#本项目下所有新建文件必须按 CDTR 规范组织,不要平铺在根目录。
#- C_context/ # 背景资料、需求文档、参考(供喂给的输入)
#- D_deliverables/ # 我真的会拿出去用的最终产物(不超过 3-5 个)
#- R_raw/ # 中间产物、实验、草稿(过程混乱但不丢弃)
#- T_tools/ # 过程中抽出来的可复用脚本/小工具
| """ | |
| The most atomic way to train and run inference for a GPT in pure, dependency-free Python. | |
| This file is the complete algorithm. | |
| Everything else is just efficiency. | |
| @karpathy | |
| """ | |
| import os # os.path.exists | |
| import math # math.log, math.exp |
| package SparkStreaming; | |
| import java.util.Arrays; | |
| import java.util.Iterator; | |
| import java.util.List; | |
| import org.apache.spark.SparkConf; | |
| import org.apache.spark.SparkContext; |
| public class IOUtils { | |
| /** | |
| * 解压缩本地文件到hdfs。 | |
| * @param srcPath | |
| * @param tgtPath | |
| */ | |
| public static void gzipFileToHdfs(String srcPath,String tgtPath){ | |
| Configuration conf=new Configuration(); | |
| GzipCodec zip=new GzipCodec(); |
| awk -f file.dat.awk file.dat > new_file.dat | |
| ## file.dat.awk | |
| begin | |
| { | |
| print substr($0,1,254)"**************""*****"substr($0,19) | |
| } | |
| dbaccess etl_db<<! | |
| call test1(); | |
| ! |
| package hadoopDemo; | |
| import java.io.File; | |
| import java.net.URL; | |
| import org.apache.avro.Protocol; | |
| import org.apache.avro.generic.GenericData; | |
| import org.apache.avro.generic.GenericRecord; | |
| import org.apache.avro.ipc.HttpTransceiver; | |
| import org.apache.avro.ipc.Transceiver; |
| package hadoopDemo; | |
| import java.io.File; | |
| import org.apache.avro.Protocol; | |
| import org.apache.avro.Protocol.Message; | |
| import org.apache.avro.generic.GenericData; | |
| import org.apache.avro.generic.GenericRecord; | |
| import org.apache.avro.ipc.HttpServer; | |
| import org.apache.avro.ipc.Server; |
| package hadoopDemo; | |
| import java.io.File; | |
| import java.io.FileOutputStream; | |
| import java.io.IOException; | |
| import org.apache.avro.Schema; | |
| import org.apache.avro.file.DataFileReader; | |
| import org.apache.avro.file.DataFileWriter; | |
| import org.apache.avro.generic.GenericData; |
| # -*- coding:utf-8 -*- | |
| from datetime import datetime | |
| class Get(object): | |
| """ | |
| Lazy value calculate for object | |
| """ | |
| def __init__(self, getter): | |
| self.getter = getter |