Skip to content

Instantly share code, notes, and snippets.

@xiyoulaoyuanjia
xiyoulaoyuanjia / gist:98ec380dc264edcd166a95405862fa17
Created August 22, 2019 12:05
sparkingStreamingPairTest.java
package SparkStreaming;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
@xiyoulaoyuanjia
xiyoulaoyuanjia / spark common code
Last active February 26, 2019 11:35
解压文件到hdfs
public class IOUtils {
/**
* 解压缩本地文件到hdfs。
* @param srcPath
* @param tgtPath
*/
public static void gzipFileToHdfs(String srcPath,String tgtPath){
Configuration conf=new Configuration();
GzipCodec zip=new GzipCodec();
awk -f file.dat.awk file.dat > new_file.dat
## file.dat.awk
begin
{
print substr($0,1,254)"**************""*****"substr($0,19)
}
dbaccess etl_db<<!
call test1();
!
package hadoopDemo;
import java.io.File;
import java.net.URL;
import org.apache.avro.Protocol;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.ipc.HttpTransceiver;
import org.apache.avro.ipc.Transceiver;
package hadoopDemo;
import java.io.File;
import org.apache.avro.Protocol;
import org.apache.avro.Protocol.Message;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.ipc.HttpServer;
import org.apache.avro.ipc.Server;
package hadoopDemo;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
@xiyoulaoyuanjia
xiyoulaoyuanjia / get.py
Last active May 9, 2016 16:42 — forked from podhmo/get.py
# -*- coding:utf-8 -*-
from datetime import datetime
class Get(object):
"""
Lazy value calculate for object
"""
def __init__(self, getter):
self.getter = getter
@xiyoulaoyuanjia
xiyoulaoyuanjia / uri.js
Created March 26, 2016 14:59 — forked from jlong/uri.js
URI Parsing with Javascript
var parser = document.createElement('a');
parser.href = "http://example.com:3000/pathname/?search=test#hash";
parser.protocol; // => "http:"
parser.hostname; // => "example.com"
parser.port; // => "3000"
parser.pathname; // => "/pathname/"
parser.search; // => "?search=test"
parser.hash; // => "#hash"
parser.host; // => "example.com:3000"
@xiyoulaoyuanjia
xiyoulaoyuanjia / request.py
Created October 9, 2015 14:26
Timeout for python requests.get entire response
import requests
import eventlet
eventlet.monkey_patch()
with eventlet.Timeout(10):
requests.get("http://ipv4.download.thinkbroadband.com/1GB.zip", verify=False)
##### r = requests.get("get("http://ipv4.download.thinkbroadband.c", verify=False, timeout=10)