Last active
September 21, 2017 13:27
-
-
Save prithvi514/51ef8f4b6aad78bad60173ca90eb048f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sqoop job --create test_job1 -- import --connect "jdbc:sqlserver://192.168.1.206;database=wells-fargo" --username sa --password Hadoop123 --driver com.microsoft.sqlserver.jdbc.SQLServerDriver --query 'select * from dbo.tVulnMgmt_DeltaRiskAGGHist where $CONDITIONS' --target-dir "/tmp/dbo-delta2" -m 1 | |
sqoop job --list | |
sqoop job --exec test_job1 | |
sqoop import --connect "jdbc:sqlserver://192.168.1.206;database=wells-fargo" --username sa --password Hadoop123 --driver com.microsoft.sqlserver.jdbc.SQLServerDriver --query "select * from dbo.tVulnMgmt_DeltaRiskAGGHist WHERE $CONDITIONS" --target-dir "/tmp/dbo-delta" --split-by "Date" | |
spark-submit --class org.apache.spark.examples.SparkPi --deploy-mode client --master yarn spark-examples_2.11-2.1.1.2.6.1.0-129.jar | |
hive.optimize.reducededuplication.min.reducer=4 | |
hive.optimize.reducededuplication=true | |
hive.merge.mapfiles=true | |
hive.merge.mapredfiles=false | |
hive.merge.smallfiles.avgsize=16000000 | |
hive.merge.size.per.task=256000000 | |
hive.merge.sparkfiles=true | |
hive.auto.convert.join=true | |
hive.auto.convert.join.noconditionaltask=true | |
hive.auto.convert.join.noconditionaltask.size=20M(might need to increase for Spark, 200M) | |
hive.optimize.bucketmapjoin.sortedmerge=false | |
hive.map.aggr.hash.percentmemory=0.5 | |
hive.map.aggr=true | |
hive.optimize.sort.dynamic.partition=false | |
hive.stats.autogather=true | |
hive.stats.fetch.column.stats=true | |
hive.compute.query.using.stats=true | |
hive.limit.pushdown.memory.usage=0.4 (MR and Spark) | |
hive.optimize.index.filter=true | |
hive.exec.reducers.bytes.per.reducer=67108864 | |
hive.smbjoin.cache.rows=10000 | |
hive.fetch.task.conversion=more | |
hive.fetch.task.conversion.threshold=1073741824 | |
hive.optimize.ppd=true | |
nameNode=hdfs://compute-77.cloudwickdc.local:8020 | |
jobTracker=compute-77.cloudwickdc.local:8050 | |
queueName=default | |
examplesRoot=examples | |
oozie.use.system.libpath=true | |
oozie.wf.application.path=${nameNode}/user/prithvi | |
drop table if exists wells_poc.oozie_test2; | |
create table wells_poc.oozie_test2 as select * from atlas_test_table_sqoop1; | |
<workflow-app xmlns="uri:oozie:workflow:0.4" name="hive-wf"> | |
<credentials> | |
<credential name='hive_credentials' type='hcat'> | |
<property> | |
<name>hcat.metastore.uri</name> | |
<value>thrift://compute-79.cloudwickdc.local:9083</value> | |
</property> | |
<property> | |
<name>hcat.metastore.principal</name> | |
<value>hive/[email protected]</value> | |
</property> | |
</credential> | |
</credentials> | |
<start to="hive-node"/> | |
<action cred='hive_credentials' name="hive-node"> | |
<hive xmlns="uri:oozie:hive-action:0.2"> | |
<job-tracker>${jobTracker}</job-tracker> | |
<name-node>${nameNode}</name-node> | |
<configuration> | |
<property> | |
<name>mapred.job.queue.name</name> | |
<value>${queueName}</value> | |
</property> | |
</configuration> | |
<script>tt.hql</script> | |
</hive> | |
<ok to="end"/> | |
<error to="fail"/> | |
</action> | |
<kill name="fail"> | |
<message>Hive failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> | |
</kill> | |
<end name="end"/> | |
</workflow-app> | |
export OOZIE_URL=http://compute-80.cloudwickdc.local:11000/oozie | |
oozie job -config test.properties -run | |
oozie job -info <oozie_id> | |
%livy2.spark | |
val hiveContext=new org.apache.spark.sql.hive.HiveContext(sc) | |
hiveContext.sql("select count(*) from wells_poc.oozie_test2").collect() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment