Skip to content

Instantly share code, notes, and snippets.

View prithvi514's full-sized avatar

Prithvi D prithvi514

View GitHub Profile
sqoop job --create test_job1 -- import --connect "jdbc:sqlserver://192.168.1.206;database=wells-fargo" --username sa --password Hadoop123 --driver com.microsoft.sqlserver.jdbc.SQLServerDriver --query 'select * from dbo.tVulnMgmt_DeltaRiskAGGHist where $CONDITIONS' --target-dir "/tmp/dbo-delta2" -m 1
sqoop job --list
sqoop job --exec test_job1
sqoop import --connect "jdbc:sqlserver://192.168.1.206;database=wells-fargo" --username sa --password Hadoop123 --driver com.microsoft.sqlserver.jdbc.SQLServerDriver --query "select * from dbo.tVulnMgmt_DeltaRiskAGGHist WHERE $CONDITIONS" --target-dir "/tmp/dbo-delta" --split-by "Date"
spark-submit --class org.apache.spark.examples.SparkPi --deploy-mode client --master yarn spark-examples_2.11-2.1.1.2.6.1.0-129.jar
hive.optimize.reducededuplication.min.reducer=4
@prithvi514
prithvi514 / EmrLauncher
Created March 2, 2017 00:44 — forked from amalgjose/EmrLauncher
Python code for launching an EMR cluster
__author__ = 'Amal G Jose'
import time
import logging
from boto.emr.connection import EmrConnection
from boto.emr.bootstrap_action import BootstrapAction
from boto.emr.step import InstallHiveStep
from boto.emr.step import InstallPigStep
from boto.regioninfo import RegionInfo