Created
July 13, 2019 07:28
-
-
Save tilakpatidar/87f61c8ac62f018110db5f3a4db4db0e to your computer and use it in GitHub Desktop.
Import data from postgres table to parquet using sqoop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
#https://www.datageekinme.com/setup/setting-up-my-mac-sqoop/ | |
# Installation on mac | |
brew install sqoop | |
sudo mkdir /var/lib/accumulo | |
export ACCUMULO_HOME='/var/lib/accumulo' | |
export SQOOP_VERSION=1.4.6_1 | |
export SQOOP_HOME=/usr/local/Cellar/sqoop/1.4.6_1/libexec | |
export SQOOP_CONF_DIR=$SQOOP_HOME/conf | |
export SQOOP_LIB_DIR=$SQOOP_HOME/lib | |
export PATH=$SQOOP_HOME/bin:$PATH | |
#set -Ux fish_user_paths "$SQOOP_HOME/bin" | |
# Install mysql for metastore | |
wget https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.16.tar.gz | |
tar -xvf mysql-connector-java-8.0.16.tar.gz | |
cp mysql-connector-java-8.0.16/mysql-connector-java-8.0.16.jar $SQOOP_HOME/lib/ | |
wget https://jdbc.postgresql.org/download/postgresql-42.2.6.jar | |
cp postgresql-42.2.6.jar $SQOOP_HOME/lib/ | |
mysql -u root -p -D sqoop -e "INSERT INTO SQOOP_ROOT VALUES(NULL,'sqoop.hsqldb.job.storage.version','0');" | |
# List tables in source database | |
sqoop list-tables \ | |
--connect jdbc:postgresql://localhost:5432/datamart \ | |
--username tilak \ | |
--password rootthegroot\ | |
--verbose | |
# Cleanup before import command | |
mkdir libjars | |
rm -rf $SQOOP_HOME/lib/products.* | |
# Import command | |
sqoop import \ | |
-fs file:/// -jt local \ | |
--connect jdbc:postgresql://localhost:5432/datamart \ | |
--username tilak \ | |
--password rootthegroot\ | |
--table products \ | |
--target-dir /tmp/test_sqoop/ \ | |
--as-sequencefile \ | |
--delete-target-dir \ | |
--verbose \ | |
--bindir /usr/local/Cellar/sqoop/1.4.6_1/libexec/lib | |
# Below options require hadoop cluster you cannot run them in local mode | |
# --split-by region_code\ | |
# --num-mappers 5\ | |
# --query 'SELECT * FROM products WHERE $CONDITIONS'\ | |
# --boundary-query "SELECT i, i + 1 from generate_series((SELECT MIN(region_code) FROM products), (SELECT MAX(region_code) FROM products)) s(i);"\ | |
# --direct |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment