Last active
November 29, 2016 14:23
-
-
Save bliz937/cab29ba333e3c10f1f9bd4e46af043c4 to your computer and use it in GitHub Desktop.
A script that I used to setup hadoop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Hadoop http download location | |
hadoop_http="https://archive.apache.org/dist/hadoop/core/hadoop-2.7.2/hadoop-2.7.2.tar.gz" | |
# Location to install hadoop | |
HADOOP_INSTALL=/opt/hadoop/ | |
# User to run hadoop under | |
HADOOP_USER=hadoop | |
# hostanme or IP of master hdfs | |
HADOOP_HDFS_MASTER=nyancat | |
# hostname of mapred host | |
HADOOP_MAPRED_MASTER=nyancat | |
# location of tools | |
WGET=/usr/bin/wget | |
PRINTF=/usr/bin/printf | |
TAR=/usr/bin/tar | |
SED=/usr/bin/sed | |
function prereq { | |
if [ -z "$(which javac)" ]; then | |
$PRINTF "Install Java first!\n" | |
exit | |
fi | |
if [ -z "ps aux | grep -v grep | grep sshd" ]; then | |
$PRINTF "Install and run SSH server!\n" | |
exit | |
fi | |
} | |
function download_extract_hadoop { | |
$PRINTF "download_extract_hadoop start\n\n" | |
cd /tmp/ | |
$WGET $hadoop_http | |
if [ $? -ne 0 ]; then | |
$PRINTF "Failed to download hadoop from $hadoop_http" | |
exit 1 | |
fi | |
# the filename of downloaded archive | |
HADOOP_TAR=$(basename $hadoop_http) | |
# filename of extracted directory | |
HADOOP_TARLESS=$($PRINTF $HADOOP_TAR | $SED 's/.tar.gz//') | |
rm -rf $HADOOP_INSTALL | |
mkdir $HADOOP_INSTALL | |
$TAR -xzvf $HADOOP_TAR | |
if [ -z "$HADOOP_TARLESS" ]; then | |
$PRINTF "Variable HADOOP_TARLESS is empty!!\n" | |
exit -1 | |
fi | |
mv $HADOOP_TARLESS/* $HADOOP_INSTALL | |
HADOOP_ENV_FILE="$HADOOP_INSTALL/etc/hadoop/hadoop-env.sh" | |
backup_file $HADOOP_ENV_FILE | |
$SED -i "s#\${JAVA_HOME}#$(dirname $(dirname $(readlink -f $(which javac))))#" $HADOOP_ENV_FILE | |
$PRINTF "download_extract_hadoop done...\n\n" | |
} | |
function create_user { | |
$PRINTF "create_user start\n\n" | |
useradd -m $HADOOP_USER | |
if [ $? -ne 0 ]; then | |
$PRINTF "Failed to create user $HADOOP_USER" | |
exit 1 | |
fi | |
touch /home/$HADOOP_USER/.bashrc | |
$PRINTF "export PATH=$PATH:$HADOOP_INSTALL/bin/:$HADOOP_INSTALL/sbin/:\n" >> /home/$HADOOP_USER/.bashrc | |
$PRINTF "export JAVA_HOME=$(dirname $(dirname $(readlink -f $(which javac))))\n" >> /home/$HADOOP_USER/.bashrc | |
export PATH=$PATH:$HADOOP_INSTALL/bin/:$HADOOP_INSTALL/sbin/: | |
export JAVA_HOME=$(dirname $(dirname $(readlink -f $(which javac)))) | |
chown -R $HADOOP_USER /home/$HADOOP_USER/ | |
chown -R $HADOOP_USER $HADOOP_INSTALL | |
$PRINTF "create_user done...\n\n" | |
} | |
function run_standalone { | |
$PRINTF "standalone start\n\n" | |
cd /home/$HADOOP_USER | |
rm -rf input | |
mkdir input | |
cp $HADOOP_INSTALL/etc/hadoop/*.xml input | |
hadoop jar $HADOOP_INSTALL/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.2.jar grep input output 'dfs[a-z.]+' | |
cat output/* | |
read -rsp $'Check output above.\n\nPress any key to continue...\n' -n1 key | |
$PRINTF "standalone done...\n\n" | |
} | |
function configure_psu { | |
$PRINTF "configure_psu start\n\n" | |
mkdir -p /home/$HADOOP_USER/fs/{namenode,datanode,namesecondary} | |
chown -R $HADOOP_USER /home/$HADOOP_USER/ | |
HADOOP_CORE_SITE_FILE=$HADOOP_INSTALL/etc/hadoop/core-site.xml | |
backup_file $HADOOP_CORE_SITE_FILE | |
HADOOP_CORE_SITE="<property>\n\ | |
<name>fs.defaultFS</name>\n\ | |
<value>hdfs://$HADOOP_HDFS_MASTER:8020</value>\n\ | |
</property>\n\ | |
<property>\n\ | |
<name>io.file.buffer.size</name>\n\ | |
<value>131072</value>\n\ | |
</property>" | |
$SED -i "/<configuration>/a\ $HADOOP_CORE_SITE" $HADOOP_CORE_SITE_FILE | |
HADOOP_YARN_SITE_FILE=$HADOOP_INSTALL/etc/hadoop/yarn-site.xml | |
backup_file $HADOOP_YARN_SITE_FILE | |
HADOOP_YARN_SITE="<property>\n\ | |
<name>yarn.resourcemanager.hostname</name>\n\ | |
<value>$HADOOP_MAPRED_MASTER</value>\n\ | |
</property>\n\ | |
<property>\n\ | |
<name>yarn.nodemanager.aux-services</name>\n\ | |
<value>mapreduce_shuffle</value>\n\ | |
</property>" | |
$SED -i "/<configuration>/a\ $HADOOP_YARN_SITE" $HADOOP_YARN_SITE_FILE | |
HADOOP_HDFS_SITE_FILE=$HADOOP_INSTALL/etc/hadoop/hdfs-site.xml | |
backup_file $HADOOP_HDFS_SITE_FILE | |
HADOOP_HDFS_SITE="<property>\n\ | |
<name>dfs.replication</name>\n\ | |
<value>3</value>\n\ | |
</property>\n\ | |
<property>\n\ | |
<name>dfs.namenode.name.dir</name>\n\ | |
<value>file:/home/$HADOOP_USER/fs/namenode</value>\n\ | |
</property>\n\ | |
<property>\n\ | |
<name>dfs.datanode.data.dir</name>\n\ | |
<value>file:/home/$HADOOP_USER/fs/datanode</value>\n\ | |
</property>\n\ | |
<property>\n\ | |
<name>dfs.namenode.checkpoint.dir</name>\n\ | |
<value>file:/home/$HADOOP_USER/fs/namesecondary</value>\n\ | |
</property>\n\ | |
<property>\n\ | |
<name>dfs.block.size</name>\n\ | |
<value>134217728</value>\n\ | |
</property>" | |
$SED -i "/<configuration>/a\ $HADOOP_HDFS_SITE" $HADOOP_HDFS_SITE_FILE | |
HADOOP_MAPRED_FILE=$HADOOP_INSTALL/etc/hadoop/mapred-site.xml | |
mv $HADOOP_MAPRED_FILE.template $HADOOP_MAPRED_FILE | |
backup_file $HADOOP_MAPRED_FILE | |
HADOOP_MAPRED="<property>\n\ | |
<name>mapred.job.tracker</name>\n\ | |
<value>$HADOOP_MAPRED_MASTER:54311</value>\n\ | |
</property>\n\ | |
<property>\n\ | |
<name>mapreduce.framework.name</name>\n\ | |
<value>yarn</value>\n\ | |
</property>\n\ | |
<property>\n\ | |
<name>mapreduce.jobhistory.address</name>\n\ | |
<value>$HADOOP_MAPRED_MASTER:10020</value>\n\ | |
</property>\n\ | |
<property>\n\ | |
<name>mapreduce.jobhistory.webapp.address</name>\n\ | |
<value>$HADOOP_MAPRED_MASTER:19888</value>\n\ | |
</property>" | |
$SED -i "/<configuration>/a\ $HADOOP_MAPRED" $HADOOP_MAPRED_FILE | |
$PRINTF "$HADOOP_MAPRED_MASTER" > $HADOOP_INSTALL/etc/hadoop/masters | |
chown -R $HADOOP_USER /home/$HADOOP_USER/ | |
$PRINTF "configure_psu done...\n\n" | |
} | |
function backup_file { | |
if [ -z "$1" ]; then | |
$PRINTF "backup_file arguement is empty!\n\n" | |
return | |
fi | |
cp $1 $1.original | |
} | |
function todo_pubkey { | |
$PRINTF "Make this account, and $HADOOP_USER, authenticate with public keys.\n\n" | |
read -rsp $'Press any key to continue...\n' -n1 key | |
} | |
function to_do { | |
$PRINTF "TO DO:\n\n" | |
$PRINTF "Generate ssh keys for pub key auth on master to slave.\n\n" | |
chown -R $HADOOP_USER $HADOOP_INSTALL | |
} | |
prereq | |
download_extract_hadoop | |
create_user | |
run_standalone | |
configure_psu | |
todo_pubkey | |
hdfs namenode -format | |
to_do |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment