Skip to content

Instantly share code, notes, and snippets.

@bliz937
Last active November 29, 2016 14:23
Show Gist options
  • Save bliz937/cab29ba333e3c10f1f9bd4e46af043c4 to your computer and use it in GitHub Desktop.
Save bliz937/cab29ba333e3c10f1f9bd4e46af043c4 to your computer and use it in GitHub Desktop.
A script that I used to setup hadoop.
#!/bin/bash
# Hadoop http download location
hadoop_http="https://archive.apache.org/dist/hadoop/core/hadoop-2.7.2/hadoop-2.7.2.tar.gz"
# Location to install hadoop
HADOOP_INSTALL=/opt/hadoop/
# User to run hadoop under
HADOOP_USER=hadoop
# hostanme or IP of master hdfs
HADOOP_HDFS_MASTER=nyancat
# hostname of mapred host
HADOOP_MAPRED_MASTER=nyancat
# location of tools
WGET=/usr/bin/wget
PRINTF=/usr/bin/printf
TAR=/usr/bin/tar
SED=/usr/bin/sed
function prereq {
if [ -z "$(which javac)" ]; then
$PRINTF "Install Java first!\n"
exit
fi
if [ -z "ps aux | grep -v grep | grep sshd" ]; then
$PRINTF "Install and run SSH server!\n"
exit
fi
}
function download_extract_hadoop {
$PRINTF "download_extract_hadoop start\n\n"
cd /tmp/
$WGET $hadoop_http
if [ $? -ne 0 ]; then
$PRINTF "Failed to download hadoop from $hadoop_http"
exit 1
fi
# the filename of downloaded archive
HADOOP_TAR=$(basename $hadoop_http)
# filename of extracted directory
HADOOP_TARLESS=$($PRINTF $HADOOP_TAR | $SED 's/.tar.gz//')
rm -rf $HADOOP_INSTALL
mkdir $HADOOP_INSTALL
$TAR -xzvf $HADOOP_TAR
if [ -z "$HADOOP_TARLESS" ]; then
$PRINTF "Variable HADOOP_TARLESS is empty!!\n"
exit -1
fi
mv $HADOOP_TARLESS/* $HADOOP_INSTALL
HADOOP_ENV_FILE="$HADOOP_INSTALL/etc/hadoop/hadoop-env.sh"
backup_file $HADOOP_ENV_FILE
$SED -i "s#\${JAVA_HOME}#$(dirname $(dirname $(readlink -f $(which javac))))#" $HADOOP_ENV_FILE
$PRINTF "download_extract_hadoop done...\n\n"
}
function create_user {
$PRINTF "create_user start\n\n"
useradd -m $HADOOP_USER
if [ $? -ne 0 ]; then
$PRINTF "Failed to create user $HADOOP_USER"
exit 1
fi
touch /home/$HADOOP_USER/.bashrc
$PRINTF "export PATH=$PATH:$HADOOP_INSTALL/bin/:$HADOOP_INSTALL/sbin/:\n" >> /home/$HADOOP_USER/.bashrc
$PRINTF "export JAVA_HOME=$(dirname $(dirname $(readlink -f $(which javac))))\n" >> /home/$HADOOP_USER/.bashrc
export PATH=$PATH:$HADOOP_INSTALL/bin/:$HADOOP_INSTALL/sbin/:
export JAVA_HOME=$(dirname $(dirname $(readlink -f $(which javac))))
chown -R $HADOOP_USER /home/$HADOOP_USER/
chown -R $HADOOP_USER $HADOOP_INSTALL
$PRINTF "create_user done...\n\n"
}
function run_standalone {
$PRINTF "standalone start\n\n"
cd /home/$HADOOP_USER
rm -rf input
mkdir input
cp $HADOOP_INSTALL/etc/hadoop/*.xml input
hadoop jar $HADOOP_INSTALL/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.2.jar grep input output 'dfs[a-z.]+'
cat output/*
read -rsp $'Check output above.\n\nPress any key to continue...\n' -n1 key
$PRINTF "standalone done...\n\n"
}
function configure_psu {
$PRINTF "configure_psu start\n\n"
mkdir -p /home/$HADOOP_USER/fs/{namenode,datanode,namesecondary}
chown -R $HADOOP_USER /home/$HADOOP_USER/
HADOOP_CORE_SITE_FILE=$HADOOP_INSTALL/etc/hadoop/core-site.xml
backup_file $HADOOP_CORE_SITE_FILE
HADOOP_CORE_SITE="<property>\n\
<name>fs.defaultFS</name>\n\
<value>hdfs://$HADOOP_HDFS_MASTER:8020</value>\n\
</property>\n\
<property>\n\
<name>io.file.buffer.size</name>\n\
<value>131072</value>\n\
</property>"
$SED -i "/<configuration>/a\ $HADOOP_CORE_SITE" $HADOOP_CORE_SITE_FILE
HADOOP_YARN_SITE_FILE=$HADOOP_INSTALL/etc/hadoop/yarn-site.xml
backup_file $HADOOP_YARN_SITE_FILE
HADOOP_YARN_SITE="<property>\n\
<name>yarn.resourcemanager.hostname</name>\n\
<value>$HADOOP_MAPRED_MASTER</value>\n\
</property>\n\
<property>\n\
<name>yarn.nodemanager.aux-services</name>\n\
<value>mapreduce_shuffle</value>\n\
</property>"
$SED -i "/<configuration>/a\ $HADOOP_YARN_SITE" $HADOOP_YARN_SITE_FILE
HADOOP_HDFS_SITE_FILE=$HADOOP_INSTALL/etc/hadoop/hdfs-site.xml
backup_file $HADOOP_HDFS_SITE_FILE
HADOOP_HDFS_SITE="<property>\n\
<name>dfs.replication</name>\n\
<value>3</value>\n\
</property>\n\
<property>\n\
<name>dfs.namenode.name.dir</name>\n\
<value>file:/home/$HADOOP_USER/fs/namenode</value>\n\
</property>\n\
<property>\n\
<name>dfs.datanode.data.dir</name>\n\
<value>file:/home/$HADOOP_USER/fs/datanode</value>\n\
</property>\n\
<property>\n\
<name>dfs.namenode.checkpoint.dir</name>\n\
<value>file:/home/$HADOOP_USER/fs/namesecondary</value>\n\
</property>\n\
<property>\n\
<name>dfs.block.size</name>\n\
<value>134217728</value>\n\
</property>"
$SED -i "/<configuration>/a\ $HADOOP_HDFS_SITE" $HADOOP_HDFS_SITE_FILE
HADOOP_MAPRED_FILE=$HADOOP_INSTALL/etc/hadoop/mapred-site.xml
mv $HADOOP_MAPRED_FILE.template $HADOOP_MAPRED_FILE
backup_file $HADOOP_MAPRED_FILE
HADOOP_MAPRED="<property>\n\
<name>mapred.job.tracker</name>\n\
<value>$HADOOP_MAPRED_MASTER:54311</value>\n\
</property>\n\
<property>\n\
<name>mapreduce.framework.name</name>\n\
<value>yarn</value>\n\
</property>\n\
<property>\n\
<name>mapreduce.jobhistory.address</name>\n\
<value>$HADOOP_MAPRED_MASTER:10020</value>\n\
</property>\n\
<property>\n\
<name>mapreduce.jobhistory.webapp.address</name>\n\
<value>$HADOOP_MAPRED_MASTER:19888</value>\n\
</property>"
$SED -i "/<configuration>/a\ $HADOOP_MAPRED" $HADOOP_MAPRED_FILE
$PRINTF "$HADOOP_MAPRED_MASTER" > $HADOOP_INSTALL/etc/hadoop/masters
chown -R $HADOOP_USER /home/$HADOOP_USER/
$PRINTF "configure_psu done...\n\n"
}
function backup_file {
if [ -z "$1" ]; then
$PRINTF "backup_file arguement is empty!\n\n"
return
fi
cp $1 $1.original
}
function todo_pubkey {
$PRINTF "Make this account, and $HADOOP_USER, authenticate with public keys.\n\n"
read -rsp $'Press any key to continue...\n' -n1 key
}
function to_do {
$PRINTF "TO DO:\n\n"
$PRINTF "Generate ssh keys for pub key auth on master to slave.\n\n"
chown -R $HADOOP_USER $HADOOP_INSTALL
}
prereq
download_extract_hadoop
create_user
run_standalone
configure_psu
todo_pubkey
hdfs namenode -format
to_do
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment