Last active
December 30, 2015 09:39
-
-
Save oschrenk/7810631 to your computer and use it in GitHub Desktop.
Configuring Hadoop on OS X for the first time
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
function error_handler() { | |
echo "Error occurred in script at line: ${1}." | |
echo "Line exited with status: ${2}" | |
} | |
trap 'error_handler ${LINENO} $?' ERR | |
set -o errexit # | |
echo "" | |
echo "Installing Dependencies" | |
echo "-----------------" | |
brew install xmlstarlet | |
echo "" | |
echo "Installing Hadoop" | |
echo "-----------------" | |
brew install hadoop | |
HADOOP_VERSION=$(brew info hadoop | grep "hadoop:" | grep -o "[0-9].[0-9].[0-9]") | |
HADOOP_LIBEXEC_PATH=$(brew --prefix)/Cellar/hadoop/$HADOOP_VERSION/libexec | |
cd $HADOOP_LIBEXEC_PATH | |
echo "" | |
echo "Configuring Hadoop" | |
echo "------------------" | |
file=$(echo "conf/hadoop-env.sh") | |
if [ -n "$(grep HADOOP_OPTS conf/hadoop-env.sh | sed 's/^ *//g' | grep -v "^#")" ]; then | |
echo " WARN HADOOP_OPTS already set, not changing file." | |
else | |
echo " Configuring $file." | |
printf "\n\nHADOOP_OPTS=\"-Djava.net.preferIPv4Stack=true -Djava.security.krb5.realm=OX.AC.UK -Djava.security.krb5.kdc=kdc0.ox.ac.uk:kdc1.ox.ac.uk\"" >> conf/hadoop-env.sh | |
fi | |
# core-site.xml | |
file=$(echo "conf/core-site.xml") | |
if [ -n "$(xmlstarlet sel -T -t -v "/configuration/property" $file)" ]; then | |
echo " WARN $file already configured, not changing file." | |
else | |
echo " Configuring $file" | |
xml ed -L -s /configuration -t elem -n property -v "" $file | |
xml ed -L -s /configuration/property -t elem -n name -v "fs.default.name" $file | |
xml ed -L -a /configuration/property/name -t elem -n value -v "hdfs://localhost:9000" $file | |
xml ed -L -s /configuration -t elem -n property2 -v "" $file | |
xml ed -L -s /configuration/property2 -t elem -n name -v "hadoop.tmp.dir" $file | |
xml ed -L -a /configuration/property2/name -t elem -n value -v '/Users/${user.name}/.hadoop-store' $file | |
xml ed -L -r /configuration/property2 -v property $file | |
fi | |
# conf/hdfs-site.xml | |
file=$(echo "conf/hdfs-site.xml") | |
if [ -n "$(xmlstarlet sel -T -t -v "/configuration/property" $file)" ]; then | |
echo " WARN $file already configured, not changing file." | |
else | |
echo " Configuring $file" | |
xml ed -L -s /configuration -t elem -n property -v "" $file | |
xml ed -L -s /configuration/property -t elem -n name -v "dfs.replication" $file | |
xml ed -L -a /configuration/property/name -t elem -n value -v "1" $file | |
fi | |
# conf/mapred-site.xml | |
file=$(echo "conf/mapred-site.xml") | |
if [ -n "$(xmlstarlet sel -T -t -v "/configuration/property" $file)" ]; then | |
echo " WARN $file already configured, not changing file." | |
else | |
echo " Configuring $file" | |
xml ed -L -s /configuration -t elem -n property -v "" $file | |
xml ed -L -s /configuration/property -t elem -n name -v "mapred.job.tracker" $file | |
xml ed -L -a /configuration/property/name -t elem -n value -v "localhost:9001" $file | |
xml ed -L -s /configuration -t elem -n property -v "" $file | |
xml ed -L -s /configuration/property -t elem -n name -v "mapred.tasktracker.map.tasks.maximum" $file | |
xml ed -L -a /configuration/property/name -t elem -n value -v "$(sysctl -n hw.physicalcpu)" $file | |
fi | |
echo "" | |
echo "Configuring SSH" | |
echo "---------------" | |
echo " Hadoop manages all its nodes using ssh" | |
echo "" | |
echo " Enabling Remote Login on OS X" | |
sudo systemsetup -setremotelogin on | |
SSH_KEY_FILENAME=$HOME/.ssh/id_rsa_hadoop | |
if [ -f $SSH_KEY_FILENAME ]; then | |
echo " WARN $SSH_KEY_FILENAME already exists" | |
else | |
echo " Creating passwordless SSH key for localhost at $SSH_KEY_FILENAME" | |
ssh-keygen -t rsa -f $SSH_KEY_FILENAME -P "" | |
fi | |
if [ -n "$(grep $(cat $SSH_KEY_FILENAME.pub | awk '{print $2}') ~/.ssh/authorized_keys)" ]; then | |
echo " INFO SSH key already authorized" | |
else | |
echo " Authorizing SSH key" | |
cat $SSH_KEY_FILENAME.pub >> ~/.ssh/authorized_keys | |
fi | |
#!/bin/sh | |
if [ -n "$(grep localhost ~/.ssh/config)" ]; then | |
echo " WARN localhost is already configured in SSH config" | |
else | |
echo " Changing SSH config to use $SSH_KEY_FILENAME for localhost" | |
CURRENT_USER=$(whoami) | |
printf "\n\nHost localhost\n\tUser $CURRENT_USER\n\tIdentityFile $SSH_KEY_FILENAME\n" >> $HOME/.ssh/config | |
fi | |
echo "" | |
echo "Formatting HDFS" | |
echo "---------------" | |
echo "" | |
bin/hadoop namenode -format | |
echo " Creating /user/$(whoami) directory so that hadoop fs -ls works as expected" | |
hadoop fs -mkdir /user | |
hadoop fs -mkdir /user/$(whoami) | |
echo "" | |
echo "Launching Hadoop" | |
echo "---------------" | |
echo "" | |
bin/start-all.sh | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment