Skip to content

Instantly share code, notes, and snippets.

@oschrenk
Last active December 30, 2015 09:39

Revisions

  1. oschrenk revised this gist Dec 5, 2013. 1 changed file with 4 additions and 0 deletions.
    4 changes: 4 additions & 0 deletions configure-hadoop.sh
    Original file line number Diff line number Diff line change
    @@ -115,6 +115,10 @@ echo "---------------"
    echo ""
    bin/hadoop namenode -format

    echo " Creating /user/$(whoami) directory so that hadoop fs -ls works as expected"
    hadoop fs -mkdir /user
    hadoop fs -mkdir /user/$(whoami)

    echo ""
    echo "Launching Hadoop"
    echo "---------------"
  2. oschrenk created this gist Dec 5, 2013.
    123 changes: 123 additions & 0 deletions configure-hadoop.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,123 @@
    #!/bin/sh

    function error_handler() {
    echo "Error occurred in script at line: ${1}."
    echo "Line exited with status: ${2}"
    }

    trap 'error_handler ${LINENO} $?' ERR

    set -o errexit #

    echo ""
    echo "Installing Dependencies"
    echo "-----------------"
    brew install xmlstarlet

    echo ""
    echo "Installing Hadoop"
    echo "-----------------"
    brew install hadoop
    HADOOP_VERSION=$(brew info hadoop | grep "hadoop:" | grep -o "[0-9].[0-9].[0-9]")
    HADOOP_LIBEXEC_PATH=$(brew --prefix)/Cellar/hadoop/$HADOOP_VERSION/libexec
    cd $HADOOP_LIBEXEC_PATH

    echo ""
    echo "Configuring Hadoop"
    echo "------------------"

    file=$(echo "conf/hadoop-env.sh")
    if [ -n "$(grep HADOOP_OPTS conf/hadoop-env.sh | sed 's/^ *//g' | grep -v "^#")" ]; then
    echo " WARN HADOOP_OPTS already set, not changing file."
    else
    echo " Configuring $file."
    printf "\n\nHADOOP_OPTS=\"-Djava.net.preferIPv4Stack=true -Djava.security.krb5.realm=OX.AC.UK -Djava.security.krb5.kdc=kdc0.ox.ac.uk:kdc1.ox.ac.uk\"" >> conf/hadoop-env.sh
    fi

    # core-site.xml
    file=$(echo "conf/core-site.xml")
    if [ -n "$(xmlstarlet sel -T -t -v "/configuration/property" $file)" ]; then
    echo " WARN $file already configured, not changing file."
    else
    echo " Configuring $file"
    xml ed -L -s /configuration -t elem -n property -v "" $file
    xml ed -L -s /configuration/property -t elem -n name -v "fs.default.name" $file
    xml ed -L -a /configuration/property/name -t elem -n value -v "hdfs://localhost:9000" $file

    xml ed -L -s /configuration -t elem -n property2 -v "" $file
    xml ed -L -s /configuration/property2 -t elem -n name -v "hadoop.tmp.dir" $file
    xml ed -L -a /configuration/property2/name -t elem -n value -v '/Users/${user.name}/.hadoop-store' $file
    xml ed -L -r /configuration/property2 -v property $file
    fi

    # conf/hdfs-site.xml
    file=$(echo "conf/hdfs-site.xml")
    if [ -n "$(xmlstarlet sel -T -t -v "/configuration/property" $file)" ]; then
    echo " WARN $file already configured, not changing file."
    else
    echo " Configuring $file"
    xml ed -L -s /configuration -t elem -n property -v "" $file
    xml ed -L -s /configuration/property -t elem -n name -v "dfs.replication" $file
    xml ed -L -a /configuration/property/name -t elem -n value -v "1" $file
    fi

    # conf/mapred-site.xml
    file=$(echo "conf/mapred-site.xml")
    if [ -n "$(xmlstarlet sel -T -t -v "/configuration/property" $file)" ]; then
    echo " WARN $file already configured, not changing file."
    else
    echo " Configuring $file"
    xml ed -L -s /configuration -t elem -n property -v "" $file
    xml ed -L -s /configuration/property -t elem -n name -v "mapred.job.tracker" $file
    xml ed -L -a /configuration/property/name -t elem -n value -v "localhost:9001" $file

    xml ed -L -s /configuration -t elem -n property -v "" $file
    xml ed -L -s /configuration/property -t elem -n name -v "mapred.tasktracker.map.tasks.maximum" $file
    xml ed -L -a /configuration/property/name -t elem -n value -v "$(sysctl -n hw.physicalcpu)" $file
    fi

    echo ""
    echo "Configuring SSH"
    echo "---------------"
    echo " Hadoop manages all its nodes using ssh"
    echo ""
    echo " Enabling Remote Login on OS X"
    sudo systemsetup -setremotelogin on

    SSH_KEY_FILENAME=$HOME/.ssh/id_rsa_hadoop

    if [ -f $SSH_KEY_FILENAME ]; then
    echo " WARN $SSH_KEY_FILENAME already exists"
    else
    echo " Creating passwordless SSH key for localhost at $SSH_KEY_FILENAME"
    ssh-keygen -t rsa -f $SSH_KEY_FILENAME -P ""
    fi

    if [ -n "$(grep $(cat $SSH_KEY_FILENAME.pub | awk '{print $2}') ~/.ssh/authorized_keys)" ]; then
    echo " INFO SSH key already authorized"
    else
    echo " Authorizing SSH key"
    cat $SSH_KEY_FILENAME.pub >> ~/.ssh/authorized_keys
    fi

    #!/bin/sh
    if [ -n "$(grep localhost ~/.ssh/config)" ]; then
    echo " WARN localhost is already configured in SSH config"
    else
    echo " Changing SSH config to use $SSH_KEY_FILENAME for localhost"
    CURRENT_USER=$(whoami)
    printf "\n\nHost localhost\n\tUser $CURRENT_USER\n\tIdentityFile $SSH_KEY_FILENAME\n" >> $HOME/.ssh/config
    fi

    echo ""
    echo "Formatting HDFS"
    echo "---------------"
    echo ""
    bin/hadoop namenode -format

    echo ""
    echo "Launching Hadoop"
    echo "---------------"
    echo ""
    bin/start-all.sh