Created
June 18, 2012 15:39
-
-
Save justinkamerman/2948974 to your computer and use it in GitHub Desktop.
Cloud-init scripts for configuring a Ubuntu image for Hadoop
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WRITE-MIME_MULTIPART=./bin/write-mime-multipart | |
.PHONY: clean | |
cloud-config.txt: ubuntu-config.txt hadoop-setup.sh | |
$(WRITE-MIME_MULTIPART) --output=$@ $^ | |
clean: | |
$(RM) cloud-config.txt |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Install and configure Hadoop on this node | |
# | |
# This script will be run as root | |
# | |
CONFIGURED_USER=ubuntu | |
HADOOP_HOME=/usr/local/hadoop | |
HADOOP_VERSION=0.20.203.0 | |
HADOOP_INSTALL=${HADOOP_HOME}/hadoop-${HADOOP_VERSION} | |
HADOOP_TMP_DIR=${HADOOP_INSTALL}/datastore | |
export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64 | |
export CFLAGS=-m32 | |
export CXXFLAGS=-m32 | |
export HADOOP_INSTALL=${HADOOP_HOME}/hadoop-${HADOOP_VERSION} | |
export PATH=${JAVA_HOME}/bin:${HADOOP_INSTALL}/bin:${PATH} | |
# Instance meta-data | |
# Add profile options | |
cat >>/etc/bash.bashrc <<EOF | |
# Added by cloud-init | |
export JAVA_HOME=${JAVA_HOME} | |
export LD_LIBRARY_PATH=${JAVA_HOME}/lib:$LD_LIBRARY_PATH | |
export HADOOP_HOME=${HADOOP_HOME} | |
export HADOOP_INSTALL=${HADOOP_INSTALL} | |
export PATH=\$JAVA_HOME/bin:\$HADOOP_INSTALL/bin:\$PATH | |
EOF | |
# Create hadoop user | |
useradd -s /bin/bash -c "Hadoop user" -m -d ${HADOOP_HOME} hadoop | |
# Download hadoop distribution | |
wget http://mirror.csclub.uwaterloo.ca/apache//hadoop/common/hadoop-0.20.203.0/hadoop-${HADOOP_VERSION}rc1.tar.gz -O /mnt/hadoop-${HADOOP_VERSION}rc1.tar.gz | |
tar zxvf /mnt/hadoop-${HADOOP_VERSION}rc1.tar.gz -C ${HADOOP_HOME} | |
# Hadoop config: pseudo cluster | |
cat >>${HADOOP_INSTALL}/conf/hadoop-env.sh <<EOF | |
# Added by cloud-init | |
export JAVA_HOME=${JAVA_HOME} | |
EOF | |
cat >${HADOOP_INSTALL}/conf/core-site.xml <<\EOF | |
<?xml version="1.0"?> | |
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | |
<configuration> | |
<property> | |
<name>io.compression.codecs</name> | |
<value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec</value> | |
</property> | |
<property> | |
<name>io.compression.codec.lzo.class</name> | |
<value>com.hadoop.compression.lzo.LzoCodec</value> | |
</property> | |
<property> | |
<name>fs.default.name</name> | |
<value>hdfs://localhost:9000</value> | |
</property> | |
<property> | |
<name>hadoop.tmp.dir</name> | |
<value>/usr/local/hadoop/datastore/hadoop-${user.name}</value> | |
</property> | |
</configuration> | |
EOF | |
cat >${HADOOP_INSTALL}/conf/mapred-site.xml <<\EOF | |
<?xml version="1.0"?> | |
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | |
<configuration> | |
<property> | |
<name>mapred.job.tracker</name> | |
<value>localhost:9001</value> | |
</property> | |
<property> | |
<name>mapreduce.jobtracker.staging.root.dir</name> | |
<value>/user</value> | |
</property> | |
</configuration> | |
EOF | |
cat >${HADOOP_INSTALL}/conf/hdfs-site.xml <<\EOF | |
<?xml version="1.0"?> | |
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | |
<configuration> | |
<property> | |
<name>dfs.replication</name> | |
<value>1</value> | |
</property> | |
</configuration> | |
EOF | |
# Download hadoop-lzo distribution | |
echo "### Downloading hadoop-lzo distibution" | |
sleep 300 | |
HADOOP_PLATFORM=$(hadoop org.apache.hadoop.util.PlatformName) | |
pushd /mnt | |
git clone https://github.com/kevinweil/hadoop-lzo.git | |
cd hadoop-lzo | |
ant compile-native tar | |
cp build/hadoop-lzo-*.jar ${HADOOP_INSTALL}/lib | |
cp -r build/native/${HADOOP_PLATFORM}/lib/libgplcompression.* ${HADOOP_INSTALL}/lib/native/${HADOOP_PLATFORM} | |
popd | |
# Change file ownership to hadoop | |
chown -R hadoop:hadoop ${HADOOP_INSTALL} | |
# Set up Hadoop tmp dir | |
mkdir ${HADOOP_TMP_DIR} | |
chown hadoop:hadoop ${HADOOP_TMP_DIR} | |
# Clean up | |
#echo "### Cleaning up hadoop tarball" | |
#rm -rf /mnt/hadoop-${HADOOP_VERSION}rc1.tar.gz | |
#rm -rf /mnt/hadoop-lzo | |
# Get mapreduce code | |
#su -l ${CONFIGURED_USER} -c 'git clone git://github.com/justinkamerman/fiddlenet.git' | |
# Set up passphraseless ssh | |
echo "### Setting up passphraseless ssh" | |
mkdir ${HADOOP_HOME}/.ssh | |
cat >${HADOOP_HOME}/.ssh/config <<EOF | |
Host * | |
StrictHostKeyChecking no | |
EOF | |
cat >${HADOOP_HOME}/.ssh/id_dsa <<EOF | |
-----BEGIN RSA PRIVATE KEY----- | |
MIIEpAIBAAKCAQEAqq3q4eQi9xqPRoTa+gsNIfnB1tIvnNIu6P4B/rejcAgmfxNa | |
TLILqSDBWYwI6xsL3MaH0hBY+c/SghcxrT3JL2z0TKfmexaUtC3+js8h++F+vPv9 | |
fZaXojelNyXsOlgTBa+e4thIn7j/44ozMs/Uvy6PkGVukrPXRV57b9KEriAtEYJF | |
3DdW/Y6S89+gsV7Scwty3pOJ7eeNJEirTCdl0ox+/aNFNxlM29sOqqB43q1fKLZx | |
223CginVtd+fO8VLzU0+PdyxFZ9qCMVPuEP+bl3uSWCdlU9cfj+LY5cXch+kFdoN | |
Key7VqH2aTxXBDL8t1dwssUnlSkwtOt4Wg1qzQIDAQABAoIBAFpFXeNXa/7Rd1HO | |
1ppE2g9ML29U/4WrzM/B+IAl1DVeui2fqLTDvlMXVevsmpLuXRnJjvBVYRnPBwFz | |
Dv0Xnp6Mu7EHZGlZihC5+tbBSrITk5qUlH+l9FEBqUo/rm81QepR9nD3/4EqsXxB | |
Dc8kCNuM3rV6UD8bCxJPZG3CJBaLZId2xueaejWgAnx9L6bx4G8hTfBGgGpPeZ6t | |
sfCtukMZnAAADZRImjxfIUjDTNbdGVTCmYgWL9cg7j8VsFa7nISbPCmmyyTU6wQa | |
6nXEdeazejYmGcjZopPlojcQJ6V6uQMljIrI+3mx7UzQuEz/Fjqa4vvTV7H+t6fJ | |
qSSErgECgYEA3f092vTWF3yRRVV6MSyrvvw4DfPgFZs1IXIP72WxmK4SfWY1vwEe | |
/pQprSvctd6jW71G9K3SZsFoDQmuDrYmda/66YC61jDgeInbTxb9Cc9Wiy11D4v1 | |
ddeqkKX2yVBUAhd80bhAXFJNDWhesd6nXzgB9QuPHKnmLYSr1WMwO8ECgYEAxNQ5 | |
cJpLH0W2AG94cfRjYAfdxT421tsYX0W/MRtq3Rqj0L74UVeYgG+YOl30GJkg5H1i | |
D04EWk94hfNviLg6HfIDo5iFqIbFWuzXjI194JzGAMPaEPJi3w/PgJAVc1QiDjkq | |
DV7BQXgvz/InTQnWznOVIp8sU6SBLB2kUzpa4g0CgYEA3d7kWdlnuaW5FFEwhcGe | |
Do7L/7YF+9JasgjswFslu/IPbOIhSbx3G/8+AGTcfbH+GAz/xEGPD0CzHITWQMHx | |
gqLW51bQZpAHarJuTYgudAWU/Bn87AL43EUnptcZ52+v5z9Oc9XyDdP8SzBLpP9i | |
zZqO6joZWY6+DjSSAf7XEIECgYEAifFaGCpqP45xkTiOJv7prmGU8Sk68bU3DX4q | |
ElZuvGpxKFjOWuOTA2AyRaWW7q5SuQ+Oa793mXtcsjP7lMvYHyh/mGXKNmPNaH3Y | |
Sq7W61W0BtE7wOi+linUePuBrQPnoiQ57ojb0/BRQeEp3fnpS2MBv/Ph8vS1ep+D | |
jLi2/PkCgYAzqknNfAkItNFRljQ1VDRpq8vrCANvQ1OaRa22zuUe7g8Vi6qr8uiB | |
eAWfWirpwIB2YnvKcpKlgM6DASO5a8t6tyCC6l1iV3BMgXLcUeMwmbi5ocwpCVoy | |
8mlCSVDCFzkwPSPr7h2OFI+fYPery0y9L3Vs19m+pbcN== | |
-----END RSA PRIVATE KEY----- | |
EOF | |
cat >${HADOOP_HOME}/.ssh/authorized_keys <<EOF | |
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCqrerh5CL3Go9GhNr6Cw0h+cHW0i+c0i7o/gH+t6NwCCZ/E1pMsgupIMFZjAjrGwvcxofSEFj5z9KCFzGtPckvbPRMp+Z7FpS0Lf6OzyH74X68+/19lpeiN6U3Jew6WBMFr57i2EifuP/jijMyz9S/Lo+QZW6Ss9dFXntv0oSuIC0RgkXcN1b9jpLz36CxXtJzC3Lek4nt540kSKtMJ2XSjH79o0U3GUzb2w6qoHjerV8otnHbbcKCKdW13587xUvNTT493LEVn2oIxU+4Q/5uXe5JYJ2VT1x+P4tjlxdyH6QV2g0p7LtWofZpPFcEMvy3V3CyxSeVKT | |
EOF | |
chown -R hadoop:hadoop ${HADOOP_HOME}/.ssh | |
chmod 700 ${HADOOP_HOME}/.ssh | |
chmod 600 ${HADOOP_HOME}/.ssh/id_dsa | |
# Disable ipv6 | |
echo "### Disabling ipv6 stack" | |
cat >>/etc/sysctl.conf <<EOF | |
# IPv6 | |
net.ipv6.conf.all.disable_ipv6 = 1 | |
net.ipv6.conf.default.disable_ipv6 = 1 | |
net.ipv6.conf.lo.disable_ipv6 = 1 | |
EOF | |
sysctl -p | |
# Namenode | |
# Format hdfs filesystem | |
echo "### Formatting hdfs filesystem" | |
su hadoop -c "${HADOOP_INSTALL}/bin/hadoop namenode -format" | |
# Start hadoop daemons | |
echo "### Starting Hadoop daemons" | |
su hadoop -c "${HADOOP_INSTALL}/bin/start-all.sh" | |
# Create user directory | |
echo "### Creating hdfs directory for user ${CONFIGURED_USER}" | |
su hadoop -c "${HADOOP_INSTALL}/bin/hadoop fs -mkdir /user/${CONFIGURED_USER}" | |
# Change ownership of user directory | |
echo "### Changing ownership of hdfs directory for user ${CONFIGURED_USER}" | |
su hadoop -c "${HADOOP_INSTALL}/bin/hadoop fs -chown ${CONFIGURED_USER} /user/${CONFIGURED_USER}" | |
1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#cloud-config | |
# Provide debconf answers | |
debconf_selections: | | |
debconf debconf/priority select low | |
debconf debconf/frontend select readline | |
# Update apt database on first boot | |
apt_update: true | |
# Upgrade the instance on first boot | |
apt_upgrade: true | |
# Install additional packages on first boot | |
packages: | |
- lzop | |
- liblzo2-2 | |
- liblzo2-dev | |
- git-core | |
- openjdk-6-jre-headless | |
- openjdk-6-jdk | |
- ant | |
- make | |
- jflex | |
# timezone: set the timezone for this instance | |
# the value of 'timezone' must exist in /usr/share/zoneinfo | |
timezone: America/Halifax | |
# manage_etc_hosts: | |
# default: false | |
# Setting this config variable to 'true' will mean that on every | |
# boot, /etc/hosts will be re-written from /etc/cloud/templates/hosts.tmpl | |
# The strings '$hostname' and '$fqdn' are replaced in the template | |
# with the appropriate values | |
manage_etc_hosts: true | |
# Print message at the end of cloud-init job | |
final_message: "The system is finally up, after $UPTIME seconds" | |
# configure where output will go | |
# 'output' entry is a dict with 'init', 'config', 'final' or 'all' | |
# entries. Each one defines where | |
# cloud-init, cloud-config, cloud-config-final or all output will go | |
# each entry in the dict can be a string, list or dict. | |
# if it is a string, it refers to stdout and stderr | |
# if it is a list, entry 0 is stdout, entry 1 is stderr | |
# if it is a dict, it is expected to have 'output' and 'error' fields | |
# default is to write to console only | |
# the special entry "&1" for an error means "same location as stdout" | |
# (Note, that '&1' has meaning in yaml, so it must be quoted) | |
output: | |
init: "> /tmp/cloud-init.out" | |
config: [ "> /tmp/cloud-config.out", "> /tmp/cloud-config.err" ] | |
final: | |
output: "> /tmp/cloud-final.out" | |
error: "&1" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment