Skip to content

Instantly share code, notes, and snippets.

@nipra
Created November 13, 2012 10:04

Revisions

  1. nipra revised this gist Nov 13, 2012. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions notes.txt
    Original file line number Diff line number Diff line change
    @@ -141,5 +141,5 @@ done
    ~/bin/start-hbase-all.sh

    # ~/bin/stop-all.sh
    ~/bin/stop-hadoop-all.sh
    ~/bin/stop-hbase-all.sh
    ~/bin/stop-hbase-all.sh
    ~/bin/stop-hadoop-all.sh
  2. nipra created this gist Nov 13, 2012.
    145 changes: 145 additions & 0 deletions notes.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,145 @@
    # Installing CDH4 on a Single Linux Node in Pseudo-distributed Mode
    # https://ccp.cloudera.com/display/CDH4DOC/Installing+CDH4+on+a+Single+Linux+Node+in+Pseudo-distributed+Mode

    # Installing CDH4 with MRv1 on a Single Linux Node in Pseudo-distributed mode
    # On Ubuntu and other Debian systems
    nipra@lambda:Downloads$ wget -cv http://archive.cloudera.com/cdh4/one-click-install/precise/amd64/cdh4-repository_1.0_all.deb
    nipra@lambda:Downloads$ sudo dpkg -i cdh4-repository_1.0_all.deb # Adds /etc/apt/sources.list.d/cloudera-cdh4.list ??
    nipra@lambda:Downloads$ dpkg -L cdh4-repository # To view the files on Ubuntu systems

    # Install CDH4
    # For Ubuntu Precise systems
    # nipra@lambda:~$ lsb_release -c
    $ curl -s http://archive.cloudera.com/cdh4/ubuntu/precise/amd64/cdh/archive.key | sudo apt-key add -

    # To view the files on Ubuntu systems
    $ dpkg -L hadoop-0.20-conf-pseudo

    # Step 1: Format the NameNode.
    # Got the error: Error: JAVA_HOME is not set and could not be found.
    # Even though ~/.bashrc has ``export JAVA_HOME=/usr/lib/jvm/jdk1.7.0''
    # FIX: [ https://groups.google.com/a/cloudera.org/d/msg/cdh-user/x0KLPPHiibU/twracHL-Rd0J ]
    # $ sudo visudo
    # Add ``Defaults env_keep+=JAVA_HOME''
    $ sudo -u hdfs hdfs namenode -format

    # Step 2: Start HDFS
    # http://localhost:50070/
    $ for service in /etc/init.d/hadoop-hdfs-*
    > do
    > sudo $service start
    > done

    # Step 3: Create the /tmp Directory
    $ sudo -u hdfs hadoop fs -mkdir /tmp
    $ sudo -u hdfs hadoop fs -chmod -R 1777 /tmp

    # Step 4: Create the MapReduce system directories
    sudo -u hdfs hadoop fs -mkdir /var
    sudo -u hdfs hadoop fs -mkdir /var/lib
    sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs
    sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs/cache
    sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs/cache/mapred
    sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs/cache/mapred/mapred
    sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs/cache/mapred/mapred/staging
    sudo -u hdfs hadoop fs -chmod 1777 /var/lib/hadoop-hdfs/cache/mapred/mapred/staging
    sudo -u hdfs hadoop fs -chown -R mapred /var/lib/hadoop-hdfs/cache/mapred

    # Step 5: Verify the HDFS File Structure
    $ sudo -u hdfs hadoop fs -ls -R /

    # Step 6: Start MapReduce
    # http://localhost:50030/
    for service in /etc/init.d/hadoop-0.20-mapreduce-*
    > do
    > sudo $service start
    > done

    # Step 7: Create User Directories
    $ sudo -u hdfs hadoop fs -mkdir /user/<user>
    $ sudo -u hdfs hadoop fs -chown <user> /user/<user>
    where <user> is the Linux username of each user.

    sudo -u hdfs hadoop fs -mkdir /user/$USER
    sudo -u hdfs hadoop fs -chown $USER /user/$USER

    # For stopping daemons
    $ for service in /etc/init.d/hadoop-hdfs-* /etc/init.d/hadoop-0.20-mapreduce-*
    > do
    > sudo $service stop
    > done

    # HBase
    nipra@lambda:~$ sudo apt-get install hbase
    nipra@lambda:~$ sudo apt-get install hbase-master
    nipra@lambda:~$ sudo jps
    nipra@lambda:~$ sudo /etc/init.d/hbase-master stop

    # To enable pseudo-distributed mode, you must first make some configuration
    # changes. Open /etc/hbase/conf/hbase-site.xml
    <configuration>
    <property>
    <name>hbase.cluster.distributed</name>
    <value>true</value>
    </property>
    <property>
    <name>hbase.rootdir</name>
    <value>hdfs://localhost:8020/hbase</value>
    </property>
    </configuration>

    # Creating the /hbase Directory in HDFS
    $ sudo -u hdfs hadoop fs -mkdir /hbase
    $ sudo -u hdfs hadoop fs -chown hbase /hbase

    # Installing and Starting ZooKeeper Server
    # Using config: /etc/zookeeper/conf/zoo.cfg
    # ZooKeeper data directory is missing at /var/lib/zookeeper fix the path or run initialize
    # invoke-rc.d: initscript zookeeper-server, action "start" failed.

    sudo apt-get install zookeeper-server
    nipra@lambda:~$ sudo /etc/init.d/zookeeper-server init
    nipra@lambda:~$ sudo /etc/init.d/zookeeper-server start

    nipra@lambda:~$ sudo /etc/init.d/hbase-master start

    # To enable the HBase Region Server on Ubuntu and Debian systems
    $ sudo apt-get install hbase-regionserver
    http://localhost:60010/

    # Installing and Starting the HBase Thrift Server
    sudo apt-get install hbase-thrift
    # Installing and Configuring REST
    sudo apt-get install hbase-rest

    # ~/bin/start-hadoop-all.sh
    for service in /etc/init.d/hadoop-hdfs-* /etc/init.d/hadoop-0.20-mapreduce-*
    do
    sudo $service start
    done

    # ~/bin/start-hbase-all.sh
    for service in /etc/init.d/zookeeper-server /etc/init.d/hbase-master /etc/init.d/hbase-regionserver /etc/init.d/hbase-rest /etc/init.d/hbase-thrift
    do
    sudo $service start
    done

    # ~/bin/stop-hbase-all.sh
    for service in /etc/init.d/hbase-thrift /etc/init.d/hbase-rest /etc/init.d/hbase-regionserver /etc/init.d/hbase-master /etc/init.d/zookeeper-server
    do
    sudo $service stop
    done

    # ~/bin/stop-hadoop-all.sh
    for service in /etc/init.d/hadoop-0.20-mapreduce-* /etc/init.d/hadoop-hdfs-*
    do
    sudo $service stop
    done

    # ~/bin/start-all.sh
    ~/bin/start-hadoop-all.sh
    ~/bin/start-hbase-all.sh

    # ~/bin/stop-all.sh
    ~/bin/stop-hadoop-all.sh
    ~/bin/stop-hbase-all.sh