Last active
December 22, 2015 04:18
-
-
Save tzolov/6415996 to your computer and use it in GitHub Desktop.
Vagrantfile for Pivotal HD 1.0.1 CE + HAWQ 1.1.0-8
Defines the type of VMs required and how to configure and provision those machines * pcc_provision.sh - Provisioning is applied only to the pcc VM. It follows the PHD_10_Guid.pdf instruction to install PCC on the pcc VM and PHD on the phd[1..3] VMs. Annotations in the script can provide addition…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Note: The default pwd is /home/vagrant. | |
# Note: 'root' is the default user. You can not change the root user in the script. "$sudo su - gpadmin" will not work! | |
# Use the inline syntax instead: "$su - -c "some command" gpadmin". | |
CLUSTER_NAME=PHD_C1 | |
# Note that Hive is disabled. To enable Hive along with all other service you need at least 2GB memory per each node in the cluster. | |
# Currently phd2 and phd3 are set to 1GB of memory due to my laptop has only 8GB memory. | |
# If you have enough physical memory then set at phd2 and phd3 to 2048MB (see Vagrant) and add 'hive' to the SERVICES variable | |
# If you do not have enough momeory then you can remove phd3 from yor cluster configuration: Comment it in Vagrangfile and | |
# edit the MASTER_AND_SLAVES variable | |
#SERVICES=hdfs,yarn,pig,zookeeper,hbase,gpxf,hawq | |
SERVICES=hdfs,yarn,pig,zookeeper,hbase,gpxf,hawq | |
MASTER_NODE=phd1.localdomain | |
MASTER_AND_SLAVES=$MASTER_NODE,phd2.localdomain,phd3.localdomain | |
HAWQ_SEGMENT_HOSTS=$MASTER_AND_SLAVES | |
ROOT_PASSWORD=vagrant | |
GPADMIN_PASSWORD=gpadmin | |
echo "********************************************************************************" | |
echo "* Prepare PCC (Pivotal Control Center) Perquisites " | |
echo "********************************************************************************" | |
# If missing try to download the Oracle JDK6 installation binary. | |
if [ ! -f /vagrant/jdk-6u45-linux-x64-rpm.bin ]; | |
then | |
cd /vagrant; wget -O jdk-6u45-linux-x64-rpm.bin --no-cookies --header "Cookie: gpw_e24=http%3A%2F%2Fwww.oracle.com" "http://download.oracle.com/otn-pub/java/jdk/6u45-b06/jdk-6u45-linux-x64-rpm.bin" --no-check-certificate; cd ~ | |
fi | |
# Ensure that all installation packages are available in the same folder where the 'vagrant up' is executed. | |
[ ! -f /vagrant/jdk-6u45-linux-x64-rpm.bin ] && ( echo "Can not find jdk-6u45-linux-x64-rpm.bin in the vagrant startup directory"; exit 1 ) | |
[ ! -f /vagrant/PCC-2.0.1.84.121.163.x86_64.tar.gz ] && ( echo "Can not find PCC-2.0.1.84.121.163.x86_64.tar.gz in the vagrant startup directory"; exit 1 ) | |
[ ! -f /vagrant/PHD-1.0.1.0-19.tar.gz ] && ( echo "Can not find PHD-1.0.1.0-19.tar.gz in the vagrant startup directory"; exit 1 ) | |
# Disable security. | |
sestatus; chkconfig iptables off; service iptables stop; service iptables status | |
# Install required packages. | |
yum -y install httpd mod_ssl postgresql postgresql-devel postgresql-server compat-readline5 createrepo sigar nc expect | |
# Install Oracle Java 6 on PCC (e.g Admin) node. | |
cp /vagrant/jdk-6u45-linux-x64-rpm.bin .; chmod a+x ./jdk-6u45-linux-x64-rpm.bin; sudo ./jdk-6u45-linux-x64-rpm.bin; java -version | |
echo "********************************************************************************" | |
echo "* Install PCC (Pivotal Control Center) " | |
echo "********************************************************************************" | |
service commander stop | |
# Copy, uncompress and enter the PCC package folder | |
cp /vagrant/PCC-2.0.1.84.121.163.x86_64.tar.gz* /home/vagrant/; tar --no-same-owner -xzvf ./PCC-2.0.1.84.121.163.x86_64.tar.gz; cd /home/vagrant/PCC-2.0.1.84 | |
# Install PCC as root using root with root's login shell (Note: will not work without the '-' option) | |
su - -c "cd /home/vagrant/PCC-2.0.1.84; ./install" root | |
echo "********************************************************************************" | |
echo "* Prepare Hosts for Cluster: $CLUSTER_NAME " | |
echo "********************************************************************************" | |
# Add Sun Java JDK RPM installer binary (jdk-6u45-linux-x64-rpm.bin). Will be installed on all cluster hosts automatically. | |
# The JDK rpm file need to have execute permission. If the JDK version you have downloaded is only available as a .rpm file | |
# (not as a Linux binary installer .bin file) you need to install the JDK by hand on all cluster nodes. | |
su - -c "cp /vagrant/jdk-6u45-linux-x64-rpm.bin .; chmod a+x ./jdk-6u45-linux-x64-rpm.bin" gpadmin | |
echo "Import PHD & PADS packages into the PCC local yum repository ..." | |
# (Required) For installing PHD | |
su - -c "cp /vagrant/PHD-1.0.1.0-19.tar.gz* .; tar -xzf ./PHD-1.0.1.0-19.tar.gz; icm_client import -p ./PHD-1.0.1.0-19" gpadmin | |
# <<HAWQ>> Import HAWQ packages | |
# If not available locally yet, download the HAWQ from the public distribution URL | |
if [ ! -f /vagrant/PADS-1.1.0-8.tar.gz ]; | |
then | |
cd /vagrant; wget http://bitcast-a.v1.o1.sjc1.bitgravity.com/greenplum/pivotal-sw/PADS-1.1.0-8.tar.gz; cd ~ | |
fi | |
# Import HAWQ packages in the local yum repo | |
su - -c "cp /vagrant/PADS-1.1.0-8.tar.gz* .; tar -xzf ./PADS-1.1.0-8.tar.gz; icm_client import -p ./PADS-1.1.0-8" gpadmin | |
# <</HAWQ>> | |
# (Optional) Import DataLoader and UUS installation packages | |
#su - -c "cp /vagrant/PHDTools-1.0.1-19.tar.gz* .; tar -xzf ./PHDTools-1.0.1-19.tar.gz; icm_client import -p ./PHDTools-1.0.1-19" gpadmin | |
# Create a hostfile (HostFile.txt) that contains the hostnames of all your cluster nodes except the Admin node; separated by newlines | |
# Important: The hostfile should contain all nodes within your cluster EXCEPT the Admin node (e.g. except pcc.localdomain). | |
su - -c "echo $MASTER_AND_SLAVES | tr , '\n' > /home/gpadmin/HostFile.txt" gpadmin | |
echo "Prepare cluster hosts" | |
# Preparing the Cluster Nodes for Pivotal HD | |
# Note: preparehosts expects user inputs like root and gpadmin passwords. The 'expect' tool is used to emulate this user interaction. | |
cat > /home/gpadmin/preparehosts.exp <<EOF | |
#!/usr/bin/expect -f | |
set timeout 100 | |
spawn icm_client preparehosts --hostfile=./HostFile.txt --java=jdk-6u45-linux-x64-rpm.bin --ntp --selinuxoff --iptablesoff | |
expect "Please enter the root user's password. This is required for creating gpadmin users on the cluster nodes:" | |
send -- "$ROOT_PASSWORD\r" | |
expect "Please enter a non-empty password to be used for the gpadmin user:" | |
send -- "$GPADMIN_PASSWORD\r" | |
send -- "\r" | |
expect eof | |
EOF | |
chown gpadmin:gpadmin /home/gpadmin/preparehosts.exp; chmod a+x /home/gpadmin/preparehosts.exp | |
# Prepare all PHD hosts | |
su - -c "expect -f /home/gpadmin/preparehosts.exp" gpadmin | |
# <<HAWQ>> | |
echo "Prepare HAWQ hosts" | |
# Disable vm.overcommit_memory to prevent OOM. Pivotal recommends that you do not set the vm.overcommit_memory | |
# parameter if you run HAWQ on small memory virtual machines. If you set this parameter you may encounter out of memory issues. | |
sed -i 's/vm.overcommit_memory/# vm.overcommit_memory/g' /usr/lib/gphd/gphdmgr/hawq_sys_config/sysctl.conf | |
# Prepare all Hawq hosts. Asumes that HAWQ is deployed on all hosts. | |
su - -c "icm_client prepare-hawq-hosts -f ./HostFile.txt -g /usr/lib/gphd/gphdmgr/hawq_sys_config/" gpadmin | |
# <</HAWQ>> | |
# Verify that all hosts are prepared for installation | |
su - -c "icm_client scanhosts -f ./HostFile.txt" gpadmin | |
echo "********************************************************************************" | |
echo "* Deploy Cluster: $CLUSTER_NAME " | |
echo "********************************************************************************" | |
# As gpadmin deploy the PHD cluster. Pivotal HD manager deploys clusters using input from the cluster | |
# configuration directory. This cluster configuration directory contains files that describes the topology | |
# and configuration for the cluster and the installation procedure. | |
# Fetch the default Cluster Configuration Templates. | |
su - -c "icm_client fetch-template -o ~/ClusterConfigDir" gpadmin | |
# The default clusterConfig.xml is modified to assign cluster hosts to Hadoop service roles. | |
# Following mapping convention is applied: | |
# --------------------------------------------------------------------------------------------------------- | |
# Hosts | Services | |
# --------------------------------------------------------------------------------------------------------- | |
# MASTER_NODE | client, namenode, secondarynameonde, yarn-resourcemanager, mapreduce-historyserver, | |
# | hbase-master,hive-server,hive-metastore,hawq-master,hawq-standbymaste,hawq-segment, | |
# | gpxf-agent | |
# | | |
# MASTER_AND_SLAVES | datanode,yarn-nodemanager,zookeeper-server,hbase-regionserver,hawq-segment,gpxf-agent | |
# --------------------------------------------------------------------------------------------------------- | |
# Apply the mapping convention (above) to the default clusterConfig.xml. | |
sed -i "\ | |
s/<clusterName>.*<\/clusterName>/<clusterName>$CLUSTER_NAME<\/clusterName>/g;\ | |
s/<services>.*<\/services>/<services>$SERVICES<\/services>/g;\ | |
s/<client>.*<\/client>/<client>$MASTER_NODE<\/client>/g;\ | |
s/<namenode>.*<\/namenode>/<namenode>$MASTER_NODE<\/namenode>/g;\ | |
s/<datanode>.*<\/datanode>/<datanode>$MASTER_AND_SLAVES<\/datanode>/g;\ | |
s/<secondarynamenode>.*<\/secondarynamenode>/<secondarynamenode>$MASTER_NODE<\/secondarynamenode>/g;\ | |
s/<yarn-resourcemanager>.*<\/yarn-resourcemanager>/<yarn-resourcemanager>$MASTER_NODE<\/yarn-resourcemanager>/g;\ | |
s/<yarn-nodemanager>.*<\/yarn-nodemanager>/<yarn-nodemanager>$MASTER_AND_SLAVES<\/yarn-nodemanager>/g;\ | |
s/<mapreduce-historyserver>.*<\/mapreduce-historyserver>/<mapreduce-historyserver>$MASTER_NODE<\/mapreduce-historyserver>/g;\ | |
s/<zookeeper-server>.*<\/zookeeper-server>/<zookeeper-server>$MASTER_AND_SLAVES<\/zookeeper-server>/g;\ | |
s/<hbase-master>.*<\/hbase-master>/<hbase-master>$MASTER_NODE<\/hbase-master>/g;\ | |
s/<hbase-regionserver>.*<\/hbase-regionserver>/<hbase-regionserver>$MASTER_AND_SLAVES<\/hbase-regionserver>/g;\ | |
s/<hive-server>.*<\/hive-server>/<hive-server>$MASTER_NODE<\/hive-server>/g;\ | |
s/<hive-metastore>.*<\/hive-metastore>/<hive-metastore>$MASTER_NODE<\/hive-metastore>/g;\ | |
s/<hawq-master>.*<\/hawq-master>/<hawq-master>$MASTER_NODE<\/hawq-master>/g;\ | |
s/<hawq-standbymaster>.*<\/hawq-standbymaster>/<hawq-standbymaster>$MASTER_NODE<\/hawq-standbymaster>/g;\ | |
s/<hawq-segment>.*<\/hawq-segment>/<hawq-segment>$HAWQ_SEGMENT_HOSTS<\/hawq-segment>/g;" /home/gpadmin/ClusterConfigDir/clusterConfig.xml | |
# Perform the deploy | |
su - -c "icm_client deploy -c ~/ClusterConfigDir" gpadmin | |
# <<HAWQ>> post deploy configuration | |
echo "********************************************************************************" | |
echo "* HAWQ - post deploy configuration " | |
echo "********************************************************************************" | |
su - -c "echo $HAWQ_SEGMENT_HOSTS | tr , '\n' > /home/gpadmin/HAWQ_Segment_Hosts.txt" gpadmin | |
su - -c "\ | |
scp /home/gpadmin/HAWQ_Segment_Hosts.txt gpadmin@$MASTER_NODE:/home/gpadmin/HAWQ_Segment_Hosts.txt;\ | |
ssh gpadmin@$MASTER_NODE 'source /usr/local/hawq/greenplum_path.sh;\ | |
/usr/local/hawq/bin/gpssh-exkeys -f /home/gpadmin/HAWQ_Segment_Hosts.txt -p $GPADMIN_PASSWORD'" gpadmin | |
# <</HAWQ>> | |
echo "********************************************************************************" | |
echo "* Start Cluster: $CLUSTER_NAME " | |
echo "********************************************************************************" | |
su - -c "icm_client start -l $CLUSTER_NAME" gpadmin | |
# <<HAWQ>> | |
echo "********************************************************************************" | |
echo "* Start HAWQ " | |
echo "********************************************************************************" | |
su - -c "ssh gpadmin@$MASTER_NODE '/etc/init.d/hawq init'" gpadmin; | |
# <</HAWQ>> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- mode: ruby -*- | |
# vi: set ft=ruby : | |
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing! | |
VAGRANTFILE_API_VERSION = "2" | |
# Embedded provisioning script common for all cluster hosts and PCC. | |
$phd_provision_script = <<SCRIPT | |
#!/bin/bash | |
# Set timezone and run NTP (set to Europe - Amsterdam time). | |
/etc/init.d/ntpd stop; mv /etc/localtime /etc/localtime.bak; ln -s /usr/share/zoneinfo/Europe/Amsterdam /etc/localtime; /etc/init.d/ntpd start | |
cat > /etc/hosts <<EOF | |
127.0.0.1 localhost.localdomain localhost | |
::1 localhost6.localdomain6 localhost6 | |
10.211.55.100 pcc.localdomain pcc | |
10.211.55.101 phd1.localdomain phd1 | |
10.211.55.102 phd2.localdomain phd2 | |
10.211.55.103 phd3.localdomain phd3 | |
EOF | |
yum -y install postgresql-devel nc expect | |
SCRIPT | |
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| | |
config.vm.define :phd1 do |phd1| | |
phd1.vm.box = "CentOS-6.2-x86_64" | |
phd1.vm.provider :virtualbox do |v| | |
v.name = "phd1" | |
v.customize ["modifyvm", :id, "--memory", "2048"] | |
end | |
phd1.vm.provider "vmware_fusion" do |v| | |
v.name = "phd1" | |
v.vmx["memsize"] = "2048" | |
end | |
phd1.vm.hostname = "phd1.localdomain" | |
phd1.vm.network :private_network, ip: "10.211.55.101" | |
phd1.vm.provision :shell, :inline => $phd_provision_script | |
end | |
config.vm.define :phd2 do |phd2| | |
phd2.vm.box = "CentOS-6.2-x86_64" | |
phd2.vm.provider :virtualbox do |v| | |
v.name = "phd2" | |
v.customize ["modifyvm", :id, "--memory", "1024"] | |
end | |
phd2.vm.provider "vmware_fusion" do |v| | |
v.name = "phd2" | |
v.vmx["memsize"] = "1024" | |
end | |
phd2.vm.hostname = "phd2.localdomain" | |
phd2.vm.network :private_network, ip: "10.211.55.102" | |
phd2.vm.provision :shell, :inline => $phd_provision_script | |
end | |
config.vm.define :phd3 do |phd3| | |
phd3.vm.box = "CentOS-6.2-x86_64" | |
phd3.vm.provider :virtualbox do |v| | |
v.name = "phd3" | |
v.customize ["modifyvm", :id, "--memory", "1024"] | |
end | |
phd3.vm.provider "vmware_fusion" do |v| | |
v.name = "phd3" | |
v.vmx["memsize"] = "1024" | |
end | |
phd3.vm.hostname = "phd3.localdomain" | |
phd3.vm.network :private_network, ip: "10.211.55.103" | |
phd3.vm.provision :shell, :inline => $phd_provision_script | |
end | |
config.vm.define :pcc do |pcc| | |
pcc.vm.box = "CentOS-6.2-x86_64" | |
pcc.vm.provider :virtualbox do |v| | |
v.name = "pcc" | |
v.customize ["modifyvm", :id, "--memory", "350"] | |
end | |
pcc.vm.provider "vmware_fusion" do |v| | |
v.name = "pcc" | |
v.vmx["memsize"] = "350" | |
end | |
pcc.vm.hostname = "pcc.localdomain" | |
pcc.vm.network :private_network, ip: "10.211.55.100" | |
pcc.vm.network :forwarded_port, guest: 5000, host: 5000 | |
pcc.vm.provision :shell, :inline => $phd_provision_script | |
pcc.vm.provision :shell, :path => "pcc_provision.sh" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment