Skip to content

Instantly share code, notes, and snippets.

@tosin2013
Last active September 12, 2024 19:16
Show Gist options
  • Save tosin2013/76e47de3f32de4486ab4699c21b2188e to your computer and use it in GitHub Desktop.
Save tosin2013/76e47de3f32de4486ab4699c21b2188e to your computer and use it in GitHub Desktop.
openshift-ai-workload.sh
#!/bin/bash
if [ -z "$1" ]; then
echo "Instance size not provided. Please pass the instance size as an argument."
echo "Example: ./openshift-ai-workload.sh m6i.4xlarge"
echo "Example: ./openshift-ai-workload.sh g4dn.4xlarge"
echo "Example: ./openshift-ai-workload.sh g4ad.4xlarge"
echo "Example: ./openshift-ai-workload.sh p3.2xlarge"
echo "Example with GPU flag: ./openshift-ai-workload.sh --gpu"
exit 1
fi
print_aws_variables() {
echo "Export the following AWS variables before running this script:"
echo "export aws_access_key_id=\"YOUR_ACCESS_KEY_ID\""
echo "export aws_secret_access_key=\"YOUR_SECRET_ACCESS_KEY\""
echo "export aws_region=\"YOUR_AWS_REGION\""
}
if [ -z ${aws_access_key_id} ]; then
echo "aws_access_key_id is not set"
print_aws_variables
exit 1
fi
if [ -z ${aws_secret_access_key} ]; then
echo "aws_secret_access_key is not set"
print_aws_variables
exit 1
fi
if [ -z ${aws_region} ]; then
echo "aws_region is not set"
print_aws_variables
exit 1
fi
if ! yq -v &> /dev/null; then
VERSION=v4.34.1
BINARY=yq_linux_amd64
sudo wget https://github.com/mikefarah/yq/releases/download/${VERSION}/${BINARY} -O /usr/bin/yq &&\
sudo chmod +x /usr/bin/yq
fi
# Check if 'oc' is installed
if ! command -v oc &> /dev/null; then
echo "'oc' (OpenShift command-line tool) is not installed. Installing and configuring..."
# Download the 'configure-openshift-packages.sh' script
curl -OL https://raw.githubusercontent.com/tosin2013/openshift-4-deployment-notes/master/pre-steps/configure-openshift-packages.sh
# Make the script executable
chmod +x configure-openshift-packages.sh
# Run the 'configure-openshift-packages.sh' script with the -i flag for installation
./configure-openshift-packages.sh -i
else
echo "'oc' (OpenShift command-line tool) is already installed. Skipping installation."
fi
# Check if '~/.ssh/cluster-key' exists, and if not, generate the SSH key
ssh_key_file="/home/$USER/.ssh/cluster-key"
if [ ! -f "$ssh_key_file" ]; then
echo "SSH key '/home/$USER/.ssh/cluster-key' does not exist. Generating..."
ssh-keygen -t rsa -b 4096 -f /home/$USER/.ssh/cluster-key -N ''
else
echo "SSH key '/home/$USER/.ssh/cluster-key' already exists. Skipping key generation."
fi
# Check if 'configure-aws-cli.sh' exists in the home directory
script_file="configure-aws-cli.sh"
home_dir="$HOME"
if [ -e "$home_dir/$script_file" ]; then
echo "The script '$script_file' already exists in the home directory."
echo "Skipping download and execution."
else
# Download the 'configure-aws-cli.sh' script
curl -OL https://raw.githubusercontent.com/tosin2013/openshift-4-deployment-notes/master/aws/$script_file
# Make the script executable
chmod +x "$script_file"
# Run the script
./"$script_file" -i $aws_access_key_id $aws_secret_access_key $aws_region || exit 1
fi
openshift-install create install-config --dir=cluster
# Check if the special GPU flag is provided
gpu_flag=false
if [[ "$1" == "--gpu" ]]; then
gpu_flag=true
fi
if $gpu_flag; then
echo "Using custom configuration with 'm6i.2xlarge' for compute 0-2 and 'p2.xlarge' for compute 3-4 for GPU workloads."
yq -i eval '.compute[0].hyperthreading = "Enabled" |
.compute[0].name = "worker" |
.compute[0].platform.aws.rootVolume.iops = 2000 |
.compute[0].platform.aws.rootVolume.size = 500 |
.compute[0].platform.aws.rootVolume.type = "io1" |
.compute[0].platform.aws.type = "m6i.2xlarge" |
.compute[0].replicas = 1 |
.compute[1].hyperthreading = "Enabled" |
.compute[1].name = "worker" |
.compute[1].platform.aws.rootVolume.iops = 2000 |
.compute[1].platform.aws.rootVolume.size = 500 |
.compute[1].platform.aws.rootVolume.type = "io1" |
.compute[1].platform.aws.type = "m6i.2xlarge" |
.compute[1].replicas = 1 |
.compute[2].hyperthreading = "Enabled" |
.compute[2].name = "worker" |
.compute[2].platform.aws.rootVolume.iops = 2000 |
.compute[2].platform.aws.rootVolume.size = 500 |
.compute[2].platform.aws.rootVolume.type = "io1" |
.compute[2].platform.aws.type = "m6i.2xlarge" |
.compute[2].replicas = 1 |
.compute[3].hyperthreading = "Enabled" |
.compute[3].name = "worker" |
.compute[3].platform.aws.rootVolume.iops = 2000 |
.compute[3].platform.aws.rootVolume.size = 500 |
.compute[3].platform.aws.rootVolume.type = "io1" |
.compute[3].platform.aws.type = "p2.xlarge" |
.compute[3].replicas = 1 |
.compute[4].hyperthreading = "Enabled" |
.compute[4].name = "worker" |
.compute[4].platform.aws.rootVolume.iops = 2000 |
.compute[4].platform.aws.rootVolume.size = 500 |
.compute[4].platform.aws.rootVolume.type = "io1" |
.compute[4].platform.aws.type = "p2.xlarge" |
.compute[4].replicas = 1' cluster/install-config.yaml
else
echo "Using AWS Instance Size: $1"
instance_size=$1
yq -i eval '.compute[0].hyperthreading = "Enabled" |
.compute[0].name = "worker" |
.compute[0].platform.aws.rootVolume.iops = 2000 |
.compute[0].platform.aws.rootVolume.size = 500 |
.compute[0].platform.aws.rootVolume.type = "io1" |
.compute[0].platform.aws.type = "'"${instance_size}"'" |
.compute[0].replicas = 5' cluster/install-config.yaml
fi
openshift-install create cluster --dir $HOME/cluster --log-level debug
# openshift-install destroy cluster --dir=cluster --log-level debug
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment