Last active
January 31, 2025 02:57
-
-
Save 0xBigBoss/896c84ec02e8b27963ea16c5a2ec5692 to your computer and use it in GitHub Desktop.
A script to start ray nodes within docker.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Help function to display usage | |
show_help() { | |
echo "Usage: $0 [OPTIONS]" | |
echo | |
echo "Options:" | |
echo " --image IMAGE Docker image to use (required)" | |
echo " --address IP Head node IP address (required)" | |
echo " --mode MODE Either 'head' or 'worker' (required)" | |
echo " --hf-path PATH Path to Hugging Face cache directory (required)" | |
echo " --node-name NAME Custom name for the Ray node (default: ray-node)" | |
echo " --port PORT Port for Ray head node (default: 6379)" | |
echo " --docker-args ARGS Additional Docker arguments (quoted if multiple)" | |
echo " --ray-args ARGS Additional Ray arguments (quoted if multiple)" | |
echo | |
echo "Example for head node:" | |
echo " $0 --image vllm/vllm-openai:latest --address 10.0.0.12 --mode head \\" | |
echo " --hf-path ~/.cache/huggingface --network-interface eth0" | |
echo | |
echo "Example for worker node:" | |
echo " $0 --image vllm/vllm-openai:latest --address 10.0.0.12 --mode worker \\" | |
echo " --hf-path ~/.cache/huggingface --network-interface eth0" | |
} | |
# Default values | |
NODE_NAME="ray-node" | |
RAY_PORT="6379" | |
# Parse command line arguments | |
while [[ $# -gt 0 ]]; do | |
case $1 in | |
--help) | |
show_help | |
exit 0 | |
;; | |
--image) | |
DOCKER_IMAGE="$2" | |
shift 2 | |
;; | |
--address) | |
HEAD_NODE_ADDRESS="$2" | |
shift 2 | |
;; | |
--mode) | |
NODE_MODE="$2" | |
shift 2 | |
;; | |
--hf-path) | |
PATH_TO_HF_HOME="$2" | |
shift 2 | |
;; | |
--node-name) | |
NODE_NAME="$2" | |
shift 2 | |
;; | |
--port) | |
RAY_PORT="$2" | |
shift 2 | |
;; | |
--docker-args) | |
DOCKER_EXTRA_ARGS="$2" | |
shift 2 | |
;; | |
--ray-args) | |
RAY_EXTRA_ARGS="$2" | |
shift 2 | |
;; | |
*) | |
echo "Unknown option: $1" | |
show_help | |
exit 1 | |
;; | |
esac | |
done | |
# Validate required arguments | |
if [ -z "$DOCKER_IMAGE" ] || [ -z "$HEAD_NODE_ADDRESS" ] || [ -z "$NODE_MODE" ] || [ -z "$PATH_TO_HF_HOME" ]; then | |
echo "Error: Missing required arguments" | |
show_help | |
exit 1 | |
fi | |
# Validate node mode | |
if [ "${NODE_MODE}" != "head" ] && [ "${NODE_MODE}" != "worker" ]; then | |
echo "Error: Mode must be 'head' or 'worker'" | |
exit 1 | |
fi | |
# Define a function to cleanup on EXIT signal | |
cleanup() { | |
docker stop "${NODE_NAME}" | |
docker rm "${NODE_NAME}" | |
} | |
trap cleanup EXIT | |
# Command setup for head or worker node | |
RAY_START_CMD="ray start --block" | |
if [ "${NODE_MODE}" == "head" ]; then | |
RAY_START_CMD+=" --head --port=${RAY_PORT}" | |
else | |
RAY_START_CMD+=" --address=${HEAD_NODE_ADDRESS}:${RAY_PORT}" | |
fi | |
# Add any extra Ray arguments | |
if [ -n "$RAY_EXTRA_ARGS" ]; then | |
RAY_START_CMD+=" ${RAY_EXTRA_ARGS}" | |
fi | |
# Run the docker command | |
docker run \ | |
--entrypoint /bin/bash \ | |
--network host \ | |
--name "${NODE_NAME}" \ | |
--ipc=host \ | |
--gpus all \ | |
-v "${PATH_TO_HF_HOME}:/root/.cache/huggingface" \ | |
${DOCKER_EXTRA_ARGS} \ | |
"${DOCKER_IMAGE}" -c "${RAY_START_CMD}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment