Last active
November 5, 2024 21:11
-
-
Save rahulunair/46f7f8d6633f18b3f64ec80e6f5f1e41 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
model="NousResearch/Nous-Hermes-Llama2-13b" | |
volume="$PWD/data" | |
tgi_version="2.0.0" # https://github.com/huggingface/tgi-gaudi/releases/tag/v2.0.0 | |
max_input_token=16000 | |
max_total_token=32000 | |
container_name="tgi-container" | |
kill_existing_container() { | |
if [ "$(docker ps -q -f name=$container_name)" ]; then | |
echo "Stopping and removing existing container: $container_name" | |
docker stop "$container_name" | |
docker rm "$container_name" | |
fi | |
} | |
echo "Pulling Docker image: ghcr.io/huggingface/tgi-gaudi:$tgi_version" | |
docker pull ghcr.io/huggingface/tgi-gaudi:"$tgi_version" | |
kill_existing_container | |
echo "Starting Docker container..." | |
docker run -d --name "$container_name" -p 8080:80 \ | |
-v "$volume:/data" \ | |
--runtime=habana \ | |
-e HABANA_VISIBLE_DEVICES=all \ | |
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \ | |
-e ENABLE_HPU_GRAPH=true \ | |
-e LIMIT_HPU_GRAPH=true \ | |
-e USE_FLASH_ATTENTION=true \ | |
-e FLASH_ATTENTION_RECOMPUTE=true \ | |
--cap-add=sys_nice \ | |
--ipc=host \ | |
ghcr.io/huggingface/tgi-gaudi:"$tgi_version" \ | |
--model-id "$model" \ | |
--max-input-tokens "$max_input_token" \ | |
--max-total-tokens "$max_total_token" | |
echo "Docker container $container_name is up and running." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment