Skip to content

Instantly share code, notes, and snippets.

@guicho271828
Last active July 8, 2025 20:55
Show Gist options
  • Save guicho271828/184ca022aa8f4bb7f16e8b9525b46f54 to your computer and use it in GitHub Desktop.
Save guicho271828/184ca022aa8f4bb7f16e8b9525b46f54 to your computer and use it in GitHub Desktop.
#!/bin/bash
sleep 10
echo hi!
#!/bin/bash
curl -X POST http://localhost:8000/v1/load_lora_adapter \
-H "Content-Type: application/json" \
-d "{\"lora_name\": \"myadapter\", \"lora_path\": \"$ADAPTER\"}"
#!/bin/bash
./a.sh
#!/bin/bash -x
# create a new process group
setsid ./b.sh &
pid=$!
sleep 3
echo c done!
# works -- kill the whole process group
trap "kill -- -$pid" EXIT
# does not work
# trap "kill $pid" EXIT
#!/bin/bash
run (){
echo testing $MODEL and $ADAPTER
setsid ./serve.sh &
pid=$!
trap "kill -- -$pid; ./wait.sh 'Application shutdown complete.'" RETURN
./wait.sh
./add.sh
python test.py
}
export MODEL=TinyLlama/TinyLlama-1.1B-Chat-v1.0
export ADAPTER=tmberooney/medllama
run
export MODEL=ibm-granite/granite-3.3-2b-instruct
export ADAPTER=agentlans/granite-3.3-2b-instruct-ethics
run
export MODEL=ibm-granite/granite-3.3-8b-instruct
export ADAPTER=vpakarinen/ibm-granite-8b-lora-uncensored
run
#!/bin/bash
export VLLM_ALLOW_RUNTIME_LORA_UPDATING=True
vllm serve $MODEL \
--enable-lora \
--dtype bfloat16 \
--max-lora-rank 64 \
--enable-prefix-caching \
> $(readlink -ef $(dirname $0))/vllm.log \
2> $(readlink -ef $(dirname $0))/vllm.err
# --lora-modules "{\"name\": \"myadapter\", \"path\": \"$ADAPTER\"}" \
# --lora-modules "{\"name\": \"myadapter\", \"path\": \"$ADAPTER\", \"base_model_name\": \"$MODEL\"}" \
#!/bin/env python3
# SPDX-License-Identifier: Apache-2.0
# derived from https://docs.vllm.ai/en/v0.8.1/getting_started/examples/openai_completion_client.html
from openai import OpenAI
# Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"
client = OpenAI(
# defaults to os.environ.get("OPENAI_API_KEY")
api_key=openai_api_key,
base_url=openai_api_base,
)
import os
stream = False
# Completion API
completion = client.completions.create(
model=os.environ["MODEL"],
prompt="A robot may not injure a human being",
echo=False,
n=2,
stream=stream,
logprobs=3)
print("Completion results:")
if stream:
for c in completion:
print(c)
else:
print(completion)
completion = client.completions.create(
model="myadapter",
prompt="A robot may not injure a human being",
echo=False,
n=2,
stream=stream,
logprobs=3)
print("Completion results:")
if stream:
for c in completion:
print(c)
else:
print(completion)
#!/bin/bash
dir=$(readlink -ef $(dirname $0))
text="${1:-Application startup complete.}"
while sleep 1 ; do
if grep -q "$text" $dir/vllm.err
then
break
fi
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment