Skip to content

Instantly share code, notes, and snippets.

@JenZhao
Last active March 8, 2025 23:31
Show Gist options
  • Save JenZhao/c2c90c40e9ff4951049f6f472f62c1cb to your computer and use it in GitHub Desktop.
Save JenZhao/c2c90c40e9ff4951049f6f472f62c1cb to your computer and use it in GitHub Desktop.
benchmarking scripts for vllm
#!/bin/bash
# first start it in another terminal
# vllm serve Qwen/Qwen2-VL-7B-Instruct --swap-space 16 --disable-log-requests
# Define model variables
MODEL_NAME="Qwen/Qwen2-VL-7B-Instruct"
NUM_PROMPTS=10
# Define the output markdown file
OUTPUT_FILE="benchmark_output.md"
echo "Benchmark script is running..."
# Clear the output file at the start
> "$OUTPUT_FILE"
# Write a header and markdown table header to the output file (Backend placed after Dataset)
{
echo "# Benchmark Results"
echo ""
echo "| Dataset | Backend | Successful requests | Benchmark duration (s) | Total input tokens |"
echo "|---------|---------|---------------------|--------------------------|--------------------|"
} >> "$OUTPUT_FILE"
commands=(
"python3 benchmarks/benchmark_serving.py --backend openai-chat --model ${MODEL_NAME} --endpoint /v1/chat/completions --dataset-name sonnet --dataset-path benchmarks/sonnet.txt --num-prompts ${NUM_PROMPTS}"
"python3 benchmarks/benchmark_serving.py --model ${MODEL_NAME} --backend openai-chat --endpoint /v1/chat/completions --dataset-name hf --dataset-path lmarena-ai/vision-arena-bench-v0.1 --hf-split train --num-prompts ${NUM_PROMPTS} --request-rate 1000 --percentile-metrics ttft,tpot,e2el"
"python3 benchmarks/benchmark_serving.py --model ${MODEL_NAME} --backend openai-chat --endpoint /v1/chat/completions --dataset-name hf --dataset-path lmms-lab/LLaVA-OneVision-Data --hf-split train --hf-subset \"chart2text(cauldron)\" --num-prompts ${NUM_PROMPTS} --request-rate 1000 --percentile-metrics ttft,tpot,e2el"
"python3 benchmarks/benchmark_serving.py --backend vllm --model ${MODEL_NAME} --dataset-name sonnet --dataset-path benchmarks/sonnet.txt --num-prompts ${NUM_PROMPTS}"
"python3 benchmarks/benchmark_serving.py --backend vllm --model ${MODEL_NAME} --dataset-name sharegpt --dataset-path /home/jovyan/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts ${NUM_PROMPTS}"
"python3 benchmarks/benchmark_serving.py --backend vllm --model ${MODEL_NAME} --dataset-name random --num-prompts ${NUM_PROMPTS}"
"python3 benchmarks/benchmark_serving.py --backend vllm --model ${MODEL_NAME} --dataset-name burstgpt --dataset-path /home/jovyan/data/vllm_benchmark_datasets/BurstGPT_without_fails_2.csv --num-prompts ${NUM_PROMPTS}"
)
counter=1
total_commands=${#commands[@]}
for cmd in "${commands[@]}"; do
echo $cmd
echo "Running command ${counter} of ${total_commands}..."
dataset=$(echo "$cmd" | grep -oP '(?<=--dataset-name )\S+')
dataset_path=$(echo "$cmd" | grep -oP '(?<=--dataset-path )\S+')
backend=$(echo "$cmd" | grep -oP '(?<=--backend )\S+')
# Update dataset name based on dataset_path if needed
if [ "$dataset_path" = "lmarena-ai/vision-arena-bench-v0.1" ]; then
dataset="hf-vision-arena"
fi
# --- Run the command and parse the output ---
# Using tee to print output in real time to the terminal while capturing it.
output=$(eval $cmd 2>&1 | tee /dev/tty)
successful=$(echo "$output" | grep "Successful requests:" | sed 's/.*Successful requests:[[:space:]]*//')
duration=$(echo "$output" | grep "Benchmark duration (s):" | sed 's/.*Benchmark duration (s):[[:space:]]*//')
tokens=$(echo "$output" | grep "Total input tokens:" | sed 's/.*Total input tokens:[[:space:]]*//')
echo "| $dataset | $backend | $successful | $duration | $tokens |" >> "$OUTPUT_FILE"
echo "Finished command ${counter}."
((counter++))
done
echo "Benchmark script completed. Results are saved in $OUTPUT_FILE"
#!/bin/bash
# Usage:
# ./benchmark.sh [--remove-dataset-name]
#
# If --remove-dataset-name is provided, the script will remove the --dataset-name argument
# from each command when executed. For the main branch, there is no --dataset-name option,
# so it must be removed to avoid errors.
# Parse command-line arguments
REMOVE_DATASET_NAME=false
while [[ $# -gt 0 ]]; do
case "$1" in
--remove-dataset-name)
REMOVE_DATASET_NAME=true
shift
;;
*)
shift
;;
esac
done
# Define model and num_prompts variables
MODEL="NousResearch/Hermes-3-Llama-3.1-8B"
NUM_PROMPTS=10
# Define the output markdown file
OUTPUT_FILE="throughput_benchmark_output.md"
echo "Throughput benchmark script is running..."
# Clear the output file at the start
> "$OUTPUT_FILE"
# Write a markdown header and table header to the output file
{
echo "# Throughput Benchmark Results"
echo ""
echo "| Dataset | Processed Prompts | Throughput (requests/s) | Total tokens/s | Output tokens/s |"
echo "|---------|-------------------|-------------------------|----------------|-----------------|"
} >> "$OUTPUT_FILE"
commands=(
"VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset-name random --num-prompts $NUM_PROMPTS"
"VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS"
"VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset-name sonnet --dataset benchmarks/sonnet.txt --num-prompts $NUM_PROMPTS"
"VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset /home/jovyan/data/vllm_benchmark_datasets/BurstGPT_without_fails_2.csv --dataset-name burstgpt --num-prompts $NUM_PROMPTS"
)
counter=1
total_commands=${#commands[@]}
for cmd in "${commands[@]}"; do
echo "-----------------------------------------"
echo "Running command ${counter} of ${total_commands}:"
echo "$cmd"
# Extract dataset name:
# First try to extract from --dataset-name; if not found, extract from --dataset and use its basename.
dataset=$(echo "$cmd" | grep -oP '(?<=--dataset-name )\S+')
if [ -z "$dataset" ]; then
dataset=$(echo "$cmd" | grep -oP '(?<=--dataset )\S+')
if [ -n "$dataset" ]; then
dataset=$(basename "$dataset")
fi
fi
# If no dataset information is found, set it to "N/A"
if [ -z "$dataset" ]; then
dataset="N/A"
fi
# If the remove flag is set, strip out the --dataset-name parameter and its argument
if $REMOVE_DATASET_NAME; then
cmd=$(echo "$cmd" | sed -E 's/--dataset-name[[:space:]]+\S+//')
fi
echo "Output:"
# Run the command, printing output live to the terminal while capturing it.
output=$(eval $cmd 2>&1 | tee /dev/tty)
# For this benchmark, processed prompts equals NUM_PROMPTS
processed_prompts=$NUM_PROMPTS
# Extract throughput metrics from the line that starts with "Throughput:"
throughput_line=$(echo "$output" | grep "Throughput:")
requests=$(echo "$throughput_line" | grep -oP '(?<=Throughput: )\d+(\.\d+)?')
total_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= total tokens/s)')
output_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= output tokens/s)')
# Append a row to the markdown table with the extracted metrics
echo "| $dataset | $processed_prompts | $requests | $total_tokens | $output_tokens |" >> "$OUTPUT_FILE"
echo "Finished command ${counter}."
((counter++))
done
echo "Throughput benchmark script completed. Results are saved in $OUTPUT_FILE"
#!/bin/bash
# Define the number of prompts to use
NUM_PROMPTS=10
# Define the output markdown file
OUTPUT_FILE="throughput_benchmark_output.md"
echo "Throughput benchmark script is running..."
# Clear the output file at the start
> "$OUTPUT_FILE"
# Write markdown header and table header to the output file (without the Processed Prompts column)
{
echo "# Throughput Benchmark Results"
echo ""
echo "| Dataset | Num Prompts | Max Loras | Max Lora Rank | Enable Lora | Async Engine | Throughput (requests/s) | Total tokens/s | Output tokens/s |"
echo "|---------|-------------|-----------|---------------|-------------|--------------|-------------------------|----------------|-----------------|"
} >> "$OUTPUT_FILE"
# Maximum allowed length for dataset name
MAX_DATASET_LENGTH=10
# Define benchmark commands as an array.
# Note: NUM_PROMPTS is expanded by using double quotes.
commands=(
"python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --max-loras 1 --max-lora-rank 8 --enable-lora --lora-path \"yard1/llama-2-7b-sql-lora-test\""
"python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --max-loras 4 --max-lora-rank 8 --enable-lora --lora-path \"yard1/llama-2-7b-sql-lora-test\""
"python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --async-engine"
"python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --async-engine --max-loras 1 --max-lora-rank 8 --enable-lora --lora-path \"yard1/llama-2-7b-sql-lora-test\""
"python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --async-engine --max-loras 4 --max-lora-rank 8 --enable-lora --lora-path \"yard1/llama-2-7b-sql-lora-test\""
)
counter=1
total_commands=${#commands[@]}
for cmd in "${commands[@]}"; do
echo $cmd
echo "Running command ${counter} of ${total_commands}..."
# --- Parameter extraction from the command string ---
# Dataset: extract the file given to --dataset and use its basename
dataset=$(echo "$cmd" | grep -oP '(?<=--dataset )\S+')
if [ -n "$dataset" ]; then
dataset=$(basename "$dataset")
else
dataset="N/A"
fi
# Truncate dataset if it exceeds the maximum length
if [ ${#dataset} -gt $MAX_DATASET_LENGTH ]; then
dataset="${dataset:0:$MAX_DATASET_LENGTH}..."
fi
# Num Prompts: extract from --num-prompts
num_prompts=$(echo "$cmd" | grep -oP '(?<=--num-prompts )\S+')
if [ -z "$num_prompts" ]; then
num_prompts="N/A"
fi
# Max Loras: extract the value from --max-loras, if present
max_loras=$(echo "$cmd" | grep -oP '(?<=--max-loras )\S+')
if [ -z "$max_loras" ]; then
max_loras="N/A"
fi
# Max Lora Rank: extract the value from --max-lora-rank, if present
max_lora_rank=$(echo "$cmd" | grep -oP '(?<=--max-lora-rank )\S+')
if [ -z "$max_lora_rank" ]; then
max_lora_rank="N/A"
fi
# Enable Lora: check for the presence of --enable-lora
if echo "$cmd" | grep -q -- "--enable-lora"; then
enable_lora="Yes"
else
enable_lora="No"
fi
# Async Engine: check for the presence of --async-engine
if echo "$cmd" | grep -q -- "--async-engine"; then
async_engine="Yes"
else
async_engine="No"
fi
# --- Run the command and parse the output ---
# Using tee to print output in real time to the terminal while capturing it.
output=$(eval $cmd 2>&1 | tee /dev/tty)
# Extract throughput metrics from the output string
# Expected format: "Throughput: 7.59 requests/s, 3654.39 total tokens/s, 1786.51 output tokens/s"
throughput_line=$(echo "$output" | grep "Throughput:")
requests=$(echo "$throughput_line" | grep -oP '(?<=Throughput: )\d+(\.\d+)?')
total_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= total tokens/s)')
output_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= output tokens/s)')
# Append a row to the markdown table (without the Processed Prompts column)
echo "| $dataset | $num_prompts | $max_loras | $max_lora_rank | $enable_lora | $async_engine | $requests | $total_tokens | $output_tokens |" >> "$OUTPUT_FILE"
echo "Finished command ${counter}."
((counter++))
done
echo "Throughput benchmark script completed. Results are saved in $OUTPUT_FILE"
#!/bin/bash
# Usage:
# ./benchmark.sh [--remove-dataset-name]
#
# If --remove-dataset-name is provided, the script will remove the --dataset-name argument
# from each command when executed.
# Parse command-line arguments
REMOVE_DATASET_NAME=false
while [[ $# -gt 0 ]]; do
case "$1" in
--remove-dataset-name)
REMOVE_DATASET_NAME=true
shift
;;
*)
shift
;;
esac
done
# Define model and num_prompts variables
MODEL="NousResearch/Hermes-3-Llama-3.1-8B"
NUM_PROMPTS=10
# Define arrays of parameter values
range_ratios=(0.5 1.0)
prefix_lengths=(2)
input_lengths=(10 20)
output_lengths=(20 30)
# Define the output markdown file
OUTPUT_FILE="throughput_benchmark_output.md"
echo "Throughput benchmark script is running..."
# Clear the output file at the start
> "$OUTPUT_FILE"
# Write a markdown header and table header (including parameter columns)
{
echo "# Throughput Benchmark Results"
echo ""
echo "| Dataset | Processed Prompts | Throughput (requests/s) | Total tokens/s | Output tokens/s | Range Ratio | Prefix Len | Input Len | Output Len |"
echo "|---------|-------------------|-------------------------|----------------|-----------------|-------------|------------|-----------|------------|"
} >> "$OUTPUT_FILE"
# Generate commands for each combination of parameters
commands=()
for rr in "${range_ratios[@]}"; do
for pl in "${prefix_lengths[@]}"; do
for il in "${input_lengths[@]}"; do
for ol in "${output_lengths[@]}"; do
cmd="VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset-name random --num-prompts $NUM_PROMPTS --prefix-len $pl --random-range-ratio $rr --input-len $il --output-len $ol"
commands+=("$cmd")
done
done
done
done
# Limit the rounds to at most 5
max_rounds=5
total_commands=${#commands[@]}
if [ $total_commands -gt $max_rounds ]; then
total_commands=$max_rounds
fi
counter=1
# Execute each command for the first max_rounds combinations
for cmd in "${commands[@]:0:$total_commands}"; do
echo "-----------------------------------------"
echo "Running command ${counter} of ${total_commands}:"
echo "$cmd"
# Extract dataset name (default to "N/A" if not found)
dataset=$(echo "$cmd" | grep -oP '(?<=--dataset-name )\S+')
if [ -z "$dataset" ]; then
dataset="N/A"
fi
# Remove --dataset-name option if the flag is set
if $REMOVE_DATASET_NAME; then
cmd=$(echo "$cmd" | sed -E 's/--dataset-name[[:space:]]+\S+//')
fi
echo "Output:"
# Run the command, showing output live while capturing it.
output=$(eval $cmd 2>&1 | tee /dev/tty)
processed_prompts=$NUM_PROMPTS
# Extract throughput metrics from the line starting with "Throughput:"
throughput_line=$(echo "$output" | grep "Throughput:")
requests=$(echo "$throughput_line" | grep -oP '(?<=Throughput: )\d+(\.\d+)?')
total_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= total tokens/s)')
output_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= output tokens/s)')
# Extract the parameter values from the command
range_ratio=$(echo "$cmd" | grep -oP '(?<=--random-range-ratio )\S+')
prefix_len=$(echo "$cmd" | grep -oP '(?<=--prefix-len )\S+')
input_len=$(echo "$cmd" | grep -oP '(?<=--input-len )\S+')
output_len=$(echo "$cmd" | grep -oP '(?<=--output-len )\S+')
# Append a row to the markdown table with the metrics and parameter values
echo "| $dataset | $processed_prompts | $requests | $total_tokens | $output_tokens | $range_ratio | $prefix_len | $input_len | $output_len |" >> "$OUTPUT_FILE"
echo "Finished command ${counter}."
((counter++))
done
echo "Throughput benchmark script completed. Results are saved in $OUTPUT_FILE"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment