Last active
March 8, 2025 23:31
-
-
Save JenZhao/c2c90c40e9ff4951049f6f472f62c1cb to your computer and use it in GitHub Desktop.
benchmarking scripts for vllm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# first start it in another terminal | |
# vllm serve Qwen/Qwen2-VL-7B-Instruct --swap-space 16 --disable-log-requests | |
# Define model variables | |
MODEL_NAME="Qwen/Qwen2-VL-7B-Instruct" | |
NUM_PROMPTS=10 | |
# Define the output markdown file | |
OUTPUT_FILE="benchmark_output.md" | |
echo "Benchmark script is running..." | |
# Clear the output file at the start | |
> "$OUTPUT_FILE" | |
# Write a header and markdown table header to the output file (Backend placed after Dataset) | |
{ | |
echo "# Benchmark Results" | |
echo "" | |
echo "| Dataset | Backend | Successful requests | Benchmark duration (s) | Total input tokens |" | |
echo "|---------|---------|---------------------|--------------------------|--------------------|" | |
} >> "$OUTPUT_FILE" | |
commands=( | |
"python3 benchmarks/benchmark_serving.py --backend openai-chat --model ${MODEL_NAME} --endpoint /v1/chat/completions --dataset-name sonnet --dataset-path benchmarks/sonnet.txt --num-prompts ${NUM_PROMPTS}" | |
"python3 benchmarks/benchmark_serving.py --model ${MODEL_NAME} --backend openai-chat --endpoint /v1/chat/completions --dataset-name hf --dataset-path lmarena-ai/vision-arena-bench-v0.1 --hf-split train --num-prompts ${NUM_PROMPTS} --request-rate 1000 --percentile-metrics ttft,tpot,e2el" | |
"python3 benchmarks/benchmark_serving.py --model ${MODEL_NAME} --backend openai-chat --endpoint /v1/chat/completions --dataset-name hf --dataset-path lmms-lab/LLaVA-OneVision-Data --hf-split train --hf-subset \"chart2text(cauldron)\" --num-prompts ${NUM_PROMPTS} --request-rate 1000 --percentile-metrics ttft,tpot,e2el" | |
"python3 benchmarks/benchmark_serving.py --backend vllm --model ${MODEL_NAME} --dataset-name sonnet --dataset-path benchmarks/sonnet.txt --num-prompts ${NUM_PROMPTS}" | |
"python3 benchmarks/benchmark_serving.py --backend vllm --model ${MODEL_NAME} --dataset-name sharegpt --dataset-path /home/jovyan/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts ${NUM_PROMPTS}" | |
"python3 benchmarks/benchmark_serving.py --backend vllm --model ${MODEL_NAME} --dataset-name random --num-prompts ${NUM_PROMPTS}" | |
"python3 benchmarks/benchmark_serving.py --backend vllm --model ${MODEL_NAME} --dataset-name burstgpt --dataset-path /home/jovyan/data/vllm_benchmark_datasets/BurstGPT_without_fails_2.csv --num-prompts ${NUM_PROMPTS}" | |
) | |
counter=1 | |
total_commands=${#commands[@]} | |
for cmd in "${commands[@]}"; do | |
echo $cmd | |
echo "Running command ${counter} of ${total_commands}..." | |
dataset=$(echo "$cmd" | grep -oP '(?<=--dataset-name )\S+') | |
dataset_path=$(echo "$cmd" | grep -oP '(?<=--dataset-path )\S+') | |
backend=$(echo "$cmd" | grep -oP '(?<=--backend )\S+') | |
# Update dataset name based on dataset_path if needed | |
if [ "$dataset_path" = "lmarena-ai/vision-arena-bench-v0.1" ]; then | |
dataset="hf-vision-arena" | |
fi | |
# --- Run the command and parse the output --- | |
# Using tee to print output in real time to the terminal while capturing it. | |
output=$(eval $cmd 2>&1 | tee /dev/tty) | |
successful=$(echo "$output" | grep "Successful requests:" | sed 's/.*Successful requests:[[:space:]]*//') | |
duration=$(echo "$output" | grep "Benchmark duration (s):" | sed 's/.*Benchmark duration (s):[[:space:]]*//') | |
tokens=$(echo "$output" | grep "Total input tokens:" | sed 's/.*Total input tokens:[[:space:]]*//') | |
echo "| $dataset | $backend | $successful | $duration | $tokens |" >> "$OUTPUT_FILE" | |
echo "Finished command ${counter}." | |
((counter++)) | |
done | |
echo "Benchmark script completed. Results are saved in $OUTPUT_FILE" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Usage: | |
# ./benchmark.sh [--remove-dataset-name] | |
# | |
# If --remove-dataset-name is provided, the script will remove the --dataset-name argument | |
# from each command when executed. For the main branch, there is no --dataset-name option, | |
# so it must be removed to avoid errors. | |
# Parse command-line arguments | |
REMOVE_DATASET_NAME=false | |
while [[ $# -gt 0 ]]; do | |
case "$1" in | |
--remove-dataset-name) | |
REMOVE_DATASET_NAME=true | |
shift | |
;; | |
*) | |
shift | |
;; | |
esac | |
done | |
# Define model and num_prompts variables | |
MODEL="NousResearch/Hermes-3-Llama-3.1-8B" | |
NUM_PROMPTS=10 | |
# Define the output markdown file | |
OUTPUT_FILE="throughput_benchmark_output.md" | |
echo "Throughput benchmark script is running..." | |
# Clear the output file at the start | |
> "$OUTPUT_FILE" | |
# Write a markdown header and table header to the output file | |
{ | |
echo "# Throughput Benchmark Results" | |
echo "" | |
echo "| Dataset | Processed Prompts | Throughput (requests/s) | Total tokens/s | Output tokens/s |" | |
echo "|---------|-------------------|-------------------------|----------------|-----------------|" | |
} >> "$OUTPUT_FILE" | |
commands=( | |
"VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset-name random --num-prompts $NUM_PROMPTS" | |
"VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS" | |
"VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset-name sonnet --dataset benchmarks/sonnet.txt --num-prompts $NUM_PROMPTS" | |
"VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset /home/jovyan/data/vllm_benchmark_datasets/BurstGPT_without_fails_2.csv --dataset-name burstgpt --num-prompts $NUM_PROMPTS" | |
) | |
counter=1 | |
total_commands=${#commands[@]} | |
for cmd in "${commands[@]}"; do | |
echo "-----------------------------------------" | |
echo "Running command ${counter} of ${total_commands}:" | |
echo "$cmd" | |
# Extract dataset name: | |
# First try to extract from --dataset-name; if not found, extract from --dataset and use its basename. | |
dataset=$(echo "$cmd" | grep -oP '(?<=--dataset-name )\S+') | |
if [ -z "$dataset" ]; then | |
dataset=$(echo "$cmd" | grep -oP '(?<=--dataset )\S+') | |
if [ -n "$dataset" ]; then | |
dataset=$(basename "$dataset") | |
fi | |
fi | |
# If no dataset information is found, set it to "N/A" | |
if [ -z "$dataset" ]; then | |
dataset="N/A" | |
fi | |
# If the remove flag is set, strip out the --dataset-name parameter and its argument | |
if $REMOVE_DATASET_NAME; then | |
cmd=$(echo "$cmd" | sed -E 's/--dataset-name[[:space:]]+\S+//') | |
fi | |
echo "Output:" | |
# Run the command, printing output live to the terminal while capturing it. | |
output=$(eval $cmd 2>&1 | tee /dev/tty) | |
# For this benchmark, processed prompts equals NUM_PROMPTS | |
processed_prompts=$NUM_PROMPTS | |
# Extract throughput metrics from the line that starts with "Throughput:" | |
throughput_line=$(echo "$output" | grep "Throughput:") | |
requests=$(echo "$throughput_line" | grep -oP '(?<=Throughput: )\d+(\.\d+)?') | |
total_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= total tokens/s)') | |
output_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= output tokens/s)') | |
# Append a row to the markdown table with the extracted metrics | |
echo "| $dataset | $processed_prompts | $requests | $total_tokens | $output_tokens |" >> "$OUTPUT_FILE" | |
echo "Finished command ${counter}." | |
((counter++)) | |
done | |
echo "Throughput benchmark script completed. Results are saved in $OUTPUT_FILE" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Define the number of prompts to use | |
NUM_PROMPTS=10 | |
# Define the output markdown file | |
OUTPUT_FILE="throughput_benchmark_output.md" | |
echo "Throughput benchmark script is running..." | |
# Clear the output file at the start | |
> "$OUTPUT_FILE" | |
# Write markdown header and table header to the output file (without the Processed Prompts column) | |
{ | |
echo "# Throughput Benchmark Results" | |
echo "" | |
echo "| Dataset | Num Prompts | Max Loras | Max Lora Rank | Enable Lora | Async Engine | Throughput (requests/s) | Total tokens/s | Output tokens/s |" | |
echo "|---------|-------------|-----------|---------------|-------------|--------------|-------------------------|----------------|-----------------|" | |
} >> "$OUTPUT_FILE" | |
# Maximum allowed length for dataset name | |
MAX_DATASET_LENGTH=10 | |
# Define benchmark commands as an array. | |
# Note: NUM_PROMPTS is expanded by using double quotes. | |
commands=( | |
"python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --max-loras 1 --max-lora-rank 8 --enable-lora --lora-path \"yard1/llama-2-7b-sql-lora-test\"" | |
"python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --max-loras 4 --max-lora-rank 8 --enable-lora --lora-path \"yard1/llama-2-7b-sql-lora-test\"" | |
"python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --async-engine" | |
"python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --async-engine --max-loras 1 --max-lora-rank 8 --enable-lora --lora-path \"yard1/llama-2-7b-sql-lora-test\"" | |
"python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --async-engine --max-loras 4 --max-lora-rank 8 --enable-lora --lora-path \"yard1/llama-2-7b-sql-lora-test\"" | |
) | |
counter=1 | |
total_commands=${#commands[@]} | |
for cmd in "${commands[@]}"; do | |
echo $cmd | |
echo "Running command ${counter} of ${total_commands}..." | |
# --- Parameter extraction from the command string --- | |
# Dataset: extract the file given to --dataset and use its basename | |
dataset=$(echo "$cmd" | grep -oP '(?<=--dataset )\S+') | |
if [ -n "$dataset" ]; then | |
dataset=$(basename "$dataset") | |
else | |
dataset="N/A" | |
fi | |
# Truncate dataset if it exceeds the maximum length | |
if [ ${#dataset} -gt $MAX_DATASET_LENGTH ]; then | |
dataset="${dataset:0:$MAX_DATASET_LENGTH}..." | |
fi | |
# Num Prompts: extract from --num-prompts | |
num_prompts=$(echo "$cmd" | grep -oP '(?<=--num-prompts )\S+') | |
if [ -z "$num_prompts" ]; then | |
num_prompts="N/A" | |
fi | |
# Max Loras: extract the value from --max-loras, if present | |
max_loras=$(echo "$cmd" | grep -oP '(?<=--max-loras )\S+') | |
if [ -z "$max_loras" ]; then | |
max_loras="N/A" | |
fi | |
# Max Lora Rank: extract the value from --max-lora-rank, if present | |
max_lora_rank=$(echo "$cmd" | grep -oP '(?<=--max-lora-rank )\S+') | |
if [ -z "$max_lora_rank" ]; then | |
max_lora_rank="N/A" | |
fi | |
# Enable Lora: check for the presence of --enable-lora | |
if echo "$cmd" | grep -q -- "--enable-lora"; then | |
enable_lora="Yes" | |
else | |
enable_lora="No" | |
fi | |
# Async Engine: check for the presence of --async-engine | |
if echo "$cmd" | grep -q -- "--async-engine"; then | |
async_engine="Yes" | |
else | |
async_engine="No" | |
fi | |
# --- Run the command and parse the output --- | |
# Using tee to print output in real time to the terminal while capturing it. | |
output=$(eval $cmd 2>&1 | tee /dev/tty) | |
# Extract throughput metrics from the output string | |
# Expected format: "Throughput: 7.59 requests/s, 3654.39 total tokens/s, 1786.51 output tokens/s" | |
throughput_line=$(echo "$output" | grep "Throughput:") | |
requests=$(echo "$throughput_line" | grep -oP '(?<=Throughput: )\d+(\.\d+)?') | |
total_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= total tokens/s)') | |
output_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= output tokens/s)') | |
# Append a row to the markdown table (without the Processed Prompts column) | |
echo "| $dataset | $num_prompts | $max_loras | $max_lora_rank | $enable_lora | $async_engine | $requests | $total_tokens | $output_tokens |" >> "$OUTPUT_FILE" | |
echo "Finished command ${counter}." | |
((counter++)) | |
done | |
echo "Throughput benchmark script completed. Results are saved in $OUTPUT_FILE" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Usage: | |
# ./benchmark.sh [--remove-dataset-name] | |
# | |
# If --remove-dataset-name is provided, the script will remove the --dataset-name argument | |
# from each command when executed. | |
# Parse command-line arguments | |
REMOVE_DATASET_NAME=false | |
while [[ $# -gt 0 ]]; do | |
case "$1" in | |
--remove-dataset-name) | |
REMOVE_DATASET_NAME=true | |
shift | |
;; | |
*) | |
shift | |
;; | |
esac | |
done | |
# Define model and num_prompts variables | |
MODEL="NousResearch/Hermes-3-Llama-3.1-8B" | |
NUM_PROMPTS=10 | |
# Define arrays of parameter values | |
range_ratios=(0.5 1.0) | |
prefix_lengths=(2) | |
input_lengths=(10 20) | |
output_lengths=(20 30) | |
# Define the output markdown file | |
OUTPUT_FILE="throughput_benchmark_output.md" | |
echo "Throughput benchmark script is running..." | |
# Clear the output file at the start | |
> "$OUTPUT_FILE" | |
# Write a markdown header and table header (including parameter columns) | |
{ | |
echo "# Throughput Benchmark Results" | |
echo "" | |
echo "| Dataset | Processed Prompts | Throughput (requests/s) | Total tokens/s | Output tokens/s | Range Ratio | Prefix Len | Input Len | Output Len |" | |
echo "|---------|-------------------|-------------------------|----------------|-----------------|-------------|------------|-----------|------------|" | |
} >> "$OUTPUT_FILE" | |
# Generate commands for each combination of parameters | |
commands=() | |
for rr in "${range_ratios[@]}"; do | |
for pl in "${prefix_lengths[@]}"; do | |
for il in "${input_lengths[@]}"; do | |
for ol in "${output_lengths[@]}"; do | |
cmd="VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset-name random --num-prompts $NUM_PROMPTS --prefix-len $pl --random-range-ratio $rr --input-len $il --output-len $ol" | |
commands+=("$cmd") | |
done | |
done | |
done | |
done | |
# Limit the rounds to at most 5 | |
max_rounds=5 | |
total_commands=${#commands[@]} | |
if [ $total_commands -gt $max_rounds ]; then | |
total_commands=$max_rounds | |
fi | |
counter=1 | |
# Execute each command for the first max_rounds combinations | |
for cmd in "${commands[@]:0:$total_commands}"; do | |
echo "-----------------------------------------" | |
echo "Running command ${counter} of ${total_commands}:" | |
echo "$cmd" | |
# Extract dataset name (default to "N/A" if not found) | |
dataset=$(echo "$cmd" | grep -oP '(?<=--dataset-name )\S+') | |
if [ -z "$dataset" ]; then | |
dataset="N/A" | |
fi | |
# Remove --dataset-name option if the flag is set | |
if $REMOVE_DATASET_NAME; then | |
cmd=$(echo "$cmd" | sed -E 's/--dataset-name[[:space:]]+\S+//') | |
fi | |
echo "Output:" | |
# Run the command, showing output live while capturing it. | |
output=$(eval $cmd 2>&1 | tee /dev/tty) | |
processed_prompts=$NUM_PROMPTS | |
# Extract throughput metrics from the line starting with "Throughput:" | |
throughput_line=$(echo "$output" | grep "Throughput:") | |
requests=$(echo "$throughput_line" | grep -oP '(?<=Throughput: )\d+(\.\d+)?') | |
total_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= total tokens/s)') | |
output_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= output tokens/s)') | |
# Extract the parameter values from the command | |
range_ratio=$(echo "$cmd" | grep -oP '(?<=--random-range-ratio )\S+') | |
prefix_len=$(echo "$cmd" | grep -oP '(?<=--prefix-len )\S+') | |
input_len=$(echo "$cmd" | grep -oP '(?<=--input-len )\S+') | |
output_len=$(echo "$cmd" | grep -oP '(?<=--output-len )\S+') | |
# Append a row to the markdown table with the metrics and parameter values | |
echo "| $dataset | $processed_prompts | $requests | $total_tokens | $output_tokens | $range_ratio | $prefix_len | $input_len | $output_len |" >> "$OUTPUT_FILE" | |
echo "Finished command ${counter}." | |
((counter++)) | |
done | |
echo "Throughput benchmark script completed. Results are saved in $OUTPUT_FILE" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment