JenZhao · March 8, 2025 23:31
diff --git a/benchmark_serving.sh b/benchmark_serving.sh
 #!/bin/bash
 # first start it in another terminal
 # vllm serve Qwen/Qwen2-VL-7B-Instruct     --swap-space 16     --disable-log-requests

 # Define model variables
 MODEL_NAME="Qwen/Qwen2-VL-7B-Instruct"
 NUM_PROMPTS=10

 # Define the output markdown file
 OUTPUT_FILE="benchmark_output.md"

 echo "Benchmark script is running..."

 # Clear the output file at the start
 > "$OUTPUT_FILE"

 # Write a header and markdown table header to the output file (Backend placed after Dataset)
 {
  echo "# Benchmark Results"
  echo ""
  echo "| Dataset | Backend | Successful requests | Benchmark duration (s) | Total input tokens |"
  echo "|---------|---------|---------------------|--------------------------|--------------------|"
 } >> "$OUTPUT_FILE"

 commands=(
  "python3 benchmarks/benchmark_serving.py --backend openai-chat --model ${MODEL_NAME} --endpoint /v1/chat/completions --dataset-name sonnet --dataset-path benchmarks/sonnet.txt --num-prompts ${NUM_PROMPTS}"
  "python3 benchmarks/benchmark_serving.py --model ${MODEL_NAME} --backend openai-chat --endpoint /v1/chat/completions --dataset-name hf --dataset-path lmarena-ai/vision-arena-bench-v0.1 --hf-split train --num-prompts ${NUM_PROMPTS} --request-rate 1000 --percentile-metrics ttft,tpot,e2el"
  "python3 benchmarks/benchmark_serving.py --model ${MODEL_NAME} --backend openai-chat --endpoint /v1/chat/completions --dataset-name hf --dataset-path lmms-lab/LLaVA-OneVision-Data --hf-split train --hf-subset \"chart2text(cauldron)\" --num-prompts ${NUM_PROMPTS} --request-rate 1000 --percentile-metrics ttft,tpot,e2el"
  "python3 benchmarks/benchmark_serving.py --backend vllm --model ${MODEL_NAME} --dataset-name sonnet --dataset-path benchmarks/sonnet.txt --num-prompts ${NUM_PROMPTS}"
  "python3 benchmarks/benchmark_serving.py --backend vllm --model ${MODEL_NAME} --dataset-name sharegpt --dataset-path /home/jovyan/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts ${NUM_PROMPTS}"
  "python3 benchmarks/benchmark_serving.py --backend vllm --model ${MODEL_NAME} --dataset-name random --num-prompts ${NUM_PROMPTS}"
  "python3 benchmarks/benchmark_serving.py --backend vllm --model ${MODEL_NAME} --dataset-name burstgpt --dataset-path /home/jovyan/data/vllm_benchmark_datasets/BurstGPT_without_fails_2.csv --num-prompts ${NUM_PROMPTS}"
 )

 counter=1
 total_commands=${#commands[@]}

 for cmd in "${commands[@]}"; do
    echo $cmd
    echo "Running command ${counter} of ${total_commands}..."
    
    dataset=$(echo "$cmd" | grep -oP '(?<=--dataset-name )\S+')
    dataset_path=$(echo "$cmd" | grep -oP '(?<=--dataset-path )\S+')
    backend=$(echo "$cmd" | grep -oP '(?<=--backend )\S+')
    
    # Update dataset name based on dataset_path if needed
    if [ "$dataset_path" = "lmarena-ai/vision-arena-bench-v0.1" ]; then
        dataset="hf-vision-arena"
    fi

    # --- Run the command and parse the output ---
    # Using tee to print output in real time to the terminal while capturing it.
    output=$(eval $cmd 2>&1 | tee /dev/tty)
    
    successful=$(echo "$output" | grep "Successful requests:" | sed 's/.*Successful requests:[[:space:]]*//')
    duration=$(echo "$output" | grep "Benchmark duration (s):" | sed 's/.*Benchmark duration (s):[[:space:]]*//')
    tokens=$(echo "$output" | grep "Total input tokens:" | sed 's/.*Total input tokens:[[:space:]]*//')
    
    echo "| $dataset | $backend | $successful | $duration | $tokens |" >> "$OUTPUT_FILE"
    
    echo "Finished command ${counter}."
    ((counter++))
 done

 echo "Benchmark script completed. Results are saved in $OUTPUT_FILE"
diff --git a/benchmark_throughput.sh b/benchmark_throughput.sh
 #!/bin/bash

 # Usage:
 #   ./benchmark.sh [--remove-dataset-name]
 #
 # If --remove-dataset-name is provided, the script will remove the --dataset-name argument
 # from each command when executed. For the main branch, there is no --dataset-name option,
 # so it must be removed to avoid errors.

 # Parse command-line arguments
 REMOVE_DATASET_NAME=false
 while [[ $# -gt 0 ]]; do
    case "$1" in
        --remove-dataset-name)
            REMOVE_DATASET_NAME=true
            shift
            ;;
        *)
            shift
            ;;
    esac
 done

 # Define model and num_prompts variables
 MODEL="NousResearch/Hermes-3-Llama-3.1-8B"
 NUM_PROMPTS=10

 # Define the output markdown file
 OUTPUT_FILE="throughput_benchmark_output.md"

 echo "Throughput benchmark script is running..."

 # Clear the output file at the start
 > "$OUTPUT_FILE"

 # Write a markdown header and table header to the output file
 {
  echo "# Throughput Benchmark Results"
  echo ""
  echo "| Dataset | Processed Prompts | Throughput (requests/s) | Total tokens/s | Output tokens/s |"
  echo "|---------|-------------------|-------------------------|----------------|-----------------|"
 } >> "$OUTPUT_FILE"

 commands=(
  "VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset-name random --num-prompts $NUM_PROMPTS"
  "VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS"
  "VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset-name sonnet --dataset benchmarks/sonnet.txt --num-prompts $NUM_PROMPTS"
  "VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset /home/jovyan/data/vllm_benchmark_datasets/BurstGPT_without_fails_2.csv --dataset-name burstgpt --num-prompts $NUM_PROMPTS"
 )

 counter=1
 total_commands=${#commands[@]}

 for cmd in "${commands[@]}"; do
    echo "-----------------------------------------"
    echo "Running command ${counter} of ${total_commands}:"
    echo "$cmd"
    
    # Extract dataset name:
    #   First try to extract from --dataset-name; if not found, extract from --dataset and use its basename.
    dataset=$(echo "$cmd" | grep -oP '(?<=--dataset-name )\S+')
    if [ -z "$dataset" ]; then
        dataset=$(echo "$cmd" | grep -oP '(?<=--dataset )\S+')
        if [ -n "$dataset" ]; then
            dataset=$(basename "$dataset")
        fi
    fi
    # If no dataset information is found, set it to "N/A"
    if [ -z "$dataset" ]; then
        dataset="N/A"
    fi

    # If the remove flag is set, strip out the --dataset-name parameter and its argument
    if $REMOVE_DATASET_NAME; then
        cmd=$(echo "$cmd" | sed -E 's/--dataset-name[[:space:]]+\S+//')
    fi

    echo "Output:"
    # Run the command, printing output live to the terminal while capturing it.
    output=$(eval $cmd 2>&1 | tee /dev/tty)

    # For this benchmark, processed prompts equals NUM_PROMPTS
    processed_prompts=$NUM_PROMPTS

    # Extract throughput metrics from the line that starts with "Throughput:"
    throughput_line=$(echo "$output" | grep "Throughput:")
    requests=$(echo "$throughput_line" | grep -oP '(?<=Throughput: )\d+(\.\d+)?')
    total_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= total tokens/s)')
    output_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= output tokens/s)')

    # Append a row to the markdown table with the extracted metrics
    echo "| $dataset | $processed_prompts | $requests | $total_tokens | $output_tokens |" >> "$OUTPUT_FILE"

    echo "Finished command ${counter}."
    ((counter++))
 done

 echo "Throughput benchmark script completed. Results are saved in $OUTPUT_FILE"
diff --git a/benchmark_throughput_lora.sh b/benchmark_throughput_lora.sh
 #!/bin/bash

 # Define the number of prompts to use
 NUM_PROMPTS=10

 # Define the output markdown file
 OUTPUT_FILE="throughput_benchmark_output.md"

 echo "Throughput benchmark script is running..."

 # Clear the output file at the start
 > "$OUTPUT_FILE"

 # Write markdown header and table header to the output file (without the Processed Prompts column)
 {
  echo "# Throughput Benchmark Results"
  echo ""
  echo "| Dataset | Num Prompts | Max Loras | Max Lora Rank | Enable Lora | Async Engine | Throughput (requests/s) | Total tokens/s | Output tokens/s |"
  echo "|---------|-------------|-----------|---------------|-------------|--------------|-------------------------|----------------|-----------------|"
 } >> "$OUTPUT_FILE"

 # Maximum allowed length for dataset name
 MAX_DATASET_LENGTH=10

 # Define benchmark commands as an array.
 # Note: NUM_PROMPTS is expanded by using double quotes.
 commands=(
  "python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --max-loras 1 --max-lora-rank 8 --enable-lora --lora-path \"yard1/llama-2-7b-sql-lora-test\""
  "python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --max-loras 4 --max-lora-rank 8 --enable-lora --lora-path \"yard1/llama-2-7b-sql-lora-test\""
  "python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --async-engine"
  "python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --async-engine --max-loras 1 --max-lora-rank 8 --enable-lora --lora-path \"yard1/llama-2-7b-sql-lora-test\""
  "python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --async-engine --max-loras 4 --max-lora-rank 8 --enable-lora --lora-path \"yard1/llama-2-7b-sql-lora-test\""
 )

 counter=1
 total_commands=${#commands[@]}

 for cmd in "${commands[@]}"; do
    echo $cmd
    echo "Running command ${counter} of ${total_commands}..."
    
    # --- Parameter extraction from the command string ---

    # Dataset: extract the file given to --dataset and use its basename
    dataset=$(echo "$cmd" | grep -oP '(?<=--dataset )\S+')
    if [ -n "$dataset" ]; then
        dataset=$(basename "$dataset")
    else
        dataset="N/A"
    fi

    # Truncate dataset if it exceeds the maximum length
    if [ ${#dataset} -gt $MAX_DATASET_LENGTH ]; then
        dataset="${dataset:0:$MAX_DATASET_LENGTH}..."
    fi

    # Num Prompts: extract from --num-prompts
    num_prompts=$(echo "$cmd" | grep -oP '(?<=--num-prompts )\S+')
    if [ -z "$num_prompts" ]; then
        num_prompts="N/A"
    fi

    # Max Loras: extract the value from --max-loras, if present
    max_loras=$(echo "$cmd" | grep -oP '(?<=--max-loras )\S+')
    if [ -z "$max_loras" ]; then
        max_loras="N/A"
    fi

    # Max Lora Rank: extract the value from --max-lora-rank, if present
    max_lora_rank=$(echo "$cmd" | grep -oP '(?<=--max-lora-rank )\S+')
    if [ -z "$max_lora_rank" ]; then
        max_lora_rank="N/A"
    fi

    # Enable Lora: check for the presence of --enable-lora
    if echo "$cmd" | grep -q -- "--enable-lora"; then
        enable_lora="Yes"
    else
        enable_lora="No"
    fi

    # Async Engine: check for the presence of --async-engine
    if echo "$cmd" | grep -q -- "--async-engine"; then
        async_engine="Yes"
    else
        async_engine="No"
    fi

    # --- Run the command and parse the output ---
    # Using tee to print output in real time to the terminal while capturing it.
    output=$(eval $cmd 2>&1 | tee /dev/tty)

    # Extract throughput metrics from the output string
    # Expected format: "Throughput: 7.59 requests/s, 3654.39 total tokens/s, 1786.51 output tokens/s"
    throughput_line=$(echo "$output" | grep "Throughput:")
    requests=$(echo "$throughput_line" | grep -oP '(?<=Throughput: )\d+(\.\d+)?')
    total_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= total tokens/s)')
    output_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= output tokens/s)')

    # Append a row to the markdown table (without the Processed Prompts column)
    echo "| $dataset | $num_prompts | $max_loras | $max_lora_rank | $enable_lora | $async_engine | $requests | $total_tokens | $output_tokens |" >> "$OUTPUT_FILE"

    echo "Finished command ${counter}."
    ((counter++))
 done

 echo "Throughput benchmark script completed. Results are saved in $OUTPUT_FILE"
diff --git a/benchmark_throughput_random.sh b/benchmark_throughput_random.sh
 #!/bin/bash
 # Usage:
 #   ./benchmark.sh [--remove-dataset-name]
 #
 # If --remove-dataset-name is provided, the script will remove the --dataset-name argument
 # from each command when executed.

 # Parse command-line arguments
 REMOVE_DATASET_NAME=false
 while [[ $# -gt 0 ]]; do
    case "$1" in
        --remove-dataset-name)
            REMOVE_DATASET_NAME=true
            shift
            ;;
        *)
            shift
            ;;
    esac
 done

 # Define model and num_prompts variables
 MODEL="NousResearch/Hermes-3-Llama-3.1-8B"
 NUM_PROMPTS=10

 # Define arrays of parameter values
 range_ratios=(0.5 1.0)
 prefix_lengths=(2)
 input_lengths=(10 20)
 output_lengths=(20 30)

 # Define the output markdown file
 OUTPUT_FILE="throughput_benchmark_output.md"

 echo "Throughput benchmark script is running..."

 # Clear the output file at the start
 > "$OUTPUT_FILE"

 # Write a markdown header and table header (including parameter columns)
 {
  echo "# Throughput Benchmark Results"
  echo ""
  echo "| Dataset | Processed Prompts | Throughput (requests/s) | Total tokens/s | Output tokens/s | Range Ratio | Prefix Len | Input Len | Output Len |"
  echo "|---------|-------------------|-------------------------|----------------|-----------------|-------------|------------|-----------|------------|"
 } >> "$OUTPUT_FILE"

 # Generate commands for each combination of parameters
 commands=()
 for rr in "${range_ratios[@]}"; do
  for pl in "${prefix_lengths[@]}"; do
    for il in "${input_lengths[@]}"; do
      for ol in "${output_lengths[@]}"; do
        cmd="VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset-name random --num-prompts $NUM_PROMPTS --prefix-len $pl --random-range-ratio $rr --input-len $il --output-len $ol"
        commands+=("$cmd")
      done
    done
  done
 done

 # Limit the rounds to at most 5
 max_rounds=5
 total_commands=${#commands[@]}
 if [ $total_commands -gt $max_rounds ]; then
    total_commands=$max_rounds
 fi

 counter=1

 # Execute each command for the first max_rounds combinations
 for cmd in "${commands[@]:0:$total_commands}"; do
    echo "-----------------------------------------"
    echo "Running command ${counter} of ${total_commands}:"
    echo "$cmd"
    
    # Extract dataset name (default to "N/A" if not found)
    dataset=$(echo "$cmd" | grep -oP '(?<=--dataset-name )\S+')
    if [ -z "$dataset" ]; then
      dataset="N/A"
    fi

    # Remove --dataset-name option if the flag is set
    if $REMOVE_DATASET_NAME; then
      cmd=$(echo "$cmd" | sed -E 's/--dataset-name[[:space:]]+\S+//')
    fi

    echo "Output:"
    # Run the command, showing output live while capturing it.
    output=$(eval $cmd 2>&1 | tee /dev/tty)

    processed_prompts=$NUM_PROMPTS

    # Extract throughput metrics from the line starting with "Throughput:"
    throughput_line=$(echo "$output" | grep "Throughput:")
    requests=$(echo "$throughput_line" | grep -oP '(?<=Throughput: )\d+(\.\d+)?')
    total_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= total tokens/s)')
    output_tokens=$(echo "$throughput_line" | grep -oP '(?<=, )\d+(\.\d+)?(?= output tokens/s)')

    # Extract the parameter values from the command
    range_ratio=$(echo "$cmd" | grep -oP '(?<=--random-range-ratio )\S+')
    prefix_len=$(echo "$cmd" | grep -oP '(?<=--prefix-len )\S+')
    input_len=$(echo "$cmd" | grep -oP '(?<=--input-len )\S+')
    output_len=$(echo "$cmd" | grep -oP '(?<=--output-len )\S+')

    # Append a row to the markdown table with the metrics and parameter values
    echo "| $dataset | $processed_prompts | $requests | $total_tokens | $output_tokens | $range_ratio | $prefix_len | $input_len | $output_len |" >> "$OUTPUT_FILE"

    echo "Finished command ${counter}."
    ((counter++))
 done

 echo "Throughput benchmark script completed. Results are saved in $OUTPUT_FILE"
	#!/bin/bash
	# first start it in another terminal
	# vllm serve Qwen/Qwen2-VL-7B-Instruct --swap-space 16 --disable-log-requests

	# Define model variables
	MODEL_NAME="Qwen/Qwen2-VL-7B-Instruct"
	NUM_PROMPTS=10

	# Define the output markdown file
	OUTPUT_FILE="benchmark_output.md"

	echo "Benchmark script is running..."

	# Clear the output file at the start
	> "$OUTPUT_FILE"

	# Write a header and markdown table header to the output file (Backend placed after Dataset)
	{
	echo "# Benchmark Results"
	echo ""
	echo "\| Dataset \| Backend \| Successful requests \| Benchmark duration (s) \| Total input tokens \|"
	echo "\|---------\|---------\|---------------------\|--------------------------\|--------------------\|"
	} >> "$OUTPUT_FILE"

	commands=(
	"python3 benchmarks/benchmark_serving.py --backend openai-chat --model ${MODEL_NAME} --endpoint /v1/chat/completions --dataset-name sonnet --dataset-path benchmarks/sonnet.txt --num-prompts ${NUM_PROMPTS}"
	"python3 benchmarks/benchmark_serving.py --model ${MODEL_NAME} --backend openai-chat --endpoint /v1/chat/completions --dataset-name hf --dataset-path lmarena-ai/vision-arena-bench-v0.1 --hf-split train --num-prompts ${NUM_PROMPTS} --request-rate 1000 --percentile-metrics ttft,tpot,e2el"
	"python3 benchmarks/benchmark_serving.py --model ${MODEL_NAME} --backend openai-chat --endpoint /v1/chat/completions --dataset-name hf --dataset-path lmms-lab/LLaVA-OneVision-Data --hf-split train --hf-subset \"chart2text(cauldron)\" --num-prompts ${NUM_PROMPTS} --request-rate 1000 --percentile-metrics ttft,tpot,e2el"
	"python3 benchmarks/benchmark_serving.py --backend vllm --model ${MODEL_NAME} --dataset-name sonnet --dataset-path benchmarks/sonnet.txt --num-prompts ${NUM_PROMPTS}"
	"python3 benchmarks/benchmark_serving.py --backend vllm --model ${MODEL_NAME} --dataset-name sharegpt --dataset-path /home/jovyan/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts ${NUM_PROMPTS}"
	"python3 benchmarks/benchmark_serving.py --backend vllm --model ${MODEL_NAME} --dataset-name random --num-prompts ${NUM_PROMPTS}"
	"python3 benchmarks/benchmark_serving.py --backend vllm --model ${MODEL_NAME} --dataset-name burstgpt --dataset-path /home/jovyan/data/vllm_benchmark_datasets/BurstGPT_without_fails_2.csv --num-prompts ${NUM_PROMPTS}"
	)

	counter=1
	total_commands=${#commands[@]}

	for cmd in "${commands[@]}"; do
	echo $cmd
	echo "Running command ${counter} of ${total_commands}..."

	dataset=$(echo "$cmd" \| grep -oP '(?<=--dataset-name )\S+')
	dataset_path=$(echo "$cmd" \| grep -oP '(?<=--dataset-path )\S+')
	backend=$(echo "$cmd" \| grep -oP '(?<=--backend )\S+')

	# Update dataset name based on dataset_path if needed
	if [ "$dataset_path" = "lmarena-ai/vision-arena-bench-v0.1" ]; then
	dataset="hf-vision-arena"
	fi

	# --- Run the command and parse the output ---
	# Using tee to print output in real time to the terminal while capturing it.
	output=$(eval $cmd 2>&1 \| tee /dev/tty)

	successful=$(echo "$output" \| grep "Successful requests:" \| sed 's/.Successful requests:[[:space:]]//')
	duration=$(echo "$output" \| grep "Benchmark duration (s):" \| sed 's/.Benchmark duration (s):[[:space:]]//')
	tokens=$(echo "$output" \| grep "Total input tokens:" \| sed 's/.Total input tokens:[[:space:]]//')

	echo "\| $dataset \| $backend \| $successful \| $duration \| $tokens \|" >> "$OUTPUT_FILE"

	echo "Finished command ${counter}."
	((counter++))
	done

	echo "Benchmark script completed. Results are saved in $OUTPUT_FILE"
	#!/bin/bash

	# Usage:
	# ./benchmark.sh [--remove-dataset-name]
	#
	# If --remove-dataset-name is provided, the script will remove the --dataset-name argument
	# from each command when executed. For the main branch, there is no --dataset-name option,
	# so it must be removed to avoid errors.

	# Parse command-line arguments
	REMOVE_DATASET_NAME=false
	while [[ $# -gt 0 ]]; do
	case "$1" in
	--remove-dataset-name)
	REMOVE_DATASET_NAME=true
	shift
	;;
	*)
	shift
	;;
	esac
	done

	# Define model and num_prompts variables
	MODEL="NousResearch/Hermes-3-Llama-3.1-8B"
	NUM_PROMPTS=10

	# Define the output markdown file
	OUTPUT_FILE="throughput_benchmark_output.md"

	echo "Throughput benchmark script is running..."

	# Clear the output file at the start
	> "$OUTPUT_FILE"

	# Write a markdown header and table header to the output file
	{
	echo "# Throughput Benchmark Results"
	echo ""
	echo "\| Dataset \| Processed Prompts \| Throughput (requests/s) \| Total tokens/s \| Output tokens/s \|"
	echo "\|---------\|-------------------\|-------------------------\|----------------\|-----------------\|"
	} >> "$OUTPUT_FILE"

	commands=(
	"VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset-name random --num-prompts $NUM_PROMPTS"
	"VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS"
	"VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset-name sonnet --dataset benchmarks/sonnet.txt --num-prompts $NUM_PROMPTS"
	"VLLM_USE_V1=1 python3 benchmarks/benchmark_throughput.py --model $MODEL --dataset /home/jovyan/data/vllm_benchmark_datasets/BurstGPT_without_fails_2.csv --dataset-name burstgpt --num-prompts $NUM_PROMPTS"
	)

	counter=1
	total_commands=${#commands[@]}

	for cmd in "${commands[@]}"; do
	echo "-----------------------------------------"
	echo "Running command ${counter} of ${total_commands}:"
	echo "$cmd"

	# Extract dataset name:
	# First try to extract from --dataset-name; if not found, extract from --dataset and use its basename.
	dataset=$(echo "$cmd" \| grep -oP '(?<=--dataset-name )\S+')
	if [ -z "$dataset" ]; then
	dataset=$(echo "$cmd" \| grep -oP '(?<=--dataset )\S+')
	if [ -n "$dataset" ]; then
	dataset=$(basename "$dataset")
	fi
	fi
	# If no dataset information is found, set it to "N/A"
	if [ -z "$dataset" ]; then
	dataset="N/A"
	fi

	# If the remove flag is set, strip out the --dataset-name parameter and its argument
	if $REMOVE_DATASET_NAME; then
	cmd=$(echo "$cmd" \| sed -E 's/--dataset-name[[:space:]]+\S+//')
	fi

	echo "Output:"
	# Run the command, printing output live to the terminal while capturing it.
	output=$(eval $cmd 2>&1 \| tee /dev/tty)

	# For this benchmark, processed prompts equals NUM_PROMPTS
	processed_prompts=$NUM_PROMPTS

	# Extract throughput metrics from the line that starts with "Throughput:"
	throughput_line=$(echo "$output" \| grep "Throughput:")
	requests=$(echo "$throughput_line" \| grep -oP '(?<=Throughput: )\d+(\.\d+)?')
	total_tokens=$(echo "$throughput_line" \| grep -oP '(?<=, )\d+(\.\d+)?(?= total tokens/s)')
	output_tokens=$(echo "$throughput_line" \| grep -oP '(?<=, )\d+(\.\d+)?(?= output tokens/s)')

	# Append a row to the markdown table with the extracted metrics
	echo "\| $dataset \| $processed_prompts \| $requests \| $total_tokens \| $output_tokens \|" >> "$OUTPUT_FILE"

	echo "Finished command ${counter}."
	((counter++))
	done

	echo "Throughput benchmark script completed. Results are saved in $OUTPUT_FILE"
	#!/bin/bash

	# Define the number of prompts to use
	NUM_PROMPTS=10

	# Define the output markdown file
	OUTPUT_FILE="throughput_benchmark_output.md"

	echo "Throughput benchmark script is running..."

	# Clear the output file at the start
	> "$OUTPUT_FILE"

	# Write markdown header and table header to the output file (without the Processed Prompts column)
	{
	echo "# Throughput Benchmark Results"
	echo ""
	echo "\| Dataset \| Num Prompts \| Max Loras \| Max Lora Rank \| Enable Lora \| Async Engine \| Throughput (requests/s) \| Total tokens/s \| Output tokens/s \|"
	echo "\|---------\|-------------\|-----------\|---------------\|-------------\|--------------\|-------------------------\|----------------\|-----------------\|"
	} >> "$OUTPUT_FILE"

	# Maximum allowed length for dataset name
	MAX_DATASET_LENGTH=10

	# Define benchmark commands as an array.
	# Note: NUM_PROMPTS is expanded by using double quotes.
	commands=(
	"python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --max-loras 1 --max-lora-rank 8 --enable-lora --lora-path \"yard1/llama-2-7b-sql-lora-test\""
	"python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --max-loras 4 --max-lora-rank 8 --enable-lora --lora-path \"yard1/llama-2-7b-sql-lora-test\""
	"python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --async-engine"
	"python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --async-engine --max-loras 1 --max-lora-rank 8 --enable-lora --lora-path \"yard1/llama-2-7b-sql-lora-test\""
	"python3 benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-hf --backend vllm --dataset ~/data/vllm_benchmark_datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts $NUM_PROMPTS --async-engine --max-loras 4 --max-lora-rank 8 --enable-lora --lora-path \"yard1/llama-2-7b-sql-lora-test\""
	)

	counter=1
	total_commands=${#commands[@]}

	for cmd in "${commands[@]}"; do
	echo $cmd
	echo "Running command ${counter} of ${total_commands}..."

	# --- Parameter extraction from the command string ---

	# Dataset: extract the file given to --dataset and use its basename
	dataset=$(echo "$cmd" \| grep -oP '(?<=--dataset )\S+')
	if [ -n "$dataset" ]; then
	dataset=$(basename "$dataset")
	else
	dataset="N/A"
	fi

	# Truncate dataset if it exceeds the maximum length
	if [ ${#dataset} -gt $MAX_DATASET_LENGTH ]; then
	dataset="${dataset:0:$MAX_DATASET_LENGTH}..."
	fi

	# Num Prompts: extract from --num-prompts
	num_prompts=$(echo "$cmd" \| grep -oP '(?<=--num-prompts )\S+')
	if [ -z "$num_prompts" ]; then
	num_prompts="N/A"
	fi

	# Max Loras: extract the value from --max-loras, if present
	max_loras=$(echo "$cmd" \| grep -oP '(?<=--max-loras )\S+')
	if [ -z "$max_loras" ]; then
	max_loras="N/A"
	fi

	# Max Lora Rank: extract the value from --max-lora-rank, if present
	max_lora_rank=$(echo "$cmd" \| grep -oP '(?<=--max-lora-rank )\S+')
	if [ -z "$max_lora_rank" ]; then
	max_lora_rank="N/A"
	fi

	# Enable Lora: check for the presence of --enable-lora
	if echo "$cmd" \| grep -q -- "--enable-lora"; then
	enable_lora="Yes"
	else
	enable_lora="No"
	fi

	# Async Engine: check for the presence of --async-engine
	if echo "$cmd" \| grep -q -- "--async-engine"; then
	async_engine="Yes"
	else
	async_engine="No"
	fi

	# --- Run the command and parse the output ---
	# Using tee to print output in real time to the terminal while capturing it.
	output=$(eval $cmd 2>&1 \| tee /dev/tty)

	# Extract throughput metrics from the output string
	# Expected format: "Throughput: 7.59 requests/s, 3654.39 total tokens/s, 1786.51 output tokens/s"
	throughput_line=$(echo "$output" \| grep "Throughput:")
	requests=$(echo "$throughput_line" \| grep -oP '(?<=Throughput: )\d+(\.\d+)?')
	total_tokens=$(echo "$throughput_line" \| grep -oP '(?<=, )\d+(\.\d+)?(?= total tokens/s)')
	output_tokens=$(echo "$throughput_line" \| grep -oP '(?<=, )\d+(\.\d+)?(?= output tokens/s)')

	# Append a row to the markdown table (without the Processed Prompts column)
	echo "\| $dataset \| $num_prompts \| $max_loras \| $max_lora_rank \| $enable_lora \| $async_engine \| $requests \| $total_tokens \| $output_tokens \|" >> "$OUTPUT_FILE"

	echo "Finished command ${counter}."
	((counter++))
	done

	echo "Throughput benchmark script completed. Results are saved in $OUTPUT_FILE"