PaulCapestany · April 8, 2025 02:13
diff --git a/test-embed.sh b/test-embed.sh
 #!/bin/bash

 # Example usage: test with 50 requests (first sequentially, then in parallel)
 # ./test-embed.sh 50

 # Configuration
 MODEL=${MODEL:-"nomic-embed-text"}  # Default model
 NUM_REQUESTS=${1:-20}               # Number of requests
 HOST=${HOST:-"localhost"}           # Ollama host
 PORT=${PORT:-"11434"}               # Ollama port

 # Create log directory
 LOG_DIR="./embed_test_logs"
 mkdir -p "$LOG_DIR"

 # Create two distinct test texts
 TEXT1="Longer text (that changes due to including appended test number): "
 TEXT2="a" # short text to test if < 4 tokens is still an issue (it appears it might no longer be an issue): https://github.com/ggml-org/llama.cpp/issues/6722#issuecomment-2785033321

 # High-precision time measurement function
 get_time_ms() {
  echo $(date +%s.%N)
 }

 # Calculate duration in milliseconds
 calc_duration_ms() {
  local start=$1
  local end=$2
  echo $(echo "($end - $start) * 1000" | bc | cut -d'.' -f1)
 }

 echo "===== Ollama Embedding Parallelism Test (Multiple Inputs) ====="
 echo "Model: $MODEL"
 echo "Requests: $NUM_REQUESTS (each with 2 texts)"
 echo "Timestamp: $(date)"
 echo "============================================================="

 # Run sequential requests as baseline
 echo ""
 echo "Running $NUM_REQUESTS sequential embedding requests (multiple inputs)..."
 SEQ_START=$(get_time_ms)

 for i in $(seq 1 $NUM_REQUESTS); do
  echo "Starting sequential request $i..."
  REQ_START=$(get_time_ms)
  
  curl -s -X POST "http://$HOST:$PORT/api/embed" \
    -H "Content-Type: application/json" \
    -d "{\"model\": \"$MODEL\", \"input\": [\"$TEXT1 $i\", \"$TEXT2\"]}" \
    > "$LOG_DIR/seq_multi_$i.json"
  
  REQ_END=$(get_time_ms)
  DURATION_MS=$(calc_duration_ms $REQ_START $REQ_END)
  echo "Request $i completed in ${DURATION_MS}ms"
 done

 SEQ_END=$(get_time_ms)
 SEQ_TOTAL_MS=$(calc_duration_ms $SEQ_START $SEQ_END)
 echo "Sequential test completed in ${SEQ_TOTAL_MS}ms"

 # Run parallel requests
 echo ""
 echo "Running $NUM_REQUESTS parallel embedding requests (multiple inputs)..."
 PAR_START=$(get_time_ms)

 # Start all requests in parallel
 for i in $(seq 1 $NUM_REQUESTS); do
  echo "Starting parallel request $i..."
  curl -s -X POST "http://$HOST:$PORT/api/embed" \
    -H "Content-Type: application/json" \
    -d "{\"model\": \"$MODEL\", \"input\": [\"$TEXT1 $i\", \"$TEXT2\"]}" \
    > "$LOG_DIR/par_multi_$i.json" &
 done

 # Wait for all background processes to complete
 wait

 PAR_END=$(get_time_ms)
 PAR_TOTAL_MS=$(calc_duration_ms $PAR_START $PAR_END)
 echo "Parallel test completed in ${PAR_TOTAL_MS}ms"

 # Calculate speedup
 if [ "$PAR_TOTAL_MS" -gt 0 ]; then
  SPEEDUP=$(echo "scale=2; $SEQ_TOTAL_MS / $PAR_TOTAL_MS" | bc)
 else
  SPEEDUP="N/A"
 fi

 # Report results
 echo ""
 echo "===== RESULTS SUMMARY ====="
 echo "Sequential execution time: ${SEQ_TOTAL_MS}ms"
 echo "Parallel execution time:   ${PAR_TOTAL_MS}ms"
 echo "Speedup:                   ${SPEEDUP}x"
 echo "============================="

 if (( $(echo "$SPEEDUP > 1.2" | bc -l 2>/dev/null) )); then
  echo "SUCCESS: Parallelism is working! Significant speedup detected."
 elif (( $(echo "$SPEEDUP >= 0.9" | bc -l 2>/dev/null) )); then
  echo "PARTIAL: Limited parallelism detected. Some speedup but not optimal."
 else
  echo "FAILED: No parallelism detected. Parallel execution was slower than sequential."
 fi

 echo ""
 echo "Log files saved to: $LOG_DIR"

 # Validate results by checking response format
 echo ""
 echo "Validating responses..."
 SAMPLE_FILE=$(find $LOG_DIR -name "par_multi_1.json" -o -name "seq_multi_1.json" | head -1)

 if [ -f "$SAMPLE_FILE" ]; then
  # Check if we have embeddings array in the response
  EMBEDDING_COUNT=$(grep -o "\"embeddings\":" "$SAMPLE_FILE" | wc -l)
  if [ "$EMBEDDING_COUNT" -gt 0 ]; then
    echo "Response format looks good, found embeddings array."
    
    # Check if we have 2 embeddings per request
    ARRAYS=$(grep -o "\\[\\[" "$SAMPLE_FILE" | wc -l)
    if [ "$ARRAYS" -ge 2 ]; then
      echo "SUCCESS: Found multiple embeddings in the response (one for each input text)."
    else
      echo "WARNING: Expected multiple embeddings arrays but found fewer than expected."
    fi
  else
    echo "ERROR: Response format issue - no embeddings found in response."
  fi
 else
  echo "WARNING: Could not find any response files to validate."
 fi

 # Check if any responses contain errors
 grep -l "error" $LOG_DIR/*.json > /dev/null 2>&1
 if [ $? -eq 0 ]; then
  echo "WARNING: Some responses contain errors! Check log files."
  grep -l "error" $LOG_DIR/*.json | while read file; do
    echo "Error in $file:"
    cat "$file" | grep -A 2 "error"
  done
 else
  echo "No errors found in responses."
 fi
	#!/bin/bash

	# Example usage: test with 50 requests (first sequentially, then in parallel)
	# ./test-embed.sh 50

	# Configuration
	MODEL=${MODEL:-"nomic-embed-text"} # Default model
	NUM_REQUESTS=${1:-20} # Number of requests
	HOST=${HOST:-"localhost"} # Ollama host
	PORT=${PORT:-"11434"} # Ollama port

	# Create log directory
	LOG_DIR="./embed_test_logs"
	mkdir -p "$LOG_DIR"

	# Create two distinct test texts
	TEXT1="Longer text (that changes due to including appended test number): "
	TEXT2="a" # short text to test if < 4 tokens is still an issue (it appears it might no longer be an issue): https://github.com/ggml-org/llama.cpp/issues/6722#issuecomment-2785033321

	# High-precision time measurement function
	get_time_ms() {
	echo $(date +%s.%N)
	}

	# Calculate duration in milliseconds
	calc_duration_ms() {
	local start=$1
	local end=$2
	echo $(echo "($end - $start) * 1000" \| bc \| cut -d'.' -f1)
	}

	echo "===== Ollama Embedding Parallelism Test (Multiple Inputs) ====="
	echo "Model: $MODEL"
	echo "Requests: $NUM_REQUESTS (each with 2 texts)"
	echo "Timestamp: $(date)"
	echo "============================================================="

	# Run sequential requests as baseline
	echo ""
	echo "Running $NUM_REQUESTS sequential embedding requests (multiple inputs)..."
	SEQ_START=$(get_time_ms)

	for i in $(seq 1 $NUM_REQUESTS); do
	echo "Starting sequential request $i..."
	REQ_START=$(get_time_ms)

	curl -s -X POST "http://$HOST:$PORT/api/embed" \
	-H "Content-Type: application/json" \
	-d "{\"model\": \"$MODEL\", \"input\": [\"$TEXT1 $i\", \"$TEXT2\"]}" \
	> "$LOG_DIR/seq_multi_$i.json"

	REQ_END=$(get_time_ms)
	DURATION_MS=$(calc_duration_ms $REQ_START $REQ_END)
	echo "Request $i completed in ${DURATION_MS}ms"
	done

	SEQ_END=$(get_time_ms)
	SEQ_TOTAL_MS=$(calc_duration_ms $SEQ_START $SEQ_END)
	echo "Sequential test completed in ${SEQ_TOTAL_MS}ms"

	# Run parallel requests
	echo ""
	echo "Running $NUM_REQUESTS parallel embedding requests (multiple inputs)..."
	PAR_START=$(get_time_ms)

	# Start all requests in parallel
	for i in $(seq 1 $NUM_REQUESTS); do
	echo "Starting parallel request $i..."
	curl -s -X POST "http://$HOST:$PORT/api/embed" \
	-H "Content-Type: application/json" \
	-d "{\"model\": \"$MODEL\", \"input\": [\"$TEXT1 $i\", \"$TEXT2\"]}" \
	> "$LOG_DIR/par_multi_$i.json" &
	done

	# Wait for all background processes to complete
	wait

	PAR_END=$(get_time_ms)
	PAR_TOTAL_MS=$(calc_duration_ms $PAR_START $PAR_END)
	echo "Parallel test completed in ${PAR_TOTAL_MS}ms"

	# Calculate speedup
	if [ "$PAR_TOTAL_MS" -gt 0 ]; then
	SPEEDUP=$(echo "scale=2; $SEQ_TOTAL_MS / $PAR_TOTAL_MS" \| bc)
	else
	SPEEDUP="N/A"
	fi

	# Report results
	echo ""
	echo "===== RESULTS SUMMARY ====="
	echo "Sequential execution time: ${SEQ_TOTAL_MS}ms"
	echo "Parallel execution time: ${PAR_TOTAL_MS}ms"
	echo "Speedup: ${SPEEDUP}x"
	echo "============================="

	if (( $(echo "$SPEEDUP > 1.2" \| bc -l 2>/dev/null) )); then
	echo "SUCCESS: Parallelism is working! Significant speedup detected."
	elif (( $(echo "$SPEEDUP >= 0.9" \| bc -l 2>/dev/null) )); then
	echo "PARTIAL: Limited parallelism detected. Some speedup but not optimal."
	else
	echo "FAILED: No parallelism detected. Parallel execution was slower than sequential."
	fi

	echo ""
	echo "Log files saved to: $LOG_DIR"

	# Validate results by checking response format
	echo ""
	echo "Validating responses..."
	SAMPLE_FILE=$(find $LOG_DIR -name "par_multi_1.json" -o -name "seq_multi_1.json" \| head -1)

	if [ -f "$SAMPLE_FILE" ]; then
	# Check if we have embeddings array in the response
	EMBEDDING_COUNT=$(grep -o "\"embeddings\":" "$SAMPLE_FILE" \| wc -l)
	if [ "$EMBEDDING_COUNT" -gt 0 ]; then
	echo "Response format looks good, found embeddings array."

	# Check if we have 2 embeddings per request
	ARRAYS=$(grep -o "\\[\\[" "$SAMPLE_FILE" \| wc -l)
	if [ "$ARRAYS" -ge 2 ]; then
	echo "SUCCESS: Found multiple embeddings in the response (one for each input text)."
	else
	echo "WARNING: Expected multiple embeddings arrays but found fewer than expected."
	fi
	else
	echo "ERROR: Response format issue - no embeddings found in response."
	fi
	else
	echo "WARNING: Could not find any response files to validate."
	fi

	# Check if any responses contain errors
	grep -l "error" $LOG_DIR/*.json > /dev/null 2>&1
	if [ $? -eq 0 ]; then
	echo "WARNING: Some responses contain errors! Check log files."
	grep -l "error" $LOG_DIR/*.json \| while read file; do
	echo "Error in $file:"
	cat "$file" \| grep -A 2 "error"
	done
	else
	echo "No errors found in responses."
	fi