johnfmorton · February 1, 2025 16:40
diff --git a/warm_cache.sh b/warm_cache.sh
 #!/bin/bash
 # Usage: ./warm_cache.sh [SITEMAP_INDEX_URL] [DELAY_SECONDS]
 # Example: ./warm_cache.sh "https://example.com/sitemap.xml" 1
 #
 # If no sitemap index URL is provided, a default is used.
 DEFAULT_SITEMAP_INDEX_URL="https://example.com/sitemap.xml"
 SITEMAP_INDEX_URL="${1:-$DEFAULT_SITEMAP_INDEX_URL}"

 # Delay (in seconds) between each request. Default is 0 seconds if not provided.
 DELAY="${2:-0}"

 echo "Fetching sitemap index from: $SITEMAP_INDEX_URL"
 index_content=$(curl -s "$SITEMAP_INDEX_URL")
 if [ -z "$index_content" ]; then
  echo "Failed to retrieve the sitemap index from $SITEMAP_INDEX_URL"
  exit 1
 fi

 echo "Extracting secondary sitemap URLs..."
 # Extract all URLs within <loc> tags.
 sitemap_urls=$(echo "$index_content" | grep -oP '(?<=<loc>).*?(?=</loc>)')
 if [ -z "$sitemap_urls" ]; then
  echo "No secondary sitemap URLs found in the sitemap index."
  exit 1
 fi

 # Create a temporary file to hold the list of page URLs.
 page_urls_file=$(mktemp)

 # Process each secondary sitemap URL: download its content and extract the page URLs.
 for sitemap_url in $sitemap_urls; do
  echo "Processing secondary sitemap: $sitemap_url"
  sitemap_content=$(curl -s "$sitemap_url")
  if [ -z "$sitemap_content" ]; then
    echo "Failed to retrieve content from $sitemap_url"
    continue
  fi
  # Append all URLs (from <loc> tags) to the temporary file.
  echo "$sitemap_content" | grep -oP '(?<=<loc>).*?(?=</loc>)' >> "$page_urls_file"
 done

 # Remove duplicate URLs.
 sort -u "$page_urls_file" -o "$page_urls_file"

 # Temporary file to store URL and response time data.
 results_file=$(mktemp)

 echo "Starting cache warm-up..."
 # Process each URL sequentially to avoid overloading the server.
 while IFS= read -r url; do
  [[ -z "$url" ]] && continue  # Skip empty lines
  # Use a custom User-Agent to identify cache warm-up requests.
  response_time=$(curl -A "CacheWarmupBot/1.0" -s -o /dev/null -w "%{time_total}" "$url")
  exit_status=$?
  if [ $exit_status -eq 0 ]; then
    echo "Cache warmed for: $url in ${response_time} seconds"
    # Record the URL and its response time for later reporting.
    echo "$url $response_time" >> "$results_file"
  else
    echo "Failed to warm cache for: $url"
  fi
  # If a delay is specified, sleep for that many seconds.
  if [ "$DELAY" -gt 0 ]; then
    sleep "$DELAY"
  fi
 done < "$page_urls_file"

 # Generate a report based on response times.
 echo "-------------------------------"
 echo "Response Time Report:"
 echo "-------------------------------"
 if [ -s "$results_file" ]; then
  echo "Fastest 5 pages:"
  sort -k2 -n "$results_file" | head -n 5
  echo "-------------------------------"
  echo "Slowest 5 pages:"
  sort -k2 -n "$results_file" | tail -n 5
 else
  echo "No successful page requests to report."
 fi
 echo "-------------------------------"
 echo "Cache warm-up complete."

 # Clean up temporary files.
 rm "$page_urls_file"
 rm "$results_file"
	#!/bin/bash
	# Usage: ./warm_cache.sh [SITEMAP_INDEX_URL] [DELAY_SECONDS]
	# Example: ./warm_cache.sh "https://example.com/sitemap.xml" 1
	#
	# If no sitemap index URL is provided, a default is used.
	DEFAULT_SITEMAP_INDEX_URL="https://example.com/sitemap.xml"
	SITEMAP_INDEX_URL="${1:-$DEFAULT_SITEMAP_INDEX_URL}"

	# Delay (in seconds) between each request. Default is 0 seconds if not provided.
	DELAY="${2:-0}"

	echo "Fetching sitemap index from: $SITEMAP_INDEX_URL"
	index_content=$(curl -s "$SITEMAP_INDEX_URL")
	if [ -z "$index_content" ]; then
	echo "Failed to retrieve the sitemap index from $SITEMAP_INDEX_URL"
	exit 1
	fi

	echo "Extracting secondary sitemap URLs..."
	# Extract all URLs within <loc> tags.
	sitemap_urls=$(echo "$index_content" \| grep -oP '(?<=<loc>).*?(?=</loc>)')
	if [ -z "$sitemap_urls" ]; then
	echo "No secondary sitemap URLs found in the sitemap index."
	exit 1
	fi

	# Create a temporary file to hold the list of page URLs.
	page_urls_file=$(mktemp)

	# Process each secondary sitemap URL: download its content and extract the page URLs.
	for sitemap_url in $sitemap_urls; do
	echo "Processing secondary sitemap: $sitemap_url"
	sitemap_content=$(curl -s "$sitemap_url")
	if [ -z "$sitemap_content" ]; then
	echo "Failed to retrieve content from $sitemap_url"
	continue
	fi
	# Append all URLs (from <loc> tags) to the temporary file.
	echo "$sitemap_content" \| grep -oP '(?<=<loc>).*?(?=</loc>)' >> "$page_urls_file"
	done

	# Remove duplicate URLs.
	sort -u "$page_urls_file" -o "$page_urls_file"

	# Temporary file to store URL and response time data.
	results_file=$(mktemp)

	echo "Starting cache warm-up..."
	# Process each URL sequentially to avoid overloading the server.
	while IFS= read -r url; do
	[[ -z "$url" ]] && continue # Skip empty lines
	# Use a custom User-Agent to identify cache warm-up requests.
	response_time=$(curl -A "CacheWarmupBot/1.0" -s -o /dev/null -w "%{time_total}" "$url")
	exit_status=$?
	if [ $exit_status -eq 0 ]; then
	echo "Cache warmed for: $url in ${response_time} seconds"
	# Record the URL and its response time for later reporting.
	echo "$url $response_time" >> "$results_file"
	else
	echo "Failed to warm cache for: $url"
	fi
	# If a delay is specified, sleep for that many seconds.
	if [ "$DELAY" -gt 0 ]; then
	sleep "$DELAY"
	fi
	done < "$page_urls_file"

	# Generate a report based on response times.
	echo "-------------------------------"
	echo "Response Time Report:"
	echo "-------------------------------"
	if [ -s "$results_file" ]; then
	echo "Fastest 5 pages:"
	sort -k2 -n "$results_file" \| head -n 5
	echo "-------------------------------"
	echo "Slowest 5 pages:"
	sort -k2 -n "$results_file" \| tail -n 5
	else
	echo "No successful page requests to report."
	fi
	echo "-------------------------------"
	echo "Cache warm-up complete."

	# Clean up temporary files.
	rm "$page_urls_file"
	rm "$results_file"