Created
February 1, 2025 16:40
-
-
Save johnfmorton/df5745d997057402491e25b37e33043e to your computer and use it in GitHub Desktop.
Bash script to warm the cache on a website with the sitemap.xml. I use it to warm my FastCGI cache via a nightly cron job.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Usage: ./warm_cache.sh [SITEMAP_INDEX_URL] [DELAY_SECONDS] | |
# Example: ./warm_cache.sh "https://example.com/sitemap.xml" 1 | |
# | |
# If no sitemap index URL is provided, a default is used. | |
DEFAULT_SITEMAP_INDEX_URL="https://example.com/sitemap.xml" | |
SITEMAP_INDEX_URL="${1:-$DEFAULT_SITEMAP_INDEX_URL}" | |
# Delay (in seconds) between each request. Default is 0 seconds if not provided. | |
DELAY="${2:-0}" | |
echo "Fetching sitemap index from: $SITEMAP_INDEX_URL" | |
index_content=$(curl -s "$SITEMAP_INDEX_URL") | |
if [ -z "$index_content" ]; then | |
echo "Failed to retrieve the sitemap index from $SITEMAP_INDEX_URL" | |
exit 1 | |
fi | |
echo "Extracting secondary sitemap URLs..." | |
# Extract all URLs within <loc> tags. | |
sitemap_urls=$(echo "$index_content" | grep -oP '(?<=<loc>).*?(?=</loc>)') | |
if [ -z "$sitemap_urls" ]; then | |
echo "No secondary sitemap URLs found in the sitemap index." | |
exit 1 | |
fi | |
# Create a temporary file to hold the list of page URLs. | |
page_urls_file=$(mktemp) | |
# Process each secondary sitemap URL: download its content and extract the page URLs. | |
for sitemap_url in $sitemap_urls; do | |
echo "Processing secondary sitemap: $sitemap_url" | |
sitemap_content=$(curl -s "$sitemap_url") | |
if [ -z "$sitemap_content" ]; then | |
echo "Failed to retrieve content from $sitemap_url" | |
continue | |
fi | |
# Append all URLs (from <loc> tags) to the temporary file. | |
echo "$sitemap_content" | grep -oP '(?<=<loc>).*?(?=</loc>)' >> "$page_urls_file" | |
done | |
# Remove duplicate URLs. | |
sort -u "$page_urls_file" -o "$page_urls_file" | |
# Temporary file to store URL and response time data. | |
results_file=$(mktemp) | |
echo "Starting cache warm-up..." | |
# Process each URL sequentially to avoid overloading the server. | |
while IFS= read -r url; do | |
[[ -z "$url" ]] && continue # Skip empty lines | |
# Use a custom User-Agent to identify cache warm-up requests. | |
response_time=$(curl -A "CacheWarmupBot/1.0" -s -o /dev/null -w "%{time_total}" "$url") | |
exit_status=$? | |
if [ $exit_status -eq 0 ]; then | |
echo "Cache warmed for: $url in ${response_time} seconds" | |
# Record the URL and its response time for later reporting. | |
echo "$url $response_time" >> "$results_file" | |
else | |
echo "Failed to warm cache for: $url" | |
fi | |
# If a delay is specified, sleep for that many seconds. | |
if [ "$DELAY" -gt 0 ]; then | |
sleep "$DELAY" | |
fi | |
done < "$page_urls_file" | |
# Generate a report based on response times. | |
echo "-------------------------------" | |
echo "Response Time Report:" | |
echo "-------------------------------" | |
if [ -s "$results_file" ]; then | |
echo "Fastest 5 pages:" | |
sort -k2 -n "$results_file" | head -n 5 | |
echo "-------------------------------" | |
echo "Slowest 5 pages:" | |
sort -k2 -n "$results_file" | tail -n 5 | |
else | |
echo "No successful page requests to report." | |
fi | |
echo "-------------------------------" | |
echo "Cache warm-up complete." | |
# Clean up temporary files. | |
rm "$page_urls_file" | |
rm "$results_file" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment