Skip to content

Instantly share code, notes, and snippets.

@nalmeida
Last active March 28, 2025 17:22
Show Gist options
  • Save nalmeida/97691b73c13030254847fbcbcf4da580 to your computer and use it in GitHub Desktop.
Save nalmeida/97691b73c13030254847fbcbcf4da580 to your computer and use it in GitHub Desktop.
############################# Display #############################
# Verbose program output
# Accepts log level: "error", "warn", "info", "debug", "trace"
verbose = "error"
# Don't show interactive progress bar while checking links.
no_progress = false
# Path to summary output file.
# output = "report.md"
############################# Cache ###############################
# Enable link caching. This can be helpful to avoid checking the same links on
# multiple runs.
cache = true
# Discard all cached requests older than this duration.
max_cache_age = "2d"
############################# Runtime #############################
# Number of threads to utilize.
# Defaults to number of cores available to the system if omitted.
threads = 2
# Maximum number of allowed redirects.
max_redirects = 10
# Maximum number of allowed retries before a link is declared dead.
max_retries = 2
# Maximum number of concurrent link checks.
max_concurrency = 14
############################# Requests ############################
# User agent to send with each request.
user_agent = "curl/7.83. 1"
# Website timeout from connect to response finished.
timeout = 20
# Minimum wait time in seconds between retries of failed requests.
retry_wait_time = 2
# Comma-separated list of accepted status codes for valid links.
# Supported values are:
#
# accept = ["200..=204", "429"]
# accept = "200..=204, 429"
# accept = ["200", "429"]
# accept = "200, 429"
accept = ["200", "429"]
# Proceed for server connections considered insecure (invalid TLS).
insecure = false
# Only test links with the given schemes (e.g. https).
# Omit to check links with any other scheme.
# At the moment, we support http, https, file, and mailto.
scheme = ["https"]
# When links are available using HTTPS, treat HTTP links as errors.
require_https = false
# Request method
method = "get"
# Custom request headers
headers = []
# Remap URI matching pattern to different URI.
# remap = ["https://example.com http://example.invalid"]
# Base URL or website root directory to check relative URLs.
base = "https://wellhub.com"
# HTTP basic auth support. This will be the username and password passed to the
# authorization HTTP header. See
# <https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Authorization>
# basic_auth = ["example.com user:pwd"]
############################# Exclusions ##########################
# Skip missing input files (default is to error if they don't exist).
skip_missing = true
# Check links inside `<code>` and `<pre>` blocks as well as Markdown code
# blocks.
include_verbatim = false
# Ignore case of paths when matching glob patterns.
glob_ignore_case = false
# Exclude URLs and mail addresses from checking (supports regex).
exclude = ['^https://www\.linkedin\.com', '^https://security\.wellhub\.com', '^https://helpcenter\.gympass\.com']
# Exclude these filesystem paths from getting checked.
# exclude_path = ["file/path/to/Ignore", "./other/file/path/to/Ignore"]
# URLs to check (supports regex). Has preference over all excludes.
# include = ['gist\.github\.com.*']
# Exclude all private IPs from checking.
# Equivalent to setting `exclude_private`, `exclude_link_local`, and
# `exclude_loopback` to true.
exclude_all_private = false
# Exclude private IP address ranges from checking.
exclude_private = false
# Exclude link-local IP address range from checking.
exclude_link_local = false
# Exclude loopback IP address range and localhost from checking.
exclude_loopback = false
# Check mail addresses
# include_mail = true
#!/bin/zsh
# Function to display help information
show_help() {
echo "Sitemap Link Checker - Validate links from XML sitemaps"
echo ""
echo "Usage: ./check404.sh [OPTIONS] <SITEMAP_URL>"
echo ""
echo "Options:"
echo " -h, --help Show this help message and exit"
echo ""
echo "Example:"
echo " ./check404.sh https://example.com/sitemap.xml"
echo ""
echo "Dependencies: curl, lychee"
}
# Check for help flags
if [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then
show_help
exit 0
fi
# Verify arguments
if [[ $# -eq 0 ]]; then
echo "Error: Sitemap URL is required."
show_help
exit 1
fi
# URL of the sitemap passed as first argument
SITEMAP_URL="$1"
# Check for required dependencies
command -v curl >/dev/null 2>&1 || { echo >&2 "Error: curl is not installed."; exit 1; }
command -v lychee >/dev/null 2>&1 || { echo >&2 "Error: lychee is not installed."; exit 1; }
# Extract links from sitemap and pass directly to lychee
curl -s "$SITEMAP_URL" | grep -oE '<loc>[^<]+' | sed 's/<loc>//' | xargs lychee
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment