Last active
March 28, 2025 17:22
-
-
Save nalmeida/97691b73c13030254847fbcbcf4da580 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################# Display ############################# | |
# Verbose program output | |
# Accepts log level: "error", "warn", "info", "debug", "trace" | |
verbose = "error" | |
# Don't show interactive progress bar while checking links. | |
no_progress = false | |
# Path to summary output file. | |
# output = "report.md" | |
############################# Cache ############################### | |
# Enable link caching. This can be helpful to avoid checking the same links on | |
# multiple runs. | |
cache = true | |
# Discard all cached requests older than this duration. | |
max_cache_age = "2d" | |
############################# Runtime ############################# | |
# Number of threads to utilize. | |
# Defaults to number of cores available to the system if omitted. | |
threads = 2 | |
# Maximum number of allowed redirects. | |
max_redirects = 10 | |
# Maximum number of allowed retries before a link is declared dead. | |
max_retries = 2 | |
# Maximum number of concurrent link checks. | |
max_concurrency = 14 | |
############################# Requests ############################ | |
# User agent to send with each request. | |
user_agent = "curl/7.83. 1" | |
# Website timeout from connect to response finished. | |
timeout = 20 | |
# Minimum wait time in seconds between retries of failed requests. | |
retry_wait_time = 2 | |
# Comma-separated list of accepted status codes for valid links. | |
# Supported values are: | |
# | |
# accept = ["200..=204", "429"] | |
# accept = "200..=204, 429" | |
# accept = ["200", "429"] | |
# accept = "200, 429" | |
accept = ["200", "429"] | |
# Proceed for server connections considered insecure (invalid TLS). | |
insecure = false | |
# Only test links with the given schemes (e.g. https). | |
# Omit to check links with any other scheme. | |
# At the moment, we support http, https, file, and mailto. | |
scheme = ["https"] | |
# When links are available using HTTPS, treat HTTP links as errors. | |
require_https = false | |
# Request method | |
method = "get" | |
# Custom request headers | |
headers = [] | |
# Remap URI matching pattern to different URI. | |
# remap = ["https://example.com http://example.invalid"] | |
# Base URL or website root directory to check relative URLs. | |
base = "https://wellhub.com" | |
# HTTP basic auth support. This will be the username and password passed to the | |
# authorization HTTP header. See | |
# <https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Authorization> | |
# basic_auth = ["example.com user:pwd"] | |
############################# Exclusions ########################## | |
# Skip missing input files (default is to error if they don't exist). | |
skip_missing = true | |
# Check links inside `<code>` and `<pre>` blocks as well as Markdown code | |
# blocks. | |
include_verbatim = false | |
# Ignore case of paths when matching glob patterns. | |
glob_ignore_case = false | |
# Exclude URLs and mail addresses from checking (supports regex). | |
exclude = ['^https://www\.linkedin\.com', '^https://security\.wellhub\.com', '^https://helpcenter\.gympass\.com'] | |
# Exclude these filesystem paths from getting checked. | |
# exclude_path = ["file/path/to/Ignore", "./other/file/path/to/Ignore"] | |
# URLs to check (supports regex). Has preference over all excludes. | |
# include = ['gist\.github\.com.*'] | |
# Exclude all private IPs from checking. | |
# Equivalent to setting `exclude_private`, `exclude_link_local`, and | |
# `exclude_loopback` to true. | |
exclude_all_private = false | |
# Exclude private IP address ranges from checking. | |
exclude_private = false | |
# Exclude link-local IP address range from checking. | |
exclude_link_local = false | |
# Exclude loopback IP address range and localhost from checking. | |
exclude_loopback = false | |
# Check mail addresses | |
# include_mail = true |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/zsh | |
# Function to display help information | |
show_help() { | |
echo "Sitemap Link Checker - Validate links from XML sitemaps" | |
echo "" | |
echo "Usage: ./check404.sh [OPTIONS] <SITEMAP_URL>" | |
echo "" | |
echo "Options:" | |
echo " -h, --help Show this help message and exit" | |
echo "" | |
echo "Example:" | |
echo " ./check404.sh https://example.com/sitemap.xml" | |
echo "" | |
echo "Dependencies: curl, lychee" | |
} | |
# Check for help flags | |
if [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then | |
show_help | |
exit 0 | |
fi | |
# Verify arguments | |
if [[ $# -eq 0 ]]; then | |
echo "Error: Sitemap URL is required." | |
show_help | |
exit 1 | |
fi | |
# URL of the sitemap passed as first argument | |
SITEMAP_URL="$1" | |
# Check for required dependencies | |
command -v curl >/dev/null 2>&1 || { echo >&2 "Error: curl is not installed."; exit 1; } | |
command -v lychee >/dev/null 2>&1 || { echo >&2 "Error: lychee is not installed."; exit 1; } | |
# Extract links from sitemap and pass directly to lychee | |
curl -s "$SITEMAP_URL" | grep -oE '<loc>[^<]+' | sed 's/<loc>//' | xargs lychee |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment