Last active
June 27, 2025 12:04
-
-
Save maaduukaar/e4d77f388f266322e85ab648a4dedecb to your computer and use it in GitHub Desktop.
Python script to check URLs for 404 errors with CSV export
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import csv | |
import time | |
from collections import Counter | |
# Settings | |
input_file = 'urls.txt' | |
output_file = 'result.csv' | |
timeout = 10 # Request timeout in seconds | |
delay = 0.5 # Delay between requests to avoid overloading servers | |
# Load URLs from file | |
with open(input_file, 'r', encoding='utf-8') as file: | |
urls = [line.strip() for line in file if line.strip()] | |
print(f'Found {len(urls)} URLs to check.') | |
# URL checking | |
results = [] | |
status_list = [] | |
for index, url in enumerate(urls, start=1): | |
try: | |
response = requests.head(url, timeout=timeout, allow_redirects=True) | |
status_code = response.status_code | |
print(f'{index}. {url} - {status_code}') | |
except requests.exceptions.RequestException as e: | |
status_code = 'Error' | |
print(f'{index}. {url} - Error: {e}') | |
results.append((index, url, status_code)) | |
status_list.append(status_code) | |
time.sleep(delay) | |
# Save results to CSV (tab-separated) | |
with open(output_file, 'w', newline='', encoding='utf-8') as csvfile: | |
writer = csv.writer(csvfile, delimiter='\t') | |
writer.writerow(['No', 'URL', 'Status']) | |
writer.writerows(results) | |
# Calculate statistics | |
counter = Counter(status_list) | |
print('\n===== SUMMARY =====') | |
print(f'Total checked: {len(urls)}') | |
for status, count in counter.items(): | |
print(f'Status {status}: {count}') | |
print(f'Results saved to {output_file}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment