Skip to content

Instantly share code, notes, and snippets.

@hartwork
Last active April 30, 2025 14:29
Show Gist options
  • Save hartwork/e4ffe49bdc55124848bba4978dd89606 to your computer and use it in GitHub Desktop.
Save hartwork/e4ffe49bdc55124848bba4978dd89606 to your computer and use it in GitHub Desktop.
Check a URL using Selenium
#! /usr/bin/env python3
# Check a URL using headless Selenium
# Copyright (c) 2025 Sebastian Pipping <[email protected]>
#
# Licensed under GNU Affero General Public License v3.0 or later
# SPDX-License-Identifier: AGPL-3.0-or-later
import argparse
import json
import sys
from enum import Enum
from selenium import webdriver
# Tip: https://www.whatsmybrowser.org/
_DEFAULT_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36"
class PageLoadStrategy(Enum):
EAGER = "eager"
NONE = "none"
NORMAL = "normal"
def run(config) -> int:
service = webdriver.ChromeService(executable_path=config.chromedriver)
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument(f"--user-agent={config.user_agent}")
options.capabilities["goog:loggingPrefs"] = {"performance": "ALL"}
options.page_load_strategy = config.page_load_strategy
browser = webdriver.Chrome(options=options, service=service)
browser.get(config.url)
logs = browser.get_log("performance")
responses = []
for log in logs:
message = json.loads(log["message"])["message"]
if message["method"] == "Network.responseReceived":
status_code = int(message["params"]["response"]["status"])
url = message["params"]["response"]["url"]
responses.append((url, status_code))
browser.close()
success = False
if responses:
if config.page_load_strategy == PageLoadStrategy.NONE.value:
url, status_code = responses[0]
print(f"{status_code} {url}")
else:
for i, (url, status_code) in enumerate(responses):
print(f"{i + 1}. {status_code} {url}")
success = 200 <= responses[0][1] < 300
exit_code = 0 if success else 1
return exit_code
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Check a URL using headless Selenium",
)
parser.add_argument(
"--page-load-strategy",
choices={v.value for v in PageLoadStrategy},
default=PageLoadStrategy.NONE.value,
help='Selenium page load strategy (default: "%(default)s")',
)
parser.add_argument(
"--chromedriver",
metavar="PATH",
default="/usr/bin/chromedriver",
help='Path to chromedriver executable (default: "%(default)s")',
)
parser.add_argument(
"--user-agent",
metavar="STRING",
default=_DEFAULT_USER_AGENT,
help='User agent header value to use (default: "%(default)s")',
)
parser.add_argument("--version", action="version", version="%(prog)s 1.3.1")
parser.add_argument("url", metavar="URL", help="URL to check")
config = parser.parse_args()
sys.exit(run(config))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment