Last active
April 30, 2025 14:29
-
-
Save hartwork/e4ffe49bdc55124848bba4978dd89606 to your computer and use it in GitHub Desktop.
Check a URL using Selenium
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
# Check a URL using headless Selenium | |
# Copyright (c) 2025 Sebastian Pipping <[email protected]> | |
# | |
# Licensed under GNU Affero General Public License v3.0 or later | |
# SPDX-License-Identifier: AGPL-3.0-or-later | |
import argparse | |
import json | |
import sys | |
from enum import Enum | |
from selenium import webdriver | |
# Tip: https://www.whatsmybrowser.org/ | |
_DEFAULT_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36" | |
class PageLoadStrategy(Enum): | |
EAGER = "eager" | |
NONE = "none" | |
NORMAL = "normal" | |
def run(config) -> int: | |
service = webdriver.ChromeService(executable_path=config.chromedriver) | |
options = webdriver.ChromeOptions() | |
options.add_argument("--headless") | |
options.add_argument(f"--user-agent={config.user_agent}") | |
options.capabilities["goog:loggingPrefs"] = {"performance": "ALL"} | |
options.page_load_strategy = config.page_load_strategy | |
browser = webdriver.Chrome(options=options, service=service) | |
browser.get(config.url) | |
logs = browser.get_log("performance") | |
responses = [] | |
for log in logs: | |
message = json.loads(log["message"])["message"] | |
if message["method"] == "Network.responseReceived": | |
status_code = int(message["params"]["response"]["status"]) | |
url = message["params"]["response"]["url"] | |
responses.append((url, status_code)) | |
browser.close() | |
success = False | |
if responses: | |
if config.page_load_strategy == PageLoadStrategy.NONE.value: | |
url, status_code = responses[0] | |
print(f"{status_code} {url}") | |
else: | |
for i, (url, status_code) in enumerate(responses): | |
print(f"{i + 1}. {status_code} {url}") | |
success = 200 <= responses[0][1] < 300 | |
exit_code = 0 if success else 1 | |
return exit_code | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser( | |
description="Check a URL using headless Selenium", | |
) | |
parser.add_argument( | |
"--page-load-strategy", | |
choices={v.value for v in PageLoadStrategy}, | |
default=PageLoadStrategy.NONE.value, | |
help='Selenium page load strategy (default: "%(default)s")', | |
) | |
parser.add_argument( | |
"--chromedriver", | |
metavar="PATH", | |
default="/usr/bin/chromedriver", | |
help='Path to chromedriver executable (default: "%(default)s")', | |
) | |
parser.add_argument( | |
"--user-agent", | |
metavar="STRING", | |
default=_DEFAULT_USER_AGENT, | |
help='User agent header value to use (default: "%(default)s")', | |
) | |
parser.add_argument("--version", action="version", version="%(prog)s 1.3.1") | |
parser.add_argument("url", metavar="URL", help="URL to check") | |
config = parser.parse_args() | |
sys.exit(run(config)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment