Last active
April 25, 2025 01:43
-
-
Save LeMoussel/bfed69044cb65947a4e63c76f11e6e1f to your computer and use it in GitHub Desktop.
Python Selenium: Get response attributes via the ChromeDriver performance logging capability
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Implementation of the Selenium Chrome WebDriver with HTTP Response data | |
included via the ChromeDriver performance logging capability | |
""" | |
import json | |
from requests.structures import CaseInsensitiveDict | |
# https://github.com/SeleniumHQ/selenium | |
from selenium import webdriver | |
from selenium.webdriver.chrome.service import Service | |
# https://github.com/SergeyPirogov/webdriver_manager | |
from webdriver_manager.chrome import ChromeDriverManager | |
class ChromeWebDriverPerfomance: | |
def __init__(self, headless=False): | |
self.options = webdriver.ChromeOptions() | |
self.options.add_argument("--no-sandbox") | |
self.options.add_argument("--disable-dev-shm-usage") | |
self.options.add_argument("--headless=new") | |
self.options.add_argument("disable-infobars") | |
self.options.add_argument("--disable-extensions") | |
# https://developer.chrome.com/docs/chromedriver/logging/performance-log | |
self.options.set_capability("goog:loggingPrefs", {"performance": "ALL"}) | |
self.driver = webdriver.Chrome( | |
service=Service(ChromeDriverManager().install()), options=self.options | |
) | |
# List to store each response | |
self.responses = [] | |
def get(self, url): | |
self.driver.get(url) | |
# Parse the Chrome Performance logs | |
response = None | |
for log_entry in self.driver.get_log("performance"): | |
log_message = json.loads(log_entry["message"])["message"] | |
# Filter out HTTP responses | |
if log_message["method"] == "Network.responseReceived": | |
self.responses.append(log_message["params"]["response"]) | |
if log_message["params"]["type"] == "Document": | |
response = log_message["params"]["response"] | |
return response | |
def close(self): | |
self.driver.close() | |
if __name__ == "__main__": | |
start_url = "https://zonetuto.fr" | |
cwd_perf = ChromeWebDriverPerfomance() | |
response = cwd_perf.get(start_url) | |
cwd_perf.close() | |
""" | |
The ChromeWebDriver response attribute(s) contain a dict with information about the response | |
{ | |
"connectionId": [Integer], | |
"connectionReused": [Boolean], | |
"encodedDataLength": [Integer], | |
"fromDiskCache": [Boolean], | |
"fromServiceWorker": [Boolean], | |
"headers": [dict], # HTTP Headers as a dict | |
"headersText": [String], # HTTP Headers as text | |
"mimeType": [String], | |
"protocol": [String], | |
"remoteIPAddress": [String], | |
"remotePort": [Integer], | |
"requestHeaders": [dict], | |
"requestHeadersText": [String], | |
"securityDetails": [dict], # TLS/SSL related information | |
"securityState": [String], | |
"status": [Integer], # HTTP Status Code of the Response | |
"statusText": [String], | |
"timing": [dict], | |
"url": [String] | |
} | |
""" | |
headers = CaseInsensitiveDict(response["headers"]) | |
status_code = response["status"] | |
print(f"HTTP Status code: {status_code}") | |
print(f"Headers: {headers}") |
BTW, this is how I implemented your suggestion:
def get(self, url):
try:
# Your code to get the URL
self.driver.get(url)
# Other logic...
# Parse the Chrome Performance logs
response = None
for log_entry in self.driver.get_log("performance"):
log_message = json.loads(log_entry["message"])["message"]
# Filter out HTTP responses
if log_message["method"] == "Network.responseReceived":
self.responses.append(log_message["params"]["response"])
if log_message["params"]["type"] == "Document":
response = log_message["params"]["response"]
return response
except TimeoutException as e:
print(f"Timeout accessing URL {url}: {e}")
# Handle timeout (return default value, retry, etc.)
except WebDriverException as e:
print(f"WebDriver error accessing URL {url}: {e}")
# Handle other WebDriver errors
except Exception as e:
print(f"Unexpected error: {e}")
# Catch any other unanticipated exception
sorry for the stupid formatting error. I fixed the problem by moving the parsing logic and the return statement out one indent. Forgive me.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you for your instructive reply. This corrected the timeout issue I was having 👍🏻 but now I am coming up against DNS resolution errors.
This code for checking several websites :
fails when it comes across "https://www.goxxgle.com" with
I thought this would have been caught by the 'unexpected error' line in the error checking code ?
Also, I think I should put the
outside the for loop