Last active
July 14, 2019 22:57
-
-
Save osoleve/d73370bc3a28039cd3f1514b359453f7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Fri Jul 12 20:34:48 2019 | |
Code adapted from http://stackoverflow.com/q/55250990 | |
""" | |
import csv | |
from typing import List, Tuple | |
from timeit import default_timer as time | |
import asyncio | |
from asyncio import TimeoutError as asioTimeoutError | |
import nest_asyncio | |
import aiohttp | |
from aiohttp import \ | |
InvalidURL, ClientConnectorError, \ | |
ServerDisconnectedError, ClientOSError, \ | |
ClientResponseError, ServerTimeoutError | |
from aiohttp.client_exceptions import TooManyRedirects | |
HEADERS = { | |
'user-agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) ' | |
'AppleWebKit/537.36 (KHTML, like Gecko) ' | |
'Chrome/45.0.2454.101 Safari/537.36'), | |
} | |
SEMAPHORE_COUNTER = 1000 | |
SEM = asyncio.Semaphore(SEMAPHORE_COUNTER) | |
TIMEOUT = 5 | |
async def http_request(session: aiohttp.ClientSession, | |
url: str | |
) -> Tuple[str, int, str, float]: | |
""" | |
Returns a tuple containing: | |
-The URL | |
-HTTP Request Status Code | |
-HTTP Request Response String or Error | |
-Seconds to complete the request | |
""" | |
async with SEM: | |
try: | |
start = time() | |
resp = await session.head(url, | |
allow_redirects=False, | |
ssl=False, | |
headers=HEADERS) | |
async with resp: | |
status = resp.status | |
reason = resp.reason | |
end = time() | |
delta_t = end - start | |
return (url, status, reason, delta_t) | |
except (asioTimeoutError, | |
ClientConnectorError, | |
ClientOSError, | |
ClientResponseError, | |
InvalidURL, | |
ServerDisconnectedError, | |
ServerTimeoutError, | |
TooManyRedirects) as e: | |
end = time() | |
delta_t = end - start | |
return (url, -1, str(e), delta_t) | |
async def http_requests(loop: asyncio.events.AbstractEventLoop, | |
urls: List[str], | |
timeout: int, | |
) -> List[Tuple[str, int, str, float]]: | |
""" | |
Loop wrapper for async http_request | |
""" | |
conn = aiohttp.TCPConnector(limit=1100, ttl_dns_cache=250) | |
timeout = aiohttp.ClientTimeout(connect=timeout) | |
async with aiohttp.ClientSession(loop=loop, | |
timeout=timeout, | |
connector=conn | |
) as session: | |
responses = await \ | |
asyncio.gather(*(http_request(session, url) for url in urls)) | |
return responses | |
def poll_urls(urls: List[str], | |
timeout: int = TIMEOUT | |
) -> List[Tuple[str, int, str, float]]: | |
""" | |
:param timeout: in seconds | |
""" | |
print(f"Starting loop, {len(urls)} urls...\n\t", end='') | |
loop_start = time() | |
loop = asyncio.get_event_loop() | |
responses = loop.run_until_complete(http_requests(loop, urls, timeout)) | |
loop_end = time() | |
# Time the whole set took | |
delta_t = loop_end - loop_start | |
# Calculate the effective time spent per URL | |
effective_mean = delta_t/float(len(urls)) | |
# Calculate average of actual execution time of each URL | |
durations = [response[3] for response in responses] | |
true_avg = sum(durations)/float(len(durations)) | |
lift = sum(durations) - delta_t | |
print(f"Took {delta_t:.2f} seconds to poll {len(urls)} websites:\n\t" | |
f"Effective Mean: {effective_mean:.5f} seconds per url\n\t" | |
f"Actual Mean: {true_avg:.5f} seconds per url\n\t" | |
f"Lift per URL: {true_avg-effective_mean:.2f} seconds\n\t" | |
f"Est. time saved: {lift:.2f} seconds\n") | |
# return codes | |
return responses | |
def main(): | |
"""Placeholder""" | |
# This next line is a patch to fix async in Spyder | |
nest_asyncio.apply() | |
urls = [] | |
results = [] | |
with open(f'tablinktest.csv') as infile: | |
reader = csv.reader(infile) | |
for row in reader: | |
urls.extend(row) | |
results.extend(poll_urls(urls)) | |
with open(f'{len(results)} test.csv', 'w') as outfile: | |
writer = csv.writer(outfile, lineterminator='\n') | |
writer.writerow(['URL', | |
'StatusCode', | |
'Response', | |
'Response Time']) | |
for result in results: | |
writer.writerow(result) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment