Created
July 29, 2020 04:24
-
-
Save reservoirinvest/8a9175194269aa0362adf329417f28dd to your computer and use it in GitHub Desktop.
asyncio
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Using tasks on create_task for better control of errors | |
# Ref: [EdgeDB](https://youtu.be/-CzqsgaXUM8?t=2279) | |
import asyncio | |
import time | |
from typing import Callable, Coroutine | |
import httpx | |
# Let us start by making a progress reporting async function. | |
addr = 'https://langa.pl/crawl' | |
async def progress( | |
url: str, | |
algo: Callable[..., Coroutine], | |
) -> None: | |
# we will store the create_task into a variable | |
task = asyncio.create_task( | |
algo(url), | |
name=url, | |
) | |
todo.add(task) # we will add task instead of line | |
start = time.time() | |
while len(todo): | |
# we will use asyncio.wait function that takes and collection of tasks | |
# ... our todo set is great for this | |
# ... and waits for them to complete | |
# ... unlike wait_for, asyncio.wait will not raise an exception | |
# ... it instead gives us two tasks, done and the ones which are pending | |
done, _pending = await asyncio.wait(todo, timeout=0.5) | |
# to clean up our todo set, we are removing the done task from it | |
todo.difference_update(done) | |
# we will report progress as things are going on | |
urls = (t.get_name() for t in todo) | |
# prints the current status | |
print(f"{len(todo)}: " + " ".join(sorted(urls))[-75:]) | |
end = time.time() | |
print(f"Took {int(end-start)} seconds") | |
async def crawl3( | |
prefix: str, url: str = "", | |
) -> None: | |
url = url or prefix | |
client = httpx.AsyncClient() | |
try: | |
res = await client.get(url) | |
finally: | |
await client.aclose() | |
for line in res.text.splitlines(): | |
if line.startswith(prefix): | |
task = asyncio.create_task( | |
crawl3(prefix, line), | |
name=line, | |
) | |
todo.add(task) | |
todo = set() | |
asyncio.run(progress(addr, crawl3)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment