Created
May 25, 2025 22:08
-
-
Save RichardDally/ce930a3467bfbe96291e4f2a0f3c550e to your computer and use it in GitHub Desktop.
Multiple files with async - without authentication
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# /// script | |
# dependencies = [ | |
# "aiohttp>=3.9.0", # For asynchronous HTTP requests | |
# "aiofiles>=23.0.0", # For asynchronous file operations | |
# "uvloop>=0.19.0; sys_platform != 'win32'" # For a faster asyncio event loop on POSIX | |
# ] | |
# /// | |
# | |
# Script: async_downloader_with_uvloop.py | |
# Description: Downloads multiple files asynchronously using aiohttp, | |
# with uvloop for a potentially faster asyncio event loop. | |
# Python Version: >= 3.11 | |
# Author: AI Assistant | |
# Date: May 25, 2025 | |
# | |
# How to run this script with 'uv': | |
# --------------------------------- | |
# 1. Ensure 'uv' is installed (https://github.com/astral-sh/uv). | |
# | |
# 2. Create a virtual environment and activate it: | |
# uv venv .venv | |
# source .venv/bin/activate # On Linux/macOS | |
# # For Windows CMD: .venv\Scripts\activate.bat | |
# # For Windows PowerShell: .venv\Scripts\Activate.ps1 | |
# | |
# 3. Install the dependencies (listed in the '/// script' block above) into the environment: | |
# uv pip install aiohttp>=3.9.0 aiofiles>=23.0.0 "uvloop>=0.19.0; sys_platform != 'win32'" | |
# | |
# 4. Run the script: | |
# uv run async_downloader_with_uvloop.py | |
# | |
# (Replace 'async_downloader_with_uvloop.py' with your actual script filename if different) | |
# --------------------------------- | |
import asyncio | |
import sys # For platform check | |
import os | |
import time | |
from pathlib import Path | |
# Attempt to install uvloop. It's a good practice to do this early. | |
IS_WINDOWS = sys.platform == "win32" | |
UVLOOP_SUCCESS = False | |
try: | |
if not IS_WINDOWS: | |
import uvloop | |
uvloop.install() | |
UVLOOP_SUCCESS = True | |
print("INFO: uvloop is installed and will be used as the asyncio event loop.") | |
else: | |
print("INFO: On Windows, uvloop is not typically used; using default asyncio event loop.") | |
except ImportError: | |
if not IS_WINDOWS: | |
print("INFO: uvloop not found. To potentially improve performance on POSIX systems,") | |
print(" install it using: uv pip install \"uvloop>=0.19.0\"") | |
# No special message for Windows if uvloop is not found, as it's not expected. | |
except Exception as e: | |
print(f"WARNING: Could not install or use uvloop: {e}. Using default asyncio event loop.") | |
# These must be importable. Their installation is covered in the comments above. | |
try: | |
import aiohttp | |
import aiofiles | |
except ImportError as e: | |
print(f"ERROR: Missing critical dependency: {e.name}. Please install dependencies as per the instructions at the top of the script.") | |
sys.exit(1) | |
# --- Configuration --- | |
PYTHON_SOURCE_URLS = [ | |
"https://www.python.org/ftp/python/3.12.4/Python-3.12.4.tgz", | |
"https://www.python.org/ftp/python/3.11.9/Python-3.11.9.tgz", | |
"https://www.python.org/ftp/python/3.10.14/Python-3.10.14.tgz", | |
"https://www.python.org/ftp/python/3.9.19/Python-3.9.19.tgz", | |
] | |
DOWNLOAD_DIR = Path("python_sources_uvloop_dl") # Changed directory name slightly | |
# --- End Configuration --- | |
async def download_file_async(session: aiohttp.ClientSession, url: str, download_folder: Path) -> Path | None: | |
""" | |
Asynchronously downloads a single file using aiohttp and saves it with aiofiles. | |
""" | |
filename = Path(url).name | |
filepath = download_folder / filename | |
try: | |
print(f"[{filename}] Attempting download from {url}...") | |
timeout = aiohttp.ClientTimeout(connect=15, total=300) # Connect timeout 15s, total 5 mins | |
async with session.get(url, timeout=timeout, allow_redirects=True) as response: | |
response.raise_for_status() | |
async with aiofiles.open(filepath, 'wb') as f: | |
content_length = response.headers.get('Content-Length') | |
downloaded_bytes = 0 | |
print(f"[{filename}] Starting stream... (Size: {int(content_length) / (1024*1024):.2f} MB if known)" if content_length else f"[{filename}] Starting stream...") | |
async for chunk in response.content.iter_chunked(32 * 1024): # 32KB chunks | |
if not chunk: | |
break | |
await f.write(chunk) | |
downloaded_bytes += len(chunk) | |
# Add progress indication if desired, e.g., every few MB | |
file_size_mb = filepath.stat().st_size / (1024 * 1024) | |
print(f"[{filename}] SUCCESS. Saved to {filepath} ({file_size_mb:.2f} MB)") | |
return filepath | |
except aiohttp.ClientResponseError as e: | |
print(f"[{filename}] FAILED. HTTP Error (Status {e.status}): {e.message}") | |
except aiohttp.ClientConnectionError as e: # More specific than ClientError for connection issues | |
print(f"[{filename}] FAILED. Connection Error: {e}") | |
except asyncio.TimeoutError: | |
print(f"[{filename}] FAILED. Timeout after specified duration.") | |
except Exception as e: | |
print(f"[{filename}] FAILED. An unexpected error occurred: {type(e).__name__} - {e}") | |
if await aiofiles.os.path.exists(filepath): # Use await with aiofiles.os | |
try: | |
await aiofiles.os.remove(filepath) # Use await | |
print(f"[{filename}] Cleaned up partially downloaded file: {filepath}") | |
except Exception as e_clean: | |
print(f"[{filename}] Error cleaning up file {filepath}: {e_clean}") | |
return None | |
async def main(): | |
""" | |
Main asynchronous function to coordinate downloads. | |
""" | |
DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True) | |
print(f"INFO: Files will be downloaded to: {DOWNLOAD_DIR.resolve()}") | |
if UVLOOP_SUCCESS: | |
print(f"INFO: Running with uvloop.") | |
else: | |
print(f"INFO: Running with default asyncio event loop.") | |
start_time = time.perf_counter() | |
# You can configure connector limits globally for the session | |
# conn = aiohttp.TCPConnector(limit_per_host=10) # Max 10 connections to the same host simultaneously | |
# async with aiohttp.ClientSession(connector=conn) as session: | |
async with aiohttp.ClientSession() as session: | |
tasks = [asyncio.create_task(download_file_async(session, url, DOWNLOAD_DIR)) for url in PYTHON_SOURCE_URLS] | |
results = await asyncio.gather(*tasks) | |
end_time = time.perf_counter() | |
successful_downloads = [res for res in results if res is not None] | |
print(f"\n--- Download Summary ---") | |
try: | |
loop = asyncio.get_running_loop() | |
print(f"Event loop used: {loop.__class__.__module__}.{loop.__class__.__name__}") | |
except RuntimeError: # Should not happen if main is run with asyncio.run | |
print("Event loop used: (Could not determine - no running loop after main completion)") | |
print(f"Total files attempted: {len(PYTHON_SOURCE_URLS)}") | |
print(f"Successfully downloaded: {len(successful_downloads)}") | |
total_size_mb = 0 | |
if successful_downloads: | |
for filepath_obj in successful_downloads: | |
size_bytes = Path(filepath_obj).stat().st_size # stat is synchronous | |
size_mb = size_bytes / (1024 * 1024) | |
total_size_mb += size_mb | |
print(f" - {Path(filepath_obj).name} ({size_mb:.2f} MB)") | |
print(f"Total downloaded size: {total_size_mb:.2f} MB") | |
if len(successful_downloads) < len(PYTHON_SOURCE_URLS): | |
failed_count = len(PYTHON_SOURCE_URLS) - len(successful_downloads) | |
print(f"Failed downloads: {failed_count}") | |
print(f"All downloads attempted in {end_time - start_time:.2f} seconds.") | |
if __name__ == "__main__": | |
asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment