Skip to content

Instantly share code, notes, and snippets.

@RichardDally
Created May 25, 2025 22:08
Show Gist options
  • Save RichardDally/ce930a3467bfbe96291e4f2a0f3c550e to your computer and use it in GitHub Desktop.
Save RichardDally/ce930a3467bfbe96291e4f2a0f3c550e to your computer and use it in GitHub Desktop.
Multiple files with async - without authentication
#!/usr/bin/env python3
# /// script
# dependencies = [
# "aiohttp>=3.9.0", # For asynchronous HTTP requests
# "aiofiles>=23.0.0", # For asynchronous file operations
# "uvloop>=0.19.0; sys_platform != 'win32'" # For a faster asyncio event loop on POSIX
# ]
# ///
#
# Script: async_downloader_with_uvloop.py
# Description: Downloads multiple files asynchronously using aiohttp,
# with uvloop for a potentially faster asyncio event loop.
# Python Version: >= 3.11
# Author: AI Assistant
# Date: May 25, 2025
#
# How to run this script with 'uv':
# ---------------------------------
# 1. Ensure 'uv' is installed (https://github.com/astral-sh/uv).
#
# 2. Create a virtual environment and activate it:
# uv venv .venv
# source .venv/bin/activate # On Linux/macOS
# # For Windows CMD: .venv\Scripts\activate.bat
# # For Windows PowerShell: .venv\Scripts\Activate.ps1
#
# 3. Install the dependencies (listed in the '/// script' block above) into the environment:
# uv pip install aiohttp>=3.9.0 aiofiles>=23.0.0 "uvloop>=0.19.0; sys_platform != 'win32'"
#
# 4. Run the script:
# uv run async_downloader_with_uvloop.py
#
# (Replace 'async_downloader_with_uvloop.py' with your actual script filename if different)
# ---------------------------------
import asyncio
import sys # For platform check
import os
import time
from pathlib import Path
# Attempt to install uvloop. It's a good practice to do this early.
IS_WINDOWS = sys.platform == "win32"
UVLOOP_SUCCESS = False
try:
if not IS_WINDOWS:
import uvloop
uvloop.install()
UVLOOP_SUCCESS = True
print("INFO: uvloop is installed and will be used as the asyncio event loop.")
else:
print("INFO: On Windows, uvloop is not typically used; using default asyncio event loop.")
except ImportError:
if not IS_WINDOWS:
print("INFO: uvloop not found. To potentially improve performance on POSIX systems,")
print(" install it using: uv pip install \"uvloop>=0.19.0\"")
# No special message for Windows if uvloop is not found, as it's not expected.
except Exception as e:
print(f"WARNING: Could not install or use uvloop: {e}. Using default asyncio event loop.")
# These must be importable. Their installation is covered in the comments above.
try:
import aiohttp
import aiofiles
except ImportError as e:
print(f"ERROR: Missing critical dependency: {e.name}. Please install dependencies as per the instructions at the top of the script.")
sys.exit(1)
# --- Configuration ---
PYTHON_SOURCE_URLS = [
"https://www.python.org/ftp/python/3.12.4/Python-3.12.4.tgz",
"https://www.python.org/ftp/python/3.11.9/Python-3.11.9.tgz",
"https://www.python.org/ftp/python/3.10.14/Python-3.10.14.tgz",
"https://www.python.org/ftp/python/3.9.19/Python-3.9.19.tgz",
]
DOWNLOAD_DIR = Path("python_sources_uvloop_dl") # Changed directory name slightly
# --- End Configuration ---
async def download_file_async(session: aiohttp.ClientSession, url: str, download_folder: Path) -> Path | None:
"""
Asynchronously downloads a single file using aiohttp and saves it with aiofiles.
"""
filename = Path(url).name
filepath = download_folder / filename
try:
print(f"[{filename}] Attempting download from {url}...")
timeout = aiohttp.ClientTimeout(connect=15, total=300) # Connect timeout 15s, total 5 mins
async with session.get(url, timeout=timeout, allow_redirects=True) as response:
response.raise_for_status()
async with aiofiles.open(filepath, 'wb') as f:
content_length = response.headers.get('Content-Length')
downloaded_bytes = 0
print(f"[{filename}] Starting stream... (Size: {int(content_length) / (1024*1024):.2f} MB if known)" if content_length else f"[{filename}] Starting stream...")
async for chunk in response.content.iter_chunked(32 * 1024): # 32KB chunks
if not chunk:
break
await f.write(chunk)
downloaded_bytes += len(chunk)
# Add progress indication if desired, e.g., every few MB
file_size_mb = filepath.stat().st_size / (1024 * 1024)
print(f"[{filename}] SUCCESS. Saved to {filepath} ({file_size_mb:.2f} MB)")
return filepath
except aiohttp.ClientResponseError as e:
print(f"[{filename}] FAILED. HTTP Error (Status {e.status}): {e.message}")
except aiohttp.ClientConnectionError as e: # More specific than ClientError for connection issues
print(f"[{filename}] FAILED. Connection Error: {e}")
except asyncio.TimeoutError:
print(f"[{filename}] FAILED. Timeout after specified duration.")
except Exception as e:
print(f"[{filename}] FAILED. An unexpected error occurred: {type(e).__name__} - {e}")
if await aiofiles.os.path.exists(filepath): # Use await with aiofiles.os
try:
await aiofiles.os.remove(filepath) # Use await
print(f"[{filename}] Cleaned up partially downloaded file: {filepath}")
except Exception as e_clean:
print(f"[{filename}] Error cleaning up file {filepath}: {e_clean}")
return None
async def main():
"""
Main asynchronous function to coordinate downloads.
"""
DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
print(f"INFO: Files will be downloaded to: {DOWNLOAD_DIR.resolve()}")
if UVLOOP_SUCCESS:
print(f"INFO: Running with uvloop.")
else:
print(f"INFO: Running with default asyncio event loop.")
start_time = time.perf_counter()
# You can configure connector limits globally for the session
# conn = aiohttp.TCPConnector(limit_per_host=10) # Max 10 connections to the same host simultaneously
# async with aiohttp.ClientSession(connector=conn) as session:
async with aiohttp.ClientSession() as session:
tasks = [asyncio.create_task(download_file_async(session, url, DOWNLOAD_DIR)) for url in PYTHON_SOURCE_URLS]
results = await asyncio.gather(*tasks)
end_time = time.perf_counter()
successful_downloads = [res for res in results if res is not None]
print(f"\n--- Download Summary ---")
try:
loop = asyncio.get_running_loop()
print(f"Event loop used: {loop.__class__.__module__}.{loop.__class__.__name__}")
except RuntimeError: # Should not happen if main is run with asyncio.run
print("Event loop used: (Could not determine - no running loop after main completion)")
print(f"Total files attempted: {len(PYTHON_SOURCE_URLS)}")
print(f"Successfully downloaded: {len(successful_downloads)}")
total_size_mb = 0
if successful_downloads:
for filepath_obj in successful_downloads:
size_bytes = Path(filepath_obj).stat().st_size # stat is synchronous
size_mb = size_bytes / (1024 * 1024)
total_size_mb += size_mb
print(f" - {Path(filepath_obj).name} ({size_mb:.2f} MB)")
print(f"Total downloaded size: {total_size_mb:.2f} MB")
if len(successful_downloads) < len(PYTHON_SOURCE_URLS):
failed_count = len(PYTHON_SOURCE_URLS) - len(successful_downloads)
print(f"Failed downloads: {failed_count}")
print(f"All downloads attempted in {end_time - start_time:.2f} seconds.")
if __name__ == "__main__":
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment