Skip to content

Instantly share code, notes, and snippets.

@RichardDally
Created May 25, 2025 22:09
Show Gist options
  • Save RichardDally/a0dbc272b2aa46d618db02b456bef360 to your computer and use it in GitHub Desktop.
Save RichardDally/a0dbc272b2aa46d618db02b456bef360 to your computer and use it in GitHub Desktop.
Download multiple files with async and authentication
#!/usr/bin/env python3
# /// script
# dependencies = [
# "aiohttp>=3.9.0", # For asynchronous HTTP requests
# "aiofiles>=23.0.0", # For asynchronous file operations
# "uvloop>=0.19.0; sys_platform != 'win32'" # For a faster asyncio event loop on POSIX
# ]
# ///
#
# Script: async_downloader_netrc.py
# Description: Downloads multiple files asynchronously using aiohttp,
# with uvloop for a potentially faster asyncio event loop,
# and includes .netrc support for authentication.
# Python Version: >= 3.11
# Author: AI Assistant
# Date: May 26, 2025
#
# How to run this script with 'uv':
# ---------------------------------
# 1. Ensure 'uv' is installed (https://github.com/astral-sh/uv).
#
# 2. Create a virtual environment and activate it:
# uv venv .venv
# source .venv/bin/activate # On Linux/macOS
# # For Windows CMD: .venv\Scripts\activate.bat
# # For Windows PowerShell: .venv\Scripts\Activate.ps1
#
# 3. Install the dependencies (listed in the '/// script' block above) into the environment:
# uv pip install aiohttp>=3.9.0 aiofiles>=23.0.0 "uvloop>=0.19.0; sys_platform != 'win32'"
#
# 4. Run the script:
# uv run async_downloader_netrc.py
#
# (Replace 'async_downloader_netrc.py' with your actual script filename if different)
#
# .netrc for Authentication:
# If any URLs require authentication, this script will attempt to find credentials
# in your ~/.netrc file (or %HOME%\_netrc on Windows, though behavior might vary).
# Ensure your .netrc file has appropriate permissions (e.g., 600 on POSIX).
# Example .netrc entry:
# machine my.securehost.com login myusername password mysecretpassword
# ---------------------------------
import asyncio
import sys # For platform check
import os
import time
from pathlib import Path
import netrc # For .netrc file parsing
from urllib.parse import urlparse # To get hostname from URL
# Attempt to install uvloop. It's a good practice to do this early.
IS_WINDOWS = sys.platform == "win32"
UVLOOP_SUCCESS = False
try:
if not IS_WINDOWS:
import uvloop
uvloop.install()
UVLOOP_SUCCESS = True
print("INFO: uvloop is installed and will be used as the asyncio event loop.")
else:
print("INFO: On Windows, uvloop is not typically used; using default asyncio event loop.")
except ImportError:
if not IS_WINDOWS:
print("INFO: uvloop not found. To potentially improve performance on POSIX systems,")
print(" install it using: uv pip install \"uvloop>=0.19.0\"")
except Exception as e:
print(f"WARNING: Could not install or use uvloop: {e}. Using default asyncio event loop.")
# These must be importable. Their installation is covered in the comments above.
try:
import aiohttp
import aiofiles
except ImportError as e:
print(f"ERROR: Missing critical dependency: {e.name}. Please install dependencies as per the instructions at the top of the script.")
sys.exit(1)
# --- Configuration ---
# Note: These public Python URLs do not require authentication.
# The .netrc functionality is for URLs that *do* require HTTP Basic/Digest auth.
# Replace with your actual URLs, some of which might require authentication.
TARGET_URLS = [
"https://www.python.org/ftp/python/3.12.4/Python-3.12.4.tgz",
"https://www.python.org/ftp/python/3.11.9/Python-3.11.9.tgz",
# Example of a URL that might require auth (replace with a real one for testing .netrc):
# "https://your-protected-server.com/somefile.zip",
]
DOWNLOAD_DIR = Path("python_sources_netrc_dl")
# --- End Configuration ---
async def download_file_async(session: aiohttp.ClientSession, url: str, download_folder: Path) -> Path | None:
"""
Asynchronously downloads a single file, attempting .netrc auth if needed.
"""
filename = Path(url).name
filepath = download_folder / filename
auth_from_netrc = None
hostname = None # Initialize hostname
# --- .netrc handling ---
try:
parsed_url = urlparse(url)
hostname = parsed_url.hostname
if hostname:
netrc_path_str = os.path.expanduser("~/.netrc")
# The 'netrc' module itself handles finding the _netrc on Windows if default path is used.
# For explicit path, expanduser is more POSIX-centric for '~'.
# For broader Windows compatibility for explicit path, one might need more checks.
# However, netrc.netrc() without arguments usually does the right thing.
# Let netrc module find the file itself, more robust across OS for default locations
try:
netrc_credentials = netrc.netrc() # Reads from default location
auth_info = netrc_credentials.authenticators(hostname)
if auth_info:
login, _account, password = auth_info # auth_info is (login, account, password)
if login and password:
auth_from_netrc = aiohttp.BasicAuth(login=login, password=password)
print(f"INFO:[{filename}] Using .netrc credentials for host {hostname}")
else:
print(f"DEBUG:[{filename}] Credentials for {hostname} in .netrc missing login or password.")
# else:
# print(f"DEBUG:[{filename}] No .netrc entry found for host: {hostname}")
except FileNotFoundError:
# This means default .netrc path was not found by the netrc module.
pass # It's okay if .netrc is not present, don't log loudly.
except netrc.NetrcParseError as e:
print(f"WARNING:[{filename}] Could not parse .netrc file: {e}")
except Exception as e_netrc_setup:
# Catch any other unexpected error during .netrc setup (e.g., urlparse issues)
print(f"WARNING:[{filename}] Error during .netrc setup for {url}: {type(e_netrc_setup).__name__} - {e_netrc_setup}")
# --- End .netrc handling ---
try:
print(f"INFO:[{filename}] Attempting download (Auth: {'Yes' if auth_from_netrc else 'No'})...")
timeout = aiohttp.ClientTimeout(connect=15, total=300) # Connect timeout 15s, total 5 mins
async with session.get(url, auth=auth_from_netrc, timeout=timeout, allow_redirects=True) as response:
# Check if authentication was successful if it was attempted
if auth_from_netrc and response.status == 401:
print(f"ERROR:[{filename}] Authentication failed with .netrc credentials (HTTP 401).")
response.raise_for_status() # Will raise an exception
response.raise_for_status() # For other non-401 errors
async with aiofiles.open(filepath, 'wb') as f:
content_length_str = response.headers.get('Content-Length')
downloaded_bytes = 0
if content_length_str and content_length_str.isdigit():
total_size_mb = int(content_length_str) / (1024*1024)
print(f"INFO:[{filename}] Starting stream (Size: {total_size_mb:.2f} MB)...")
else:
print(f"INFO:[{filename}] Starting stream (Size: unknown)...")
async for chunk in response.content.iter_chunked(32 * 1024): # 32KB chunks
if not chunk: break
await f.write(chunk)
downloaded_bytes += len(chunk)
file_size_mb = filepath.stat().st_size / (1024 * 1024) # stat is synchronous
print(f"SUCCESS:[{filename}] Saved to {filepath} ({file_size_mb:.2f} MB)")
return filepath
except aiohttp.ClientResponseError as e:
print(f"FAILED:[{filename}] HTTP Error (Status {e.status}): {e.message}")
except aiohttp.ClientConnectionError as e:
print(f"FAILED:[{filename}] Connection Error: {e}")
except asyncio.TimeoutError:
print(f"FAILED:[{filename}] Timeout after specified duration.")
except Exception as e:
print(f"FAILED:[{filename}] An unexpected error: {type(e).__name__} - {e}")
if await aiofiles.os.path.exists(filepath): # Use await
try:
await aiofiles.os.remove(filepath) # Use await
print(f"INFO:[{filename}] Cleaned up partially downloaded file: {filepath}")
except Exception as e_clean:
print(f"WARNING:[{filename}] Error cleaning up file {filepath}: {e_clean}")
return None
async def main():
DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
print(f"INFO: Files will be downloaded to: {DOWNLOAD_DIR.resolve()}")
if UVLOOP_SUCCESS:
print(f"INFO: Running with uvloop.")
else:
print(f"INFO: Running with default asyncio event loop.")
start_time = time.perf_counter()
async with aiohttp.ClientSession() as session:
tasks = [asyncio.create_task(download_file_async(session, url, DOWNLOAD_DIR)) for url in TARGET_URLS]
results = await asyncio.gather(*tasks)
end_time = time.perf_counter()
successful_downloads = [res for res in results if res is not None]
print(f"\n--- Download Summary ---")
try:
loop = asyncio.get_running_loop()
print(f"Event loop used: {loop.__class__.__module__}.{loop.__class__.__name__}")
except RuntimeError:
print("Event loop used: (Could not determine - no running loop after main completion)")
print(f"Total files attempted: {len(TARGET_URLS)}")
print(f"Successfully downloaded: {len(successful_downloads)}")
total_size_mb = 0
if successful_downloads:
for filepath_obj in successful_downloads:
# .stat() is synchronous. For fully async stat, consider await aiofiles.os.stat()
size_bytes = Path(filepath_obj).stat().st_size
size_mb = size_bytes / (1024 * 1024)
total_size_mb += size_mb
print(f" - {Path(filepath_obj).name} ({size_mb:.2f} MB)")
print(f"Total downloaded size: {total_size_mb:.2f} MB")
if len(successful_downloads) < len(TARGET_URLS):
failed_count = len(TARGET_URLS) - len(successful_downloads)
print(f"Failed downloads: {failed_count}")
print(f"All downloads attempted in {end_time - start_time:.2f} seconds.")
if __name__ == "__main__":
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment