Created
May 25, 2025 22:09
-
-
Save RichardDally/a0dbc272b2aa46d618db02b456bef360 to your computer and use it in GitHub Desktop.
Download multiple files with async and authentication
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# /// script | |
# dependencies = [ | |
# "aiohttp>=3.9.0", # For asynchronous HTTP requests | |
# "aiofiles>=23.0.0", # For asynchronous file operations | |
# "uvloop>=0.19.0; sys_platform != 'win32'" # For a faster asyncio event loop on POSIX | |
# ] | |
# /// | |
# | |
# Script: async_downloader_netrc.py | |
# Description: Downloads multiple files asynchronously using aiohttp, | |
# with uvloop for a potentially faster asyncio event loop, | |
# and includes .netrc support for authentication. | |
# Python Version: >= 3.11 | |
# Author: AI Assistant | |
# Date: May 26, 2025 | |
# | |
# How to run this script with 'uv': | |
# --------------------------------- | |
# 1. Ensure 'uv' is installed (https://github.com/astral-sh/uv). | |
# | |
# 2. Create a virtual environment and activate it: | |
# uv venv .venv | |
# source .venv/bin/activate # On Linux/macOS | |
# # For Windows CMD: .venv\Scripts\activate.bat | |
# # For Windows PowerShell: .venv\Scripts\Activate.ps1 | |
# | |
# 3. Install the dependencies (listed in the '/// script' block above) into the environment: | |
# uv pip install aiohttp>=3.9.0 aiofiles>=23.0.0 "uvloop>=0.19.0; sys_platform != 'win32'" | |
# | |
# 4. Run the script: | |
# uv run async_downloader_netrc.py | |
# | |
# (Replace 'async_downloader_netrc.py' with your actual script filename if different) | |
# | |
# .netrc for Authentication: | |
# If any URLs require authentication, this script will attempt to find credentials | |
# in your ~/.netrc file (or %HOME%\_netrc on Windows, though behavior might vary). | |
# Ensure your .netrc file has appropriate permissions (e.g., 600 on POSIX). | |
# Example .netrc entry: | |
# machine my.securehost.com login myusername password mysecretpassword | |
# --------------------------------- | |
import asyncio | |
import sys # For platform check | |
import os | |
import time | |
from pathlib import Path | |
import netrc # For .netrc file parsing | |
from urllib.parse import urlparse # To get hostname from URL | |
# Attempt to install uvloop. It's a good practice to do this early. | |
IS_WINDOWS = sys.platform == "win32" | |
UVLOOP_SUCCESS = False | |
try: | |
if not IS_WINDOWS: | |
import uvloop | |
uvloop.install() | |
UVLOOP_SUCCESS = True | |
print("INFO: uvloop is installed and will be used as the asyncio event loop.") | |
else: | |
print("INFO: On Windows, uvloop is not typically used; using default asyncio event loop.") | |
except ImportError: | |
if not IS_WINDOWS: | |
print("INFO: uvloop not found. To potentially improve performance on POSIX systems,") | |
print(" install it using: uv pip install \"uvloop>=0.19.0\"") | |
except Exception as e: | |
print(f"WARNING: Could not install or use uvloop: {e}. Using default asyncio event loop.") | |
# These must be importable. Their installation is covered in the comments above. | |
try: | |
import aiohttp | |
import aiofiles | |
except ImportError as e: | |
print(f"ERROR: Missing critical dependency: {e.name}. Please install dependencies as per the instructions at the top of the script.") | |
sys.exit(1) | |
# --- Configuration --- | |
# Note: These public Python URLs do not require authentication. | |
# The .netrc functionality is for URLs that *do* require HTTP Basic/Digest auth. | |
# Replace with your actual URLs, some of which might require authentication. | |
TARGET_URLS = [ | |
"https://www.python.org/ftp/python/3.12.4/Python-3.12.4.tgz", | |
"https://www.python.org/ftp/python/3.11.9/Python-3.11.9.tgz", | |
# Example of a URL that might require auth (replace with a real one for testing .netrc): | |
# "https://your-protected-server.com/somefile.zip", | |
] | |
DOWNLOAD_DIR = Path("python_sources_netrc_dl") | |
# --- End Configuration --- | |
async def download_file_async(session: aiohttp.ClientSession, url: str, download_folder: Path) -> Path | None: | |
""" | |
Asynchronously downloads a single file, attempting .netrc auth if needed. | |
""" | |
filename = Path(url).name | |
filepath = download_folder / filename | |
auth_from_netrc = None | |
hostname = None # Initialize hostname | |
# --- .netrc handling --- | |
try: | |
parsed_url = urlparse(url) | |
hostname = parsed_url.hostname | |
if hostname: | |
netrc_path_str = os.path.expanduser("~/.netrc") | |
# The 'netrc' module itself handles finding the _netrc on Windows if default path is used. | |
# For explicit path, expanduser is more POSIX-centric for '~'. | |
# For broader Windows compatibility for explicit path, one might need more checks. | |
# However, netrc.netrc() without arguments usually does the right thing. | |
# Let netrc module find the file itself, more robust across OS for default locations | |
try: | |
netrc_credentials = netrc.netrc() # Reads from default location | |
auth_info = netrc_credentials.authenticators(hostname) | |
if auth_info: | |
login, _account, password = auth_info # auth_info is (login, account, password) | |
if login and password: | |
auth_from_netrc = aiohttp.BasicAuth(login=login, password=password) | |
print(f"INFO:[{filename}] Using .netrc credentials for host {hostname}") | |
else: | |
print(f"DEBUG:[{filename}] Credentials for {hostname} in .netrc missing login or password.") | |
# else: | |
# print(f"DEBUG:[{filename}] No .netrc entry found for host: {hostname}") | |
except FileNotFoundError: | |
# This means default .netrc path was not found by the netrc module. | |
pass # It's okay if .netrc is not present, don't log loudly. | |
except netrc.NetrcParseError as e: | |
print(f"WARNING:[{filename}] Could not parse .netrc file: {e}") | |
except Exception as e_netrc_setup: | |
# Catch any other unexpected error during .netrc setup (e.g., urlparse issues) | |
print(f"WARNING:[{filename}] Error during .netrc setup for {url}: {type(e_netrc_setup).__name__} - {e_netrc_setup}") | |
# --- End .netrc handling --- | |
try: | |
print(f"INFO:[{filename}] Attempting download (Auth: {'Yes' if auth_from_netrc else 'No'})...") | |
timeout = aiohttp.ClientTimeout(connect=15, total=300) # Connect timeout 15s, total 5 mins | |
async with session.get(url, auth=auth_from_netrc, timeout=timeout, allow_redirects=True) as response: | |
# Check if authentication was successful if it was attempted | |
if auth_from_netrc and response.status == 401: | |
print(f"ERROR:[{filename}] Authentication failed with .netrc credentials (HTTP 401).") | |
response.raise_for_status() # Will raise an exception | |
response.raise_for_status() # For other non-401 errors | |
async with aiofiles.open(filepath, 'wb') as f: | |
content_length_str = response.headers.get('Content-Length') | |
downloaded_bytes = 0 | |
if content_length_str and content_length_str.isdigit(): | |
total_size_mb = int(content_length_str) / (1024*1024) | |
print(f"INFO:[{filename}] Starting stream (Size: {total_size_mb:.2f} MB)...") | |
else: | |
print(f"INFO:[{filename}] Starting stream (Size: unknown)...") | |
async for chunk in response.content.iter_chunked(32 * 1024): # 32KB chunks | |
if not chunk: break | |
await f.write(chunk) | |
downloaded_bytes += len(chunk) | |
file_size_mb = filepath.stat().st_size / (1024 * 1024) # stat is synchronous | |
print(f"SUCCESS:[{filename}] Saved to {filepath} ({file_size_mb:.2f} MB)") | |
return filepath | |
except aiohttp.ClientResponseError as e: | |
print(f"FAILED:[{filename}] HTTP Error (Status {e.status}): {e.message}") | |
except aiohttp.ClientConnectionError as e: | |
print(f"FAILED:[{filename}] Connection Error: {e}") | |
except asyncio.TimeoutError: | |
print(f"FAILED:[{filename}] Timeout after specified duration.") | |
except Exception as e: | |
print(f"FAILED:[{filename}] An unexpected error: {type(e).__name__} - {e}") | |
if await aiofiles.os.path.exists(filepath): # Use await | |
try: | |
await aiofiles.os.remove(filepath) # Use await | |
print(f"INFO:[{filename}] Cleaned up partially downloaded file: {filepath}") | |
except Exception as e_clean: | |
print(f"WARNING:[{filename}] Error cleaning up file {filepath}: {e_clean}") | |
return None | |
async def main(): | |
DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True) | |
print(f"INFO: Files will be downloaded to: {DOWNLOAD_DIR.resolve()}") | |
if UVLOOP_SUCCESS: | |
print(f"INFO: Running with uvloop.") | |
else: | |
print(f"INFO: Running with default asyncio event loop.") | |
start_time = time.perf_counter() | |
async with aiohttp.ClientSession() as session: | |
tasks = [asyncio.create_task(download_file_async(session, url, DOWNLOAD_DIR)) for url in TARGET_URLS] | |
results = await asyncio.gather(*tasks) | |
end_time = time.perf_counter() | |
successful_downloads = [res for res in results if res is not None] | |
print(f"\n--- Download Summary ---") | |
try: | |
loop = asyncio.get_running_loop() | |
print(f"Event loop used: {loop.__class__.__module__}.{loop.__class__.__name__}") | |
except RuntimeError: | |
print("Event loop used: (Could not determine - no running loop after main completion)") | |
print(f"Total files attempted: {len(TARGET_URLS)}") | |
print(f"Successfully downloaded: {len(successful_downloads)}") | |
total_size_mb = 0 | |
if successful_downloads: | |
for filepath_obj in successful_downloads: | |
# .stat() is synchronous. For fully async stat, consider await aiofiles.os.stat() | |
size_bytes = Path(filepath_obj).stat().st_size | |
size_mb = size_bytes / (1024 * 1024) | |
total_size_mb += size_mb | |
print(f" - {Path(filepath_obj).name} ({size_mb:.2f} MB)") | |
print(f"Total downloaded size: {total_size_mb:.2f} MB") | |
if len(successful_downloads) < len(TARGET_URLS): | |
failed_count = len(TARGET_URLS) - len(successful_downloads) | |
print(f"Failed downloads: {failed_count}") | |
print(f"All downloads attempted in {end_time - start_time:.2f} seconds.") | |
if __name__ == "__main__": | |
asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment