RichardDally · May 25, 2025 22:09
diff --git a/async_download_with_authent.py b/async_download_with_authent.py
 #!/usr/bin/env python3
 # /// script
 # dependencies = [
 #   "aiohttp>=3.9.0",      # For asynchronous HTTP requests
 #   "aiofiles>=23.0.0",    # For asynchronous file operations
 #   "uvloop>=0.19.0; sys_platform != 'win32'" # For a faster asyncio event loop on POSIX
 # ]
 # ///
 #
 # Script: async_downloader_netrc.py
 # Description: Downloads multiple files asynchronously using aiohttp,
 #              with uvloop for a potentially faster asyncio event loop,
 #              and includes .netrc support for authentication.
 # Python Version: >= 3.11
 # Author: AI Assistant
 # Date: May 26, 2025
 #
 # How to run this script with 'uv':
 # ---------------------------------
 # 1. Ensure 'uv' is installed (https://github.com/astral-sh/uv).
 #
 # 2. Create a virtual environment and activate it:
 #    uv venv .venv
 #    source .venv/bin/activate  # On Linux/macOS
 #    # For Windows CMD: .venv\Scripts\activate.bat
 #    # For Windows PowerShell: .venv\Scripts\Activate.ps1
 #
 # 3. Install the dependencies (listed in the '/// script' block above) into the environment:
 #    uv pip install aiohttp>=3.9.0 aiofiles>=23.0.0 "uvloop>=0.19.0; sys_platform != 'win32'"
 #
 # 4. Run the script:
 #    uv run async_downloader_netrc.py
 #
 #    (Replace 'async_downloader_netrc.py' with your actual script filename if different)
 #
 # .netrc for Authentication:
 # If any URLs require authentication, this script will attempt to find credentials
 # in your ~/.netrc file (or %HOME%\_netrc on Windows, though behavior might vary).
 # Ensure your .netrc file has appropriate permissions (e.g., 600 on POSIX).
 # Example .netrc entry:
 #   machine my.securehost.com login myusername password mysecretpassword
 # ---------------------------------

 import asyncio
 import sys # For platform check
 import os
 import time
 from pathlib import Path
 import netrc # For .netrc file parsing
 from urllib.parse import urlparse # To get hostname from URL

 # Attempt to install uvloop. It's a good practice to do this early.
 IS_WINDOWS = sys.platform == "win32"
 UVLOOP_SUCCESS = False
 try:
    if not IS_WINDOWS:
        import uvloop
        uvloop.install()
        UVLOOP_SUCCESS = True
        print("INFO: uvloop is installed and will be used as the asyncio event loop.")
    else:
        print("INFO: On Windows, uvloop is not typically used; using default asyncio event loop.")
 except ImportError:
    if not IS_WINDOWS:
        print("INFO: uvloop not found. To potentially improve performance on POSIX systems,")
        print("      install it using: uv pip install \"uvloop>=0.19.0\"")
 except Exception as e:
    print(f"WARNING: Could not install or use uvloop: {e}. Using default asyncio event loop.")

 # These must be importable. Their installation is covered in the comments above.
 try:
    import aiohttp
    import aiofiles
 except ImportError as e:
    print(f"ERROR: Missing critical dependency: {e.name}. Please install dependencies as per the instructions at the top of the script.")
    sys.exit(1)


 # --- Configuration ---
 # Note: These public Python URLs do not require authentication.
 # The .netrc functionality is for URLs that *do* require HTTP Basic/Digest auth.
 # Replace with your actual URLs, some of which might require authentication.
 TARGET_URLS = [
    "https://www.python.org/ftp/python/3.12.4/Python-3.12.4.tgz",
    "https://www.python.org/ftp/python/3.11.9/Python-3.11.9.tgz",
    # Example of a URL that might require auth (replace with a real one for testing .netrc):
    # "https://your-protected-server.com/somefile.zip",
 ]
 DOWNLOAD_DIR = Path("python_sources_netrc_dl")
 # --- End Configuration ---


 async def download_file_async(session: aiohttp.ClientSession, url: str, download_folder: Path) -> Path | None:
    """
    Asynchronously downloads a single file, attempting .netrc auth if needed.
    """
    filename = Path(url).name
    filepath = download_folder / filename
    auth_from_netrc = None
    hostname = None # Initialize hostname

    # --- .netrc handling ---
    try:
        parsed_url = urlparse(url)
        hostname = parsed_url.hostname
        if hostname:
            netrc_path_str = os.path.expanduser("~/.netrc")
            # The 'netrc' module itself handles finding the _netrc on Windows if default path is used.
            # For explicit path, expanduser is more POSIX-centric for '~'.
            # For broader Windows compatibility for explicit path, one might need more checks.
            # However, netrc.netrc() without arguments usually does the right thing.
            
            # Let netrc module find the file itself, more robust across OS for default locations
            try:
                netrc_credentials = netrc.netrc() # Reads from default location
                auth_info = netrc_credentials.authenticators(hostname)
                if auth_info:
                    login, _account, password = auth_info # auth_info is (login, account, password)
                    if login and password: 
                        auth_from_netrc = aiohttp.BasicAuth(login=login, password=password)
                        print(f"INFO:[{filename}] Using .netrc credentials for host {hostname}")
                    else:
                        print(f"DEBUG:[{filename}] Credentials for {hostname} in .netrc missing login or password.")
                # else:
                #     print(f"DEBUG:[{filename}] No .netrc entry found for host: {hostname}")
            except FileNotFoundError:
                 # This means default .netrc path was not found by the netrc module.
                 pass # It's okay if .netrc is not present, don't log loudly.
            except netrc.NetrcParseError as e:
                print(f"WARNING:[{filename}] Could not parse .netrc file: {e}")

    except Exception as e_netrc_setup:
        # Catch any other unexpected error during .netrc setup (e.g., urlparse issues)
        print(f"WARNING:[{filename}] Error during .netrc setup for {url}: {type(e_netrc_setup).__name__} - {e_netrc_setup}")
    # --- End .netrc handling ---

    try:
        print(f"INFO:[{filename}] Attempting download (Auth: {'Yes' if auth_from_netrc else 'No'})...")
        timeout = aiohttp.ClientTimeout(connect=15, total=300) # Connect timeout 15s, total 5 mins
        async with session.get(url, auth=auth_from_netrc, timeout=timeout, allow_redirects=True) as response:
            # Check if authentication was successful if it was attempted
            if auth_from_netrc and response.status == 401:
                print(f"ERROR:[{filename}] Authentication failed with .netrc credentials (HTTP 401).")
                response.raise_for_status() # Will raise an exception

            response.raise_for_status() # For other non-401 errors
            
            async with aiofiles.open(filepath, 'wb') as f:
                content_length_str = response.headers.get('Content-Length')
                downloaded_bytes = 0
                if content_length_str and content_length_str.isdigit():
                    total_size_mb = int(content_length_str) / (1024*1024)
                    print(f"INFO:[{filename}] Starting stream (Size: {total_size_mb:.2f} MB)...")
                else:
                    print(f"INFO:[{filename}] Starting stream (Size: unknown)...")

                async for chunk in response.content.iter_chunked(32 * 1024): # 32KB chunks
                    if not chunk: break
                    await f.write(chunk)
                    downloaded_bytes += len(chunk)
            
            file_size_mb = filepath.stat().st_size / (1024 * 1024) # stat is synchronous
            print(f"SUCCESS:[{filename}] Saved to {filepath} ({file_size_mb:.2f} MB)")
            return filepath
    except aiohttp.ClientResponseError as e:
        print(f"FAILED:[{filename}] HTTP Error (Status {e.status}): {e.message}")
    except aiohttp.ClientConnectionError as e:
        print(f"FAILED:[{filename}] Connection Error: {e}")
    except asyncio.TimeoutError:
        print(f"FAILED:[{filename}] Timeout after specified duration.")
    except Exception as e:
        print(f"FAILED:[{filename}] An unexpected error: {type(e).__name__} - {e}")
    
    if await aiofiles.os.path.exists(filepath): # Use await
        try:
            await aiofiles.os.remove(filepath) # Use await
            print(f"INFO:[{filename}] Cleaned up partially downloaded file: {filepath}")
        except Exception as e_clean:
            print(f"WARNING:[{filename}] Error cleaning up file {filepath}: {e_clean}")
    return None

 async def main():
    DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
    print(f"INFO: Files will be downloaded to: {DOWNLOAD_DIR.resolve()}")
    if UVLOOP_SUCCESS:
        print(f"INFO: Running with uvloop.")
    else:
        print(f"INFO: Running with default asyncio event loop.")

    start_time = time.perf_counter()
    async with aiohttp.ClientSession() as session:
        tasks = [asyncio.create_task(download_file_async(session, url, DOWNLOAD_DIR)) for url in TARGET_URLS]
        results = await asyncio.gather(*tasks)

    end_time = time.perf_counter()
    successful_downloads = [res for res in results if res is not None]
    
    print(f"\n--- Download Summary ---")
    try:
        loop = asyncio.get_running_loop()
        print(f"Event loop used: {loop.__class__.__module__}.{loop.__class__.__name__}")
    except RuntimeError:
        print("Event loop used: (Could not determine - no running loop after main completion)")
        
    print(f"Total files attempted: {len(TARGET_URLS)}")
    print(f"Successfully downloaded: {len(successful_downloads)}")
    
    total_size_mb = 0
    if successful_downloads:
        for filepath_obj in successful_downloads:
            # .stat() is synchronous. For fully async stat, consider await aiofiles.os.stat()
            size_bytes = Path(filepath_obj).stat().st_size
            size_mb = size_bytes / (1024 * 1024)
            total_size_mb += size_mb
            print(f" - {Path(filepath_obj).name} ({size_mb:.2f} MB)")
        print(f"Total downloaded size: {total_size_mb:.2f} MB")

    if len(successful_downloads) < len(TARGET_URLS):
        failed_count = len(TARGET_URLS) - len(successful_downloads)
        print(f"Failed downloads: {failed_count}")

    print(f"All downloads attempted in {end_time - start_time:.2f} seconds.")

 if __name__ == "__main__":
    asyncio.run(main())
	#!/usr/bin/env python3
	# /// script
	# dependencies = [
	# "aiohttp>=3.9.0", # For asynchronous HTTP requests
	# "aiofiles>=23.0.0", # For asynchronous file operations
	# "uvloop>=0.19.0; sys_platform != 'win32'" # For a faster asyncio event loop on POSIX
	# ]
	# ///
	#
	# Script: async_downloader_netrc.py
	# Description: Downloads multiple files asynchronously using aiohttp,
	# with uvloop for a potentially faster asyncio event loop,
	# and includes .netrc support for authentication.
	# Python Version: >= 3.11
	# Author: AI Assistant
	# Date: May 26, 2025
	#
	# How to run this script with 'uv':
	# ---------------------------------
	# 1. Ensure 'uv' is installed (https://github.com/astral-sh/uv).
	#
	# 2. Create a virtual environment and activate it:
	# uv venv .venv
	# source .venv/bin/activate # On Linux/macOS
	# # For Windows CMD: .venv\Scripts\activate.bat
	# # For Windows PowerShell: .venv\Scripts\Activate.ps1
	#
	# 3. Install the dependencies (listed in the '/// script' block above) into the environment:
	# uv pip install aiohttp>=3.9.0 aiofiles>=23.0.0 "uvloop>=0.19.0; sys_platform != 'win32'"
	#
	# 4. Run the script:
	# uv run async_downloader_netrc.py
	#
	# (Replace 'async_downloader_netrc.py' with your actual script filename if different)
	#
	# .netrc for Authentication:
	# If any URLs require authentication, this script will attempt to find credentials
	# in your ~/.netrc file (or %HOME%\_netrc on Windows, though behavior might vary).
	# Ensure your .netrc file has appropriate permissions (e.g., 600 on POSIX).
	# Example .netrc entry:
	# machine my.securehost.com login myusername password mysecretpassword
	# ---------------------------------

	import asyncio
	import sys # For platform check
	import os
	import time
	from pathlib import Path
	import netrc # For .netrc file parsing
	from urllib.parse import urlparse # To get hostname from URL

	# Attempt to install uvloop. It's a good practice to do this early.
	IS_WINDOWS = sys.platform == "win32"
	UVLOOP_SUCCESS = False
	try:
	if not IS_WINDOWS:
	import uvloop
	uvloop.install()
	UVLOOP_SUCCESS = True
	print("INFO: uvloop is installed and will be used as the asyncio event loop.")
	else:
	print("INFO: On Windows, uvloop is not typically used; using default asyncio event loop.")
	except ImportError:
	if not IS_WINDOWS:
	print("INFO: uvloop not found. To potentially improve performance on POSIX systems,")
	print(" install it using: uv pip install \"uvloop>=0.19.0\"")
	except Exception as e:
	print(f"WARNING: Could not install or use uvloop: {e}. Using default asyncio event loop.")

	# These must be importable. Their installation is covered in the comments above.
	try:
	import aiohttp
	import aiofiles
	except ImportError as e:
	print(f"ERROR: Missing critical dependency: {e.name}. Please install dependencies as per the instructions at the top of the script.")
	sys.exit(1)


	# --- Configuration ---
	# Note: These public Python URLs do not require authentication.
	# The .netrc functionality is for URLs that do require HTTP Basic/Digest auth.
	# Replace with your actual URLs, some of which might require authentication.
	TARGET_URLS = [
	"https://www.python.org/ftp/python/3.12.4/Python-3.12.4.tgz",
	"https://www.python.org/ftp/python/3.11.9/Python-3.11.9.tgz",
	# Example of a URL that might require auth (replace with a real one for testing .netrc):
	# "https://your-protected-server.com/somefile.zip",
	]
	DOWNLOAD_DIR = Path("python_sources_netrc_dl")
	# --- End Configuration ---


	async def download_file_async(session: aiohttp.ClientSession, url: str, download_folder: Path) -> Path \| None:
	"""
	Asynchronously downloads a single file, attempting .netrc auth if needed.
	"""
	filename = Path(url).name
	filepath = download_folder / filename
	auth_from_netrc = None
	hostname = None # Initialize hostname

	# --- .netrc handling ---
	try:
	parsed_url = urlparse(url)
	hostname = parsed_url.hostname
	if hostname:
	netrc_path_str = os.path.expanduser("~/.netrc")
	# The 'netrc' module itself handles finding the _netrc on Windows if default path is used.
	# For explicit path, expanduser is more POSIX-centric for '~'.
	# For broader Windows compatibility for explicit path, one might need more checks.
	# However, netrc.netrc() without arguments usually does the right thing.

	# Let netrc module find the file itself, more robust across OS for default locations
	try:
	netrc_credentials = netrc.netrc() # Reads from default location
	auth_info = netrc_credentials.authenticators(hostname)
	if auth_info:
	login, _account, password = auth_info # auth_info is (login, account, password)
	if login and password:
	auth_from_netrc = aiohttp.BasicAuth(login=login, password=password)
	print(f"INFO:[{filename}] Using .netrc credentials for host {hostname}")
	else:
	print(f"DEBUG:[{filename}] Credentials for {hostname} in .netrc missing login or password.")
	# else:
	# print(f"DEBUG:[{filename}] No .netrc entry found for host: {hostname}")
	except FileNotFoundError:
	# This means default .netrc path was not found by the netrc module.
	pass # It's okay if .netrc is not present, don't log loudly.
	except netrc.NetrcParseError as e:
	print(f"WARNING:[{filename}] Could not parse .netrc file: {e}")

	except Exception as e_netrc_setup:
	# Catch any other unexpected error during .netrc setup (e.g., urlparse issues)
	print(f"WARNING:[{filename}] Error during .netrc setup for {url}: {type(e_netrc_setup).__name__} - {e_netrc_setup}")
	# --- End .netrc handling ---

	try:
	print(f"INFO:[{filename}] Attempting download (Auth: {'Yes' if auth_from_netrc else 'No'})...")
	timeout = aiohttp.ClientTimeout(connect=15, total=300) # Connect timeout 15s, total 5 mins
	async with session.get(url, auth=auth_from_netrc, timeout=timeout, allow_redirects=True) as response:
	# Check if authentication was successful if it was attempted
	if auth_from_netrc and response.status == 401:
	print(f"ERROR:[{filename}] Authentication failed with .netrc credentials (HTTP 401).")
	response.raise_for_status() # Will raise an exception

	response.raise_for_status() # For other non-401 errors

	async with aiofiles.open(filepath, 'wb') as f:
	content_length_str = response.headers.get('Content-Length')
	downloaded_bytes = 0
	if content_length_str and content_length_str.isdigit():
	total_size_mb = int(content_length_str) / (1024*1024)
	print(f"INFO:[{filename}] Starting stream (Size: {total_size_mb:.2f} MB)...")
	else:
	print(f"INFO:[{filename}] Starting stream (Size: unknown)...")

	async for chunk in response.content.iter_chunked(32 * 1024): # 32KB chunks
	if not chunk: break
	await f.write(chunk)
	downloaded_bytes += len(chunk)

	file_size_mb = filepath.stat().st_size / (1024 * 1024) # stat is synchronous
	print(f"SUCCESS:[{filename}] Saved to {filepath} ({file_size_mb:.2f} MB)")
	return filepath
	except aiohttp.ClientResponseError as e:
	print(f"FAILED:[{filename}] HTTP Error (Status {e.status}): {e.message}")
	except aiohttp.ClientConnectionError as e:
	print(f"FAILED:[{filename}] Connection Error: {e}")
	except asyncio.TimeoutError:
	print(f"FAILED:[{filename}] Timeout after specified duration.")
	except Exception as e:
	print(f"FAILED:[{filename}] An unexpected error: {type(e).__name__} - {e}")

	if await aiofiles.os.path.exists(filepath): # Use await
	try:
	await aiofiles.os.remove(filepath) # Use await
	print(f"INFO:[{filename}] Cleaned up partially downloaded file: {filepath}")
	except Exception as e_clean:
	print(f"WARNING:[{filename}] Error cleaning up file {filepath}: {e_clean}")
	return None

	async def main():
	DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
	print(f"INFO: Files will be downloaded to: {DOWNLOAD_DIR.resolve()}")
	if UVLOOP_SUCCESS:
	print(f"INFO: Running with uvloop.")
	else:
	print(f"INFO: Running with default asyncio event loop.")

	start_time = time.perf_counter()
	async with aiohttp.ClientSession() as session:
	tasks = [asyncio.create_task(download_file_async(session, url, DOWNLOAD_DIR)) for url in TARGET_URLS]
	results = await asyncio.gather(*tasks)

	end_time = time.perf_counter()
	successful_downloads = [res for res in results if res is not None]

	print(f"\n--- Download Summary ---")
	try:
	loop = asyncio.get_running_loop()
	print(f"Event loop used: {loop.__class__.__module__}.{loop.__class__.__name__}")
	except RuntimeError:
	print("Event loop used: (Could not determine - no running loop after main completion)")

	print(f"Total files attempted: {len(TARGET_URLS)}")
	print(f"Successfully downloaded: {len(successful_downloads)}")

	total_size_mb = 0
	if successful_downloads:
	for filepath_obj in successful_downloads:
	# .stat() is synchronous. For fully async stat, consider await aiofiles.os.stat()
	size_bytes = Path(filepath_obj).stat().st_size
	size_mb = size_bytes / (1024 * 1024)
	total_size_mb += size_mb
	print(f" - {Path(filepath_obj).name} ({size_mb:.2f} MB)")
	print(f"Total downloaded size: {total_size_mb:.2f} MB")

	if len(successful_downloads) < len(TARGET_URLS):
	failed_count = len(TARGET_URLS) - len(successful_downloads)
	print(f"Failed downloads: {failed_count}")

	print(f"All downloads attempted in {end_time - start_time:.2f} seconds.")

	if __name__ == "__main__":
	asyncio.run(main())