Last active
January 9, 2025 11:53
-
-
Save KevinAlavik/384cd22af0a146c1130c681276f795b9 to your computer and use it in GitHub Desktop.
Myrient Scrapper, simple web-scrapper to download games (ROMS) from Myrient (https://myrient.erista.me), you can tweak the variables to change region and game type.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
from bs4 import BeautifulSoup | |
from urllib.parse import urljoin, unquote | |
import logging | |
import colorlog | |
from concurrent.futures import ThreadPoolExecutor | |
from time import time | |
from requests.adapters import HTTPAdapter | |
from urllib3.util.retry import Retry | |
# Logging setup | |
handler = colorlog.StreamHandler() | |
formatter = colorlog.ColoredFormatter( | |
"%(log_color)s%(asctime)s - %(levelname)s - %(message)s", | |
log_colors={ | |
'DEBUG': 'white', | |
'INFO': 'cyan', | |
'WARNING': 'yellow', | |
'ERROR': 'red', | |
'CRITICAL': 'bold_red', | |
} | |
) | |
handler.setFormatter(formatter) | |
logger = colorlog.getLogger(__name__) | |
logger.addHandler(handler) | |
logger.setLevel(logging.INFO) | |
# Session setup | |
session = requests.Session() | |
retries = Retry( | |
total=5, | |
backoff_factor=1, | |
status_forcelist=[500, 502, 503, 504] | |
) | |
adapter = HTTPAdapter(max_retries=retries) | |
session.mount("https://", adapter) | |
session.mount("http://", adapter) | |
# Configuration | |
url = "https://myrient.erista.me/files/No-Intro/Nintendo%20-%20Super%20Nintendo%20Entertainment%20System/" | |
search_extension = ".zip" | |
region_check = True | |
region = "Europe" | |
world = True | |
chunk_size = 8192 * 4 | |
download_dir = "out-snes/" | |
os.makedirs(download_dir, exist_ok=True) | |
# Pre-load completed files | |
completed_files = set(os.listdir(download_dir)) | |
def fetch_links(target_url): | |
"""Fetch links from the given URL.""" | |
try: | |
logger.info(f"π Fetching page: {target_url}") | |
response = session.get(target_url) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'html.parser') | |
return [ | |
urljoin(target_url, unquote(a_tag["href"])) | |
for a_tag in soup.find_all("a", href=True) | |
if a_tag["href"].endswith(search_extension) | |
] | |
except requests.exceptions.RequestException as e: | |
logger.error(f"β Error fetching page: {e}") | |
return [] | |
def download_file(url): | |
"""Download a single file.""" | |
local_filename = os.path.join(download_dir, os.path.basename(url)) | |
if os.path.basename(local_filename) in completed_files: | |
logger.info(f"β Already downloaded: {local_filename}") | |
return | |
if region_check and (region not in local_filename and not ("(World)" in local_filename and world)): | |
logger.info(f"β Skipping file: {local_filename}, not for region '{region}'") | |
return | |
try: | |
logger.info(f"β¬οΈ Starting download: {url}") | |
with session.get(url, stream=True) as response: | |
response.raise_for_status() | |
with open(local_filename, 'wb') as f: | |
for chunk in response.iter_content(chunk_size): | |
if chunk: | |
f.write(chunk) | |
logger.info(f"β Download completed: {local_filename}") | |
completed_files.add(os.path.basename(local_filename)) | |
except requests.exceptions.RequestException as e: | |
logger.error(f"β Error downloading file {url}: {e}") | |
def main(): | |
start_time = time() | |
try: | |
# Fetch all links | |
links = fetch_links(url) | |
if not links: | |
logger.warning("β οΈ No links found!") | |
return | |
logger.info(f"π Found {len(links)} links to process.") | |
with ThreadPoolExecutor(max_workers=8) as executor: | |
executor.map(download_file, links) | |
except KeyboardInterrupt: | |
logger.warning("β Download interrupted by user.") | |
end_time = time() | |
elapsed_time = end_time - start_time | |
logger.info(f"β±οΈ All downloads completed in {elapsed_time:.2f} seconds.") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment