Created
March 26, 2025 17:25
-
-
Save joyrider3774/2938edf96870d9c7edd1dc858be55485 to your computer and use it in GitHub Desktop.
python script for LowResNx rom and image downloader (just change the max topic id)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
from bs4 import BeautifulSoup | |
import time | |
import re | |
import urllib.parse | |
from PIL import Image | |
from io import BytesIO | |
def download_and_convert_image(url, filename, folder="downloads"): | |
"""Download an image and convert it to PNG if it's not already in PNG format.""" | |
try: | |
# Create folder if it doesn't exist | |
if not os.path.exists(folder): | |
os.makedirs(folder) | |
# Construct the full file path | |
filepath = os.path.join(folder, filename) | |
# Check if file already exists | |
if os.path.exists(filepath): | |
print(f"File already exists: {filepath}") | |
return True | |
# Download the image | |
response = requests.get(url, stream=True) | |
response.raise_for_status() | |
# Check if the image is already a PNG | |
content_type = response.headers.get('content-type', '').lower() | |
if 'image/png' in content_type or url.lower().endswith('.png'): | |
# If it's already a PNG, just save it | |
with open(filepath, 'wb') as f: | |
for chunk in response.iter_content(chunk_size=8192): | |
f.write(chunk) | |
else: | |
# If it's not a PNG, convert it | |
try: | |
image = Image.open(BytesIO(response.content)) | |
# Save as PNG | |
image.save(filepath, 'PNG') | |
except Exception as e: | |
print(f"Error converting image to PNG: {e}") | |
# Fallback to saving the original image | |
with open(filepath, 'wb') as f: | |
for chunk in response.iter_content(chunk_size=8192): | |
f.write(chunk) | |
print(f"Successfully downloaded and processed: {filepath}") | |
return True | |
except Exception as e: | |
print(f"Error downloading/processing {url} to {filename}: {e}") | |
return False | |
def download_file(url, filename, folder="downloads"): | |
"""Download a file from the given URL and save it with the given filename.""" | |
try: | |
# Create folder if it doesn't exist | |
if not os.path.exists(folder): | |
os.makedirs(folder) | |
# Construct the full file path | |
filepath = os.path.join(folder, filename) | |
# Check if file already exists | |
if os.path.exists(filepath): | |
print(f"File already exists: {filepath}") | |
return True | |
# Download the file | |
response = requests.get(url, stream=True) | |
response.raise_for_status() # Raise an exception for HTTP errors | |
# Save the file | |
with open(filepath, 'wb') as f: | |
for chunk in response.iter_content(chunk_size=8192): | |
f.write(chunk) | |
print(f"Successfully downloaded: {filepath}") | |
return True | |
except Exception as e: | |
print(f"Error downloading {url} to {filename}: {e}") | |
return False | |
def scrape_topic_page(topic_id, base_url="https://lowresnx.inutilis.com"): | |
"""Scrape a single topic page and download any .nx files and .png thumbnails found.""" | |
url = f"{base_url}/topic.php?id={topic_id}" | |
try: | |
# Send a GET request to the topic page | |
response = requests.get(url) | |
response.raise_for_status() | |
# Parse the HTML content | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Look for the attachment-info div | |
attachment_info = soup.find('div', class_='attachment-info') | |
if attachment_info: | |
# Find the download link | |
download_link = attachment_info.find('a', href=re.compile(r'\.nx$')) | |
if download_link: | |
# Extract the .nx file URL and the download attribute | |
nx_file_url = download_link['href'] | |
nx_filename = download_link.get('download', os.path.basename(nx_file_url)) | |
# Construct the full URL for the .nx file | |
full_nx_url = f"{base_url}/{nx_file_url}" if not nx_file_url.startswith('http') else nx_file_url | |
# Download the .nx file | |
nx_downloaded = download_file(full_nx_url, nx_filename) | |
# If .nx file was downloaded successfully, try to download the associated image | |
if nx_downloaded: | |
# Look for the screenshot image | |
screenshot_img = soup.find('img', class_='screenshot') | |
if screenshot_img and 'src' in screenshot_img.attrs: | |
# Get the image URL from the src attribute | |
img_src = screenshot_img['src'] | |
# Construct the full image URL | |
full_img_url = f"{base_url}/{img_src}" if not img_src.startswith('http') else img_src | |
# Create PNG filename based on the .nx filename | |
png_filename = nx_filename.replace('.nx', '.png') | |
# Download and potentially convert the image | |
download_and_convert_image(full_img_url, png_filename) | |
else: | |
# Fallback to old method if no screenshot image is found | |
png_file_url = nx_file_url.replace('.nx', '.png') | |
png_filename = nx_filename.replace('.nx', '.png') | |
full_png_url = f"{base_url}/{png_file_url}" if not png_file_url.startswith('http') else png_file_url | |
# Download the .png file | |
download_file(full_png_url, png_filename) | |
return True | |
print(f"No attachment found or download failed for topic ID: {topic_id}") | |
return False | |
except Exception as e: | |
print(f"Error processing topic ID {topic_id}: {e}") | |
return False | |
def main(): | |
# Create a folder for the downloads | |
download_folder = "lowresnx_downloads" | |
if not os.path.exists(download_folder): | |
os.makedirs(download_folder) | |
# Check if required libraries are installed | |
try: | |
import PIL | |
except ImportError: | |
print("PIL (Pillow) library is not installed. Installing it now...") | |
import pip | |
pip.main(['install', 'pillow']) | |
print("Pillow installed successfully.") | |
# Set the range of topic IDs to scrape | |
start_id = 1 | |
end_id = 3676 | |
total_topics = end_id - start_id + 1 | |
successful_downloads = 0 | |
print(f"Starting to scrape {total_topics} topic pages...") | |
# Loop through each topic ID | |
for topic_id in range(start_id, end_id + 1): | |
print(f"Processing topic ID: {topic_id} ({topic_id - start_id + 1}/{total_topics})") | |
# Scrape the topic page | |
success = scrape_topic_page(topic_id) | |
if success: | |
successful_downloads += 1 | |
# Add a small delay to avoid hammering the server | |
time.sleep(1) | |
print(f"Scraping completed. Successfully downloaded files from {successful_downloads} out of {total_topics} topics.") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment