Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save joyrider3774/2938edf96870d9c7edd1dc858be55485 to your computer and use it in GitHub Desktop.
Save joyrider3774/2938edf96870d9c7edd1dc858be55485 to your computer and use it in GitHub Desktop.
python script for LowResNx rom and image downloader (just change the max topic id)
import os
import requests
from bs4 import BeautifulSoup
import time
import re
import urllib.parse
from PIL import Image
from io import BytesIO
def download_and_convert_image(url, filename, folder="downloads"):
"""Download an image and convert it to PNG if it's not already in PNG format."""
try:
# Create folder if it doesn't exist
if not os.path.exists(folder):
os.makedirs(folder)
# Construct the full file path
filepath = os.path.join(folder, filename)
# Check if file already exists
if os.path.exists(filepath):
print(f"File already exists: {filepath}")
return True
# Download the image
response = requests.get(url, stream=True)
response.raise_for_status()
# Check if the image is already a PNG
content_type = response.headers.get('content-type', '').lower()
if 'image/png' in content_type or url.lower().endswith('.png'):
# If it's already a PNG, just save it
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
else:
# If it's not a PNG, convert it
try:
image = Image.open(BytesIO(response.content))
# Save as PNG
image.save(filepath, 'PNG')
except Exception as e:
print(f"Error converting image to PNG: {e}")
# Fallback to saving the original image
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print(f"Successfully downloaded and processed: {filepath}")
return True
except Exception as e:
print(f"Error downloading/processing {url} to {filename}: {e}")
return False
def download_file(url, filename, folder="downloads"):
"""Download a file from the given URL and save it with the given filename."""
try:
# Create folder if it doesn't exist
if not os.path.exists(folder):
os.makedirs(folder)
# Construct the full file path
filepath = os.path.join(folder, filename)
# Check if file already exists
if os.path.exists(filepath):
print(f"File already exists: {filepath}")
return True
# Download the file
response = requests.get(url, stream=True)
response.raise_for_status() # Raise an exception for HTTP errors
# Save the file
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print(f"Successfully downloaded: {filepath}")
return True
except Exception as e:
print(f"Error downloading {url} to {filename}: {e}")
return False
def scrape_topic_page(topic_id, base_url="https://lowresnx.inutilis.com"):
"""Scrape a single topic page and download any .nx files and .png thumbnails found."""
url = f"{base_url}/topic.php?id={topic_id}"
try:
# Send a GET request to the topic page
response = requests.get(url)
response.raise_for_status()
# Parse the HTML content
soup = BeautifulSoup(response.text, 'html.parser')
# Look for the attachment-info div
attachment_info = soup.find('div', class_='attachment-info')
if attachment_info:
# Find the download link
download_link = attachment_info.find('a', href=re.compile(r'\.nx$'))
if download_link:
# Extract the .nx file URL and the download attribute
nx_file_url = download_link['href']
nx_filename = download_link.get('download', os.path.basename(nx_file_url))
# Construct the full URL for the .nx file
full_nx_url = f"{base_url}/{nx_file_url}" if not nx_file_url.startswith('http') else nx_file_url
# Download the .nx file
nx_downloaded = download_file(full_nx_url, nx_filename)
# If .nx file was downloaded successfully, try to download the associated image
if nx_downloaded:
# Look for the screenshot image
screenshot_img = soup.find('img', class_='screenshot')
if screenshot_img and 'src' in screenshot_img.attrs:
# Get the image URL from the src attribute
img_src = screenshot_img['src']
# Construct the full image URL
full_img_url = f"{base_url}/{img_src}" if not img_src.startswith('http') else img_src
# Create PNG filename based on the .nx filename
png_filename = nx_filename.replace('.nx', '.png')
# Download and potentially convert the image
download_and_convert_image(full_img_url, png_filename)
else:
# Fallback to old method if no screenshot image is found
png_file_url = nx_file_url.replace('.nx', '.png')
png_filename = nx_filename.replace('.nx', '.png')
full_png_url = f"{base_url}/{png_file_url}" if not png_file_url.startswith('http') else png_file_url
# Download the .png file
download_file(full_png_url, png_filename)
return True
print(f"No attachment found or download failed for topic ID: {topic_id}")
return False
except Exception as e:
print(f"Error processing topic ID {topic_id}: {e}")
return False
def main():
# Create a folder for the downloads
download_folder = "lowresnx_downloads"
if not os.path.exists(download_folder):
os.makedirs(download_folder)
# Check if required libraries are installed
try:
import PIL
except ImportError:
print("PIL (Pillow) library is not installed. Installing it now...")
import pip
pip.main(['install', 'pillow'])
print("Pillow installed successfully.")
# Set the range of topic IDs to scrape
start_id = 1
end_id = 3676
total_topics = end_id - start_id + 1
successful_downloads = 0
print(f"Starting to scrape {total_topics} topic pages...")
# Loop through each topic ID
for topic_id in range(start_id, end_id + 1):
print(f"Processing topic ID: {topic_id} ({topic_id - start_id + 1}/{total_topics})")
# Scrape the topic page
success = scrape_topic_page(topic_id)
if success:
successful_downloads += 1
# Add a small delay to avoid hammering the server
time.sleep(1)
print(f"Scraping completed. Successfully downloaded files from {successful_downloads} out of {total_topics} topics.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment