Created
January 19, 2023 21:16
-
-
Save Atari2/a81696f68319e5a8bcede2e21c4396d1 to your computer and use it in GitHub Desktop.
downloadSMWCSection
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import os | |
from bs4 import BeautifulSoup | |
import time | |
import re | |
import logging | |
import shutil | |
from urllib.parse import unquote_plus | |
logger = logging.Logger(name='Errors', level=logging.ERROR) | |
handler = logging.FileHandler(filename='download.log', encoding='utf-8', mode='w') | |
handler.setFormatter(logging.Formatter('%(asctime)s:%(levelname)s:%(name)s: %(message)s')) | |
logger.addHandler(handler) | |
def download(): | |
try: | |
pages = int(input('How many pages does the section have?')) | |
except ValueError: | |
print('Invalid pages number was passed') | |
return | |
valid_sections = { | |
'graphics': 'smwgraphics', | |
'music': 'smwmusic', | |
'blocks': 'smwblocks', | |
'sprites': 'smwsprites', | |
'patches': 'smwpatches', | |
'uberasm': 'uberasm', | |
'hacks': 'smwhacks' | |
} | |
section = input(f'Which section do you want to download the files from? Valid values are ' | |
f'{list(valid_sections.keys())}').lower() | |
if section not in valid_sections.keys(): | |
print('Invalid section was passed') | |
return | |
else: | |
section = valid_sections[section] | |
with requests.Session() as sess: | |
for i in range(1, pages): | |
print(f'Started page {i}') | |
pagename = f'Page{i}' | |
try: | |
os.mkdir(pagename) | |
except Exception as e: | |
logger.log(logging.ERROR, f'Ignoring exception: {str(e)} on page {i}') | |
shutil.rmtree(pagename) | |
os.mkdir(pagename) | |
page = sess.get(f'https://www.smwcentral.net/?p=section&s={section}&u=0&g=0&n={i}&o=date&d=desc') | |
soup = BeautifulSoup(page.text, 'html.parser') | |
links = soup.find_all('a', href=re.compile('dl.smwcentral.net')) | |
for link in links: | |
try: | |
with sess.get('https:' + link['href']) as response: | |
filename = unquote_plus(link['href']).split('/')[-1] | |
with open(f'{pagename}/{filename}', 'wb') as f: | |
f.write(response.content) | |
print(f'\tSaved file {filename}') | |
except Exception as e: | |
logger.log(logging.ERROR, f'Ignoring exception: {str(e)} on link {link}') | |
time.sleep(0.5) | |
print(f'Finished page {i}') | |
print('Eventual download errors will be reported in download.log') | |
download() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment