Skip to content

Instantly share code, notes, and snippets.

@Pobega
Last active August 3, 2024 03:56
Show Gist options
  • Save Pobega/2948c97d58c79e741df787081f97126c to your computer and use it in GitHub Desktop.
Save Pobega/2948c97d58c79e741df787081f97126c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import argparse
import requests
from bs4 import BeautifulSoup, SoupStrainer
def main():
parser = argparse.ArgumentParser(
prog='TIC Scraper',
description='Scrape the top games from tic80.com')
parser.add_argument('-p', '--pages', default=30, type=int, dest='pages', help='The number of pages to scrape.')
parser.add_argument('-c', '--category', default=0, type=int, dest='category', help='The category in the URL bar at tic80.com (defaults to games)')
args = parser.parse_args()
pages = []
game_urls = []
scraped_carts = []
for i in range(0, args.pages):
pages.append(f'https://tic80.com/play?cat={args.category}&sort=2&page={i}')
print("Scraping game list...")
for page in pages:
print(f' Scraping {page}')
try:
r = requests.get(page)
except:
r = requests.get(page)
soup = BeautifulSoup(r.content, 'html5lib')
game_pages = soup.find_all('div', attrs = {'class':'thumbnail'})
for game_page in game_pages:
game_url_ending = game_page.a['href']
game_urls.append(f'https://tic80.com{game_url_ending}')
print("Scraping game data...")
for idx, game_url in enumerate(game_urls, start=1):
print(f' Scraping {game_url} ({idx}/{len(game_urls)})...')
cart = {}
try:
r = requests.get(game_url)
except:
r = requests.get(game_url)
soup = BeautifulSoup(r.content, 'html.parser')
# Scrape game carts
for link in soup.find_all('a'):
if link.has_attr('href'):
if link['href'].endswith('.tic'):
if link['href'].startswith('http'):
cart['url'] = link["href"]
else:
cart['url'] = f'https://tic80.com{link["href"]}'
# Scrape game covers
cart_img = soup.find('meta', attrs={'property':'og:image'})
cart['cover'] = cart_img['content']
# Add to total list
scraped_carts.append(cart)
print(scraped_carts)
for idx, cart in enumerate(scraped_carts, start=1):
game_name = cart['url'].split('/')[-1].split('.')[0]
print(f'Downloading {game_name} ({idx}/{len(scraped_carts)})...')
try:
response = requests.get(cart['url'])
except:
response = requests.get(cart['url'])
with open(f'Carts/{game_name}.tic', mode='wb') as file:
file.write(response.content)
try:
response = requests.get(cart['cover'])
except:
response = requests.get(cart['cover'])
with open(f'Imgs/{game_name}.gif', mode='wb') as file:
file.write(response.content)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment