Created
October 20, 2019 19:44
-
-
Save loisaidasam/27caab9bbbcd0c9b16de626471c586e4 to your computer and use it in GitHub Desktop.
Grab the latest NY Times Crossword puzzle
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""Grab the latest NY Times Crossword puzzle | |
TODO: Figure out how to get today's puzzle - for some reason this grabs yesterday's | |
""" | |
import os | |
import re | |
import time | |
import requests | |
URL_LOGIN = 'https://myaccount.nytimes.com/auth/login' | |
URL_CROSSWORDS = 'https://www.nytimes.com/crosswords' | |
USERNAME = os.environ.get('NYTIMES_LOGIN_EMAIL') | |
PASSWORD = os.environ.get('NYTIMES_LOGIN_PASSWORD') | |
BLOCK_OPACITY = os.environ.get('NYTIMES_BLOCK_OPACITY', 40) | |
def login(session): | |
data = { | |
'is_continue': False, | |
'expires': int(time.time() + 300), # 5 minutes from now | |
'userid': USERNAME, | |
'password': PASSWORD, | |
'remember': True, | |
} | |
print("Logging in ...") | |
response = session.post(URL_LOGIN, data=data) | |
response.raise_for_status() | |
def get_puzzle_id(session): | |
print("Getting puzzle id ...") | |
response = session.get(URL_CROSSWORDS) | |
response.raise_for_status() | |
matches = re.search('/svc/crosswords/v2/puzzle/(\d+).puz', response.text) | |
print(matches.groups()) | |
return matches and int(matches.group(1)) | |
def scrape(): | |
session = requests.Session() | |
login(session) | |
puzzle_id = get_puzzle_id(session) | |
print("puzzle_id: %s" % puzzle_id) | |
if not puzzle_id: | |
raise Exception("Unable to get puzzle_id") | |
filename = '%s.pdf' % puzzle_id | |
url = 'https://www.nytimes.com/svc/crosswords/v2/puzzle/%s?block_opacity=%s' % ( | |
filename, | |
BLOCK_OPACITY, | |
) | |
print("Downloading %s ..." % url) | |
response = session.get(url) | |
response.raise_for_status() | |
with open(filename, 'wb') as fp: | |
fp.write(response.content) | |
def main(): | |
scrape() | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment