loisaidasam · October 20, 2019 19:44
diff --git a/nytimes-xword-scrape.py b/nytimes-xword-scrape.py
 #!/usr/bin/env python

 """Grab the latest NY Times Crossword puzzle

 TODO: Figure out how to get today's puzzle - for some reason this grabs yesterday's
 """

 import os
 import re
 import time

 import requests


 URL_LOGIN = 'https://myaccount.nytimes.com/auth/login'
 URL_CROSSWORDS = 'https://www.nytimes.com/crosswords'


 USERNAME = os.environ.get('NYTIMES_LOGIN_EMAIL')
 PASSWORD = os.environ.get('NYTIMES_LOGIN_PASSWORD')
 BLOCK_OPACITY = os.environ.get('NYTIMES_BLOCK_OPACITY', 40)


 def login(session):
    data = {
        'is_continue': False,
        'expires': int(time.time() + 300),  # 5 minutes from now
        'userid': USERNAME,
        'password': PASSWORD,
        'remember': True,
    }
    print("Logging in ...")
    response = session.post(URL_LOGIN, data=data)
    response.raise_for_status()


 def get_puzzle_id(session):
    print("Getting puzzle id ...")
    response = session.get(URL_CROSSWORDS)
    response.raise_for_status()
    matches = re.search('/svc/crosswords/v2/puzzle/(\d+).puz', response.text)
    print(matches.groups())
    return matches and int(matches.group(1))


 def scrape():
    session = requests.Session()
    login(session)
    puzzle_id = get_puzzle_id(session)
    print("puzzle_id: %s" % puzzle_id)
    if not puzzle_id:
        raise Exception("Unable to get puzzle_id")
    filename = '%s.pdf' % puzzle_id
    url = 'https://www.nytimes.com/svc/crosswords/v2/puzzle/%s?block_opacity=%s' % (
        filename,
        BLOCK_OPACITY,
    )
    print("Downloading %s ..." % url)
    response = session.get(url)
    response.raise_for_status()
    with open(filename, 'wb') as fp:
        fp.write(response.content)


 def main():
    scrape()


 main()
	#!/usr/bin/env python

	"""Grab the latest NY Times Crossword puzzle

	TODO: Figure out how to get today's puzzle - for some reason this grabs yesterday's
	"""

	import os
	import re
	import time

	import requests


	URL_LOGIN = 'https://myaccount.nytimes.com/auth/login'
	URL_CROSSWORDS = 'https://www.nytimes.com/crosswords'


	USERNAME = os.environ.get('NYTIMES_LOGIN_EMAIL')
	PASSWORD = os.environ.get('NYTIMES_LOGIN_PASSWORD')
	BLOCK_OPACITY = os.environ.get('NYTIMES_BLOCK_OPACITY', 40)


	def login(session):
	data = {
	'is_continue': False,
	'expires': int(time.time() + 300), # 5 minutes from now
	'userid': USERNAME,
	'password': PASSWORD,
	'remember': True,
	}
	print("Logging in ...")
	response = session.post(URL_LOGIN, data=data)
	response.raise_for_status()


	def get_puzzle_id(session):
	print("Getting puzzle id ...")
	response = session.get(URL_CROSSWORDS)
	response.raise_for_status()
	matches = re.search('/svc/crosswords/v2/puzzle/(\d+).puz', response.text)
	print(matches.groups())
	return matches and int(matches.group(1))


	def scrape():
	session = requests.Session()
	login(session)
	puzzle_id = get_puzzle_id(session)
	print("puzzle_id: %s" % puzzle_id)
	if not puzzle_id:
	raise Exception("Unable to get puzzle_id")
	filename = '%s.pdf' % puzzle_id
	url = 'https://www.nytimes.com/svc/crosswords/v2/puzzle/%s?block_opacity=%s' % (
	filename,
	BLOCK_OPACITY,
	)
	print("Downloading %s ..." % url)
	response = session.get(url)
	response.raise_for_status()
	with open(filename, 'wb') as fp:
	fp.write(response.content)


	def main():
	scrape()


	main()