Created
February 12, 2018 07:46
-
-
Save scrapehero/edc9d9dffd24402a9c176862d076db18 to your computer and use it in GitHub Desktop.
Python 3 Code for scraping movie details from fandango.com
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from lxml import html, etree | |
import datetime | |
import requests | |
import re | |
import os | |
import sys | |
import unicodecsv as csv | |
import argparse | |
import json | |
# from exceptions import ValueError | |
def parse(location, showdate): | |
print("Fetching Locations..") | |
searchedLocation = location | |
searchedDate = showdate | |
movie_listings = [] | |
# Cookies for searching theater location | |
cookie = { | |
'akamai_generated_location': '{"zip":"""","city":"CLIFTON","state":"NJ","county":"PASSAIC","areacode":"""","lat":"40.8800","long":"-74.1446","countrycode":""""}' | |
} | |
# Headers to get location details from their auto complete query | |
location_headers = { | |
'referer': 'https://www.fandango.com/', | |
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36', | |
'x-requested-with': 'XMLHttpRequest' | |
} | |
# Location autocomplete API endpoint | |
location_url = 'https://www.fandango.com/napi/home/autocompleteDesktopSearch/' + searchedLocation | |
data = { | |
'zipCode': '', | |
'city': '', | |
'state': '', | |
'date': str(searchedDate), | |
'page': 1, | |
'favTheaterOnly': False, | |
'limit': 30, | |
'offset': 0, | |
'isdesktop': True | |
} | |
# Retrieving available locations | |
location_response = requests.get(location_url, cookies=cookie, headers=location_headers).json() | |
locations = location_response.get('resultsByType',{}).get('locations',{}).get('items',{}) | |
if locations: | |
# Selecting first location from available locations | |
searched_location = locations[0] | |
searched_location_url = searched_location.get('link') | |
location_name = searched_location.get('name') | |
state = searched_location.get('state') | |
# Getting city from location name, city is necessary to get theater lists if you are passing location as input | |
city = location_name.split(',')[0].strip() if ',' in location_name else None | |
if city and state: | |
data['city'] = city | |
data['state'] = state | |
else: | |
# city,state is not necessary if you are passing zipcode as input | |
data['zipCode'] = location_name | |
# Headers for getting theater listing for the searched location | |
theater_headers = { | |
'accept': '*/*', | |
'accept-encoding': 'gzip, deflate, br', | |
'accept-language': 'en-US,en;q=0.9,ml;q=0.8', | |
'referer': searched_location_url, | |
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36', | |
'x-requested-with': 'XMLHttpRequest' | |
} | |
movie_url = 'https://www.fandango.com/napi/theaterswithshowtimes' | |
# Fetching Movie details for search location | |
print("Fetching movie details") | |
try: | |
movie_response = requests.get(movie_url, params=data, headers=location_headers).json() | |
except: | |
print("Failed to get movie details") | |
all_theaters = movie_response.get('theaters') | |
if all_theaters: | |
# Iterating through each each theater | |
for theater in all_theaters: | |
theater_name = theater.get('name') | |
address = theater.get('address1') | |
city = theater.get('city') | |
state = theater.get('state') | |
zipcode = theater.get('zip') | |
theater_address = address + ' ' + city + ' ' + state + ' ' + zipcode | |
all_movies = theater.get('movies') | |
# Iterating through each movie in a thaater | |
if all_movies: | |
for movie in all_movies: | |
# cleaning data | |
movie_name = movie.get('title').strip() | |
duration = str(movie.get('runtime')) | |
genre = ','.join(' '.join(movie.get('genres')).split()).strip() | |
movie_rating = movie.get('rating') | |
star_rating = str(movie['stars']['totalRating'] | |
['stars']['points']).strip() | |
movie_data = { | |
"Theatre_Name": theater_name, | |
"Theatre_Address": theater_address, | |
"Movie_Name": movie_name, | |
"Show_Date": searchedDate, | |
"Movie_Rating": movie_rating, | |
"Star_Rating": star_rating, | |
"Duration": duration, | |
"Genre": genre, | |
"Location_or_Zipcode": searchedLocation | |
} | |
movie_listings.append(movie_data) | |
else: | |
print("No movies in %s"%(theater_name)) | |
return movie_listings | |
else: | |
print("No theaters found") | |
else: | |
print("No location found") | |
if __name__ == "__main__": | |
''' eg-:python fandango.py 20001 2017-12-31 ''' | |
argparser = argparse.ArgumentParser() | |
argparser.add_argument('location', help='theater location (zipcode or city+state)', type=str) | |
argparser.add_argument('showdate', help='movie show time', type=str) | |
args = argparser.parse_args() | |
location = args.location | |
showdate = args.showdate | |
validdate = False | |
try: | |
datetime.datetime.strptime(showdate, '%Y-%m-%d') | |
validdate =True | |
except ValueError: | |
print("Invalid showdate, showdate should be YYYY-MM-DD format") | |
if validdate: | |
searchdate = datetime.datetime.strptime(showdate, '%Y-%m-%d').date() | |
today = str(datetime.datetime.today().strftime('%Y-%m-%d')) | |
datenow = datetime.datetime.strptime(today,'%Y-%m-%d').date() | |
if searchdate >= datenow: | |
scraped_data = parse(location, showdate) | |
if scraped_data: | |
print("Writing data to output file") | |
with open('%s-%s-movie-results.csv' % (location, showdate), 'wb')as csvfile: | |
fieldnames = ['Theatre_Name', 'Theatre_Address', 'Movie_Name', | |
'Show_Date', 'Location_or_Zipcode', 'Duration', 'Genre', 'Movie_Rating', 'Star_Rating'] | |
writer = csv.DictWriter(csvfile, fieldnames=fieldnames, quoting=csv.QUOTE_ALL) | |
writer.writeheader() | |
for data in scraped_data: | |
writer.writerow(data) | |
else: | |
print("Your search for %s, in %s does not match any movies" % (location, showdate)) | |
else: | |
print("Entered date is already passed") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I have exactly the same error. Any help would be greatly appreciated.
Traceback:
Traceback (most recent call last):
File "./fandago.py", line 149, in
scraped_data = parse(location, showdate)
File "./fandago.py", line 43, in parse
location_response = requests.get(location_url, cookies=cookie, headers=location_headers).json()
File "/Users/user/.virtualenvs/movies/lib/python3.7/site-packages/requests/models.py", line 897, in json
return complexjson.loads(self.text, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/init.py", line 348, in loads
return _default_decoder.decode(s)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)