Last active
September 23, 2023 04:15
-
-
Save Underdoge/20a849cc4c866d1b1aea325cdf823bfd to your computer and use it in GitHub Desktop.
Write IMDb ratings into m3u movie list
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import re | |
import requests | |
import pandas as pd | |
import getopt | |
import sys | |
import unidecode as ud | |
def mainSearch(titleURL, headers): | |
url = 'https://www.imdb.com/find?q=' + titleURL + "&s=tt&ttype=\ | |
ft&ref_=fn_ft" | |
response = requests.get(url, headers=headers) | |
soup = BeautifulSoup(response.text, "html.parser") | |
titles = [re.sub(r'[^a-zA-Z0-9\s]+', ' ', ud.unidecode( | |
a.text.replace("&", "and").replace("?", "").replace("!", "") | |
.replace("-", "").replace("'", "").replace(":", ""))) | |
.replace(" ", " ").lower() for a in soup.select( | |
'div.ipc-metadata-list-summary-item__c a')] | |
href = [a.attrs.get('href') for a in soup.select( | |
'div.ipc-metadata-list-summary-item__tc a')] | |
return pd.DataFrame({'Name': titles, 'Href': href}), href, url | |
def altSearch(titleURL, headers): | |
url = 'https://www.imdb.com/search/title/?title=' + titleURL | |
response = requests.get(url, headers=headers) | |
soup = BeautifulSoup(response.text, "html.parser") | |
titles = [re.sub(r'[^a-zA-Z0-9\s]+', ' ', ud.unidecode( | |
a.text.replace("&", "and").replace("?", "").replace("!", "") | |
.replace("-", "").replace("'", "").replace(":", ""))) | |
.replace(" ", " ").lower() for a in soup.select( | |
'h3.lister-item-header a')] | |
href = [a.attrs.get('href') for a in soup.select( | |
'h3.lister-item-header a')] | |
return pd.DataFrame({'Name': titles, 'Href': href}), href, url | |
def getIMDbRating(movietitle): | |
headers = { | |
'Accept-Language': 'en-US,en;q=0.5', | |
'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)\ | |
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/\ | |
107.0.1418.52"} | |
titleURL = movietitle.replace(" ", "+") | |
results, href, url = mainSearch(titleURL, headers) | |
index = 0 | |
if (len(results) <= 0): | |
results, href, url = altSearch(titleURL, headers) | |
if (len(results) <= 0): | |
if (debug): | |
print(f"Search URL: {url}") | |
print(f"Results (a): {results}") | |
return "N/A" | |
if (len(results[results.apply( | |
lambda row: row.Name == movietitle.lower(), axis=1)]) <= 0): | |
results, href, url = altSearch(titleURL, headers) | |
if (len(results[results.apply( | |
lambda row: row.Name == movietitle.lower(), axis=1)]) <= 0): | |
if (debug): | |
print(f"Search URL: {url}") | |
print(f"Results (b): {results}") | |
return "N/A" | |
index = results[results.apply( | |
lambda row: row.Name == movietitle.lower(), axis=1)].index[0] | |
if (len(href) <= 0 or href[index][0:7:] != "/title/"): | |
if (debug): | |
print(f"Search URL: {url}") | |
print(f"Results (c): {results}") | |
return "N/A" | |
url = 'https://www.imdb.com' + href[index] | |
response = requests.get(url, headers=headers) | |
soup = BeautifulSoup(response.text, "html.parser") | |
rating = soup.select( | |
"div[data-testid=hero-rating-bar__aggregate-rating__score] span") | |
if (len(rating) <= 0): | |
if (debug): | |
print(f"Result URL: {url}") | |
print(f"Results (d): {results}") | |
print(f"* The following movie was found but has no rating: \ | |
{rating}") | |
return "N/A" | |
try: | |
float(rating[0].text) | |
return rating[0].text | |
except ValueError: | |
if (debug): | |
print(f"URL: {url}") | |
print(f"Rating not a float: {rating}") | |
return "N/A" | |
append = False | |
debug = False | |
found = [] | |
count = 0 | |
number = "all" | |
output = "" | |
help = False | |
skipDuplicates = False | |
skip = 2 | |
written = 0 | |
m3uMovieList = "" | |
singleTitle = "" | |
options = "adf:hn:o:st:" | |
argumentList = sys.argv[1:] | |
long_options = ["Append", "Debug", "File=", "Help", | |
"Number=", "Output=", "Skip", "Title="] | |
try: | |
# Parsing argument | |
arguments, values = getopt.getopt(argumentList, options, long_options) | |
# checking each argument | |
for currentArgument, currentValue in arguments: | |
if currentArgument in ("-h", "--Help"): | |
print("\n USAGE: ") | |
print("\n python3 imdb_ratings.py <Options>") | |
print("\n OPTIONS:") | |
print("\n -h This help message") | |
print(" -f (--File) Input m3u movie list (Required)") | |
print(" -o (--Output) Output m3u movie list (Optional)") | |
print(" -s (--Skip) Skip duplicate movies (Optional)") | |
print(" -d (--Debug) Enable debugging mode (Optional)") | |
print(" -n (--Number) Number of movies to look up (Optional\ | |
- Default: all)") | |
print(" -a (--Append) Append rating to movie title (Optional\ | |
- Default: rating will precede the title)") | |
print(" -t (--Title) Look up a single movie title by name") | |
print("\n EXAMPLES: ") | |
print("\n The following command will read the first 100\ | |
movies of the \"movielist.m3u\" file and write them to\n the \"newmovieli\ | |
st.m3u\" file appending the rating to the movie tile, skipping duplicate movie\ | |
titles,\n and will output debugging information:") | |
print("\n python3 imdb_ratings.py -f movielist.m3u -o newmo\ | |
vielist.m3u -n 100 -a true -s -d") | |
print("\n The following command will look up the rating for a\ | |
movie named \"Everything Everywhere All At Once\":") | |
print("\n python3 imdb_ratings.py -t \"Everything Everywher\ | |
e All At Once\"\n") | |
help = True | |
elif currentArgument in ("-f", "--File"): | |
print("m3u List:", currentValue) | |
m3uMovieList = currentValue | |
elif currentArgument in ("-n", "--Number"): | |
print("Number of movies:", currentValue) | |
number = currentValue | |
elif currentArgument in ("-a", "--AppendIMDB"): | |
print("Append rating enabled.") | |
append = True | |
elif currentArgument in ("-o", "--Output"): | |
print(f"Output file: {currentValue}") | |
output = currentValue | |
elif currentArgument in ("-s", "--Skip"): | |
print("Skip duplicates enabled.") | |
skipDuplicates = True | |
elif currentArgument in ("-d", "--Debug"): | |
print("Debugging mode enabled.") | |
debug = True | |
elif currentArgument in ("-t", "--Title"): | |
print("Look up single movie title: ", currentValue) | |
singleTitle = currentValue | |
if (not help): | |
if (number != "all"): | |
movieNumber = int(number)-1 | |
if (int(number) == 1): | |
skip = 5 | |
allMovies = pd.DataFrame({'Name': [], 'Rating': []}) | |
if (m3uMovieList != ""): | |
if (output != ""): | |
newMovieList = open(output, "w") | |
else: | |
newMovieList = open(m3uMovieList[:-4:] + "_new.m3u", "w") | |
with open(m3uMovieList) as movieList: | |
for line in movieList: | |
written += 1 | |
if (written <= skip): | |
if (number != "all" and count > movieNumber | |
and written >= skip): | |
break | |
else: | |
written = 0 | |
if (line.startswith("#EXTINF") and "Movie VOD" in line): | |
title = line[line.rindex(": ")+2:-6:] | |
found = allMovies[ | |
allMovies.apply( | |
lambda row: row.Name.lower() == title.lower(), | |
axis=1)] | |
if (len(found) == 0): | |
rating = getIMDbRating(title) | |
print(f"{count+1}) Title: \"{title}\",\ | |
Rating: {rating}") | |
allMovies.loc[len(allMovies.index)] = [title, | |
rating] | |
count += 1 | |
else: | |
rating = found['Rating'].to_string(index=False) | |
if (skipDuplicates): | |
print(f"* Skipping duplicate: \"{title}\",\ | |
Rating: {rating}") | |
else: | |
print(f"* Duplicate: \"{title}\",\ | |
Rating: {rating}") | |
if (append): | |
newline = line[0:-1:] + f" ({rating})" + line[-1::] | |
else: | |
newline = line[ | |
0:line.rindex("HD : "):] + f"{rating} " + line[ | |
line.rindex("HD : ")::] | |
if (not skipDuplicates or | |
(skipDuplicates and len(found) == 0)): | |
newMovieList.write(newline) | |
else: | |
if (line.startswith("#EXTINF")): | |
channel = line[line.rindex(",")+1::].replace( | |
"\n", "") | |
print(f"Adding channel: \"{channel}\"") | |
if (not skipDuplicates or | |
(skipDuplicates and len(found) == 0)): | |
newMovieList.write(line) | |
else: | |
if (singleTitle != ""): | |
rating = getIMDbRating(singleTitle) | |
print(f"Rating: {rating}") | |
except getopt.error as err: | |
print(str(err)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment