Last active
April 4, 2021 22:27
-
-
Save gptshubham595/afad76990dee3349b5f7a6940cdbe73a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity as cos_sim | |
def get_title_from_index(index): | |
return df[df.index == index]["title"].values[0] | |
def get_index_from_title(title): | |
return df[df.title == title]["index"].values[0] | |
##Read CSV File | |
df=pd.read_csv('movie_dataset.csv') | |
print(df.head()) | |
print(df.columns) | |
##Select Features | |
features=['keywords','cast','genres','director'] | |
##Create a column in DF which combines all selected features | |
for feature in features: | |
df[feature]=df[feature].fillna('') | |
def combine_features(row): | |
return row['keywords'] + " " + row['cast'] + " " + row['genres'] + " " + row['director'] | |
df["combined"]=df.apply(combine_features,axis=1) | |
print(df["combined"].head()) | |
##count matrix from this new combined column | |
cv=CountVectorizer() | |
count=cv.fit_transform(df["combined"]) | |
##Compute the Cosine Similarity based on the count_matrix | |
similarity_score=cos_sim(count) | |
print(similarity_score) | |
movie_user_likes = "Avatar" | |
##Get index of this movie from its title | |
index=get_index_from_title(movie_user_likes) | |
##Get a list of similar movies in descending order of similarity score | |
movies_to_recommend_scores=list(similarity_score[index]) | |
numbers = list(range(len(movies_to_recommend_scores))) | |
result = dict(zip(numbers, movies_to_recommend_scores)) | |
sorted_keys = sorted(result, key=result.get) | |
sorted_keys=sorted_keys[::-1] | |
##Print titles of first 50 movies | |
movies_to_recommend_list=sorted_keys[1:51] | |
movies_to_recommend=[] | |
for i in movies_to_recommend_list: | |
movies_to_recommend.append(get_title_from_index(i)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment