Created
April 24, 2023 12:49
-
-
Save tomatosoupcan/399adc52e516746a1d254c536b3f00b0 to your computer and use it in GitHub Desktop.
Scrape Data from Music League for Analysis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import spotipy | |
import pandas as pd | |
from spotipy.oauth2 import SpotifyClientCredentials | |
#define api information | |
user = 'ml_userid' | |
base_url = 'https://app.musicleague.com/api/v1/' | |
url = base_url + 'users/' + user + '/leagues' | |
cookie = {'session':'session cookie from browser'} | |
def get_member(members, id): | |
for member in members: | |
if member['user']['id'] == id: | |
return member['user']['name'] | |
return 'Error collecting name' | |
def spoticheck(uri): | |
track = sp.track(uri) | |
t_name = track['name'] | |
t_artist = track['album']['artists'][0]['name'] | |
t_artist_id = track['album']['artists'][0]['id'] | |
t_genres = '' | |
artist = sp.artist(t_artist_id) | |
for genre in artist['genres']: | |
if t_genres == '': | |
t_genres = genre | |
else: | |
t_genres += ',' + genre | |
return t_name,t_artist,t_genres | |
#define spotify info | |
s_client_id = 'spotify api client id' | |
s_client_secret = 'spotify api client secret' | |
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=s_client_id, | |
client_secret=s_client_secret)) | |
#initialize df | |
df = pd.DataFrame(columns=['League','Round','Song','Artist','Genres','Submitter','Voter','Points']) | |
#get leagues | |
req_leagues = requests.get(url,cookies=cookie) | |
#iterate leagues | |
for league in req_leagues.json(): | |
l_name, l_id = league['name'], league['id'] | |
#print(l_name) | |
#get members for that league | |
url = base_url + 'leagues/' + l_id + '/members' | |
req_members = requests.get(url, cookies=cookie) | |
#get rounds for that league | |
url = base_url + 'leagues/' + l_id + '/rounds' | |
req_rounds = requests.get(url, cookies=cookie) | |
for round in req_rounds.json(): | |
try: | |
r_name, r_id = round['name'], round['id'] | |
#print(' ' + r_name) | |
url = base_url + 'leagues/' + l_id + '/rounds/' + r_id + '/results' | |
req_subs = requests.get(url, cookies=cookie) | |
for result in req_subs.json()['standings']: | |
s_score, s_uri, s_votes, s_submitter = result['pointsPossible'], result['submission']['spotifyUri'], result['votes'], result['submission']['submitterId'] | |
s_name,s_artist,s_genres = spoticheck(s_uri) | |
s_submitter = get_member(req_members.json(), s_submitter) | |
#print(' ' + s_submitter + ': ' + s_info + ': ' + str(s_score)) | |
for vote in s_votes: | |
v_id, v_points = vote['voterId'], vote['weight'] | |
v_id = get_member(req_members.json(), v_id) | |
df.loc[len(df)] = [l_name,r_name,s_name,s_artist,s_genres,s_submitter,v_id,v_points] | |
#print(' ' + v_id + ': ' + str(v_points)) | |
except: | |
continue | |
#print(' Round still running or other error') | |
df.to_csv('output.csv', index=None) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment