Last active
September 14, 2020 17:19
-
-
Save jjanusch/0c659aa57bc31c208eb8b1e502929fd5 to your computer and use it in GitHub Desktop.
A script to scrape song data from Google Music. Inspired/based on this script from jimiserez https://gist.github.com/jmiserez/c9a9a0f41e867e5ebb75
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
To run: | |
1. Go to playlist in Google Music such as https://play.google.com/music/listen#/all | |
2. Open the Developer Tools and paste the script below in | |
3. Run "GoogleMusicExporter.scrape()" | |
4. Once the script completes, songs can be viewed by: | |
1. Running "GoogleMusicExporter.songs", which will show the Javascript object | |
2. Running "GoogleMusicExporter.export('csv')" which will export all of the songs as a CSV to your clipboard | |
3. Running "GoogleMusicExporter.export('json')" which will export all of the songs as JSON to your clipboard | |
*/ | |
var GoogleMusicExporter = { | |
_songs: [], | |
_interval: null, | |
intervalTime: 5, // ms | |
timeout: 3000, // ms | |
retries: this.timeout / this.intervalTime, // ms | |
get songs() { | |
return this._songs; | |
}, | |
scrape: function () { | |
var lastId = -1, | |
activeId = -1, | |
songRows, | |
foundIds = [], | |
scrollDiv = document.querySelector("div#music-content"); | |
// scroll to the top of the window to ensure all songs are captured | |
document.querySelector("div#mainContainer").scrollTop = 0 | |
// clear out the songs array to start fresh | |
this._songs = []; | |
console.log('Beginning scrape. Please wait until it says it has finished.'); | |
console.log('Note: To end early, please run GoogleMusicExporter.stopScrape()'); | |
this._interval = setInterval(function () { | |
songRows = Array.from(document.querySelectorAll("table.song-table tbody tr.song-row")); | |
if (songRows.length) { | |
activeId = songRows[0].getAttribute('data-id'); | |
// if ids are the same, page has not been changed yet | |
if (activeId == lastId) { | |
this.retries--; | |
if (scrollDiv.scrollTop == (scrollDiv.scrollHeight - scrollDiv.offsetHeight) || this.retries <= 0) { | |
this.stopScrape(); | |
console.log('Successfully scraped ' + this._songs.length + ' songs'); | |
console.log('Check out the songs by running GoogleMusicExporter.songs or running GoogleMusicExporter.export()'); | |
} | |
} else { | |
this.retries = this.timeout / this.intervalTime; | |
lastId = activeId; | |
var colNames = { | |
index: null, | |
title: null, | |
duration: null, | |
artist: null, | |
album: null, | |
playCount: null, | |
rating: null | |
}; | |
Array.from(songRows[0].childNodes).forEach(function (row, i) { | |
colNames.index = row.getAttribute("data-col") == "index" ? i : colNames.index; | |
colNames.title = row.getAttribute("data-col") == "title" ? i : colNames.title; | |
colNames.duration = row.getAttribute("data-col") == "duration" ? i : colNames.duration; | |
colNames.artist = row.getAttribute("data-col") == "artist" ? i : colNames.artist; | |
colNames.album = row.getAttribute("data-col") == "album" ? i : colNames.album; | |
colNames.playCount = row.getAttribute("data-col") == "play-count" ? i : colNames.playCount; | |
colNames.rating = row.getAttribute("data-col") == "rating" ? i : colNames.rating; | |
}); | |
this._songs = this._songs.concat(songRows.map(function (row) { | |
var song = { | |
id: row.getAttribute('data-id'), | |
index: colNames.index !== null ? parseInt(row.childNodes[colNames.index].textContent) : null, | |
title: colNames.title !== null ? this._trim(row.childNodes[colNames.title].textContent) : null, | |
duration: colNames.duration !== null ? this._trim(row.childNodes[colNames.duration].textContent || '0:00') : '0:00', | |
artist: colNames.artist !== null ? this._trim(row.childNodes[colNames.artist].textContent) : null, | |
album: colNames.album !== null ? this._trim(row.childNodes[colNames.album].textContent) : null, | |
playCount: colNames.playCount !== null ? parseInt(row.childNodes[colNames.playCount].textContent || 0) : 0, | |
rating: colNames.playCount !== null ? parseInt(row.childNodes[colNames.rating].getAttribute('data-rating') || 0) : 0 | |
}; | |
if (foundIds.indexOf(song.id) < 0) { | |
foundIds.push(song.id); | |
return song; | |
} | |
return null; | |
}.bind(this)).filter(function (song) {return song !== null})); | |
songRows[songRows.length - 1].scrollIntoView(true); | |
} | |
} | |
}.bind(this), this.intervalTime) | |
}, | |
export: function (type) { | |
if (!this._songs.length) { | |
console.log('No songs found yet. Run scrape() first!'); | |
} | |
switch (type) { | |
case 'json': | |
copy(JSON.stringify(this._songs)); | |
console.log('Exported as JSON and copied to your clipboard') | |
break; | |
case 'csv': | |
var cols = []; | |
for (var s in this._songs[0]) { | |
cols.push(s); | |
} | |
var rows = this._songs.map(function (song) { | |
var row = []; | |
cols.forEach(function (col) { | |
if (song.hasOwnProperty(col) && song[col] && typeof song[col] === 'string') { | |
row.push(song[col].replace(',', '\\,').replace(/\r\n/g, '')); | |
} else { | |
row.push(''); | |
} | |
}); | |
return row.join(','); | |
}); | |
copy(cols.join(',') + '\n' + rows.join('\n')); | |
console.log('Exported as CSV and copied to your clipboard'); | |
break; | |
default: | |
console.log('Unrecognized export type. Valid types are "json" and "csv"'); | |
} | |
}, | |
stopScrape: function () { | |
if (this._interval) { | |
clearInterval(this._interval); | |
console.log('Scraper Stopped') | |
} else { | |
console.log('Scraper not running'); | |
} | |
}, | |
_trim: function (str) { | |
return str.replace(/^[\s\n\r]+/, '').replace(/[\s\n\r]+$/, '') | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment