Skip to content

Instantly share code, notes, and snippets.

@jjanusch
Last active September 14, 2020 17:19
Show Gist options
  • Save jjanusch/0c659aa57bc31c208eb8b1e502929fd5 to your computer and use it in GitHub Desktop.
Save jjanusch/0c659aa57bc31c208eb8b1e502929fd5 to your computer and use it in GitHub Desktop.
A script to scrape song data from Google Music. Inspired/based on this script from jimiserez https://gist.github.com/jmiserez/c9a9a0f41e867e5ebb75
/*
To run:
1. Go to playlist in Google Music such as https://play.google.com/music/listen#/all
2. Open the Developer Tools and paste the script below in
3. Run "GoogleMusicExporter.scrape()"
4. Once the script completes, songs can be viewed by:
1. Running "GoogleMusicExporter.songs", which will show the Javascript object
2. Running "GoogleMusicExporter.export('csv')" which will export all of the songs as a CSV to your clipboard
3. Running "GoogleMusicExporter.export('json')" which will export all of the songs as JSON to your clipboard
*/
var GoogleMusicExporter = {
_songs: [],
_interval: null,
intervalTime: 5, // ms
timeout: 3000, // ms
retries: this.timeout / this.intervalTime, // ms
get songs() {
return this._songs;
},
scrape: function () {
var lastId = -1,
activeId = -1,
songRows,
foundIds = [],
scrollDiv = document.querySelector("div#music-content");
// scroll to the top of the window to ensure all songs are captured
document.querySelector("div#mainContainer").scrollTop = 0
// clear out the songs array to start fresh
this._songs = [];
console.log('Beginning scrape. Please wait until it says it has finished.');
console.log('Note: To end early, please run GoogleMusicExporter.stopScrape()');
this._interval = setInterval(function () {
songRows = Array.from(document.querySelectorAll("table.song-table tbody tr.song-row"));
if (songRows.length) {
activeId = songRows[0].getAttribute('data-id');
// if ids are the same, page has not been changed yet
if (activeId == lastId) {
this.retries--;
if (scrollDiv.scrollTop == (scrollDiv.scrollHeight - scrollDiv.offsetHeight) || this.retries <= 0) {
this.stopScrape();
console.log('Successfully scraped ' + this._songs.length + ' songs');
console.log('Check out the songs by running GoogleMusicExporter.songs or running GoogleMusicExporter.export()');
}
} else {
this.retries = this.timeout / this.intervalTime;
lastId = activeId;
var colNames = {
index: null,
title: null,
duration: null,
artist: null,
album: null,
playCount: null,
rating: null
};
Array.from(songRows[0].childNodes).forEach(function (row, i) {
colNames.index = row.getAttribute("data-col") == "index" ? i : colNames.index;
colNames.title = row.getAttribute("data-col") == "title" ? i : colNames.title;
colNames.duration = row.getAttribute("data-col") == "duration" ? i : colNames.duration;
colNames.artist = row.getAttribute("data-col") == "artist" ? i : colNames.artist;
colNames.album = row.getAttribute("data-col") == "album" ? i : colNames.album;
colNames.playCount = row.getAttribute("data-col") == "play-count" ? i : colNames.playCount;
colNames.rating = row.getAttribute("data-col") == "rating" ? i : colNames.rating;
});
this._songs = this._songs.concat(songRows.map(function (row) {
var song = {
id: row.getAttribute('data-id'),
index: colNames.index !== null ? parseInt(row.childNodes[colNames.index].textContent) : null,
title: colNames.title !== null ? this._trim(row.childNodes[colNames.title].textContent) : null,
duration: colNames.duration !== null ? this._trim(row.childNodes[colNames.duration].textContent || '0:00') : '0:00',
artist: colNames.artist !== null ? this._trim(row.childNodes[colNames.artist].textContent) : null,
album: colNames.album !== null ? this._trim(row.childNodes[colNames.album].textContent) : null,
playCount: colNames.playCount !== null ? parseInt(row.childNodes[colNames.playCount].textContent || 0) : 0,
rating: colNames.playCount !== null ? parseInt(row.childNodes[colNames.rating].getAttribute('data-rating') || 0) : 0
};
if (foundIds.indexOf(song.id) < 0) {
foundIds.push(song.id);
return song;
}
return null;
}.bind(this)).filter(function (song) {return song !== null}));
songRows[songRows.length - 1].scrollIntoView(true);
}
}
}.bind(this), this.intervalTime)
},
export: function (type) {
if (!this._songs.length) {
console.log('No songs found yet. Run scrape() first!');
}
switch (type) {
case 'json':
copy(JSON.stringify(this._songs));
console.log('Exported as JSON and copied to your clipboard')
break;
case 'csv':
var cols = [];
for (var s in this._songs[0]) {
cols.push(s);
}
var rows = this._songs.map(function (song) {
var row = [];
cols.forEach(function (col) {
if (song.hasOwnProperty(col) && song[col] && typeof song[col] === 'string') {
row.push(song[col].replace(',', '\\,').replace(/\r\n/g, ''));
} else {
row.push('');
}
});
return row.join(',');
});
copy(cols.join(',') + '\n' + rows.join('\n'));
console.log('Exported as CSV and copied to your clipboard');
break;
default:
console.log('Unrecognized export type. Valid types are "json" and "csv"');
}
},
stopScrape: function () {
if (this._interval) {
clearInterval(this._interval);
console.log('Scraper Stopped')
} else {
console.log('Scraper not running');
}
},
_trim: function (str) {
return str.replace(/^[\s\n\r]+/, '').replace(/[\s\n\r]+$/, '')
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment