bonelifer · April 1, 2025 23:18
diff --git a/ytmusic.py b/ytmusic.py
 #!/usr/bin/env python3
 """
 Script to Fetch MusicBrainz IDs (MBIDs) for Artists from a CSV File

 Purpose:
 This script reads a CSV file containing artist names, extracts unique artist names
 from the columns 'Artist Name 1' to 'Artist Name 6', and queries the MusicBrainz API 
 for the MusicBrainz ID (MBID) of each artist. It then saves the results to a JSON file.

 Steps:
 1. Load a CSV file containing song details, which includes up to six columns for artist names.
 2. Extract artist names from these columns, handling cases where multiple artists are listed in a single cell.
 3. Query the MusicBrainz API to retrieve the MBID for each unique artist.
 4. If an MBID is found, it is saved in a JSON file with just the MBID value.
 5. The script respects the MusicBrainz API rate limit by introducing a delay between requests.
 6. The results are saved in a formatted JSON file named 'artists_mbids.json'.
 """

 import os  # Import os to handle file path checks
 import pandas as pd  # Import pandas to read CSV and manipulate data
 import requests  # Import requests for making HTTP requests to the MusicBrainz API
 import time  # Import time for adding delays to prevent hitting rate limits
 import json  # Import json for formatting the output as JSON

 # Define the email and app name for the User-Agent header
 email = "[email protected]"  # Replace with your actual email address
 app_name = "GoogleTakeoutMusicToMBid/1.0"  # A unique name that describes the purpose of this script

 # File path for the CSV file
 file_path = r"./musiclibrarysongs.csv"  # Change this path to where your CSV file is located

 # Check if the file exists before attempting to read it
 if not os.path.isfile(file_path):
    # If the file doesn't exist, print an error message and suggest actions
    print(f"Error: The file at '{file_path}' could not be found.")
    print("Please ensure the file exists at the specified location and try again.")
 else:
    try:
        # Attempt to read the CSV file into a DataFrame, skipping problematic lines
        df = pd.read_csv(file_path, on_bad_lines='skip')  # 'skip' will ignore problematic lines
        print("File successfully loaded.")
    except Exception as e:
        # Catch any other errors that may occur while reading the file
        print(f"An error occurred while trying to load the file: {e}")
        print("Please check the file format or permissions and try again.")

 # If the file is successfully loaded, we proceed to process the data
 if 'df' in locals():
    # Clean up column names by stripping any leading or trailing spaces
    # This is important for ensuring that column names are accessed correctly.
    df.columns = df.columns.str.strip()

    # Create a set to store unique artist names. We use a set to automatically handle duplicates.
    artist_names = set()

    # Iterate through the columns that represent artist names (Artist Name 1 to Artist Name 6).
    # These columns may contain multiple artists separated by commas.
    for i in range(1, 7):  # Loop through columns 1 to 6 for artist names
        artist_column = f'Artist Name {i}'  # Dynamically generate the column name for each artist column

        # Check if the column exists in the DataFrame before attempting to process it
        if artist_column in df.columns:
            # Drop any null values (NaNs) and iterate through the remaining artist names
            for artists in df[artist_column].dropna():
                # Each artist name may be a comma-separated string, so split it into individual artists
                for artist in artists.split(','):
                    artist_names.add(artist.strip())  # Strip extra spaces and add to the set

    # Initialize a list to store the MusicBrainz IDs (MBIDs)
    mbids = []

    # Define the MusicBrainz API endpoint that will be used for searching artists by name
    api_endpoint = "https://musicbrainz.org/ws/2/artist/"

    # Set up headers for the API request, including a custom User-Agent (required by MusicBrainz)
    # The User-Agent helps MusicBrainz identify who is making the requests.
    headers = {
        'User-Agent': f'{app_name} ({email})'  # Replace with your own details
    }

    def get_artist_mbid(artist_name):
        """
        Queries the MusicBrainz API to retrieve the MusicBrainz ID (MBID) for a given artist.

        Parameters:
            artist_name (str): The name of the artist to search for.

        Returns:
            str: The MusicBrainz ID (MBID) for the first matching artist, or None if no match is found.
        """
        # Define the query parameters for the API request
        query_params = {
            'query': artist_name,  # The search query string to find the artist by name
            'fmt': 'json'  # Specify that the response should be in JSON format
        }

        # Send the request to the MusicBrainz API
        try:
            response = requests.get(api_endpoint, params=query_params, headers=headers)
            response.raise_for_status()  # Raise an error if the HTTP request failed

            # If the request was successful, process the response
            data = response.json()  # Parse the response as JSON
            if data.get('artists'):  # Check if any artist was returned in the response
                return data['artists'][0]['id']  # Return the ID of the first artist
        except requests.exceptions.RequestException as e:
            # If the API request failed, print an error message
            print(f"Error while fetching data for artist '{artist_name}': {e}")
        except Exception as e:
            # Catch any other exceptions and print a general error message
            print(f"Unexpected error while processing artist '{artist_name}': {e}")

        # Return None if no artist was found or an error occurred
        return None

    # Iterate through the unique set of artist names and fetch their MusicBrainz IDs (MBIDs)
    for artist in artist_names:
        # Call the function to retrieve the MBID for the current artist
        mbid = get_artist_mbid(artist)

        # If an MBID is found, add it to the results list
        if mbid:
            mbids.append({"MusicBrainzId": mbid})
        else:
            # If no MBID is found, log this information for the artist
            print(f"MBID not found for artist: {artist}")

        # To avoid hitting rate limits on the MusicBrainz API, introduce a 1-second delay between requests
        # This helps ensure that the script doesn't overwhelm the API with too many requests in a short time.
        time.sleep(1)

    # Convert the list of MBIDs into a properly formatted JSON array
    # The indent parameter is used to pretty-print the JSON output with 4 spaces for indentation
    json_output = json.dumps(mbids, indent=4)

    # Define the name of the output file where the JSON data will be saved
    output_file = "artists_mbids.json"

    # Open the output file in write mode and save the formatted JSON data to it
    # Use UTF-8 encoding to handle any special characters properly
    try:
        with open(output_file, 'w', encoding='utf-8') as json_file:
            json_file.write(json_output)
        print(f"JSON array of MusicBrainz IDs has been saved to '{output_file}'.")
    except Exception as e:
        # Handle errors that occur during file writing
        print(f"An error occurred while writing the output file: {e}")
	#!/usr/bin/env python3
	"""
	Script to Fetch MusicBrainz IDs (MBIDs) for Artists from a CSV File

	Purpose:
	This script reads a CSV file containing artist names, extracts unique artist names
	from the columns 'Artist Name 1' to 'Artist Name 6', and queries the MusicBrainz API
	for the MusicBrainz ID (MBID) of each artist. It then saves the results to a JSON file.

	Steps:
	1. Load a CSV file containing song details, which includes up to six columns for artist names.
	2. Extract artist names from these columns, handling cases where multiple artists are listed in a single cell.
	3. Query the MusicBrainz API to retrieve the MBID for each unique artist.
	4. If an MBID is found, it is saved in a JSON file with just the MBID value.
	5. The script respects the MusicBrainz API rate limit by introducing a delay between requests.
	6. The results are saved in a formatted JSON file named 'artists_mbids.json'.
	"""

	import os # Import os to handle file path checks
	import pandas as pd # Import pandas to read CSV and manipulate data
	import requests # Import requests for making HTTP requests to the MusicBrainz API
	import time # Import time for adding delays to prevent hitting rate limits
	import json # Import json for formatting the output as JSON

	# Define the email and app name for the User-Agent header
	email = "[email protected]" # Replace with your actual email address
	app_name = "GoogleTakeoutMusicToMBid/1.0" # A unique name that describes the purpose of this script

	# File path for the CSV file
	file_path = r"./musiclibrarysongs.csv" # Change this path to where your CSV file is located

	# Check if the file exists before attempting to read it
	if not os.path.isfile(file_path):
	# If the file doesn't exist, print an error message and suggest actions
	print(f"Error: The file at '{file_path}' could not be found.")
	print("Please ensure the file exists at the specified location and try again.")
	else:
	try:
	# Attempt to read the CSV file into a DataFrame, skipping problematic lines
	df = pd.read_csv(file_path, on_bad_lines='skip') # 'skip' will ignore problematic lines
	print("File successfully loaded.")
	except Exception as e:
	# Catch any other errors that may occur while reading the file
	print(f"An error occurred while trying to load the file: {e}")
	print("Please check the file format or permissions and try again.")

	# If the file is successfully loaded, we proceed to process the data
	if 'df' in locals():
	# Clean up column names by stripping any leading or trailing spaces
	# This is important for ensuring that column names are accessed correctly.
	df.columns = df.columns.str.strip()

	# Create a set to store unique artist names. We use a set to automatically handle duplicates.
	artist_names = set()

	# Iterate through the columns that represent artist names (Artist Name 1 to Artist Name 6).
	# These columns may contain multiple artists separated by commas.
	for i in range(1, 7): # Loop through columns 1 to 6 for artist names
	artist_column = f'Artist Name {i}' # Dynamically generate the column name for each artist column

	# Check if the column exists in the DataFrame before attempting to process it
	if artist_column in df.columns:
	# Drop any null values (NaNs) and iterate through the remaining artist names
	for artists in df[artist_column].dropna():
	# Each artist name may be a comma-separated string, so split it into individual artists
	for artist in artists.split(','):
	artist_names.add(artist.strip()) # Strip extra spaces and add to the set

	# Initialize a list to store the MusicBrainz IDs (MBIDs)
	mbids = []

	# Define the MusicBrainz API endpoint that will be used for searching artists by name
	api_endpoint = "https://musicbrainz.org/ws/2/artist/"

	# Set up headers for the API request, including a custom User-Agent (required by MusicBrainz)
	# The User-Agent helps MusicBrainz identify who is making the requests.
	headers = {
	'User-Agent': f'{app_name} ({email})' # Replace with your own details
	}

	def get_artist_mbid(artist_name):
	"""
	Queries the MusicBrainz API to retrieve the MusicBrainz ID (MBID) for a given artist.

	Parameters:
	artist_name (str): The name of the artist to search for.

	Returns:
	str: The MusicBrainz ID (MBID) for the first matching artist, or None if no match is found.
	"""
	# Define the query parameters for the API request
	query_params = {
	'query': artist_name, # The search query string to find the artist by name
	'fmt': 'json' # Specify that the response should be in JSON format
	}

	# Send the request to the MusicBrainz API
	try:
	response = requests.get(api_endpoint, params=query_params, headers=headers)
	response.raise_for_status() # Raise an error if the HTTP request failed

	# If the request was successful, process the response
	data = response.json() # Parse the response as JSON
	if data.get('artists'): # Check if any artist was returned in the response
	return data['artists'][0]['id'] # Return the ID of the first artist
	except requests.exceptions.RequestException as e:
	# If the API request failed, print an error message
	print(f"Error while fetching data for artist '{artist_name}': {e}")
	except Exception as e:
	# Catch any other exceptions and print a general error message
	print(f"Unexpected error while processing artist '{artist_name}': {e}")

	# Return None if no artist was found or an error occurred
	return None

	# Iterate through the unique set of artist names and fetch their MusicBrainz IDs (MBIDs)
	for artist in artist_names:
	# Call the function to retrieve the MBID for the current artist
	mbid = get_artist_mbid(artist)

	# If an MBID is found, add it to the results list
	if mbid:
	mbids.append({"MusicBrainzId": mbid})
	else:
	# If no MBID is found, log this information for the artist
	print(f"MBID not found for artist: {artist}")

	# To avoid hitting rate limits on the MusicBrainz API, introduce a 1-second delay between requests
	# This helps ensure that the script doesn't overwhelm the API with too many requests in a short time.
	time.sleep(1)

	# Convert the list of MBIDs into a properly formatted JSON array
	# The indent parameter is used to pretty-print the JSON output with 4 spaces for indentation
	json_output = json.dumps(mbids, indent=4)

	# Define the name of the output file where the JSON data will be saved
	output_file = "artists_mbids.json"

	# Open the output file in write mode and save the formatted JSON data to it
	# Use UTF-8 encoding to handle any special characters properly
	try:
	with open(output_file, 'w', encoding='utf-8') as json_file:
	json_file.write(json_output)
	print(f"JSON array of MusicBrainz IDs has been saved to '{output_file}'.")
	except Exception as e:
	# Handle errors that occur during file writing
	print(f"An error occurred while writing the output file: {e}")