raldone01 · August 1, 2025 16:10
diff --git a/webricks_wanted_list_converter.py b/webricks_wanted_list_converter.py
 #!/usr/bin/env python

 # This script parses a webricks.com wanted list HTML file and converts the part data
 # into a rebrickable.csv format.

 import sys
 import csv
 from bs4 import BeautifulSoup


 def parse_html_to_records(html_content):
    """
    Parses the given HTML content to extract part information.

    Args:
      html_content: A string containing the HTML source.

    Returns:
      A list of dictionaries, where each dictionary represents a part.
    """
    soup = BeautifulSoup(html_content, 'html.parser')
    records = []

    # Find the <dl> block with part details
    all_details = soup.find_all('tr', class_='item-info')

    # https://web.archive.org/web/20250713215351/https://rebrickable.com/colors/
    # Colors are in a weird webrick lego hybrid we must translate them to rebrickable ids
    webrick_to_rebrickable_colors = {
        '26-Black': 0,
        '194-Light Bluish Gray': 71,
        '1-White': 15,
        '23-Blue': 1,
        '191-Bright Light Orange': 191,
        '199-Dark Bluish Gray': 72,
        '138-Dark Tan': 28,
        '28-Green': 2,
        '297-Pearl Gold': 297,
        '21-Red': 4,
        '5-Tan': 19,
        '24-Yellow': 14,
    }

    webrick_partid_fixer = {
        '2412': '2412b',
        '3062': '3062b',
        '3839': '3839b',
        '4265c': '4265a',
        '4287': '4287c',
        '44237': '2456',
        '50746': '54200',
        '6143': '3941',
        '6590': '3713',
        '92903': '6005',
        '93888': '3007',
        '3040': '3040b',
        '3049': '3049d',
        '3069': '3069b',
        '3070': '3070b',
        '3794': '3794b',
        '4032': '4032b',
        '44301': '44301b',
        '44567': '44567b',
        '4697': '4697b',
        '48729': '48729b',
        '60470': '60470b',
    }

    for details in all_details:
        # Extract values by matching labels
        data = {}
        labels = ['Part ID', 'Color ID', 'SKU']
        for dt in details.find_all('dt', class_='label'):
            label = dt.get_text(strip=True)
            if label in labels:
                dd = dt.find_next_sibling('dd', class_='values')
                data[label] = dd.get_text(strip=True)

        # Print the extracted values
        part_id = data.get('Part ID')
        color_id = data.get('Color ID')
        sku = data.get('SKU')

        qty_input = details.find('input', class_='input-text qty')
        quantity = qty_input['value']

        if not part_id or not color_id or not quantity:
            print("Warning: Missing part information in one of the records.")
            continue

        # Translate color ID to rebrickable format if necessary
        if color_id in webrick_to_rebrickable_colors:
            color_id = webrick_to_rebrickable_colors[color_id]
        else:
            raise ValueError(
                f"Error: Color ID '{color_id}' not found in the translation map.")

        # Fix part ID if necessary
        if part_id in webrick_partid_fixer:
            print(
                f"Fixing part ID '{part_id}' to '{webrick_partid_fixer[part_id]}'")
            part_id = webrick_partid_fixer[part_id]

        # Clean up suffixes
        quantity = quantity.replace(' piece', '').strip()

        # Create a record dictionary
        record = {
            'Part': part_id,
            'Color': color_id,
            'Quantity': quantity,
            'Is Spare': False
        }
        records.append(record)

    return records


 def write_records_to_csv(records, output_filename):
    """
    Writes a list of part records to a CSV file.

    Args:
      records: A list of part dictionaries.
      output_filename: The name of the CSV file to create.
    """
    if not records:
        print("No valid records were found to write to CSV.")
        return

    # The CSV headers are defined by the rebrickable format.
    headers = ['Part', 'Color', 'Quantity', 'Is Spare']

    try:
        with open(output_filename, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=headers)
            writer.writeheader()
            writer.writerows(records)
        print(
            f"Successfully created {output_filename} with {len(records)} records.")

        # compute the total number of parts
        total_parts = sum(int(record['Quantity']) for record in records)
        print(f"Total parts: {total_parts}")
    except IOError:
        print(f"Error: Could not write to file {output_filename}.")
        sys.exit(1)


 def main():
    if len(sys.argv) != 2:
        print("Usage: python script.py <input_html_file>")
        sys.exit(1)

    input_filename = sys.argv[1]
    print(f"Processing input file: {input_filename}")

    if not input_filename.lower().endswith('.html'):
        print("Error: Input file must have a .html extension.")
        sys.exit(1)

    output_filename = input_filename.replace('.html', '_rebrickable.csv')

    # Read the contents of the input HTML file
    try:
        with open(input_filename, 'r', encoding='utf-8') as f:
            html_content = f.read()
    except FileNotFoundError:
        print(f"Error: Input file not found at '{input_filename}'")
        sys.exit(1)

    # Parse the HTML to extract part data
    records = parse_html_to_records(html_content)

    # Write the extracted records into the output CSV file
    write_records_to_csv(records, output_filename)


 if __name__ == "__main__":
    main()
	#!/usr/bin/env python

	# This script parses a webricks.com wanted list HTML file and converts the part data
	# into a rebrickable.csv format.

	import sys
	import csv
	from bs4 import BeautifulSoup


	def parse_html_to_records(html_content):
	"""
	Parses the given HTML content to extract part information.

	Args:
	html_content: A string containing the HTML source.

	Returns:
	A list of dictionaries, where each dictionary represents a part.
	"""
	soup = BeautifulSoup(html_content, 'html.parser')
	records = []

	# Find the <dl> block with part details
	all_details = soup.find_all('tr', class_='item-info')

	# https://web.archive.org/web/20250713215351/https://rebrickable.com/colors/
	# Colors are in a weird webrick lego hybrid we must translate them to rebrickable ids
	webrick_to_rebrickable_colors = {
	'26-Black': 0,
	'194-Light Bluish Gray': 71,
	'1-White': 15,
	'23-Blue': 1,
	'191-Bright Light Orange': 191,
	'199-Dark Bluish Gray': 72,
	'138-Dark Tan': 28,
	'28-Green': 2,
	'297-Pearl Gold': 297,
	'21-Red': 4,
	'5-Tan': 19,
	'24-Yellow': 14,
	}

	webrick_partid_fixer = {
	'2412': '2412b',
	'3062': '3062b',
	'3839': '3839b',
	'4265c': '4265a',
	'4287': '4287c',
	'44237': '2456',
	'50746': '54200',
	'6143': '3941',
	'6590': '3713',
	'92903': '6005',
	'93888': '3007',
	'3040': '3040b',
	'3049': '3049d',
	'3069': '3069b',
	'3070': '3070b',
	'3794': '3794b',
	'4032': '4032b',
	'44301': '44301b',
	'44567': '44567b',
	'4697': '4697b',
	'48729': '48729b',
	'60470': '60470b',
	}

	for details in all_details:
	# Extract values by matching labels
	data = {}
	labels = ['Part ID', 'Color ID', 'SKU']
	for dt in details.find_all('dt', class_='label'):
	label = dt.get_text(strip=True)
	if label in labels:
	dd = dt.find_next_sibling('dd', class_='values')
	data[label] = dd.get_text(strip=True)

	# Print the extracted values
	part_id = data.get('Part ID')
	color_id = data.get('Color ID')
	sku = data.get('SKU')

	qty_input = details.find('input', class_='input-text qty')
	quantity = qty_input['value']

	if not part_id or not color_id or not quantity:
	print("Warning: Missing part information in one of the records.")
	continue

	# Translate color ID to rebrickable format if necessary
	if color_id in webrick_to_rebrickable_colors:
	color_id = webrick_to_rebrickable_colors[color_id]
	else:
	raise ValueError(
	f"Error: Color ID '{color_id}' not found in the translation map.")

	# Fix part ID if necessary
	if part_id in webrick_partid_fixer:
	print(
	f"Fixing part ID '{part_id}' to '{webrick_partid_fixer[part_id]}'")
	part_id = webrick_partid_fixer[part_id]

	# Clean up suffixes
	quantity = quantity.replace(' piece', '').strip()

	# Create a record dictionary
	record = {
	'Part': part_id,
	'Color': color_id,
	'Quantity': quantity,
	'Is Spare': False
	}
	records.append(record)

	return records


	def write_records_to_csv(records, output_filename):
	"""
	Writes a list of part records to a CSV file.

	Args:
	records: A list of part dictionaries.
	output_filename: The name of the CSV file to create.
	"""
	if not records:
	print("No valid records were found to write to CSV.")
	return

	# The CSV headers are defined by the rebrickable format.
	headers = ['Part', 'Color', 'Quantity', 'Is Spare']

	try:
	with open(output_filename, 'w', newline='', encoding='utf-8') as csvfile:
	writer = csv.DictWriter(csvfile, fieldnames=headers)
	writer.writeheader()
	writer.writerows(records)
	print(
	f"Successfully created {output_filename} with {len(records)} records.")

	# compute the total number of parts
	total_parts = sum(int(record['Quantity']) for record in records)
	print(f"Total parts: {total_parts}")
	except IOError:
	print(f"Error: Could not write to file {output_filename}.")
	sys.exit(1)


	def main():
	if len(sys.argv) != 2:
	print("Usage: python script.py <input_html_file>")
	sys.exit(1)

	input_filename = sys.argv[1]
	print(f"Processing input file: {input_filename}")

	if not input_filename.lower().endswith('.html'):
	print("Error: Input file must have a .html extension.")
	sys.exit(1)

	output_filename = input_filename.replace('.html', '_rebrickable.csv')

	# Read the contents of the input HTML file
	try:
	with open(input_filename, 'r', encoding='utf-8') as f:
	html_content = f.read()
	except FileNotFoundError:
	print(f"Error: Input file not found at '{input_filename}'")
	sys.exit(1)

	# Parse the HTML to extract part data
	records = parse_html_to_records(html_content)

	# Write the extracted records into the output CSV file
	write_records_to_csv(records, output_filename)


	if __name__ == "__main__":
	main()
No results found