Skip to content

Instantly share code, notes, and snippets.

@raldone01
Last active August 1, 2025 16:10
Show Gist options
  • Save raldone01/e273f810cd78e8c4a34aec461c83597c to your computer and use it in GitHub Desktop.
Save raldone01/e273f810cd78e8c4a34aec461c83597c to your computer and use it in GitHub Desktop.
webricks.com wanted list html to rebrickable.csv
#!/usr/bin/env python
# This script parses a webricks.com wanted list HTML file and converts the part data
# into a rebrickable.csv format.
import sys
import csv
from bs4 import BeautifulSoup
def parse_html_to_records(html_content):
"""
Parses the given HTML content to extract part information.
Args:
html_content: A string containing the HTML source.
Returns:
A list of dictionaries, where each dictionary represents a part.
"""
soup = BeautifulSoup(html_content, 'html.parser')
records = []
# Find the <dl> block with part details
all_details = soup.find_all('tr', class_='item-info')
# https://web.archive.org/web/20250713215351/https://rebrickable.com/colors/
# Colors are in a weird webrick lego hybrid we must translate them to rebrickable ids
webrick_to_rebrickable_colors = {
'26-Black': 0,
'194-Light Bluish Gray': 71,
'1-White': 15,
'23-Blue': 1,
'191-Bright Light Orange': 191,
'199-Dark Bluish Gray': 72,
'138-Dark Tan': 28,
'28-Green': 2,
'297-Pearl Gold': 297,
'21-Red': 4,
'5-Tan': 19,
'24-Yellow': 14,
}
webrick_partid_fixer = {
'2412': '2412b',
'3062': '3062b',
'3839': '3839b',
'4265c': '4265a',
'4287': '4287c',
'44237': '2456',
'50746': '54200',
'6143': '3941',
'6590': '3713',
'92903': '6005',
'93888': '3007',
'3040': '3040b',
'3049': '3049d',
'3069': '3069b',
'3070': '3070b',
'3794': '3794b',
'4032': '4032b',
'44301': '44301b',
'44567': '44567b',
'4697': '4697b',
'48729': '48729b',
'60470': '60470b',
}
for details in all_details:
# Extract values by matching labels
data = {}
labels = ['Part ID', 'Color ID', 'SKU']
for dt in details.find_all('dt', class_='label'):
label = dt.get_text(strip=True)
if label in labels:
dd = dt.find_next_sibling('dd', class_='values')
data[label] = dd.get_text(strip=True)
# Print the extracted values
part_id = data.get('Part ID')
color_id = data.get('Color ID')
sku = data.get('SKU')
qty_input = details.find('input', class_='input-text qty')
quantity = qty_input['value']
if not part_id or not color_id or not quantity:
print("Warning: Missing part information in one of the records.")
continue
# Translate color ID to rebrickable format if necessary
if color_id in webrick_to_rebrickable_colors:
color_id = webrick_to_rebrickable_colors[color_id]
else:
raise ValueError(
f"Error: Color ID '{color_id}' not found in the translation map.")
# Fix part ID if necessary
if part_id in webrick_partid_fixer:
print(
f"Fixing part ID '{part_id}' to '{webrick_partid_fixer[part_id]}'")
part_id = webrick_partid_fixer[part_id]
# Clean up suffixes
quantity = quantity.replace(' piece', '').strip()
# Create a record dictionary
record = {
'Part': part_id,
'Color': color_id,
'Quantity': quantity,
'Is Spare': False
}
records.append(record)
return records
def write_records_to_csv(records, output_filename):
"""
Writes a list of part records to a CSV file.
Args:
records: A list of part dictionaries.
output_filename: The name of the CSV file to create.
"""
if not records:
print("No valid records were found to write to CSV.")
return
# The CSV headers are defined by the rebrickable format.
headers = ['Part', 'Color', 'Quantity', 'Is Spare']
try:
with open(output_filename, 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=headers)
writer.writeheader()
writer.writerows(records)
print(
f"Successfully created {output_filename} with {len(records)} records.")
# compute the total number of parts
total_parts = sum(int(record['Quantity']) for record in records)
print(f"Total parts: {total_parts}")
except IOError:
print(f"Error: Could not write to file {output_filename}.")
sys.exit(1)
def main():
if len(sys.argv) != 2:
print("Usage: python script.py <input_html_file>")
sys.exit(1)
input_filename = sys.argv[1]
print(f"Processing input file: {input_filename}")
if not input_filename.lower().endswith('.html'):
print("Error: Input file must have a .html extension.")
sys.exit(1)
output_filename = input_filename.replace('.html', '_rebrickable.csv')
# Read the contents of the input HTML file
try:
with open(input_filename, 'r', encoding='utf-8') as f:
html_content = f.read()
except FileNotFoundError:
print(f"Error: Input file not found at '{input_filename}'")
sys.exit(1)
# Parse the HTML to extract part data
records = parse_html_to_records(html_content)
# Write the extracted records into the output CSV file
write_records_to_csv(records, output_filename)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment