Skip to content

Instantly share code, notes, and snippets.

@raldone01
Created July 29, 2025 15:28
Show Gist options
  • Save raldone01/abf43e561a993bbbe8615d38217197f1 to your computer and use it in GitHub Desktop.
Save raldone01/abf43e561a993bbbe8615d38217197f1 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# Filename: mygobricks_differ.py
# Description: This script filters a Rebrickable CSV file to only include parts that are still missing from a Mygobricks missing parts CSV file.
# It reads both CSV files, compares the parts, and outputs a new CSV file with the remaining missing parts.
import argparse
import sys
from pathlib import Path
import pandas as pd
# Rebrickable CSV file columns:
# "Part", "Color", "Quantity", "Is Spare"
# 87994, 0, 2, False
# 30374, 0, 1, False
# ...
def parse_rebrickable_csv(file_path):
"""
Reads the Rebrickable CSV and returns a cleaned DataFrame containing part ID, color, quantity, and spare info.
"""
try:
df = pd.read_csv(file_path, usecols=[
"Part", "Color", "Quantity", "Is Spare"])
df["Part"] = df["Part"].astype(str).str.strip()
df["Color"] = pd.to_numeric(df["Color"], errors='coerce')
df["Quantity"] = pd.to_numeric(df["Quantity"], errors='coerce')
df["Is Spare"] = df["Is Spare"].astype(bool)
return df
except Exception as e:
print(f"Error reading Rebrickable CSV file: {e}")
sys.exit(1)
# Mygobricks missing parts CSV file columns:
# "LegoID", "Part", "ColorID", "Quantity", "Reason"
# "73507", "Technic Beam 1 x 11 Thick with Alternating Holes", "0", "4", "undefined"
# "87087", "Brick Special 1 x 1 with Stud on 1 Side", "72", "2", "undefined"
# "undefined", "65473", "0", "8", "undefined"
# "undefined", "38799", "0", "1", "undefined"
# ...
def parse_mygobricks_missing_csv(file_path):
"""
Reads the Mygobricks missing CSV and returns a cleaned DataFrame of still-missing parts.
"""
try:
df = pd.read_csv(file_path, usecols=[
"LegoID", "Part", "ColorID", "Quantity", "Reason"])
df["LegoID"] = df["LegoID"].astype(str).str.strip()
df["Part"] = df["Part"].astype(str).str.strip()
df["ColorID"] = pd.to_numeric(df["ColorID"], errors='coerce')
df["Quantity"] = pd.to_numeric(df["Quantity"], errors='coerce')
df["Reason"] = df["Reason"].astype(str)
# Remove invalid part numbers that don't match the expected format
valid_lego_part_number_regex = r"^[a-z0-9]+$"
mask = ~df['Part'].str.match(valid_lego_part_number_regex, na=False)
print(f"Dropping {mask.sum()} invalid part entries from missing CSV")
df.loc[mask, 'Part'] = None
# If Part is missing, use LegoID as fallback
df["Part"] = df["Part"].combine_first(df["LegoID"])
df = df.drop(columns=["LegoID"])
return df
except Exception as e:
print(f"Error reading Mygobricks missing CSV file: {e}")
sys.exit(1)
def main():
"""
Loads the CSV files, filters parts that are still missing, and writes them into a new CSV.
"""
parser = argparse.ArgumentParser(
description="Filters the Rebrickable dataset to only include parts still missing from Mygobricks."
)
parser.add_argument(
"rebrickable_path",
type=Path,
help="Path to the Rebrickable CSV file (e.g., 'set_parts.csv')."
)
parser.add_argument(
"missing_path",
type=Path,
help="Path to the Mygobricks 'missing' CSV file."
)
args = parser.parse_args()
rebrickable_df = parse_rebrickable_csv(args.rebrickable_path)
missing_df = parse_mygobricks_missing_csv(args.missing_path)
print(f"Total parts in Rebrickable CSV: {len(rebrickable_df)}")
print(f"Total parts in Missing CSV: {len(missing_df)}")
output_rows = []
# Compare each part-color combination in both files
for _, full_row in rebrickable_df.iterrows():
for _, missing_row in missing_df.iterrows():
if full_row["Part"] == missing_row["Part"] and full_row["Color"] == missing_row["ColorID"]:
output_rows.append(full_row)
output_df = pd.DataFrame(output_rows)
print(f"Parts still missing after comparison: {len(output_df)}")
# Write result to next to the original file with a new suffix
output_path = args.rebrickable_path.with_name(
f"{args.rebrickable_path.stem}_still_missing.csv"
)
output_df.to_csv(output_path, index=False)
print(f"Result saved to: {output_path}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment