raldone01 · July 29, 2025 15:28
diff --git a/mygobricks_differ.py b/mygobricks_differ.py
 #!/usr/bin/env python
 # Filename: mygobricks_differ.py
 # Description: This script filters a Rebrickable CSV file to only include parts that are still missing from a Mygobricks missing parts CSV file.
 # It reads both CSV files, compares the parts, and outputs a new CSV file with the remaining missing parts.

 import argparse
 import sys
 from pathlib import Path
 import pandas as pd


 # Rebrickable CSV file columns:
 # "Part", "Color", "Quantity", "Is Spare"
 # 87994, 0, 2, False
 # 30374, 0, 1, False
 # ...


 def parse_rebrickable_csv(file_path):
    """
    Reads the Rebrickable CSV and returns a cleaned DataFrame containing part ID, color, quantity, and spare info.
    """
    try:
        df = pd.read_csv(file_path, usecols=[
                         "Part", "Color", "Quantity", "Is Spare"])
        df["Part"] = df["Part"].astype(str).str.strip()
        df["Color"] = pd.to_numeric(df["Color"], errors='coerce')
        df["Quantity"] = pd.to_numeric(df["Quantity"], errors='coerce')
        df["Is Spare"] = df["Is Spare"].astype(bool)
        return df
    except Exception as e:
        print(f"Error reading Rebrickable CSV file: {e}")
        sys.exit(1)


 # Mygobricks missing parts CSV file columns:
 # "LegoID", "Part", "ColorID", "Quantity", "Reason"
 # "73507", "Technic Beam 1 x 11 Thick with Alternating Holes", "0", "4", "undefined"
 # "87087", "Brick Special 1 x 1 with Stud on 1 Side", "72", "2", "undefined"
 # "undefined", "65473", "0", "8", "undefined"
 # "undefined", "38799", "0", "1", "undefined"
 # ...


 def parse_mygobricks_missing_csv(file_path):
    """
    Reads the Mygobricks missing CSV and returns a cleaned DataFrame of still-missing parts.
    """
    try:
        df = pd.read_csv(file_path, usecols=[
                         "LegoID", "Part", "ColorID", "Quantity", "Reason"])
        df["LegoID"] = df["LegoID"].astype(str).str.strip()
        df["Part"] = df["Part"].astype(str).str.strip()
        df["ColorID"] = pd.to_numeric(df["ColorID"], errors='coerce')
        df["Quantity"] = pd.to_numeric(df["Quantity"], errors='coerce')
        df["Reason"] = df["Reason"].astype(str)

        # Remove invalid part numbers that don't match the expected format
        valid_lego_part_number_regex = r"^[a-z0-9]+$"
        mask = ~df['Part'].str.match(valid_lego_part_number_regex, na=False)
        print(f"Dropping {mask.sum()} invalid part entries from missing CSV")
        df.loc[mask, 'Part'] = None

        # If Part is missing, use LegoID as fallback
        df["Part"] = df["Part"].combine_first(df["LegoID"])
        df = df.drop(columns=["LegoID"])

        return df
    except Exception as e:
        print(f"Error reading Mygobricks missing CSV file: {e}")
        sys.exit(1)


 def main():
    """
    Loads the CSV files, filters parts that are still missing, and writes them into a new CSV.
    """
    parser = argparse.ArgumentParser(
        description="Filters the Rebrickable dataset to only include parts still missing from Mygobricks."
    )
    parser.add_argument(
        "rebrickable_path",
        type=Path,
        help="Path to the Rebrickable CSV file (e.g., 'set_parts.csv')."
    )
    parser.add_argument(
        "missing_path",
        type=Path,
        help="Path to the Mygobricks 'missing' CSV file."
    )
    args = parser.parse_args()

    rebrickable_df = parse_rebrickable_csv(args.rebrickable_path)
    missing_df = parse_mygobricks_missing_csv(args.missing_path)

    print(f"Total parts in Rebrickable CSV: {len(rebrickable_df)}")
    print(f"Total parts in Missing CSV: {len(missing_df)}")

    output_rows = []

    # Compare each part-color combination in both files
    for _, full_row in rebrickable_df.iterrows():
        for _, missing_row in missing_df.iterrows():
            if full_row["Part"] == missing_row["Part"] and full_row["Color"] == missing_row["ColorID"]:
                output_rows.append(full_row)

    output_df = pd.DataFrame(output_rows)

    print(f"Parts still missing after comparison: {len(output_df)}")

    # Write result to next to the original file with a new suffix
    output_path = args.rebrickable_path.with_name(
        f"{args.rebrickable_path.stem}_still_missing.csv"
    )
    output_df.to_csv(output_path, index=False)
    print(f"Result saved to: {output_path}")


 if __name__ == "__main__":
    main()
	#!/usr/bin/env python
	# Filename: mygobricks_differ.py
	# Description: This script filters a Rebrickable CSV file to only include parts that are still missing from a Mygobricks missing parts CSV file.
	# It reads both CSV files, compares the parts, and outputs a new CSV file with the remaining missing parts.

	import argparse
	import sys
	from pathlib import Path
	import pandas as pd


	# Rebrickable CSV file columns:
	# "Part", "Color", "Quantity", "Is Spare"
	# 87994, 0, 2, False
	# 30374, 0, 1, False
	# ...


	def parse_rebrickable_csv(file_path):
	"""
	Reads the Rebrickable CSV and returns a cleaned DataFrame containing part ID, color, quantity, and spare info.
	"""
	try:
	df = pd.read_csv(file_path, usecols=[
	"Part", "Color", "Quantity", "Is Spare"])
	df["Part"] = df["Part"].astype(str).str.strip()
	df["Color"] = pd.to_numeric(df["Color"], errors='coerce')
	df["Quantity"] = pd.to_numeric(df["Quantity"], errors='coerce')
	df["Is Spare"] = df["Is Spare"].astype(bool)
	return df
	except Exception as e:
	print(f"Error reading Rebrickable CSV file: {e}")
	sys.exit(1)


	# Mygobricks missing parts CSV file columns:
	# "LegoID", "Part", "ColorID", "Quantity", "Reason"
	# "73507", "Technic Beam 1 x 11 Thick with Alternating Holes", "0", "4", "undefined"
	# "87087", "Brick Special 1 x 1 with Stud on 1 Side", "72", "2", "undefined"
	# "undefined", "65473", "0", "8", "undefined"
	# "undefined", "38799", "0", "1", "undefined"
	# ...


	def parse_mygobricks_missing_csv(file_path):
	"""
	Reads the Mygobricks missing CSV and returns a cleaned DataFrame of still-missing parts.
	"""
	try:
	df = pd.read_csv(file_path, usecols=[
	"LegoID", "Part", "ColorID", "Quantity", "Reason"])
	df["LegoID"] = df["LegoID"].astype(str).str.strip()
	df["Part"] = df["Part"].astype(str).str.strip()
	df["ColorID"] = pd.to_numeric(df["ColorID"], errors='coerce')
	df["Quantity"] = pd.to_numeric(df["Quantity"], errors='coerce')
	df["Reason"] = df["Reason"].astype(str)

	# Remove invalid part numbers that don't match the expected format
	valid_lego_part_number_regex = r"^[a-z0-9]+$"
	mask = ~df['Part'].str.match(valid_lego_part_number_regex, na=False)
	print(f"Dropping {mask.sum()} invalid part entries from missing CSV")
	df.loc[mask, 'Part'] = None

	# If Part is missing, use LegoID as fallback
	df["Part"] = df["Part"].combine_first(df["LegoID"])
	df = df.drop(columns=["LegoID"])

	return df
	except Exception as e:
	print(f"Error reading Mygobricks missing CSV file: {e}")
	sys.exit(1)


	def main():
	"""
	Loads the CSV files, filters parts that are still missing, and writes them into a new CSV.
	"""
	parser = argparse.ArgumentParser(
	description="Filters the Rebrickable dataset to only include parts still missing from Mygobricks."
	)
	parser.add_argument(
	"rebrickable_path",
	type=Path,
	help="Path to the Rebrickable CSV file (e.g., 'set_parts.csv')."
	)
	parser.add_argument(
	"missing_path",
	type=Path,
	help="Path to the Mygobricks 'missing' CSV file."
	)
	args = parser.parse_args()

	rebrickable_df = parse_rebrickable_csv(args.rebrickable_path)
	missing_df = parse_mygobricks_missing_csv(args.missing_path)

	print(f"Total parts in Rebrickable CSV: {len(rebrickable_df)}")
	print(f"Total parts in Missing CSV: {len(missing_df)}")

	output_rows = []

	# Compare each part-color combination in both files
	for _, full_row in rebrickable_df.iterrows():
	for _, missing_row in missing_df.iterrows():
	if full_row["Part"] == missing_row["Part"] and full_row["Color"] == missing_row["ColorID"]:
	output_rows.append(full_row)

	output_df = pd.DataFrame(output_rows)

	print(f"Parts still missing after comparison: {len(output_df)}")

	# Write result to next to the original file with a new suffix
	output_path = args.rebrickable_path.with_name(
	f"{args.rebrickable_path.stem}_still_missing.csv"
	)
	output_df.to_csv(output_path, index=False)
	print(f"Result saved to: {output_path}")


	if __name__ == "__main__":
	main()
No results found