Skip to content

Instantly share code, notes, and snippets.

@emoose
Last active April 10, 2025 16:00
Show Gist options
  • Save emoose/904c1660dd966dbcdf42b17f73da5ff8 to your computer and use it in GitHub Desktop.
Save emoose/904c1660dd966dbcdf42b17f73da5ff8 to your computer and use it in GitHub Desktop.
sh1 map compare
# Save script as asm/maps/compare.py
# Then run
# python compare.py map0_s01 map0_s02 0
# This will compare line count between files in each maps nonmatchings folder
# For each pair that matches it removes comments & data/func refs and uses Levenshtein to compare them
# Last param is max distance to show, use 0 to get exact matches, but can tweak it to find others if wanted.
# (since this only works with asm files in nonmatchings folder, it can't compare map0_s00 since we've matched some of them)
import os
import re
import Levenshtein
def count_lines_in_file(file_path):
try:
with open(file_path, 'r') as file:
return sum(1 for _ in file)
except Exception as e:
print(f"Error reading {file_path}: {e}")
return -1
def read_and_clean_file(file_path):
try:
with open(file_path, 'r') as file:
content = file.read()
# Remove /* blockquote comments */
content = re.sub(r'/\*.*?\*/', '', content, flags=re.DOTALL)
# Remove func_[8 characters] and D_[8 characters]
content = re.sub(r'func_[a-fA-F0-9]{8}', '', content)
content = re.sub(r'D_[a-fA-F0-9]{8}', '', content)
return content
except Exception as e:
print(f"Error reading {file_path}: {e}")
return None
def find_matching_line_counts(map1, map2, maxdistance):
path1 = map1 + "/nonmatchings/" + map1
path2 = map2 + "/nonmatchings/" + map2
if not os.path.isdir(path1) or not os.path.isdir(path2):
print("One or both paths are invalid directories.")
return
# Count lines in .s files from path1
line_counts1 = {
os.path.basename(f): count_lines_in_file(os.path.join(path1, f))
for f in os.listdir(path1) if f.endswith('.s')
}
# Count lines in .s files from path2
line_counts2 = {
os.path.basename(f): count_lines_in_file(os.path.join(path2, f))
for f in os.listdir(path2) if f.endswith('.s')
}
# Find matching line counts
matched_files = []
for file1, count1 in line_counts1.items():
for file2, count2 in line_counts2.items():
if count1 == count2 and count1 != -1:
matched_files.append((file1, file2, count1))
if matched_files:
print("Distances:")
for file1, file2, count in matched_files:
#print(f"{file1} ↔ {file2} → {count} lines")
# Read and clean the contents
content1 = read_and_clean_file(os.path.join(path1, file1))
content2 = read_and_clean_file(os.path.join(path2, file2))
if content1 is not None and content2 is not None:
# Compute Levenshtein distance
distance = Levenshtein.distance(content1, content2)
if distance <= maxdistance:
print(f" {map1}/{file1} and {map2}/{file2}: {distance}")
else:
print("\nNo matching files found.")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Find matching line counts in .s files across two folders and compare contents.")
parser.add_argument("map1", type=str, help="First map folder")
parser.add_argument("map2", type=str, help="Second map folder")
parser.add_argument("distance", type=int, help="Max distance")
args = parser.parse_args()
find_matching_line_counts(args.map1, args.map2, args.distance)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment