Skip to content

Instantly share code, notes, and snippets.

@emoose
Last active April 18, 2025 21:33
Show Gist options
  • Save emoose/b0bd23e050013f6dbd45c222c67f6d8f to your computer and use it in GitHub Desktop.
Save emoose/b0bd23e050013f6dbd45c222c67f6d8f to your computer and use it in GitHub Desktop.
compare.py with symbols/includes
# Save script as asm/maps/compare.py
# Then run
# python compare.py map0_s01 map0_s02
# This compares matching funcs in first maps matchings folder against nonmatchings in the second map
# For each pair that matches it removes comments & data/func refs and uses Levenshtein to compare them
# Funcs with distance 0 Levenshtein score will then be printed, along with lines to use in symbols.txt & #include lines to add to .c
import os
import re
import Levenshtein
def count_lines_in_file(file_path):
try:
with open(file_path, 'r') as file:
return sum(1 for _ in file)
except Exception as e:
print(f"Error reading {file_path}: {e}")
return -1
def read_and_clean_file(file_path):
try:
with open(file_path, 'r') as file:
content = file.read()
# Remove /* blockquote comments */
content = re.sub(r'/\*.*?\*/', '', content, flags=re.DOTALL)
# Remove func_[8 characters] and D_[8 characters]
content = re.sub(r'func_[a-fA-F0-9]{8}', '', content)
content = re.sub(r'sharedFunc_[a-fA-F0-9]{8}_[0-9]{1}_[a-zA-Z0-9]{3}', '', content)
content = re.sub(r'D_[a-fA-F0-9]{8}', '', content)
return content
except Exception as e:
print(f"Error reading {file_path}: {e}")
return None
def find_matching_line_counts(map1, map2, maxdistance):
path1 = map1 + "/matchings/" + map1
path2 = map2 + "/nonmatchings/" + map2
if not os.path.isdir(path1) or not os.path.isdir(path2):
print("One or both paths are invalid directories.")
return
# Count lines in .s files from path1
line_counts1 = {
os.path.basename(f): count_lines_in_file(os.path.join(path1, f))
for f in os.listdir(path1) if f.endswith('.s')
}
# Count lines in .s files from path2
line_counts2 = {
os.path.basename(f): count_lines_in_file(os.path.join(path2, f))
for f in os.listdir(path2) if f.endswith('.s')
}
# Find matching line counts
matched_files = []
for file1, count1 in line_counts1.items():
for file2, count2 in line_counts2.items():
if count1 == count2 and count1 != -1:
matched_files.append((file1, file2, count1))
sharedFunc_symbols = ""
includeLines = ""
if matched_files:
print("Distances:")
for file1, file2, count in matched_files:
#print(f"{file1} ↔ {file2} → {count} lines")
# Read and clean the contents
content1 = read_and_clean_file(os.path.join(path1, file1))
content2 = read_and_clean_file(os.path.join(path2, file2))
if content1 is not None and content2 is not None:
# Compute Levenshtein distance
distance = Levenshtein.distance(content1, content2)
if distance <= maxdistance:
print(f" {map1}/{file1} and {map2}/{file2}: {distance}")
if "sharedFunc" in file1:
funcName = file1.split(".")[0]
addr = file2.split("_")[1]
addr = addr.split(".")[0]
sharedFunc_symbols += f"{funcName} = 0x{addr}; // type:func\n"
includeLines += f"#include \"maps/shared/{funcName}.h\" // 0x{addr}\n"
print("\nSymbols.txt\n")
print(sharedFunc_symbols)
print("\n.c Includes\n")
print(includeLines)
else:
print("\nNo matching files found.")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Find matching line counts in .s files across two folders and compare contents.")
parser.add_argument("map1", type=str, help="First map folder")
parser.add_argument("map2", type=str, help="Second map folder")
#parser.add_argument("distance", type=int, help="Max distance")
args = parser.parse_args()
find_matching_line_counts(args.map1, args.map2, 0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment