Last active
April 18, 2025 21:33
-
-
Save emoose/b0bd23e050013f6dbd45c222c67f6d8f to your computer and use it in GitHub Desktop.
compare.py with symbols/includes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Save script as asm/maps/compare.py | |
# Then run | |
# python compare.py map0_s01 map0_s02 | |
# This compares matching funcs in first maps matchings folder against nonmatchings in the second map | |
# For each pair that matches it removes comments & data/func refs and uses Levenshtein to compare them | |
# Funcs with distance 0 Levenshtein score will then be printed, along with lines to use in symbols.txt & #include lines to add to .c | |
import os | |
import re | |
import Levenshtein | |
def count_lines_in_file(file_path): | |
try: | |
with open(file_path, 'r') as file: | |
return sum(1 for _ in file) | |
except Exception as e: | |
print(f"Error reading {file_path}: {e}") | |
return -1 | |
def read_and_clean_file(file_path): | |
try: | |
with open(file_path, 'r') as file: | |
content = file.read() | |
# Remove /* blockquote comments */ | |
content = re.sub(r'/\*.*?\*/', '', content, flags=re.DOTALL) | |
# Remove func_[8 characters] and D_[8 characters] | |
content = re.sub(r'func_[a-fA-F0-9]{8}', '', content) | |
content = re.sub(r'sharedFunc_[a-fA-F0-9]{8}_[0-9]{1}_[a-zA-Z0-9]{3}', '', content) | |
content = re.sub(r'D_[a-fA-F0-9]{8}', '', content) | |
return content | |
except Exception as e: | |
print(f"Error reading {file_path}: {e}") | |
return None | |
def find_matching_line_counts(map1, map2, maxdistance): | |
path1 = map1 + "/matchings/" + map1 | |
path2 = map2 + "/nonmatchings/" + map2 | |
if not os.path.isdir(path1) or not os.path.isdir(path2): | |
print("One or both paths are invalid directories.") | |
return | |
# Count lines in .s files from path1 | |
line_counts1 = { | |
os.path.basename(f): count_lines_in_file(os.path.join(path1, f)) | |
for f in os.listdir(path1) if f.endswith('.s') | |
} | |
# Count lines in .s files from path2 | |
line_counts2 = { | |
os.path.basename(f): count_lines_in_file(os.path.join(path2, f)) | |
for f in os.listdir(path2) if f.endswith('.s') | |
} | |
# Find matching line counts | |
matched_files = [] | |
for file1, count1 in line_counts1.items(): | |
for file2, count2 in line_counts2.items(): | |
if count1 == count2 and count1 != -1: | |
matched_files.append((file1, file2, count1)) | |
sharedFunc_symbols = "" | |
includeLines = "" | |
if matched_files: | |
print("Distances:") | |
for file1, file2, count in matched_files: | |
#print(f"{file1} ↔ {file2} → {count} lines") | |
# Read and clean the contents | |
content1 = read_and_clean_file(os.path.join(path1, file1)) | |
content2 = read_and_clean_file(os.path.join(path2, file2)) | |
if content1 is not None and content2 is not None: | |
# Compute Levenshtein distance | |
distance = Levenshtein.distance(content1, content2) | |
if distance <= maxdistance: | |
print(f" {map1}/{file1} and {map2}/{file2}: {distance}") | |
if "sharedFunc" in file1: | |
funcName = file1.split(".")[0] | |
addr = file2.split("_")[1] | |
addr = addr.split(".")[0] | |
sharedFunc_symbols += f"{funcName} = 0x{addr}; // type:func\n" | |
includeLines += f"#include \"maps/shared/{funcName}.h\" // 0x{addr}\n" | |
print("\nSymbols.txt\n") | |
print(sharedFunc_symbols) | |
print("\n.c Includes\n") | |
print(includeLines) | |
else: | |
print("\nNo matching files found.") | |
if __name__ == "__main__": | |
import argparse | |
parser = argparse.ArgumentParser(description="Find matching line counts in .s files across two folders and compare contents.") | |
parser.add_argument("map1", type=str, help="First map folder") | |
parser.add_argument("map2", type=str, help="Second map folder") | |
#parser.add_argument("distance", type=int, help="Max distance") | |
args = parser.parse_args() | |
find_matching_line_counts(args.map1, args.map2, 0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment