Last active
April 10, 2025 16:00
-
-
Save emoose/904c1660dd966dbcdf42b17f73da5ff8 to your computer and use it in GitHub Desktop.
sh1 map compare
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Save script as asm/maps/compare.py | |
# Then run | |
# python compare.py map0_s01 map0_s02 0 | |
# This will compare line count between files in each maps nonmatchings folder | |
# For each pair that matches it removes comments & data/func refs and uses Levenshtein to compare them | |
# Last param is max distance to show, use 0 to get exact matches, but can tweak it to find others if wanted. | |
# (since this only works with asm files in nonmatchings folder, it can't compare map0_s00 since we've matched some of them) | |
import os | |
import re | |
import Levenshtein | |
def count_lines_in_file(file_path): | |
try: | |
with open(file_path, 'r') as file: | |
return sum(1 for _ in file) | |
except Exception as e: | |
print(f"Error reading {file_path}: {e}") | |
return -1 | |
def read_and_clean_file(file_path): | |
try: | |
with open(file_path, 'r') as file: | |
content = file.read() | |
# Remove /* blockquote comments */ | |
content = re.sub(r'/\*.*?\*/', '', content, flags=re.DOTALL) | |
# Remove func_[8 characters] and D_[8 characters] | |
content = re.sub(r'func_[a-fA-F0-9]{8}', '', content) | |
content = re.sub(r'D_[a-fA-F0-9]{8}', '', content) | |
return content | |
except Exception as e: | |
print(f"Error reading {file_path}: {e}") | |
return None | |
def find_matching_line_counts(map1, map2, maxdistance): | |
path1 = map1 + "/nonmatchings/" + map1 | |
path2 = map2 + "/nonmatchings/" + map2 | |
if not os.path.isdir(path1) or not os.path.isdir(path2): | |
print("One or both paths are invalid directories.") | |
return | |
# Count lines in .s files from path1 | |
line_counts1 = { | |
os.path.basename(f): count_lines_in_file(os.path.join(path1, f)) | |
for f in os.listdir(path1) if f.endswith('.s') | |
} | |
# Count lines in .s files from path2 | |
line_counts2 = { | |
os.path.basename(f): count_lines_in_file(os.path.join(path2, f)) | |
for f in os.listdir(path2) if f.endswith('.s') | |
} | |
# Find matching line counts | |
matched_files = [] | |
for file1, count1 in line_counts1.items(): | |
for file2, count2 in line_counts2.items(): | |
if count1 == count2 and count1 != -1: | |
matched_files.append((file1, file2, count1)) | |
if matched_files: | |
print("Distances:") | |
for file1, file2, count in matched_files: | |
#print(f"{file1} ↔ {file2} → {count} lines") | |
# Read and clean the contents | |
content1 = read_and_clean_file(os.path.join(path1, file1)) | |
content2 = read_and_clean_file(os.path.join(path2, file2)) | |
if content1 is not None and content2 is not None: | |
# Compute Levenshtein distance | |
distance = Levenshtein.distance(content1, content2) | |
if distance <= maxdistance: | |
print(f" {map1}/{file1} and {map2}/{file2}: {distance}") | |
else: | |
print("\nNo matching files found.") | |
if __name__ == "__main__": | |
import argparse | |
parser = argparse.ArgumentParser(description="Find matching line counts in .s files across two folders and compare contents.") | |
parser.add_argument("map1", type=str, help="First map folder") | |
parser.add_argument("map2", type=str, help="Second map folder") | |
parser.add_argument("distance", type=int, help="Max distance") | |
args = parser.parse_args() | |
find_matching_line_counts(args.map1, args.map2, args.distance) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment