Last active
April 8, 2025 10:03
-
-
Save flodolo/267168c0debe7cf867447c30ebddbc05 to your computer and use it in GitHub Desktop.
Find unreferenced messages in Bedrock/Springfield
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import os | |
import signal | |
import subprocess | |
import sys | |
from compare_locales import parser | |
# Capture CTRL+C | |
unreferenced_entities = [] | |
processed_files = [] | |
def signal_handler(sig, frame): | |
print_output() | |
sys.exit(0) | |
signal.signal(signal.SIGINT, signal_handler) | |
def print_output(): | |
if processed_files: | |
print("\nProcessed files:") | |
for f in processed_files: | |
print(f" - {f}") | |
if unreferenced_entities: | |
print("Unreference entities:") | |
for e in unreferenced_entities: | |
print(f" - {e}") | |
def grep_message(entity, repository_path, file_path): | |
""" | |
For standard messages, we need to ignore the /l10n folder. | |
For terms, we only search in reference .ftl files, but need to exclude the | |
file where they are defined. | |
""" | |
if entity.startswith("-"): | |
grep_cmd = [ | |
"grep", | |
"-R", | |
"-F", | |
"-q", | |
"--include=*.ftl", | |
f"--exclude={file_path}", | |
"--", # otherwise, grep gets confused by the hyphen in "-someterm" | |
entity, | |
os.path.join(repository_path, "l10n", "en"), | |
] | |
else: | |
grep_cmd = [ | |
"grep", | |
"-R", | |
"-F", | |
"-q", | |
"--include=*.html", | |
"--include=*.py", | |
"--include=*.js", | |
"--include=*.json", # used by newsletter stuff | |
f"--exclude-dir={os.path.join(repository_path, 'l10n')}", | |
entity, | |
repository_path, | |
] | |
result = subprocess.run(grep_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
return result.returncode == 0 | |
def extract_string_list(project_path): | |
ref_path = os.path.join(project_path, "l10n", "en") | |
file_list = [] | |
for root, dirs, files in os.walk(ref_path, followlinks=False): | |
for file in files: | |
if file.endswith(".ftl"): | |
file_list.append(os.path.join(root, file)) | |
file_list.sort() | |
string_list = {} | |
for file in file_list: | |
rel_path = os.path.relpath(file, ref_path) | |
file_parser = parser.getParser(".ftl") | |
file_parser.readFile(file) | |
try: | |
entities = file_parser.parse() | |
for entity in entities: | |
# Ignore Junk | |
if isinstance(entity, parser.Junk): | |
continue | |
if rel_path not in string_list: | |
string_list[rel_path] = [] | |
string_list[rel_path].append(str(entity)) | |
except Exception as e: | |
print(f"Error parsing file: {file}") | |
print(e) | |
return string_list | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--path", dest="repo_path", help="Path to repository clone", required=True | |
) | |
args = parser.parse_args() | |
# Path to project clone | |
string_list = extract_string_list(args.repo_path) | |
# Excluded files and IDs | |
excluded_files = [] | |
excluded_ids = [] | |
for file_path, entities in string_list.items(): | |
if file_path in excluded_files: | |
continue | |
for entity in entities: | |
# Skip if the entity is in the exclusion list | |
if entity in excluded_ids: | |
continue | |
sys.stdout.write(".") | |
sys.stdout.flush() | |
if not grep_message(entity, args.repo_path, file_path): | |
unreferenced_entities.append(f"{file_path}:{entity}") | |
processed_files.append(file_path) | |
# Print output if execution wasn't interrupted | |
print_output() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment