Skip to content

Instantly share code, notes, and snippets.

@flodolo
Last active April 8, 2025 10:03
Show Gist options
  • Save flodolo/267168c0debe7cf867447c30ebddbc05 to your computer and use it in GitHub Desktop.
Save flodolo/267168c0debe7cf867447c30ebddbc05 to your computer and use it in GitHub Desktop.
Find unreferenced messages in Bedrock/Springfield
#!/usr/bin/env python3
import argparse
import os
import signal
import subprocess
import sys
from compare_locales import parser
# Capture CTRL+C
unreferenced_entities = []
processed_files = []
def signal_handler(sig, frame):
print_output()
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
def print_output():
if processed_files:
print("\nProcessed files:")
for f in processed_files:
print(f" - {f}")
if unreferenced_entities:
print("Unreference entities:")
for e in unreferenced_entities:
print(f" - {e}")
def grep_message(entity, repository_path, file_path):
"""
For standard messages, we need to ignore the /l10n folder.
For terms, we only search in reference .ftl files, but need to exclude the
file where they are defined.
"""
if entity.startswith("-"):
grep_cmd = [
"grep",
"-R",
"-F",
"-q",
"--include=*.ftl",
f"--exclude={file_path}",
"--", # otherwise, grep gets confused by the hyphen in "-someterm"
entity,
os.path.join(repository_path, "l10n", "en"),
]
else:
grep_cmd = [
"grep",
"-R",
"-F",
"-q",
"--include=*.html",
"--include=*.py",
"--include=*.js",
"--include=*.json", # used by newsletter stuff
f"--exclude-dir={os.path.join(repository_path, 'l10n')}",
entity,
repository_path,
]
result = subprocess.run(grep_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return result.returncode == 0
def extract_string_list(project_path):
ref_path = os.path.join(project_path, "l10n", "en")
file_list = []
for root, dirs, files in os.walk(ref_path, followlinks=False):
for file in files:
if file.endswith(".ftl"):
file_list.append(os.path.join(root, file))
file_list.sort()
string_list = {}
for file in file_list:
rel_path = os.path.relpath(file, ref_path)
file_parser = parser.getParser(".ftl")
file_parser.readFile(file)
try:
entities = file_parser.parse()
for entity in entities:
# Ignore Junk
if isinstance(entity, parser.Junk):
continue
if rel_path not in string_list:
string_list[rel_path] = []
string_list[rel_path].append(str(entity))
except Exception as e:
print(f"Error parsing file: {file}")
print(e)
return string_list
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--path", dest="repo_path", help="Path to repository clone", required=True
)
args = parser.parse_args()
# Path to project clone
string_list = extract_string_list(args.repo_path)
# Excluded files and IDs
excluded_files = []
excluded_ids = []
for file_path, entities in string_list.items():
if file_path in excluded_files:
continue
for entity in entities:
# Skip if the entity is in the exclusion list
if entity in excluded_ids:
continue
sys.stdout.write(".")
sys.stdout.flush()
if not grep_message(entity, args.repo_path, file_path):
unreferenced_entities.append(f"{file_path}:{entity}")
processed_files.append(file_path)
# Print output if execution wasn't interrupted
print_output()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment