Last active
May 24, 2025 01:31
-
-
Save oguzdelioglu/eb635a0e0e22afd9a940f9a9e6ca92ac to your computer and use it in GitHub Desktop.
Swift Clean Localization Keys
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
r""" | |
iOS/macOS Localization Manager and Analysis Tool | |
This tool analyzes, cleans, and manages localization files (Localizable.strings) in iOS or macOS projects. | |
It performs the following operations: | |
1. LOCALIZATION KEY DETECTION: | |
- Intelligently detects localization keys used in Swift, Objective-C, Kotlin, JavaScript, and SQL files | |
- Supports various localization methods (NSLocalizedString, String(localized:), LocalizedStringKey, etc.) | |
- Finds keys defined in variables and referenced indirectly in code | |
- Detects keys used in common UI component initializers and structures | |
2. MISSING KEYS ANALYSIS: | |
- Identifies keys used in code but missing from localization files | |
- Generates detailed reports showing which keys are missing from each language | |
- Can filter results by key prefixes (e.g., only show missing achievement keys) | |
- Creates template files for translators with placeholder values | |
3. CLEANING OPERATIONS: | |
- Automatically detects duplicate keys and removes them while keeping the last definition | |
- Cleans comment lines (starting with //) and inline comments (after key-value definitions) | |
- Detects and removes keys that are not used anywhere in the project (with confirmation) | |
- Cleans unnecessary whitespace and empty lines (with --clean-whitespace option) | |
- Sorts keys alphabetically (with --sort-keys option) | |
4. DETAILED REPORTING: | |
- Lists all used and unused keys | |
- Reports the number of cleaned duplicate keys and comment lines and which files they were in | |
- Shows which files use the keys and how many times they are used | |
- Saves all analysis results to a detailed log file | |
Usage: | |
python3 clean_localizations.py [project_directory] [options] | |
Basic Options: | |
--confirm-all Automatically confirm all deletion operations | |
--log-file FILE_NAME Specify the log file name (default: localization_log.txt) | |
--exclude PATTERN Patterns for directories/files to exclude (can be used multiple times) | |
--verbose, -v Show detailed output | |
--clean-whitespace Clean unnecessary whitespace and empty lines | |
--sort-keys Sort keys alphabetically | |
--no-prompt Run without interactive prompts | |
Missing Keys Detection: | |
--auto-detect-missing-keys Run in missing keys detection mode | |
--filter-prefix PREFIX Filter keys by prefix(es) (can be used multiple times: --filter-prefix ach_ badge_) | |
--min-missing-count NUM Only include languages with at least NUM missing keys | |
--output-file FILE Output file for missing keys report (default: missing_keys.txt) | |
Template Generation: | |
--create-template Create a template Localizable.strings file with missing keys | |
--template-file FILE Output file for the template (default: template_localizable.strings) | |
--reference-language LANG Language to use for reference values (default: en) | |
Examples: | |
python3 clean_localizations.py # Run in current directory | |
python3 clean_localizations.py /path/to/project # Run in a specific directory | |
python3 clean_localizations.py --confirm-all # Automatically confirm all deletions | |
python3 clean_localizations.py --exclude "^\.git$" # Exclude .git directory | |
python3 clean_localizations.py --clean-whitespace # Clean unnecessary whitespace | |
python3 clean_localizations.py --sort-keys # Sort keys alphabetically | |
python3 clean_localizations.py --auto-detect-missing-keys # Only detect missing keys | |
python3 clean_localizations.py --filter-prefix ach_ badge_ # Only show keys with these prefixes | |
python3 clean_localizations.py --create-template --filter-prefix auth_ # Create a template for auth keys | |
""" | |
import os | |
import re | |
import argparse | |
import json | |
from pathlib import Path | |
from collections import Counter, defaultdict | |
def parse_args(): | |
"""Parse command line arguments.""" | |
parser = argparse.ArgumentParser(description='Detect unused localization keys in iOS projects.') | |
parser.add_argument('project_path', nargs='?', default='.', help='Project directory (default: current directory)') | |
parser.add_argument('--confirm-all', action='store_true', help='Automatically confirm all deletion operations') | |
parser.add_argument('--log-file', default='localization_log.txt', help='Log file (default: localization_log.txt)') | |
parser.add_argument('--exclude', nargs='+', default=[], help='Patterns for directories/files to exclude') | |
parser.add_argument('--verbose', '-v', action='store_true', help='Show detailed output') | |
parser.add_argument('--clean-whitespace', action='store_true', help='Clean unnecessary whitespace and empty lines') | |
parser.add_argument('--sort-keys', action='store_true', help='Sort keys alphabetically') | |
parser.add_argument('--auto-detect-missing-keys', action='store_true', help='Automatically detect missing keys without other operations') | |
parser.add_argument('--no-prompt', action='store_true', help='Run without interactive prompts') | |
parser.add_argument('--filter-prefix', nargs='+', default=[], | |
help='Filter missing keys to only include those with specific prefixes (e.g., "ach_" "button_")') | |
parser.add_argument('--min-missing-count', type=int, default=0, | |
help='Only report languages with at least this many missing keys') | |
parser.add_argument('--output-file', default='missing_keys.txt', | |
help='Output file for missing keys (default: missing_keys.txt)') | |
parser.add_argument('--create-template', action='store_true', | |
help='Create a template Localizable.strings file with missing keys for translation') | |
parser.add_argument('--template-file', default='template_localizable.strings', | |
help='Output file for the template (default: template_localizable.strings)') | |
parser.add_argument('--reference-language', default='en', | |
help='Language to use for reference values in the template (default: en)') | |
return parser.parse_args() | |
def find_strings_files(project_path, exclude_patterns): | |
"""Find all Localizable.strings files.""" | |
strings_files = [] | |
for root, dirs, files in os.walk(project_path): | |
# Filter out excluded directories | |
dirs[:] = [d for d in dirs if not any(re.match(pattern, d) for pattern in exclude_patterns)] | |
for filename in files: | |
if filename == "Localizable.strings": | |
strings_files.append(os.path.join(root, filename)) | |
return strings_files | |
def parse_strings_file(file_path): | |
"""Parse a Localizable.strings file and return key-value pairs.""" | |
keys = {} | |
try: | |
with open(file_path, 'r', encoding='utf-8') as f: | |
content = f.read() | |
except UnicodeDecodeError: | |
# Try UTF-16 | |
try: | |
with open(file_path, 'r', encoding='utf-16') as f: | |
content = f.read() | |
except Exception as e: | |
print(f"Error: Could not read file {file_path}: {e}") | |
return keys | |
# Find all lines matching the "key" = "value"; pattern | |
pattern = r'"([^"\\]*(?:\\.[^"\\]*)*)"\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"\s*;' | |
matches = re.findall(pattern, content) | |
for key, value in matches: | |
keys[key] = value | |
return keys | |
def extract_keys_from_files(project_path, exclude_patterns): | |
"""Find all string keys directly defined in code and also extract from various structure patterns.""" | |
used_keys = Counter() | |
file_occurrences = defaultdict(list) | |
all_keys_by_file = {} | |
# File extensions to scan | |
swift_extensions = ['.swift'] | |
sql_extensions = ['.sql'] | |
objc_extensions = ['.m', '.h'] | |
kotlin_extensions = ['.kt', '.kts'] | |
java_extensions = ['.java'] | |
js_extensions = ['.js', '.jsx', '.ts', '.tsx'] | |
# Track potential key variables {variable_name: key_string} | |
variables_by_file = {} | |
# Collect possible localization key patterns from existing code | |
key_pattern_analysis = analyze_key_patterns(project_path, exclude_patterns) | |
# First pass - collect all variable declarations | |
for root, dirs, files in os.walk(project_path): | |
# Filter out excluded directories | |
dirs[:] = [d for d in dirs if not any(re.match(pattern, d) for pattern in exclude_patterns)] | |
for filename in files: | |
file_path = os.path.join(root, filename) | |
file_extension = os.path.splitext(filename)[1].lower() | |
try: | |
# For code files | |
if any(filename.endswith(ext) for ext in | |
swift_extensions + sql_extensions + objc_extensions + | |
kotlin_extensions + java_extensions + js_extensions): | |
with open(file_path, 'r', encoding='utf-8') as f: | |
content = f.read() | |
# Extract all strings in the file for potential localization keys | |
all_strings = re.findall(r'"([^"\\]*(?:\\.[^"\\]*)*)"', content) | |
# Analyze the content to detect localization context | |
has_localization_context = detect_localization_context(content) | |
# Find all variable declarations that seem to be localization keys | |
file_variables = {} | |
collect_variable_declarations(content, file_variables) | |
if file_variables: | |
variables_by_file[file_path] = file_variables | |
except Exception as e: | |
print(f"Error reading file (pass 1): {file_path} - {str(e)}") | |
# Second pass - process direct usages, variable references, and contextual analysis | |
for root, dirs, files in os.walk(project_path): | |
# Filter out excluded directories | |
dirs[:] = [d for d in dirs if not any(re.match(pattern, d) for pattern in exclude_patterns)] | |
for filename in files: | |
file_path = os.path.join(root, filename) | |
file_extension = os.path.splitext(filename)[1].lower() | |
try: | |
# Determine language based on file extension | |
if any(filename.endswith(ext) for ext in swift_extensions + objc_extensions): | |
language = 'swift' | |
elif any(filename.endswith(ext) for ext in kotlin_extensions + java_extensions): | |
language = 'kotlin/android' | |
elif any(filename.endswith(ext) for ext in js_extensions): | |
language = 'javascript/react' | |
elif any(filename.endswith(ext) for ext in sql_extensions): | |
language = 'sql' | |
else: | |
# Skip unsupported file types | |
continue | |
with open(file_path, 'r', encoding='utf-8') as f: | |
content = f.read() | |
file_variables = variables_by_file.get(file_path, {}) | |
# Known localization function direct usage patterns | |
process_localization_function_usage(content, file_path, used_keys, file_occurrences, language) | |
# Process context-aware string assignments | |
# This includes categoryKey and similar assignments that are likely localization keys | |
process_contextual_string_assignments(content, file_path, used_keys, file_occurrences, key_pattern_analysis) | |
# Process variable references that might be localization keys | |
process_variable_references(content, file_path, used_keys, file_occurrences, variables_by_file) | |
# Analyze string literals in UI components to find localized strings | |
process_ui_component_strings(content, file_path, used_keys, file_occurrences, key_pattern_analysis) | |
except Exception as e: | |
print(f"Error reading file (pass 2): {file_path} - {str(e)}") | |
# Final pass - collect any remaining potential keys from contextual analysis | |
for file_path, vars_in_file in variables_by_file.items(): | |
for variable_name, key in vars_in_file.items(): | |
if key not in used_keys and is_valid_localization_key(key, key_pattern_analysis): | |
# If the key is defined but not detected in usage, add it anyway if it follows patterns | |
used_keys[key] += 1 | |
file_occurrences[key].append((file_path, f"potential_key:{variable_name}")) | |
return used_keys, file_occurrences | |
def analyze_key_patterns(project_path, exclude_patterns): | |
""" | |
Dynamically analyze the codebase to identify localization key patterns. | |
Returns a pattern analysis object with detected patterns and conventions. | |
""" | |
print("Analyzing codebase for localization key patterns...") | |
# Stores our analysis results | |
key_patterns = { | |
'prefixes': set(), | |
'suffixes': set(), | |
'assignments': defaultdict(int), # Variable names used for key assignments | |
'formats': defaultdict(int), # Key format statistics (snake_case, etc.) | |
'contexts': defaultdict(set), # Context in which keys appear | |
'known_keys': set(), # Keys found in localization files | |
} | |
# First pass - collect keys from localization files | |
for root, dirs, files in os.walk(project_path): | |
dirs[:] = [d for d in dirs if not any(re.match(pattern, d) for pattern in exclude_patterns)] | |
for filename in files: | |
if filename == "Localizable.strings": | |
file_path = os.path.join(root, filename) | |
try: | |
with open(file_path, 'r', encoding='utf-8') as f: | |
content = f.read() | |
except UnicodeDecodeError: | |
try: | |
with open(file_path, 'r', encoding='utf-16') as f: | |
content = f.read() | |
except Exception: | |
continue | |
# Extract keys | |
key_pattern = r'"([^"\\]*(?:\\.[^"\\]*)*)"\s*=\s*"[^"\\]*(?:\\.[^"\\]*)*"\s*;' | |
keys = re.findall(key_pattern, content) | |
# Add to known keys | |
key_patterns['known_keys'].update(keys) | |
# Analyze prefix and suffix patterns | |
for key in keys: | |
# Extract prefix (everything before first underscore/dot) | |
prefix_match = re.match(r'^([a-z0-9]+)[_\.]', key) | |
if prefix_match: | |
key_patterns['prefixes'].add(prefix_match.group(1)) | |
# Extract suffix (everything after last underscore/dot) | |
suffix_match = re.search(r'[_\.]([a-z0-9]+)$', key) | |
if suffix_match: | |
key_patterns['suffixes'].add(suffix_match.group(1)) | |
# Record format | |
if '_' in key: | |
key_patterns['formats']['snake_case'] += 1 | |
elif '.' in key: | |
key_patterns['formats']['dot_notation'] += 1 | |
elif re.search(r'[a-z][A-Z]', key): | |
key_patterns['formats']['camelCase'] += 1 | |
# Second pass - analyze code for key assignment patterns | |
for root, dirs, files in os.walk(project_path): | |
dirs[:] = [d for d in dirs if not any(re.match(pattern, d) for pattern in exclude_patterns)] | |
for filename in files: | |
if filename.endswith('.swift'): | |
file_path = os.path.join(root, filename) | |
try: | |
with open(file_path, 'r', encoding='utf-8') as f: | |
content = f.read() | |
except Exception: | |
continue | |
# Look for variable assignments to strings | |
assignment_patterns = [ | |
# Common key assignment patterns | |
r'(\w+)Key\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
r'let\s+(\w+)Key\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
r'var\s+(\w+)Key\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
# Other assignment forms | |
r'(\w+)(?:Title|Label)\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
r'unit(?:Key)?:\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
r'titleKey:\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
] | |
for pattern in assignment_patterns: | |
for match in re.finditer(pattern, content): | |
groups = match.groups() | |
if len(groups) >= 2: | |
# Get the variable name (first group) | |
var_type = groups[0] | |
# Get the key (last group) | |
key = groups[-1] | |
# Update context analysis | |
start_pos = max(0, match.start() - 50) | |
end_pos = min(len(content), match.end() + 50) | |
context_text = content[start_pos:end_pos] | |
# Analyze context - add to contexts if seems to be localization | |
if any(indicator in context_text for indicator in ['localized', 'LocalizedStringKey', 'String(localized:']): | |
key_patterns['contexts'][var_type].add('localization') | |
# Check for specific UI contexts | |
if 'TextField' in context_text or 'Text(' in context_text: | |
key_patterns['contexts'][var_type].add('ui_text') | |
elif 'Button' in context_text: | |
key_patterns['contexts'][var_type].add('button') | |
# Count assignments by variable type | |
key_patterns['assignments'][var_type] += 1 | |
# Print some stats about what we found | |
print(f"Found {len(key_patterns['known_keys'])} known localization keys in .strings files") | |
if key_patterns['prefixes']: | |
print(f"Detected {len(key_patterns['prefixes'])} common key prefixes, including: {', '.join(list(key_patterns['prefixes'])[:10])}...") | |
if key_patterns['suffixes']: | |
print(f"Detected {len(key_patterns['suffixes'])} common key suffixes, including: {', '.join(list(key_patterns['suffixes'])[:10])}...") | |
if key_patterns['assignments']: | |
print(f"Most common variable contexts for key assignments: {', '.join(sorted(key_patterns['assignments'].keys(), key=lambda k: key_patterns['assignments'][k], reverse=True)[:5])}") | |
return key_patterns | |
def detect_localization_context(content): | |
""" | |
Analyze file content to determine if it's likely to contain localization logic. | |
Returns a boolean indicating if localization context is detected. | |
""" | |
# Look for common localization imports and frameworks | |
localization_indicators = [ | |
'LocalizedStringKey', 'NSLocalizedString', 'String(localized:', | |
'Localizable.strings', '.localized', 'formatMessage', | |
'i18n', 'translate(', 't("', 'gettext', 'getString(R.string.', | |
'useTranslation', 'LocalizationProvider' | |
] | |
for indicator in localization_indicators: | |
if indicator in content: | |
return True | |
return False | |
def collect_variable_declarations(content, file_variables): | |
""" | |
Extract variable declarations that might be localization keys. | |
Stores results in the file_variables dict. | |
""" | |
# Standard variable declarations across languages | |
patterns = [ | |
# Swift/Obj-C | |
r'(?:let|var)\s+(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
r'static\s+(?:let|var)\s+(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
# JavaScript/TypeScript | |
r'(?:const|let|var)\s+(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*=\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]', | |
# Java/Kotlin | |
r'(?:final|private|public|protected|static)?\s*(?:String|val|var)\s+(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
] | |
for pattern in patterns: | |
for match in re.finditer(pattern, content): | |
if len(match.groups()) >= 2: | |
var_name, value = match.groups()[:2] | |
if is_potential_localization_key(value): | |
file_variables[var_name] = value | |
def is_potential_localization_key(text): | |
""" | |
Determine if a string is likely to be a localization key based on its format. | |
More sophisticated than the basic is_valid_localization_key function. | |
""" | |
if not text or not isinstance(text, str): | |
return False | |
# Empty string check | |
if not text.strip(): | |
return False | |
# Purely numeric keys are invalid | |
if text.isdigit() or text.replace('.', '', 1).isdigit(): | |
return False | |
# Check for specific patterns of localization keys | |
# Common patterns include: | |
# 1. snake_case with dots or underscores (module.key_name) | |
# 2. Short identifiers, not natural text (keys are typically concise) | |
# 3. Absence of spaces (keys rarely have spaces) | |
# 4. Presence of common prefixes/suffixes like title, label, etc. | |
# Basic format checks | |
if ' ' in text: # Spaces usually indicate natural text, not keys | |
return False | |
if len(text) > 100: # Keys are typically shorter than long text | |
return False | |
# Pattern checks | |
if re.match(r'^[a-z][a-z0-9]*[_\.][a-z0-9_\.]+$', text): # module.key or module_key pattern | |
return True | |
if re.match(r'^[a-z][a-z0-9_\.]+$', text) and ('_' in text or '.' in text): # snake_case or dot.notation | |
return True | |
# Check for common key components | |
key_components = ['title', 'label', 'message', 'error', 'success', 'button', | |
'status', 'header', 'footer', 'desc', 'tooltip', 'placeholder', | |
'name', 'text'] | |
for component in key_components: | |
if f"_{component}" in text or f".{component}" in text or text.endswith(f"_{component}") or text.endswith(f".{component}"): | |
return True | |
return False | |
def process_localization_function_usage(content, file_path, used_keys, file_occurrences, language): | |
""" | |
Process direct usage of localization functions in the code. | |
""" | |
# Map of language to localization function patterns | |
localization_patterns = { | |
'swift': [ | |
# NSLocalizedString and other Swift patterns | |
(r'NSLocalizedString\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"', "NSLocalizedString"), | |
(r'String\(localized:\s*"([^"\\]*(?:\\.[^"\\]*)*)"', "String(localized:)"), | |
(r'\.localized\(\s*(?:[^,]*,\s*)?[^"]*"([^"\\]*(?:\\.[^"\\]*)*)"', ".localized()"), | |
(r'Text\(LocalizedStringKey\("([^"\\]*(?:\\.[^"\\]*)*)"\)\)', "Text(LocalizedStringKey)"), | |
(r'LocalizedStringKey\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"', "LocalizedStringKey"), | |
], | |
'kotlin/android': [ | |
# Android getString patterns | |
(r'getString\(R\.string\.([a-zA-Z0-9_]+)', "getString"), | |
(r'\.getString\(R\.string\.([a-zA-Z0-9_]+)', "context.getString"), | |
(r'stringResource\(R\.string\.([a-zA-Z0-9_]+)', "stringResource"), | |
], | |
'javascript/react': [ | |
# React i18n patterns | |
(r'(?:i18n|t|translate)\(\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]', "i18n/translate"), | |
(r'formatMessage\(\s*{\s*id:\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]', "formatMessage"), | |
], | |
} | |
# Process patterns for the specific language | |
for pattern, pattern_name in localization_patterns.get(language, []): | |
matches = re.findall(pattern, content) | |
for key in matches: | |
used_keys[key] += 1 | |
file_occurrences[key].append((file_path, pattern_name)) | |
def process_contextual_string_assignments(content, file_path, used_keys, file_occurrences, key_pattern_analysis): | |
""" | |
Process string assignments that are likely to be localization keys based on context. | |
Uses dynamic pattern analysis rather than hard-coded patterns. | |
""" | |
# Dynamic pattern for finding any assignment with string literals | |
# This will catch category, title, label, unit assignments and more | |
assignment_pattern = r'(\w+)(?:\s*(?:=|:)|\w+Key:)\s*"([^"\\]*(?:\\.[^"\\]*)*)"' | |
# Set of variable name fragments that suggest localization key assignments | |
potential_key_vars = set() | |
# Build potential variable names from pattern analysis | |
if key_pattern_analysis and 'assignments' in key_pattern_analysis: | |
for var_type in key_pattern_analysis['assignments']: | |
if len(var_type) > 3: # Avoid very short names that might cause false positives | |
potential_key_vars.add(var_type.lower()) | |
# Add common key variable indicators if we didn't find enough from analysis | |
if len(potential_key_vars) < 5: | |
potential_key_vars.update(['key', 'title', 'label', 'unit', 'category', 'field', 'text', 'message']) | |
# Find all assignments in the content | |
for match in re.finditer(assignment_pattern, content): | |
var_name, value = match.groups() | |
# Skip if value is empty | |
if not value.strip(): | |
continue | |
# Get surrounding context to check for localization indicators | |
start_pos = max(0, match.start() - 100) | |
end_pos = min(len(content), match.end() + 100) | |
context = content[start_pos:end_pos] | |
# Analyze the variable name - does it suggest a key assignment? | |
var_lower = var_name.lower() | |
# Look for common key-related variable names or parameter names | |
is_potential_key_var = ( | |
any(key_var in var_lower for key_var in potential_key_vars) or | |
'key' in var_lower or | |
'title' in context.lower() and 'key' in context.lower() or | |
'unit' in context.lower() and 'key' in context.lower() or | |
'localized' in context.lower() | |
) | |
# Check for specific indicators in the context | |
has_localization_context = any(indicator in context for indicator in [ | |
'localized', 'LocalizedStringKey', 'String(localized:', 'NSLocalizedString', | |
'Text(', 'Label(', 'TextField(', 'Button(', 'NavigationTitle', 'titleKey:', 'unitKey:' | |
]) | |
# If it looks like a key, has a key-like format, or appears in a localization context | |
if (is_potential_key_var or | |
has_localization_context or | |
(value in key_pattern_analysis.get('known_keys', set())) or | |
is_key_like_format(value)): | |
# Check the value format - does it follow key patterns we've seen? | |
if is_valid_localization_key(value, key_pattern_analysis): | |
used_keys[value] += 1 | |
file_occurrences[value].append((file_path, f"context_assigned:{var_name}")) | |
# Special case for Swift parameters with named arguments like titleKey: "value", unitKey: "value" | |
named_param_match = re.search(r'(\w+)Key:\s*"([^"\\]*(?:\\.[^"\\]*)*)"', context) | |
if named_param_match: | |
param_name, param_value = named_param_match.groups() | |
if is_valid_localization_key(param_value, key_pattern_analysis): | |
used_keys[param_value] += 1 | |
file_occurrences[param_value].append((file_path, f"named_param:{param_name}Key")) | |
def is_key_like_format(value): | |
"""Check if a string has a format typical of localization keys.""" | |
# Most keys have underscores or dots and no spaces | |
if ' ' in value: | |
return False | |
if '_' in value or '.' in value: | |
return True | |
# Keys typically have lowercase letters, often with specific prefixes | |
if re.match(r'^[a-z][a-zA-Z0-9_\.]+$', value): | |
return True | |
return False | |
def process_variable_references(content, file_path, used_keys, file_occurrences, variables_by_file): | |
""" | |
Process references to variables that might contain localization keys. | |
""" | |
# Patterns for variable usage in localization functions | |
var_usage_patterns = [ | |
# Swift patterns | |
r'LocalizedStringKey\(\s*(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*\)', | |
r'NSLocalizedString\(\s*(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*,', | |
r'String\(localized:\s*(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*\)', | |
# JavaScript/React patterns | |
r'(?:i18n|t|translate)\(\s*(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*[,)]', | |
# Android patterns | |
r'getString\(\s*(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*\)', | |
] | |
file_variables = variables_by_file.get(file_path, {}) | |
for pattern in var_usage_patterns: | |
for match in re.finditer(pattern, content): | |
var_name = match.group(1) | |
# Check if variable is in current file | |
if var_name in file_variables: | |
key = file_variables[var_name] | |
used_keys[key] += 1 | |
file_occurrences[key].append((file_path, f"variable:{var_name}")) | |
else: | |
# Try to find the variable in other files | |
for other_file, vars_in_file in variables_by_file.items(): | |
if var_name in vars_in_file: | |
key = vars_in_file[var_name] | |
used_keys[key] += 1 | |
file_occurrences[key].append((file_path, f"external_variable:{var_name}")) | |
break | |
def process_ui_component_strings(content, file_path, used_keys, file_occurrences, key_pattern_analysis): | |
""" | |
Process string literals used in UI components that might be localized. | |
Also looks for SwiftUI view parameters that are likely to be localization keys. | |
""" | |
# Patterns for UI components with string literals that are often localized | |
ui_patterns = [ | |
# SwiftUI patterns | |
r'Text\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"(?:\s*,|\s*\))(?!.*attributedString)', | |
r'Button\([^)]*\)\s*{\s*Text\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
r'NavigationLink\([^)]*\)\s*{\s*Text\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
r'Label\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
# UIKit patterns | |
r'\.title\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
r'\.text\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
r'\.placeholder\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
r'\.buttonTitle\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
# React/JSX patterns | |
r'<Text[^>]*>\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]', | |
r'<Button[^>]*>\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]', | |
r'<Label[^>]*>\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]', | |
] | |
# Special patterns for SwiftUI named key parameters | |
named_key_params = [ | |
r'titleKey:\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
r'labelKey:\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
r'unitKey:\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
r'messageKey:\s*"([^"\\]*(?:\\.[^"\\]*)*)"', | |
] | |
# Process standard UI patterns | |
for pattern in ui_patterns: | |
for match in re.finditer(pattern, content): | |
text = match.group(1) | |
# Skip if likely translated text rather than a key | |
if not is_potential_localization_key(text) and is_translatable_text(text): | |
continue | |
# If it follows key patterns, it might be a direct key reference | |
if is_potential_localization_key(text): | |
used_keys[text] += 1 | |
file_occurrences[text].append((file_path, "ui_component")) | |
# Process named key parameters (titleKey:, unitKey:, etc.) | |
for pattern in named_key_params: | |
for match in re.finditer(pattern, content): | |
key = match.group(1) | |
param_type = pattern.split(':')[0] # Extract param name (titleKey, unitKey, etc.) | |
if is_valid_localization_key(key, key_pattern_analysis): | |
used_keys[key] += 1 | |
file_occurrences[key].append((file_path, f"named_param:{param_type}")) | |
def is_translatable_text(text): | |
""" | |
Determine if a string is likely to be translatable human-readable text rather than a key. | |
""" | |
# Human readable text likely contains: | |
# - Spaces | |
# - Multiple words | |
# - Punctuation like periods, commas, question marks | |
# - Starts with capital letter (in many languages) | |
# - Natural language sentence structure | |
# Check for spaces (most UI text has spaces between words) | |
if ' ' in text: | |
return True | |
# Check for common punctuation used in sentences | |
if any(punct in text for punct in '.,:;!?'): | |
return True | |
# Check for capitalization pattern typical in sentences | |
if text and text[0].isupper() and not text.isupper(): | |
# First letter is uppercase but not all text is uppercase | |
return True | |
# Check word count - more than 1-2 words suggests natural text | |
word_count = len(text.split()) | |
if word_count > 1: | |
return True | |
return False | |
def find_localization_keys_in_code(project_path, exclude_patterns): | |
"""Find localization keys used in code files.""" | |
return extract_keys_from_files(project_path, exclude_patterns) | |
def write_log(log_file, used_keys, unused_keys_by_file, all_keys_by_file, file_occurrences, | |
comments_removed_by_file=None, duplicates_removed_by_file=None, whitespace_removed_by_file=None, sorted_keys_by_file=None): | |
"""Write usage statistics to the log file.""" | |
with open(log_file, 'w', encoding='utf-8') as f: | |
f.write("=== LOCALIZATION ANALYSIS REPORT ===\n\n") | |
# Add cleanup information | |
if duplicates_removed_by_file: | |
f.write("--- DUPLICATE KEYS ---\n") | |
total_duplicates = sum(count for count, _ in duplicates_removed_by_file.values()) | |
f.write(f"Total {total_duplicates} duplicate keys cleaned.\n\n") | |
for file_path, (dup_count, unique_count) in duplicates_removed_by_file.items(): | |
if dup_count > 0: | |
lang = get_language_from_path(file_path) | |
f.write(f"{file_path} ({lang}): {dup_count} duplicate keys removed, {unique_count} unique keys remain\n") | |
f.write("\n") | |
if comments_removed_by_file: | |
f.write("--- COMMENT LINES ---\n") | |
total_comments = sum(comments_removed_by_file.values()) | |
f.write(f"Total {total_comments} comment lines cleaned.\n\n") | |
for file_path, comment_count in comments_removed_by_file.items(): | |
if comment_count > 0: | |
lang = get_language_from_path(file_path) | |
f.write(f"{file_path} ({lang}): {comment_count} comment lines removed\n") | |
f.write("\n") | |
if whitespace_removed_by_file: | |
f.write("--- UNNECESSARY WHITESPACE AND EMPTY LINES ---\n") | |
total_whitespace = sum(whitespace_removed_by_file.values()) | |
f.write(f"Total {total_whitespace} unnecessary whitespace and empty lines cleaned.\n\n") | |
for file_path, whitespace_count in whitespace_removed_by_file.items(): | |
if whitespace_count > 0: | |
lang = get_language_from_path(file_path) | |
f.write(f"{file_path} ({lang}): {whitespace_count} unnecessary whitespace/lines cleaned\n") | |
f.write("\n") | |
if sorted_keys_by_file: | |
f.write("--- SORTED KEYS ---\n") | |
total_sorted = sum(sorted_keys_by_file.values()) | |
f.write(f"Total {total_sorted} keys sorted alphabetically.\n\n") | |
for file_path, sorted_count in sorted_keys_by_file.items(): | |
if sorted_count > 0: | |
lang = get_language_from_path(file_path) | |
f.write(f"{file_path} ({lang}): {sorted_count} keys sorted alphabetically\n") | |
f.write("\n") | |
f.write("--- USED KEYS ---\n") | |
f.write(f"Total {len(used_keys)} keys are used.\n\n") | |
# Top 10 most used keys | |
f.write("Top 10 most used keys:\n") | |
for key, count in used_keys.most_common(10): | |
f.write(f" {key}: {count} times\n") | |
f.write("\n") | |
# All used keys and where they are used | |
f.write("All used keys and where they are used (alphabetically):\n") | |
for key in sorted(used_keys.keys()): | |
f.write(f" {key}: {used_keys[key]} times\n") | |
# Show up to 5 files for each key | |
for i, (file_path, pattern_name) in enumerate(file_occurrences[key][:5]): | |
short_path = os.path.relpath(file_path) | |
f.write(f" - {short_path} ({pattern_name})\n") | |
if len(file_occurrences[key]) > 5: | |
f.write(f" ... and {len(file_occurrences[key]) - 5} more files\n") | |
f.write("\n") | |
# Unused keys by file | |
f.write("--- UNUSED KEYS BY FILE ---\n") | |
total_unused = 0 | |
for file_path, keys in unused_keys_by_file.items(): | |
if keys: | |
total_unused += len(keys) | |
percent = (len(keys) / len(all_keys_by_file[file_path])) * 100 if all_keys_by_file[file_path] else 0 | |
f.write(f"\n{file_path} ({len(keys)}/{len(all_keys_by_file[file_path])}, %{percent:.1f}):\n") | |
for key in sorted(keys): | |
f.write(f" {key}\n") | |
f.write(f"\nTotal {total_unused} unused keys detected.\n") | |
def clean_strings_file(file_path, keys_to_remove): | |
"""Remove specified keys from the strings file.""" | |
encoding = 'utf-8' | |
try: | |
with open(file_path, 'r', encoding='utf-8') as f: | |
content = f.read() | |
except UnicodeDecodeError: | |
# Try UTF-16 | |
try: | |
with open(file_path, 'r', encoding='utf-16') as f: | |
content = f.read() | |
encoding = 'utf-16' | |
except Exception as e: | |
print(f"Error: Could not read file {file_path}: {e}") | |
return | |
lines = content.split('\n') | |
filtered_lines = [] | |
removed_count = 0 | |
for line in lines: | |
# Check the line | |
should_keep = True | |
for key in keys_to_remove: | |
pattern = r'"' + re.escape(key) + r'"\s*=\s*"[^"\\]*(?:\\.[^"\\]*)*"\s*;' | |
if re.search(pattern, line): | |
should_keep = False | |
removed_count += 1 | |
break | |
if should_keep: | |
filtered_lines.append(line) | |
# Write the file with the same encoding | |
with open(file_path, 'w', encoding=encoding) as f: | |
f.write('\n'.join(filtered_lines)) | |
return removed_count | |
def get_language_from_path(file_path): | |
"""Extract language code from file path.""" | |
match = re.search(r'/([^/]+)\.lproj/Localizable\.strings', file_path) | |
if match: | |
return match.group(1) | |
return os.path.basename(os.path.dirname(os.path.dirname(file_path))) | |
def clean_comments_from_file(file_path): | |
"""Clean comment lines from Localizable.strings file.""" | |
encoding = 'utf-8' | |
try: | |
with open(file_path, 'r', encoding='utf-8') as f: | |
content = f.read() | |
except UnicodeDecodeError: | |
# Try UTF-16 | |
try: | |
with open(file_path, 'r', encoding='utf-16') as f: | |
content = f.read() | |
encoding = 'utf-16' | |
except Exception as e: | |
print(f"Error: Could not read file {file_path}: {e}") | |
return 0 | |
lines = content.split('\n') | |
cleaned_lines = [] | |
removed_count = 0 | |
for line in lines: | |
# Is it a full comment line? | |
if line.strip().startswith('//'): | |
removed_count += 1 | |
continue | |
# Is there an inline comment? | |
comment_pos = line.find('//') | |
if comment_pos > 0: | |
# Check if the key-value pair has ended | |
# "key" = "value"; // comment | |
if '";' in line[:comment_pos]: | |
# Remove the comment part, keep the key-value | |
line = line[:comment_pos].rstrip() | |
# Otherwise, don't remove the comment (probably // characters inside key-value) | |
cleaned_lines.append(line) | |
# Write the file with the same encoding | |
with open(file_path, 'w', encoding=encoding) as f: | |
f.write('\n'.join(cleaned_lines)) | |
return removed_count | |
def remove_duplicate_keys(file_path): | |
"""Clean duplicate keys from Localizable.strings file. | |
Keeps the last occurrence, removes previous occurrences.""" | |
encoding = 'utf-8' | |
try: | |
with open(file_path, 'r', encoding='utf-8') as f: | |
content = f.read() | |
except UnicodeDecodeError: | |
# Try UTF-16 | |
try: | |
with open(file_path, 'r', encoding='utf-16') as f: | |
content = f.read() | |
encoding = 'utf-16' | |
except Exception as e: | |
print(f"Error: Could not read file {file_path}: {e}") | |
return 0, 0 | |
lines = content.split('\n') | |
seen_keys = {} # key -> last seen line number and content | |
duplicate_indexes = [] # Line indexes to delete | |
key_pattern = re.compile(r'^\s*"([^"\\]*(?:\\.[^"\\]*)*)"\s*=\s*".*";') | |
# Find duplicate keys | |
for i, line in enumerate(lines): | |
# Process only lines containing keys | |
match = key_pattern.match(line) | |
if match: | |
key = match.group(1) | |
if key in seen_keys: | |
# This key was already seen, record previous occurrence index | |
duplicate_indexes.append(seen_keys[key][0]) | |
# Update last seen index of the key | |
seen_keys[key] = (i, line) | |
# Delete duplicate lines | |
cleaned_lines = [line for i, line in enumerate(lines) if i not in duplicate_indexes] | |
# Write the file with the same encoding | |
with open(file_path, 'w', encoding=encoding) as f: | |
f.write('\n'.join(cleaned_lines)) | |
return len(duplicate_indexes), len(seen_keys) | |
def clean_whitespace_from_file(file_path): | |
"""Clean unnecessary whitespace and empty lines from Localizable.strings file.""" | |
encoding = 'utf-8' | |
try: | |
with open(file_path, 'r', encoding='utf-8') as f: | |
content = f.read() | |
except UnicodeDecodeError: | |
# Try UTF-16 | |
try: | |
with open(file_path, 'r', encoding='utf-16') as f: | |
content = f.read() | |
encoding = 'utf-16' | |
except Exception as e: | |
print(f"Error: Could not read file {file_path}: {e}") | |
return 0 | |
lines = content.split('\n') | |
cleaned_lines = [] | |
removed_count = 0 | |
for line in lines: | |
# Clean whitespace at the beginning and end of the line | |
original_line = line | |
line = line.strip() | |
# Skip completely empty lines | |
if not line: | |
removed_count += 1 | |
continue | |
# Format lines with "key" = "value"; pattern | |
key_value_pattern = r'"([^"\\]*(?:\\.[^"\\]*)*)"\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"\s*;' | |
match = re.match(key_value_pattern, line) | |
if match: | |
key, value = match.groups() | |
# Recreate the key-value pair with a formatted format | |
formatted_line = f'"{key}" = "{value}";' | |
cleaned_lines.append(formatted_line) | |
# Increment counter if different from original line | |
if formatted_line != original_line.strip(): | |
removed_count += 1 | |
else: | |
# If not a key-value pair, add the cleaned version | |
if line != original_line: | |
removed_count += 1 | |
cleaned_lines.append(line) | |
# Write the file with the same encoding | |
with open(file_path, 'w', encoding=encoding) as f: | |
f.write('\n'.join(cleaned_lines)) | |
return removed_count | |
def sort_keys_in_file(file_path): | |
"""Sort keys alphabetically in a Localizable.strings file.""" | |
encoding = 'utf-8' | |
try: | |
with open(file_path, 'r', encoding='utf-8') as f: | |
content = f.read() | |
except UnicodeDecodeError: | |
# Try UTF-16 | |
try: | |
with open(file_path, 'r', encoding='utf-16') as f: | |
content = f.read() | |
encoding = 'utf-16' | |
except Exception as e: | |
print(f"Error: Could not read file {file_path}: {e}") | |
return 0 | |
# Find all lines matching the "key" = "value"; pattern | |
pattern = r'"([^"\\]*(?:\\.[^"\\]*)*)"\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"\s*;' | |
matches = re.findall(pattern, content) | |
if not matches: | |
print(f"Error: No keys found in file {file_path}.") | |
return 0 | |
# Collect keys and values in a dictionary | |
key_values = {key: value for key, value in matches} | |
# Create sorted lines using sorted keys | |
sorted_lines = [] | |
for key in sorted(key_values.keys()): | |
sorted_lines.append(f'"{key}" = "{key_values[key]}";') | |
# Write the file with the same encoding | |
with open(file_path, 'w', encoding=encoding) as f: | |
f.write('\n'.join(sorted_lines)) | |
return len(matches) | |
def find_missing_keys(used_keys, all_keys_by_file, filter_prefixes=None, min_missing_count=0): | |
"""Find keys that are used in the project but missing from language files. | |
Args: | |
used_keys: Dictionary of keys used in the code | |
all_keys_by_file: Dictionary mapping file paths to their keys | |
filter_prefixes: Optional list of prefixes to filter keys by | |
min_missing_count: Minimum number of missing keys for a file to be included | |
Returns: | |
Dictionary mapping file paths to their missing keys | |
""" | |
missing_keys_by_file = {} | |
for file_path, keys in all_keys_by_file.items(): | |
# Find keys that are used in the project but not in this language file | |
missing_keys = [key for key in used_keys if key not in keys] | |
# Apply prefix filtering if specified | |
if filter_prefixes: | |
missing_keys = [key for key in missing_keys if any(key.startswith(prefix) for prefix in filter_prefixes)] | |
# Only include files with enough missing keys | |
if missing_keys and len(missing_keys) >= min_missing_count: | |
missing_keys_by_file[file_path] = missing_keys | |
return missing_keys_by_file | |
def write_missing_keys_to_file(missing_keys_by_file, output_file="missing_keys.txt"): | |
""" | |
Write missing keys to a detailed report file. | |
This function organizes missing keys by language and provides both a summary | |
and comprehensive listing with formatting for better readability. | |
""" | |
if not missing_keys_by_file: | |
return False | |
# Group missing keys by language for better organization | |
keys_by_language = {} | |
all_missing_keys = set() | |
for file_path, keys in missing_keys_by_file.items(): | |
lang = get_language_from_path(file_path) | |
if lang not in keys_by_language: | |
keys_by_language[lang] = {"files": {}, "missing_key_count": 0, "total_keys": set()} | |
keys_by_language[lang]["files"][file_path] = keys | |
keys_by_language[lang]["missing_key_count"] += len(keys) | |
keys_by_language[lang]["total_keys"].update(keys) | |
all_missing_keys.update(keys) | |
# Sort languages by missing key count (descending) | |
sorted_languages = sorted(keys_by_language.keys(), | |
key=lambda lang: keys_by_language[lang]["missing_key_count"], | |
reverse=True) | |
with open(output_file, 'w', encoding='utf-8') as f: | |
f.write("=== MISSING LOCALIZATION KEYS REPORT ===\n\n") | |
# Total summary | |
total_missing = len(all_missing_keys) | |
total_file_keys = sum(len(keys) for keys in missing_keys_by_file.values()) | |
f.write(f"SUMMARY:\n") | |
f.write(f"- Total unique missing keys: {total_missing}\n") | |
f.write(f"- Total missing entries across all files: {total_file_keys}\n") | |
f.write(f"- Languages with missing keys: {len(keys_by_language)}\n\n") | |
# Language summary table | |
f.write("LANGUAGE SUMMARY:\n") | |
f.write("╔════════════════╦═══════════════════════════╦═══════════════╗\n") | |
f.write("║ Language ║ Missing Keys ║ Files Affected ║\n") | |
f.write("╠════════════════╬═══════════════════════════╬═══════════════╣\n") | |
for lang in sorted_languages: | |
lang_data = keys_by_language[lang] | |
file_count = len(lang_data["files"]) | |
missing_count = lang_data["missing_key_count"] | |
unique_count = len(lang_data["total_keys"]) | |
# Format the summary line with padding for alignment | |
f.write(f"║ {lang:<14} ║ {missing_count:>5} ({unique_count} unique) ║ {file_count:>13} ║\n") | |
f.write("╚════════════════╩═══════════════════════════╩═══════════════╝\n\n") | |
# Common missing keys across multiple languages | |
common_keys = {} | |
for lang, lang_data in keys_by_language.items(): | |
for key in lang_data["total_keys"]: | |
if key not in common_keys: | |
common_keys[key] = set() | |
common_keys[key].add(lang) | |
# Sort by number of languages affected (descending) | |
most_common_keys = sorted(common_keys.items(), key=lambda x: len(x[1]), reverse=True) | |
if most_common_keys: | |
f.write("KEYS MISSING IN MULTIPLE LANGUAGES:\n") | |
f.write("╔══════════════════════════════════════════════════════╦═══════════════════════════╗\n") | |
f.write("║ Key ║ Missing in Languages ║\n") | |
f.write("╠══════════════════════════════════════════════════════╬═══════════════════════════╣\n") | |
# Show only keys missing in multiple languages (>1) | |
multi_lang_keys = [(key, langs) for key, langs in most_common_keys if len(langs) > 1] | |
for key, langs in multi_lang_keys[:20]: # Limit to 20 for readability | |
# Truncate long keys | |
display_key = key[:48] + "..." if len(key) > 48 else key | |
display_key = display_key.ljust(48) | |
# Format languages as comma-separated list | |
langs_str = ", ".join(sorted(langs)) | |
if len(langs_str) > 25: | |
langs_str = langs_str[:22] + "..." | |
f.write(f"║ {display_key} ║ {langs_str:<23} ║\n") | |
if len(multi_lang_keys) > 20: | |
f.write(f"║ ... and {len(multi_lang_keys) - 20} more keys ║ ║\n") | |
f.write("╚══════════════════════════════════════════════════════╩═══════════════════════════╝\n\n") | |
# Detailed listing by language | |
f.write("DETAILED MISSING KEYS BY LANGUAGE:\n\n") | |
for lang in sorted_languages: | |
lang_data = keys_by_language[lang] | |
file_count = len(lang_data["files"]) | |
missing_count = lang_data["missing_key_count"] | |
unique_count = len(lang_data["total_keys"]) | |
f.write(f"LANGUAGE: {lang}\n") | |
f.write(f" - Missing keys: {missing_count} ({unique_count} unique)\n") | |
f.write(f" - Files affected: {file_count}\n\n") | |
# Sort files to ensure consistent output | |
sorted_files = sorted(lang_data["files"].keys()) | |
for file_path in sorted_files: | |
keys = lang_data["files"][file_path] | |
rel_path = os.path.relpath(file_path) | |
f.write(f" File: {rel_path}\n") | |
f.write(f" Missing keys: {len(keys)}\n") | |
# Sort keys alphabetically for consistent output | |
for key in sorted(keys): | |
f.write(f' "{key}"\n') | |
f.write("\n") | |
f.write("-" * 80 + "\n\n") | |
return True | |
def create_localization_template(missing_keys, all_keys_by_file, reference_language='en', output_file='template_localizable.strings'): | |
""" | |
Create a template Localizable.strings file with missing keys. | |
Args: | |
missing_keys: Set of missing keys to include in the template | |
all_keys_by_file: Dictionary of all keys by file, used to find values for reference | |
reference_language: Language code to use for reference values | |
output_file: Output file path for the template | |
Returns: | |
True if successful, False otherwise | |
""" | |
if not missing_keys: | |
print("No missing keys to create template for.") | |
return False | |
# Find the reference language file | |
reference_file = None | |
reference_values = {} | |
for file_path in all_keys_by_file.keys(): | |
if reference_language in file_path: | |
reference_file = file_path | |
reference_values = all_keys_by_file[file_path] | |
break | |
# If reference language wasn't found, use the first file as reference | |
if reference_file is None and all_keys_by_file: | |
reference_file = list(all_keys_by_file.keys())[0] | |
reference_values = all_keys_by_file[reference_file] | |
with open(output_file, 'w', encoding='utf-8') as f: | |
f.write('// Template Localizable.strings file with missing keys\n') | |
f.write('// Generated by clean_localizations.py\n') | |
f.write('// Reference language: ' + (reference_language if reference_file else 'none') + '\n\n') | |
sorted_keys = sorted(missing_keys) | |
for key in sorted_keys: | |
# Try to get the value from the reference language | |
if key in reference_values: | |
value = reference_values[key] | |
f.write(f'"{key}" = "{value}";\n') | |
else: | |
# If not found, use a placeholder with the key name | |
f.write(f'"{key}" = "TRANSLATE: {key}";\n') | |
print(f"Created template file with {len(sorted_keys)} missing keys: {output_file}") | |
if not reference_file: | |
print(f"Warning: Reference language '{reference_language}' not found, using placeholders for all values.") | |
return True | |
def is_valid_localization_key(key, key_pattern_analysis=None): | |
""" | |
Dynamically determine if a string is likely to be a valid localization key | |
based on pattern analysis from the project. | |
""" | |
if not key or not isinstance(key, str): | |
return False | |
# Empty string check | |
if not key.strip() or key.strip() == " ": | |
return False | |
# Purely numeric keys are invalid | |
if key.isdigit() or key.replace('.', '', 1).isdigit(): | |
return False | |
# If we have pattern analysis results from the project | |
if key_pattern_analysis: | |
# Direct match with known keys | |
if key in key_pattern_analysis.get('known_keys', set()): | |
return True | |
# Check if key follows detected prefix patterns | |
prefixes = key_pattern_analysis.get('prefixes', set()) | |
for prefix in prefixes: | |
if key.startswith(prefix + '_') or key.startswith(prefix + '.'): | |
return True | |
# Check if key has detected suffix patterns | |
suffixes = key_pattern_analysis.get('suffixes', set()) | |
for suffix in suffixes: | |
if key.endswith('_' + suffix) or key.endswith('.' + suffix): | |
return True | |
# Basic format validation for localization keys | |
if '_' in key or '.' in key: | |
# If it has a structure like a localization key (underscore/dot separated) | |
# And doesn't contain spaces or weird characters | |
if re.match(r'^[a-z][a-zA-Z0-9_\.]+$', key) and ' ' not in key: | |
return True | |
return False | |
def main(): | |
args = parse_args() | |
project_path = os.path.abspath(args.project_path) | |
# Use r-string to fix regex patterns | |
exclude_patterns = args.exclude or [r'^\.git$', r'^\.build$', r'^Pods$', r'^Carthage$', r'^\.DS_Store$'] | |
print(f"Project directory: {project_path}") | |
print(f"Excluded patterns: {exclude_patterns}") | |
# Auto-detect missing keys mode | |
if args.auto_detect_missing_keys: | |
print("Running in auto-detect missing keys mode...") | |
# Display filter settings if any | |
if args.filter_prefix: | |
print(f"Filtering keys by prefixes: {', '.join(args.filter_prefix)}") | |
if args.min_missing_count > 0: | |
print(f"Only reporting languages with at least {args.min_missing_count} missing keys") | |
# Find used keys | |
print("Searching for localization keys in code files...") | |
used_keys, file_occurrences = find_localization_keys_in_code(project_path, exclude_patterns) | |
print(f"Total {len(used_keys)} keys are used in code.") | |
# Find strings files | |
print("Searching for Localizable.strings files...") | |
strings_files = find_strings_files(project_path, exclude_patterns) | |
if not strings_files: | |
print("No Localizable.strings files found. Terminating.") | |
return | |
print(f"Found {len(strings_files)} localization files.") | |
# Parse keys from each file | |
all_keys_by_file = {} | |
languages_found = set() | |
for file_path in strings_files: | |
lang = get_language_from_path(file_path) | |
languages_found.add(lang) | |
print(f"Processing: {file_path} ({lang})") | |
keys = parse_strings_file(file_path) | |
all_keys_by_file[file_path] = keys | |
# Calculate percentage of total used keys found in this file | |
if used_keys: | |
found_keys = len([k for k in keys if k in used_keys]) | |
percent_found = (found_keys / len(used_keys)) * 100 | |
else: | |
found_keys = 0 | |
percent_found = 0 | |
print(f" Total keys: {len(keys)}") | |
print(f" Contains {found_keys}/{len(used_keys)} used keys ({percent_found:.1f}%)") | |
print(f"\nFound {len(languages_found)} unique languages: {', '.join(sorted(languages_found))}") | |
# Find keys missing from language files, applying filters | |
missing_keys_by_file = find_missing_keys(used_keys, all_keys_by_file, args.filter_prefix, args.min_missing_count) | |
if missing_keys_by_file: | |
print("\nDetected keys used in code but missing from language files:") | |
total_missing = sum(len(keys) for keys in missing_keys_by_file.values()) | |
unique_missing_keys = set() | |
for keys in missing_keys_by_file.values(): | |
unique_missing_keys.update(keys) | |
print(f"Total missing entries: {total_missing} across {len(missing_keys_by_file)} files") | |
print(f"Unique missing keys: {len(unique_missing_keys)}") | |
# Display a preview of missing keys for each language | |
for file_path, keys in missing_keys_by_file.items(): | |
lang = get_language_from_path(file_path) | |
print(f" {lang}: {len(keys)} missing keys") | |
# Show a few examples of missing keys | |
for key in sorted(keys)[:5]: | |
print(f" \"{key}\"") | |
if len(keys) > 5: | |
print(f" ... and {len(keys) - 5} more keys") | |
# Write missing keys to file using the specified output file name | |
if write_missing_keys_to_file(missing_keys_by_file, output_file=args.output_file): | |
print(f"\nDetailed missing keys report written to {args.output_file}") | |
# Create a template file if requested | |
if args.create_template: | |
# Collect all unique missing keys | |
all_missing_keys = set() | |
for keys in missing_keys_by_file.values(): | |
all_missing_keys.update(keys) | |
create_localization_template( | |
all_missing_keys, | |
all_keys_by_file, | |
reference_language=args.reference_language, | |
output_file=args.template_file | |
) | |
else: | |
if args.filter_prefix: | |
print(f"\nNo missing keys found matching the specified prefixes: {', '.join(args.filter_prefix)}") | |
else: | |
print("\nNo missing keys found. All localization files are complete.") | |
# Optional: identify keys that might be unused in code | |
if not args.no_prompt and input("\nWould you like to check for potentially unused keys in localization files? (y/n): ").strip().lower() == 'y': | |
print("\nChecking for potentially unused keys...") | |
unused_keys_by_file = {} | |
total_unused = 0 | |
for file_path, keys in all_keys_by_file.items(): | |
# Find keys in the strings file that aren't found in code | |
unused_keys = [key for key in keys if key not in used_keys] | |
if unused_keys: | |
unused_keys_by_file[file_path] = unused_keys | |
total_unused += len(unused_keys) | |
if unused_keys_by_file: | |
print(f"Found {total_unused} potentially unused keys across all language files.") | |
for file_path, keys in unused_keys_by_file.items(): | |
lang = get_language_from_path(file_path) | |
percent = (len(keys) / len(all_keys_by_file[file_path])) * 100 | |
print(f" {lang}: {len(keys)}/{len(all_keys_by_file[file_path])} keys ({percent:.1f}%) may be unused") | |
print("\nNote: These keys might be used dynamically or loaded at runtime.") | |
print("Review manually before removing any keys.") | |
if args.log_file: | |
write_log(args.log_file, used_keys, unused_keys_by_file, all_keys_by_file, file_occurrences) | |
print(f"Analysis report written to {args.log_file}") | |
else: | |
print("No potentially unused keys found. All keys in localization files appear to be used in code.") | |
return | |
# Comment removal preference | |
if args.no_prompt: | |
remove_comments = False | |
clean_whitespace = args.clean_whitespace | |
sort_keys = args.sort_keys | |
else: | |
remove_comments = input("Do you want to remove comment lines? (y/n): ").strip().lower() == 'y' | |
# Unnecessary whitespace cleaning preference | |
if args.clean_whitespace: | |
clean_whitespace = True | |
else: | |
clean_whitespace = input("Do you want to clean unnecessary whitespace and empty lines? (y/n): ").strip().lower() == 'y' | |
# Alphabetical key sorting preference | |
if args.sort_keys: | |
sort_keys = True | |
else: | |
sort_keys = input("Do you want to sort keys alphabetically? (y/n): ").strip().lower() == 'y' | |
# Duplicate key removal preference | |
auto_deduplicate = True # Automatically deduplicate without asking | |
# Find used keys | |
print("Searching for localization keys in code files...") | |
used_keys, file_occurrences = find_localization_keys_in_code(project_path, exclude_patterns) | |
print(f"Total {len(used_keys)} keys are used.") | |
# Find strings files | |
print("Searching for Localizable.strings files...") | |
strings_files = find_strings_files(project_path, exclude_patterns) | |
print(f"Found {len(strings_files)} strings files.") | |
if not strings_files: | |
print("No Localizable.strings files found. Terminating.") | |
return | |
# Collect information about deletions | |
duplicates_removed_by_file = {} | |
comments_removed_by_file = {} | |
whitespace_removed_by_file = {} | |
sorted_keys_by_file = {} | |
# Clean duplicate keys | |
if auto_deduplicate: | |
print("\nCleaning duplicate keys...") | |
total_duplicates_removed = 0 | |
for file_path in strings_files: | |
lang = get_language_from_path(file_path) | |
duplicates_removed, unique_keys = remove_duplicate_keys(file_path) | |
duplicates_removed_by_file[file_path] = (duplicates_removed, unique_keys) | |
total_duplicates_removed += duplicates_removed | |
if duplicates_removed > 0: | |
print(f" {file_path} ({lang}): {duplicates_removed} duplicate keys removed, {unique_keys} unique keys remain") | |
else: | |
print(f" {file_path} ({lang}): No duplicate keys found") | |
print(f"Total {total_duplicates_removed} duplicate keys removed.\n") | |
# First clean comment lines (if requested) | |
if remove_comments: | |
print("\nCleaning comment lines...") | |
total_comments_removed = 0 | |
for file_path in strings_files: | |
lang = get_language_from_path(file_path) | |
comments_removed = clean_comments_from_file(file_path) | |
comments_removed_by_file[file_path] = comments_removed | |
total_comments_removed += comments_removed | |
print(f" {file_path} ({lang}): {comments_removed} comment lines removed") | |
print(f"Total {total_comments_removed} comment lines removed.\n") | |
# Clean unnecessary whitespace (if requested) | |
if clean_whitespace: | |
print("\nCleaning unnecessary whitespace and empty lines...") | |
total_whitespace_removed = 0 | |
for file_path in strings_files: | |
lang = get_language_from_path(file_path) | |
whitespace_removed = clean_whitespace_from_file(file_path) | |
whitespace_removed_by_file[file_path] = whitespace_removed | |
total_whitespace_removed += whitespace_removed | |
print(f" {file_path} ({lang}): {whitespace_removed} unnecessary whitespace/lines cleaned") | |
print(f"Total {total_whitespace_removed} unnecessary whitespace/lines cleaned.\n") | |
# Sort keys alphabetically (if requested) | |
if sort_keys: | |
print("\nSorting keys alphabetically...") | |
total_keys_sorted = 0 | |
for file_path in strings_files: | |
lang = get_language_from_path(file_path) | |
keys_sorted = sort_keys_in_file(file_path) | |
sorted_keys_by_file[file_path] = keys_sorted | |
total_keys_sorted += keys_sorted | |
print(f" {file_path} ({lang}): {keys_sorted} keys sorted alphabetically") | |
print(f"Total {total_keys_sorted} keys sorted alphabetically.\n") | |
# Parse keys from each file | |
all_keys_by_file = {} | |
unused_keys_by_file = {} | |
for file_path in strings_files: | |
lang = get_language_from_path(file_path) | |
print(f"Processing: {file_path} ({lang})") | |
keys = parse_strings_file(file_path) | |
all_keys_by_file[file_path] = keys | |
# Find unused keys | |
unused_keys = [key for key in keys if key not in used_keys] | |
unused_keys_by_file[file_path] = unused_keys | |
print(f" Total keys: {len(keys)}") | |
print(f" Unused keys: {len(unused_keys)}") | |
# Find keys missing from language files | |
missing_keys_by_file = find_missing_keys(used_keys, all_keys_by_file, args.filter_prefix, args.min_missing_count) | |
if missing_keys_by_file: | |
print("\nDetected keys used in project but missing from language files:") | |
total_missing = sum(len(keys) for keys in missing_keys_by_file.values()) | |
print(f"Total missing keys: {total_missing}") | |
for file_path, keys in missing_keys_by_file.items(): | |
lang = get_language_from_path(file_path) | |
print(f" {file_path} ({lang}): {len(keys)} missing keys") | |
# Write missing keys to file using the specified output file name | |
if write_missing_keys_to_file(missing_keys_by_file, output_file=args.output_file): | |
print(f"Missing keys written to {args.output_file}") | |
# If there are no unused keys | |
if all(not keys for keys in unused_keys_by_file.values()): | |
print("\nNo unused keys found in any language file. Terminating.") | |
write_log(args.log_file, used_keys, unused_keys_by_file, all_keys_by_file, file_occurrences, | |
comments_removed_by_file if remove_comments else None, | |
duplicates_removed_by_file if auto_deduplicate else None, | |
whitespace_removed_by_file if clean_whitespace else None, | |
sorted_keys_by_file if sort_keys else None) | |
print(f"\nAnalysis report written: {args.log_file}") | |
return | |
# Show unused keys and ask for deletion confirmation | |
for file_path, unused_keys in unused_keys_by_file.items(): | |
if not unused_keys: | |
continue | |
lang = get_language_from_path(file_path) | |
print(f"\n{lang} language has {len(unused_keys)} unused keys:") | |
# Show all unused keys | |
for i, key in enumerate(sorted(unused_keys), 1): | |
print(f" {i}. \"{key}\" = \"{all_keys_by_file[file_path][key]}\"") | |
if args.confirm_all: | |
confirm = 'y' | |
else: | |
confirm = input(f"\nDo you want to delete {len(unused_keys)} unused keys in {lang} language? (y/n/s): ") | |
if confirm.lower() == 'y': | |
print(f"Deleting unused keys: {file_path}") | |
removed = clean_strings_file(file_path, unused_keys) | |
print(f"Completed. {removed} keys deleted.") | |
elif confirm.lower() == 's': | |
keys_to_keep = input("Enter the keys you want to keep, separated by commas (e.g. key1,key2): ").split(',') | |
keys_to_keep = [k.strip() for k in keys_to_keep] | |
keys_to_remove = [key for key in unused_keys if key not in keys_to_keep] | |
print(f"Deleting {len(keys_to_remove)} keys, keeping {len(keys_to_keep)} keys.") | |
removed = clean_strings_file(file_path, keys_to_remove) | |
print(f"Completed. {removed} keys deleted.") | |
else: | |
print("Deletion cancelled.") | |
# Write log file | |
write_log(args.log_file, used_keys, unused_keys_by_file, all_keys_by_file, file_occurrences, | |
comments_removed_by_file if remove_comments else None, | |
duplicates_removed_by_file if auto_deduplicate else None, | |
whitespace_removed_by_file if clean_whitespace else None, | |
sorted_keys_by_file if sort_keys else None) | |
print(f"\nAnalysis report written: {args.log_file}") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment