Skip to content

Instantly share code, notes, and snippets.

@oguzdelioglu
Last active May 24, 2025 01:31
Show Gist options
  • Save oguzdelioglu/eb635a0e0e22afd9a940f9a9e6ca92ac to your computer and use it in GitHub Desktop.
Save oguzdelioglu/eb635a0e0e22afd9a940f9a9e6ca92ac to your computer and use it in GitHub Desktop.
Swift Clean Localization Keys
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
r"""
iOS/macOS Localization Manager and Analysis Tool
This tool analyzes, cleans, and manages localization files (Localizable.strings) in iOS or macOS projects.
It performs the following operations:
1. LOCALIZATION KEY DETECTION:
- Intelligently detects localization keys used in Swift, Objective-C, Kotlin, JavaScript, and SQL files
- Supports various localization methods (NSLocalizedString, String(localized:), LocalizedStringKey, etc.)
- Finds keys defined in variables and referenced indirectly in code
- Detects keys used in common UI component initializers and structures
2. MISSING KEYS ANALYSIS:
- Identifies keys used in code but missing from localization files
- Generates detailed reports showing which keys are missing from each language
- Can filter results by key prefixes (e.g., only show missing achievement keys)
- Creates template files for translators with placeholder values
3. CLEANING OPERATIONS:
- Automatically detects duplicate keys and removes them while keeping the last definition
- Cleans comment lines (starting with //) and inline comments (after key-value definitions)
- Detects and removes keys that are not used anywhere in the project (with confirmation)
- Cleans unnecessary whitespace and empty lines (with --clean-whitespace option)
- Sorts keys alphabetically (with --sort-keys option)
4. DETAILED REPORTING:
- Lists all used and unused keys
- Reports the number of cleaned duplicate keys and comment lines and which files they were in
- Shows which files use the keys and how many times they are used
- Saves all analysis results to a detailed log file
Usage:
python3 clean_localizations.py [project_directory] [options]
Basic Options:
--confirm-all Automatically confirm all deletion operations
--log-file FILE_NAME Specify the log file name (default: localization_log.txt)
--exclude PATTERN Patterns for directories/files to exclude (can be used multiple times)
--verbose, -v Show detailed output
--clean-whitespace Clean unnecessary whitespace and empty lines
--sort-keys Sort keys alphabetically
--no-prompt Run without interactive prompts
Missing Keys Detection:
--auto-detect-missing-keys Run in missing keys detection mode
--filter-prefix PREFIX Filter keys by prefix(es) (can be used multiple times: --filter-prefix ach_ badge_)
--min-missing-count NUM Only include languages with at least NUM missing keys
--output-file FILE Output file for missing keys report (default: missing_keys.txt)
Template Generation:
--create-template Create a template Localizable.strings file with missing keys
--template-file FILE Output file for the template (default: template_localizable.strings)
--reference-language LANG Language to use for reference values (default: en)
Examples:
python3 clean_localizations.py # Run in current directory
python3 clean_localizations.py /path/to/project # Run in a specific directory
python3 clean_localizations.py --confirm-all # Automatically confirm all deletions
python3 clean_localizations.py --exclude "^\.git$" # Exclude .git directory
python3 clean_localizations.py --clean-whitespace # Clean unnecessary whitespace
python3 clean_localizations.py --sort-keys # Sort keys alphabetically
python3 clean_localizations.py --auto-detect-missing-keys # Only detect missing keys
python3 clean_localizations.py --filter-prefix ach_ badge_ # Only show keys with these prefixes
python3 clean_localizations.py --create-template --filter-prefix auth_ # Create a template for auth keys
"""
import os
import re
import argparse
import json
from pathlib import Path
from collections import Counter, defaultdict
def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(description='Detect unused localization keys in iOS projects.')
parser.add_argument('project_path', nargs='?', default='.', help='Project directory (default: current directory)')
parser.add_argument('--confirm-all', action='store_true', help='Automatically confirm all deletion operations')
parser.add_argument('--log-file', default='localization_log.txt', help='Log file (default: localization_log.txt)')
parser.add_argument('--exclude', nargs='+', default=[], help='Patterns for directories/files to exclude')
parser.add_argument('--verbose', '-v', action='store_true', help='Show detailed output')
parser.add_argument('--clean-whitespace', action='store_true', help='Clean unnecessary whitespace and empty lines')
parser.add_argument('--sort-keys', action='store_true', help='Sort keys alphabetically')
parser.add_argument('--auto-detect-missing-keys', action='store_true', help='Automatically detect missing keys without other operations')
parser.add_argument('--no-prompt', action='store_true', help='Run without interactive prompts')
parser.add_argument('--filter-prefix', nargs='+', default=[],
help='Filter missing keys to only include those with specific prefixes (e.g., "ach_" "button_")')
parser.add_argument('--min-missing-count', type=int, default=0,
help='Only report languages with at least this many missing keys')
parser.add_argument('--output-file', default='missing_keys.txt',
help='Output file for missing keys (default: missing_keys.txt)')
parser.add_argument('--create-template', action='store_true',
help='Create a template Localizable.strings file with missing keys for translation')
parser.add_argument('--template-file', default='template_localizable.strings',
help='Output file for the template (default: template_localizable.strings)')
parser.add_argument('--reference-language', default='en',
help='Language to use for reference values in the template (default: en)')
return parser.parse_args()
def find_strings_files(project_path, exclude_patterns):
"""Find all Localizable.strings files."""
strings_files = []
for root, dirs, files in os.walk(project_path):
# Filter out excluded directories
dirs[:] = [d for d in dirs if not any(re.match(pattern, d) for pattern in exclude_patterns)]
for filename in files:
if filename == "Localizable.strings":
strings_files.append(os.path.join(root, filename))
return strings_files
def parse_strings_file(file_path):
"""Parse a Localizable.strings file and return key-value pairs."""
keys = {}
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
except UnicodeDecodeError:
# Try UTF-16
try:
with open(file_path, 'r', encoding='utf-16') as f:
content = f.read()
except Exception as e:
print(f"Error: Could not read file {file_path}: {e}")
return keys
# Find all lines matching the "key" = "value"; pattern
pattern = r'"([^"\\]*(?:\\.[^"\\]*)*)"\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"\s*;'
matches = re.findall(pattern, content)
for key, value in matches:
keys[key] = value
return keys
def extract_keys_from_files(project_path, exclude_patterns):
"""Find all string keys directly defined in code and also extract from various structure patterns."""
used_keys = Counter()
file_occurrences = defaultdict(list)
all_keys_by_file = {}
# File extensions to scan
swift_extensions = ['.swift']
sql_extensions = ['.sql']
objc_extensions = ['.m', '.h']
kotlin_extensions = ['.kt', '.kts']
java_extensions = ['.java']
js_extensions = ['.js', '.jsx', '.ts', '.tsx']
# Track potential key variables {variable_name: key_string}
variables_by_file = {}
# Collect possible localization key patterns from existing code
key_pattern_analysis = analyze_key_patterns(project_path, exclude_patterns)
# First pass - collect all variable declarations
for root, dirs, files in os.walk(project_path):
# Filter out excluded directories
dirs[:] = [d for d in dirs if not any(re.match(pattern, d) for pattern in exclude_patterns)]
for filename in files:
file_path = os.path.join(root, filename)
file_extension = os.path.splitext(filename)[1].lower()
try:
# For code files
if any(filename.endswith(ext) for ext in
swift_extensions + sql_extensions + objc_extensions +
kotlin_extensions + java_extensions + js_extensions):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Extract all strings in the file for potential localization keys
all_strings = re.findall(r'"([^"\\]*(?:\\.[^"\\]*)*)"', content)
# Analyze the content to detect localization context
has_localization_context = detect_localization_context(content)
# Find all variable declarations that seem to be localization keys
file_variables = {}
collect_variable_declarations(content, file_variables)
if file_variables:
variables_by_file[file_path] = file_variables
except Exception as e:
print(f"Error reading file (pass 1): {file_path} - {str(e)}")
# Second pass - process direct usages, variable references, and contextual analysis
for root, dirs, files in os.walk(project_path):
# Filter out excluded directories
dirs[:] = [d for d in dirs if not any(re.match(pattern, d) for pattern in exclude_patterns)]
for filename in files:
file_path = os.path.join(root, filename)
file_extension = os.path.splitext(filename)[1].lower()
try:
# Determine language based on file extension
if any(filename.endswith(ext) for ext in swift_extensions + objc_extensions):
language = 'swift'
elif any(filename.endswith(ext) for ext in kotlin_extensions + java_extensions):
language = 'kotlin/android'
elif any(filename.endswith(ext) for ext in js_extensions):
language = 'javascript/react'
elif any(filename.endswith(ext) for ext in sql_extensions):
language = 'sql'
else:
# Skip unsupported file types
continue
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
file_variables = variables_by_file.get(file_path, {})
# Known localization function direct usage patterns
process_localization_function_usage(content, file_path, used_keys, file_occurrences, language)
# Process context-aware string assignments
# This includes categoryKey and similar assignments that are likely localization keys
process_contextual_string_assignments(content, file_path, used_keys, file_occurrences, key_pattern_analysis)
# Process variable references that might be localization keys
process_variable_references(content, file_path, used_keys, file_occurrences, variables_by_file)
# Analyze string literals in UI components to find localized strings
process_ui_component_strings(content, file_path, used_keys, file_occurrences, key_pattern_analysis)
except Exception as e:
print(f"Error reading file (pass 2): {file_path} - {str(e)}")
# Final pass - collect any remaining potential keys from contextual analysis
for file_path, vars_in_file in variables_by_file.items():
for variable_name, key in vars_in_file.items():
if key not in used_keys and is_valid_localization_key(key, key_pattern_analysis):
# If the key is defined but not detected in usage, add it anyway if it follows patterns
used_keys[key] += 1
file_occurrences[key].append((file_path, f"potential_key:{variable_name}"))
return used_keys, file_occurrences
def analyze_key_patterns(project_path, exclude_patterns):
"""
Dynamically analyze the codebase to identify localization key patterns.
Returns a pattern analysis object with detected patterns and conventions.
"""
print("Analyzing codebase for localization key patterns...")
# Stores our analysis results
key_patterns = {
'prefixes': set(),
'suffixes': set(),
'assignments': defaultdict(int), # Variable names used for key assignments
'formats': defaultdict(int), # Key format statistics (snake_case, etc.)
'contexts': defaultdict(set), # Context in which keys appear
'known_keys': set(), # Keys found in localization files
}
# First pass - collect keys from localization files
for root, dirs, files in os.walk(project_path):
dirs[:] = [d for d in dirs if not any(re.match(pattern, d) for pattern in exclude_patterns)]
for filename in files:
if filename == "Localizable.strings":
file_path = os.path.join(root, filename)
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
except UnicodeDecodeError:
try:
with open(file_path, 'r', encoding='utf-16') as f:
content = f.read()
except Exception:
continue
# Extract keys
key_pattern = r'"([^"\\]*(?:\\.[^"\\]*)*)"\s*=\s*"[^"\\]*(?:\\.[^"\\]*)*"\s*;'
keys = re.findall(key_pattern, content)
# Add to known keys
key_patterns['known_keys'].update(keys)
# Analyze prefix and suffix patterns
for key in keys:
# Extract prefix (everything before first underscore/dot)
prefix_match = re.match(r'^([a-z0-9]+)[_\.]', key)
if prefix_match:
key_patterns['prefixes'].add(prefix_match.group(1))
# Extract suffix (everything after last underscore/dot)
suffix_match = re.search(r'[_\.]([a-z0-9]+)$', key)
if suffix_match:
key_patterns['suffixes'].add(suffix_match.group(1))
# Record format
if '_' in key:
key_patterns['formats']['snake_case'] += 1
elif '.' in key:
key_patterns['formats']['dot_notation'] += 1
elif re.search(r'[a-z][A-Z]', key):
key_patterns['formats']['camelCase'] += 1
# Second pass - analyze code for key assignment patterns
for root, dirs, files in os.walk(project_path):
dirs[:] = [d for d in dirs if not any(re.match(pattern, d) for pattern in exclude_patterns)]
for filename in files:
if filename.endswith('.swift'):
file_path = os.path.join(root, filename)
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
except Exception:
continue
# Look for variable assignments to strings
assignment_patterns = [
# Common key assignment patterns
r'(\w+)Key\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
r'let\s+(\w+)Key\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
r'var\s+(\w+)Key\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
# Other assignment forms
r'(\w+)(?:Title|Label)\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
r'unit(?:Key)?:\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
r'titleKey:\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
]
for pattern in assignment_patterns:
for match in re.finditer(pattern, content):
groups = match.groups()
if len(groups) >= 2:
# Get the variable name (first group)
var_type = groups[0]
# Get the key (last group)
key = groups[-1]
# Update context analysis
start_pos = max(0, match.start() - 50)
end_pos = min(len(content), match.end() + 50)
context_text = content[start_pos:end_pos]
# Analyze context - add to contexts if seems to be localization
if any(indicator in context_text for indicator in ['localized', 'LocalizedStringKey', 'String(localized:']):
key_patterns['contexts'][var_type].add('localization')
# Check for specific UI contexts
if 'TextField' in context_text or 'Text(' in context_text:
key_patterns['contexts'][var_type].add('ui_text')
elif 'Button' in context_text:
key_patterns['contexts'][var_type].add('button')
# Count assignments by variable type
key_patterns['assignments'][var_type] += 1
# Print some stats about what we found
print(f"Found {len(key_patterns['known_keys'])} known localization keys in .strings files")
if key_patterns['prefixes']:
print(f"Detected {len(key_patterns['prefixes'])} common key prefixes, including: {', '.join(list(key_patterns['prefixes'])[:10])}...")
if key_patterns['suffixes']:
print(f"Detected {len(key_patterns['suffixes'])} common key suffixes, including: {', '.join(list(key_patterns['suffixes'])[:10])}...")
if key_patterns['assignments']:
print(f"Most common variable contexts for key assignments: {', '.join(sorted(key_patterns['assignments'].keys(), key=lambda k: key_patterns['assignments'][k], reverse=True)[:5])}")
return key_patterns
def detect_localization_context(content):
"""
Analyze file content to determine if it's likely to contain localization logic.
Returns a boolean indicating if localization context is detected.
"""
# Look for common localization imports and frameworks
localization_indicators = [
'LocalizedStringKey', 'NSLocalizedString', 'String(localized:',
'Localizable.strings', '.localized', 'formatMessage',
'i18n', 'translate(', 't("', 'gettext', 'getString(R.string.',
'useTranslation', 'LocalizationProvider'
]
for indicator in localization_indicators:
if indicator in content:
return True
return False
def collect_variable_declarations(content, file_variables):
"""
Extract variable declarations that might be localization keys.
Stores results in the file_variables dict.
"""
# Standard variable declarations across languages
patterns = [
# Swift/Obj-C
r'(?:let|var)\s+(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
r'static\s+(?:let|var)\s+(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
# JavaScript/TypeScript
r'(?:const|let|var)\s+(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*=\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]',
# Java/Kotlin
r'(?:final|private|public|protected|static)?\s*(?:String|val|var)\s+(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
]
for pattern in patterns:
for match in re.finditer(pattern, content):
if len(match.groups()) >= 2:
var_name, value = match.groups()[:2]
if is_potential_localization_key(value):
file_variables[var_name] = value
def is_potential_localization_key(text):
"""
Determine if a string is likely to be a localization key based on its format.
More sophisticated than the basic is_valid_localization_key function.
"""
if not text or not isinstance(text, str):
return False
# Empty string check
if not text.strip():
return False
# Purely numeric keys are invalid
if text.isdigit() or text.replace('.', '', 1).isdigit():
return False
# Check for specific patterns of localization keys
# Common patterns include:
# 1. snake_case with dots or underscores (module.key_name)
# 2. Short identifiers, not natural text (keys are typically concise)
# 3. Absence of spaces (keys rarely have spaces)
# 4. Presence of common prefixes/suffixes like title, label, etc.
# Basic format checks
if ' ' in text: # Spaces usually indicate natural text, not keys
return False
if len(text) > 100: # Keys are typically shorter than long text
return False
# Pattern checks
if re.match(r'^[a-z][a-z0-9]*[_\.][a-z0-9_\.]+$', text): # module.key or module_key pattern
return True
if re.match(r'^[a-z][a-z0-9_\.]+$', text) and ('_' in text or '.' in text): # snake_case or dot.notation
return True
# Check for common key components
key_components = ['title', 'label', 'message', 'error', 'success', 'button',
'status', 'header', 'footer', 'desc', 'tooltip', 'placeholder',
'name', 'text']
for component in key_components:
if f"_{component}" in text or f".{component}" in text or text.endswith(f"_{component}") or text.endswith(f".{component}"):
return True
return False
def process_localization_function_usage(content, file_path, used_keys, file_occurrences, language):
"""
Process direct usage of localization functions in the code.
"""
# Map of language to localization function patterns
localization_patterns = {
'swift': [
# NSLocalizedString and other Swift patterns
(r'NSLocalizedString\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"', "NSLocalizedString"),
(r'String\(localized:\s*"([^"\\]*(?:\\.[^"\\]*)*)"', "String(localized:)"),
(r'\.localized\(\s*(?:[^,]*,\s*)?[^"]*"([^"\\]*(?:\\.[^"\\]*)*)"', ".localized()"),
(r'Text\(LocalizedStringKey\("([^"\\]*(?:\\.[^"\\]*)*)"\)\)', "Text(LocalizedStringKey)"),
(r'LocalizedStringKey\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"', "LocalizedStringKey"),
],
'kotlin/android': [
# Android getString patterns
(r'getString\(R\.string\.([a-zA-Z0-9_]+)', "getString"),
(r'\.getString\(R\.string\.([a-zA-Z0-9_]+)', "context.getString"),
(r'stringResource\(R\.string\.([a-zA-Z0-9_]+)', "stringResource"),
],
'javascript/react': [
# React i18n patterns
(r'(?:i18n|t|translate)\(\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]', "i18n/translate"),
(r'formatMessage\(\s*{\s*id:\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]', "formatMessage"),
],
}
# Process patterns for the specific language
for pattern, pattern_name in localization_patterns.get(language, []):
matches = re.findall(pattern, content)
for key in matches:
used_keys[key] += 1
file_occurrences[key].append((file_path, pattern_name))
def process_contextual_string_assignments(content, file_path, used_keys, file_occurrences, key_pattern_analysis):
"""
Process string assignments that are likely to be localization keys based on context.
Uses dynamic pattern analysis rather than hard-coded patterns.
"""
# Dynamic pattern for finding any assignment with string literals
# This will catch category, title, label, unit assignments and more
assignment_pattern = r'(\w+)(?:\s*(?:=|:)|\w+Key:)\s*"([^"\\]*(?:\\.[^"\\]*)*)"'
# Set of variable name fragments that suggest localization key assignments
potential_key_vars = set()
# Build potential variable names from pattern analysis
if key_pattern_analysis and 'assignments' in key_pattern_analysis:
for var_type in key_pattern_analysis['assignments']:
if len(var_type) > 3: # Avoid very short names that might cause false positives
potential_key_vars.add(var_type.lower())
# Add common key variable indicators if we didn't find enough from analysis
if len(potential_key_vars) < 5:
potential_key_vars.update(['key', 'title', 'label', 'unit', 'category', 'field', 'text', 'message'])
# Find all assignments in the content
for match in re.finditer(assignment_pattern, content):
var_name, value = match.groups()
# Skip if value is empty
if not value.strip():
continue
# Get surrounding context to check for localization indicators
start_pos = max(0, match.start() - 100)
end_pos = min(len(content), match.end() + 100)
context = content[start_pos:end_pos]
# Analyze the variable name - does it suggest a key assignment?
var_lower = var_name.lower()
# Look for common key-related variable names or parameter names
is_potential_key_var = (
any(key_var in var_lower for key_var in potential_key_vars) or
'key' in var_lower or
'title' in context.lower() and 'key' in context.lower() or
'unit' in context.lower() and 'key' in context.lower() or
'localized' in context.lower()
)
# Check for specific indicators in the context
has_localization_context = any(indicator in context for indicator in [
'localized', 'LocalizedStringKey', 'String(localized:', 'NSLocalizedString',
'Text(', 'Label(', 'TextField(', 'Button(', 'NavigationTitle', 'titleKey:', 'unitKey:'
])
# If it looks like a key, has a key-like format, or appears in a localization context
if (is_potential_key_var or
has_localization_context or
(value in key_pattern_analysis.get('known_keys', set())) or
is_key_like_format(value)):
# Check the value format - does it follow key patterns we've seen?
if is_valid_localization_key(value, key_pattern_analysis):
used_keys[value] += 1
file_occurrences[value].append((file_path, f"context_assigned:{var_name}"))
# Special case for Swift parameters with named arguments like titleKey: "value", unitKey: "value"
named_param_match = re.search(r'(\w+)Key:\s*"([^"\\]*(?:\\.[^"\\]*)*)"', context)
if named_param_match:
param_name, param_value = named_param_match.groups()
if is_valid_localization_key(param_value, key_pattern_analysis):
used_keys[param_value] += 1
file_occurrences[param_value].append((file_path, f"named_param:{param_name}Key"))
def is_key_like_format(value):
"""Check if a string has a format typical of localization keys."""
# Most keys have underscores or dots and no spaces
if ' ' in value:
return False
if '_' in value or '.' in value:
return True
# Keys typically have lowercase letters, often with specific prefixes
if re.match(r'^[a-z][a-zA-Z0-9_\.]+$', value):
return True
return False
def process_variable_references(content, file_path, used_keys, file_occurrences, variables_by_file):
"""
Process references to variables that might contain localization keys.
"""
# Patterns for variable usage in localization functions
var_usage_patterns = [
# Swift patterns
r'LocalizedStringKey\(\s*(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*\)',
r'NSLocalizedString\(\s*(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*,',
r'String\(localized:\s*(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*\)',
# JavaScript/React patterns
r'(?:i18n|t|translate)\(\s*(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*[,)]',
# Android patterns
r'getString\(\s*(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*\)',
]
file_variables = variables_by_file.get(file_path, {})
for pattern in var_usage_patterns:
for match in re.finditer(pattern, content):
var_name = match.group(1)
# Check if variable is in current file
if var_name in file_variables:
key = file_variables[var_name]
used_keys[key] += 1
file_occurrences[key].append((file_path, f"variable:{var_name}"))
else:
# Try to find the variable in other files
for other_file, vars_in_file in variables_by_file.items():
if var_name in vars_in_file:
key = vars_in_file[var_name]
used_keys[key] += 1
file_occurrences[key].append((file_path, f"external_variable:{var_name}"))
break
def process_ui_component_strings(content, file_path, used_keys, file_occurrences, key_pattern_analysis):
"""
Process string literals used in UI components that might be localized.
Also looks for SwiftUI view parameters that are likely to be localization keys.
"""
# Patterns for UI components with string literals that are often localized
ui_patterns = [
# SwiftUI patterns
r'Text\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"(?:\s*,|\s*\))(?!.*attributedString)',
r'Button\([^)]*\)\s*{\s*Text\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
r'NavigationLink\([^)]*\)\s*{\s*Text\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
r'Label\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
# UIKit patterns
r'\.title\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
r'\.text\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
r'\.placeholder\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
r'\.buttonTitle\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
# React/JSX patterns
r'<Text[^>]*>\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]',
r'<Button[^>]*>\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]',
r'<Label[^>]*>\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]',
]
# Special patterns for SwiftUI named key parameters
named_key_params = [
r'titleKey:\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
r'labelKey:\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
r'unitKey:\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
r'messageKey:\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
]
# Process standard UI patterns
for pattern in ui_patterns:
for match in re.finditer(pattern, content):
text = match.group(1)
# Skip if likely translated text rather than a key
if not is_potential_localization_key(text) and is_translatable_text(text):
continue
# If it follows key patterns, it might be a direct key reference
if is_potential_localization_key(text):
used_keys[text] += 1
file_occurrences[text].append((file_path, "ui_component"))
# Process named key parameters (titleKey:, unitKey:, etc.)
for pattern in named_key_params:
for match in re.finditer(pattern, content):
key = match.group(1)
param_type = pattern.split(':')[0] # Extract param name (titleKey, unitKey, etc.)
if is_valid_localization_key(key, key_pattern_analysis):
used_keys[key] += 1
file_occurrences[key].append((file_path, f"named_param:{param_type}"))
def is_translatable_text(text):
"""
Determine if a string is likely to be translatable human-readable text rather than a key.
"""
# Human readable text likely contains:
# - Spaces
# - Multiple words
# - Punctuation like periods, commas, question marks
# - Starts with capital letter (in many languages)
# - Natural language sentence structure
# Check for spaces (most UI text has spaces between words)
if ' ' in text:
return True
# Check for common punctuation used in sentences
if any(punct in text for punct in '.,:;!?'):
return True
# Check for capitalization pattern typical in sentences
if text and text[0].isupper() and not text.isupper():
# First letter is uppercase but not all text is uppercase
return True
# Check word count - more than 1-2 words suggests natural text
word_count = len(text.split())
if word_count > 1:
return True
return False
def find_localization_keys_in_code(project_path, exclude_patterns):
"""Find localization keys used in code files."""
return extract_keys_from_files(project_path, exclude_patterns)
def write_log(log_file, used_keys, unused_keys_by_file, all_keys_by_file, file_occurrences,
comments_removed_by_file=None, duplicates_removed_by_file=None, whitespace_removed_by_file=None, sorted_keys_by_file=None):
"""Write usage statistics to the log file."""
with open(log_file, 'w', encoding='utf-8') as f:
f.write("=== LOCALIZATION ANALYSIS REPORT ===\n\n")
# Add cleanup information
if duplicates_removed_by_file:
f.write("--- DUPLICATE KEYS ---\n")
total_duplicates = sum(count for count, _ in duplicates_removed_by_file.values())
f.write(f"Total {total_duplicates} duplicate keys cleaned.\n\n")
for file_path, (dup_count, unique_count) in duplicates_removed_by_file.items():
if dup_count > 0:
lang = get_language_from_path(file_path)
f.write(f"{file_path} ({lang}): {dup_count} duplicate keys removed, {unique_count} unique keys remain\n")
f.write("\n")
if comments_removed_by_file:
f.write("--- COMMENT LINES ---\n")
total_comments = sum(comments_removed_by_file.values())
f.write(f"Total {total_comments} comment lines cleaned.\n\n")
for file_path, comment_count in comments_removed_by_file.items():
if comment_count > 0:
lang = get_language_from_path(file_path)
f.write(f"{file_path} ({lang}): {comment_count} comment lines removed\n")
f.write("\n")
if whitespace_removed_by_file:
f.write("--- UNNECESSARY WHITESPACE AND EMPTY LINES ---\n")
total_whitespace = sum(whitespace_removed_by_file.values())
f.write(f"Total {total_whitespace} unnecessary whitespace and empty lines cleaned.\n\n")
for file_path, whitespace_count in whitespace_removed_by_file.items():
if whitespace_count > 0:
lang = get_language_from_path(file_path)
f.write(f"{file_path} ({lang}): {whitespace_count} unnecessary whitespace/lines cleaned\n")
f.write("\n")
if sorted_keys_by_file:
f.write("--- SORTED KEYS ---\n")
total_sorted = sum(sorted_keys_by_file.values())
f.write(f"Total {total_sorted} keys sorted alphabetically.\n\n")
for file_path, sorted_count in sorted_keys_by_file.items():
if sorted_count > 0:
lang = get_language_from_path(file_path)
f.write(f"{file_path} ({lang}): {sorted_count} keys sorted alphabetically\n")
f.write("\n")
f.write("--- USED KEYS ---\n")
f.write(f"Total {len(used_keys)} keys are used.\n\n")
# Top 10 most used keys
f.write("Top 10 most used keys:\n")
for key, count in used_keys.most_common(10):
f.write(f" {key}: {count} times\n")
f.write("\n")
# All used keys and where they are used
f.write("All used keys and where they are used (alphabetically):\n")
for key in sorted(used_keys.keys()):
f.write(f" {key}: {used_keys[key]} times\n")
# Show up to 5 files for each key
for i, (file_path, pattern_name) in enumerate(file_occurrences[key][:5]):
short_path = os.path.relpath(file_path)
f.write(f" - {short_path} ({pattern_name})\n")
if len(file_occurrences[key]) > 5:
f.write(f" ... and {len(file_occurrences[key]) - 5} more files\n")
f.write("\n")
# Unused keys by file
f.write("--- UNUSED KEYS BY FILE ---\n")
total_unused = 0
for file_path, keys in unused_keys_by_file.items():
if keys:
total_unused += len(keys)
percent = (len(keys) / len(all_keys_by_file[file_path])) * 100 if all_keys_by_file[file_path] else 0
f.write(f"\n{file_path} ({len(keys)}/{len(all_keys_by_file[file_path])}, %{percent:.1f}):\n")
for key in sorted(keys):
f.write(f" {key}\n")
f.write(f"\nTotal {total_unused} unused keys detected.\n")
def clean_strings_file(file_path, keys_to_remove):
"""Remove specified keys from the strings file."""
encoding = 'utf-8'
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
except UnicodeDecodeError:
# Try UTF-16
try:
with open(file_path, 'r', encoding='utf-16') as f:
content = f.read()
encoding = 'utf-16'
except Exception as e:
print(f"Error: Could not read file {file_path}: {e}")
return
lines = content.split('\n')
filtered_lines = []
removed_count = 0
for line in lines:
# Check the line
should_keep = True
for key in keys_to_remove:
pattern = r'"' + re.escape(key) + r'"\s*=\s*"[^"\\]*(?:\\.[^"\\]*)*"\s*;'
if re.search(pattern, line):
should_keep = False
removed_count += 1
break
if should_keep:
filtered_lines.append(line)
# Write the file with the same encoding
with open(file_path, 'w', encoding=encoding) as f:
f.write('\n'.join(filtered_lines))
return removed_count
def get_language_from_path(file_path):
"""Extract language code from file path."""
match = re.search(r'/([^/]+)\.lproj/Localizable\.strings', file_path)
if match:
return match.group(1)
return os.path.basename(os.path.dirname(os.path.dirname(file_path)))
def clean_comments_from_file(file_path):
"""Clean comment lines from Localizable.strings file."""
encoding = 'utf-8'
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
except UnicodeDecodeError:
# Try UTF-16
try:
with open(file_path, 'r', encoding='utf-16') as f:
content = f.read()
encoding = 'utf-16'
except Exception as e:
print(f"Error: Could not read file {file_path}: {e}")
return 0
lines = content.split('\n')
cleaned_lines = []
removed_count = 0
for line in lines:
# Is it a full comment line?
if line.strip().startswith('//'):
removed_count += 1
continue
# Is there an inline comment?
comment_pos = line.find('//')
if comment_pos > 0:
# Check if the key-value pair has ended
# "key" = "value"; // comment
if '";' in line[:comment_pos]:
# Remove the comment part, keep the key-value
line = line[:comment_pos].rstrip()
# Otherwise, don't remove the comment (probably // characters inside key-value)
cleaned_lines.append(line)
# Write the file with the same encoding
with open(file_path, 'w', encoding=encoding) as f:
f.write('\n'.join(cleaned_lines))
return removed_count
def remove_duplicate_keys(file_path):
"""Clean duplicate keys from Localizable.strings file.
Keeps the last occurrence, removes previous occurrences."""
encoding = 'utf-8'
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
except UnicodeDecodeError:
# Try UTF-16
try:
with open(file_path, 'r', encoding='utf-16') as f:
content = f.read()
encoding = 'utf-16'
except Exception as e:
print(f"Error: Could not read file {file_path}: {e}")
return 0, 0
lines = content.split('\n')
seen_keys = {} # key -> last seen line number and content
duplicate_indexes = [] # Line indexes to delete
key_pattern = re.compile(r'^\s*"([^"\\]*(?:\\.[^"\\]*)*)"\s*=\s*".*";')
# Find duplicate keys
for i, line in enumerate(lines):
# Process only lines containing keys
match = key_pattern.match(line)
if match:
key = match.group(1)
if key in seen_keys:
# This key was already seen, record previous occurrence index
duplicate_indexes.append(seen_keys[key][0])
# Update last seen index of the key
seen_keys[key] = (i, line)
# Delete duplicate lines
cleaned_lines = [line for i, line in enumerate(lines) if i not in duplicate_indexes]
# Write the file with the same encoding
with open(file_path, 'w', encoding=encoding) as f:
f.write('\n'.join(cleaned_lines))
return len(duplicate_indexes), len(seen_keys)
def clean_whitespace_from_file(file_path):
"""Clean unnecessary whitespace and empty lines from Localizable.strings file."""
encoding = 'utf-8'
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
except UnicodeDecodeError:
# Try UTF-16
try:
with open(file_path, 'r', encoding='utf-16') as f:
content = f.read()
encoding = 'utf-16'
except Exception as e:
print(f"Error: Could not read file {file_path}: {e}")
return 0
lines = content.split('\n')
cleaned_lines = []
removed_count = 0
for line in lines:
# Clean whitespace at the beginning and end of the line
original_line = line
line = line.strip()
# Skip completely empty lines
if not line:
removed_count += 1
continue
# Format lines with "key" = "value"; pattern
key_value_pattern = r'"([^"\\]*(?:\\.[^"\\]*)*)"\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"\s*;'
match = re.match(key_value_pattern, line)
if match:
key, value = match.groups()
# Recreate the key-value pair with a formatted format
formatted_line = f'"{key}" = "{value}";'
cleaned_lines.append(formatted_line)
# Increment counter if different from original line
if formatted_line != original_line.strip():
removed_count += 1
else:
# If not a key-value pair, add the cleaned version
if line != original_line:
removed_count += 1
cleaned_lines.append(line)
# Write the file with the same encoding
with open(file_path, 'w', encoding=encoding) as f:
f.write('\n'.join(cleaned_lines))
return removed_count
def sort_keys_in_file(file_path):
"""Sort keys alphabetically in a Localizable.strings file."""
encoding = 'utf-8'
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
except UnicodeDecodeError:
# Try UTF-16
try:
with open(file_path, 'r', encoding='utf-16') as f:
content = f.read()
encoding = 'utf-16'
except Exception as e:
print(f"Error: Could not read file {file_path}: {e}")
return 0
# Find all lines matching the "key" = "value"; pattern
pattern = r'"([^"\\]*(?:\\.[^"\\]*)*)"\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"\s*;'
matches = re.findall(pattern, content)
if not matches:
print(f"Error: No keys found in file {file_path}.")
return 0
# Collect keys and values in a dictionary
key_values = {key: value for key, value in matches}
# Create sorted lines using sorted keys
sorted_lines = []
for key in sorted(key_values.keys()):
sorted_lines.append(f'"{key}" = "{key_values[key]}";')
# Write the file with the same encoding
with open(file_path, 'w', encoding=encoding) as f:
f.write('\n'.join(sorted_lines))
return len(matches)
def find_missing_keys(used_keys, all_keys_by_file, filter_prefixes=None, min_missing_count=0):
"""Find keys that are used in the project but missing from language files.
Args:
used_keys: Dictionary of keys used in the code
all_keys_by_file: Dictionary mapping file paths to their keys
filter_prefixes: Optional list of prefixes to filter keys by
min_missing_count: Minimum number of missing keys for a file to be included
Returns:
Dictionary mapping file paths to their missing keys
"""
missing_keys_by_file = {}
for file_path, keys in all_keys_by_file.items():
# Find keys that are used in the project but not in this language file
missing_keys = [key for key in used_keys if key not in keys]
# Apply prefix filtering if specified
if filter_prefixes:
missing_keys = [key for key in missing_keys if any(key.startswith(prefix) for prefix in filter_prefixes)]
# Only include files with enough missing keys
if missing_keys and len(missing_keys) >= min_missing_count:
missing_keys_by_file[file_path] = missing_keys
return missing_keys_by_file
def write_missing_keys_to_file(missing_keys_by_file, output_file="missing_keys.txt"):
"""
Write missing keys to a detailed report file.
This function organizes missing keys by language and provides both a summary
and comprehensive listing with formatting for better readability.
"""
if not missing_keys_by_file:
return False
# Group missing keys by language for better organization
keys_by_language = {}
all_missing_keys = set()
for file_path, keys in missing_keys_by_file.items():
lang = get_language_from_path(file_path)
if lang not in keys_by_language:
keys_by_language[lang] = {"files": {}, "missing_key_count": 0, "total_keys": set()}
keys_by_language[lang]["files"][file_path] = keys
keys_by_language[lang]["missing_key_count"] += len(keys)
keys_by_language[lang]["total_keys"].update(keys)
all_missing_keys.update(keys)
# Sort languages by missing key count (descending)
sorted_languages = sorted(keys_by_language.keys(),
key=lambda lang: keys_by_language[lang]["missing_key_count"],
reverse=True)
with open(output_file, 'w', encoding='utf-8') as f:
f.write("=== MISSING LOCALIZATION KEYS REPORT ===\n\n")
# Total summary
total_missing = len(all_missing_keys)
total_file_keys = sum(len(keys) for keys in missing_keys_by_file.values())
f.write(f"SUMMARY:\n")
f.write(f"- Total unique missing keys: {total_missing}\n")
f.write(f"- Total missing entries across all files: {total_file_keys}\n")
f.write(f"- Languages with missing keys: {len(keys_by_language)}\n\n")
# Language summary table
f.write("LANGUAGE SUMMARY:\n")
f.write("╔════════════════╦═══════════════════════════╦═══════════════╗\n")
f.write("║ Language ║ Missing Keys ║ Files Affected ║\n")
f.write("╠════════════════╬═══════════════════════════╬═══════════════╣\n")
for lang in sorted_languages:
lang_data = keys_by_language[lang]
file_count = len(lang_data["files"])
missing_count = lang_data["missing_key_count"]
unique_count = len(lang_data["total_keys"])
# Format the summary line with padding for alignment
f.write(f"║ {lang:<14}{missing_count:>5} ({unique_count} unique) ║ {file_count:>13}\n")
f.write("╚════════════════╩═══════════════════════════╩═══════════════╝\n\n")
# Common missing keys across multiple languages
common_keys = {}
for lang, lang_data in keys_by_language.items():
for key in lang_data["total_keys"]:
if key not in common_keys:
common_keys[key] = set()
common_keys[key].add(lang)
# Sort by number of languages affected (descending)
most_common_keys = sorted(common_keys.items(), key=lambda x: len(x[1]), reverse=True)
if most_common_keys:
f.write("KEYS MISSING IN MULTIPLE LANGUAGES:\n")
f.write("╔══════════════════════════════════════════════════════╦═══════════════════════════╗\n")
f.write("║ Key ║ Missing in Languages ║\n")
f.write("╠══════════════════════════════════════════════════════╬═══════════════════════════╣\n")
# Show only keys missing in multiple languages (>1)
multi_lang_keys = [(key, langs) for key, langs in most_common_keys if len(langs) > 1]
for key, langs in multi_lang_keys[:20]: # Limit to 20 for readability
# Truncate long keys
display_key = key[:48] + "..." if len(key) > 48 else key
display_key = display_key.ljust(48)
# Format languages as comma-separated list
langs_str = ", ".join(sorted(langs))
if len(langs_str) > 25:
langs_str = langs_str[:22] + "..."
f.write(f"║ {display_key}{langs_str:<23}\n")
if len(multi_lang_keys) > 20:
f.write(f"║ ... and {len(multi_lang_keys) - 20} more keys ║ ║\n")
f.write("╚══════════════════════════════════════════════════════╩═══════════════════════════╝\n\n")
# Detailed listing by language
f.write("DETAILED MISSING KEYS BY LANGUAGE:\n\n")
for lang in sorted_languages:
lang_data = keys_by_language[lang]
file_count = len(lang_data["files"])
missing_count = lang_data["missing_key_count"]
unique_count = len(lang_data["total_keys"])
f.write(f"LANGUAGE: {lang}\n")
f.write(f" - Missing keys: {missing_count} ({unique_count} unique)\n")
f.write(f" - Files affected: {file_count}\n\n")
# Sort files to ensure consistent output
sorted_files = sorted(lang_data["files"].keys())
for file_path in sorted_files:
keys = lang_data["files"][file_path]
rel_path = os.path.relpath(file_path)
f.write(f" File: {rel_path}\n")
f.write(f" Missing keys: {len(keys)}\n")
# Sort keys alphabetically for consistent output
for key in sorted(keys):
f.write(f' "{key}"\n')
f.write("\n")
f.write("-" * 80 + "\n\n")
return True
def create_localization_template(missing_keys, all_keys_by_file, reference_language='en', output_file='template_localizable.strings'):
"""
Create a template Localizable.strings file with missing keys.
Args:
missing_keys: Set of missing keys to include in the template
all_keys_by_file: Dictionary of all keys by file, used to find values for reference
reference_language: Language code to use for reference values
output_file: Output file path for the template
Returns:
True if successful, False otherwise
"""
if not missing_keys:
print("No missing keys to create template for.")
return False
# Find the reference language file
reference_file = None
reference_values = {}
for file_path in all_keys_by_file.keys():
if reference_language in file_path:
reference_file = file_path
reference_values = all_keys_by_file[file_path]
break
# If reference language wasn't found, use the first file as reference
if reference_file is None and all_keys_by_file:
reference_file = list(all_keys_by_file.keys())[0]
reference_values = all_keys_by_file[reference_file]
with open(output_file, 'w', encoding='utf-8') as f:
f.write('// Template Localizable.strings file with missing keys\n')
f.write('// Generated by clean_localizations.py\n')
f.write('// Reference language: ' + (reference_language if reference_file else 'none') + '\n\n')
sorted_keys = sorted(missing_keys)
for key in sorted_keys:
# Try to get the value from the reference language
if key in reference_values:
value = reference_values[key]
f.write(f'"{key}" = "{value}";\n')
else:
# If not found, use a placeholder with the key name
f.write(f'"{key}" = "TRANSLATE: {key}";\n')
print(f"Created template file with {len(sorted_keys)} missing keys: {output_file}")
if not reference_file:
print(f"Warning: Reference language '{reference_language}' not found, using placeholders for all values.")
return True
def is_valid_localization_key(key, key_pattern_analysis=None):
"""
Dynamically determine if a string is likely to be a valid localization key
based on pattern analysis from the project.
"""
if not key or not isinstance(key, str):
return False
# Empty string check
if not key.strip() or key.strip() == " ":
return False
# Purely numeric keys are invalid
if key.isdigit() or key.replace('.', '', 1).isdigit():
return False
# If we have pattern analysis results from the project
if key_pattern_analysis:
# Direct match with known keys
if key in key_pattern_analysis.get('known_keys', set()):
return True
# Check if key follows detected prefix patterns
prefixes = key_pattern_analysis.get('prefixes', set())
for prefix in prefixes:
if key.startswith(prefix + '_') or key.startswith(prefix + '.'):
return True
# Check if key has detected suffix patterns
suffixes = key_pattern_analysis.get('suffixes', set())
for suffix in suffixes:
if key.endswith('_' + suffix) or key.endswith('.' + suffix):
return True
# Basic format validation for localization keys
if '_' in key or '.' in key:
# If it has a structure like a localization key (underscore/dot separated)
# And doesn't contain spaces or weird characters
if re.match(r'^[a-z][a-zA-Z0-9_\.]+$', key) and ' ' not in key:
return True
return False
def main():
args = parse_args()
project_path = os.path.abspath(args.project_path)
# Use r-string to fix regex patterns
exclude_patterns = args.exclude or [r'^\.git$', r'^\.build$', r'^Pods$', r'^Carthage$', r'^\.DS_Store$']
print(f"Project directory: {project_path}")
print(f"Excluded patterns: {exclude_patterns}")
# Auto-detect missing keys mode
if args.auto_detect_missing_keys:
print("Running in auto-detect missing keys mode...")
# Display filter settings if any
if args.filter_prefix:
print(f"Filtering keys by prefixes: {', '.join(args.filter_prefix)}")
if args.min_missing_count > 0:
print(f"Only reporting languages with at least {args.min_missing_count} missing keys")
# Find used keys
print("Searching for localization keys in code files...")
used_keys, file_occurrences = find_localization_keys_in_code(project_path, exclude_patterns)
print(f"Total {len(used_keys)} keys are used in code.")
# Find strings files
print("Searching for Localizable.strings files...")
strings_files = find_strings_files(project_path, exclude_patterns)
if not strings_files:
print("No Localizable.strings files found. Terminating.")
return
print(f"Found {len(strings_files)} localization files.")
# Parse keys from each file
all_keys_by_file = {}
languages_found = set()
for file_path in strings_files:
lang = get_language_from_path(file_path)
languages_found.add(lang)
print(f"Processing: {file_path} ({lang})")
keys = parse_strings_file(file_path)
all_keys_by_file[file_path] = keys
# Calculate percentage of total used keys found in this file
if used_keys:
found_keys = len([k for k in keys if k in used_keys])
percent_found = (found_keys / len(used_keys)) * 100
else:
found_keys = 0
percent_found = 0
print(f" Total keys: {len(keys)}")
print(f" Contains {found_keys}/{len(used_keys)} used keys ({percent_found:.1f}%)")
print(f"\nFound {len(languages_found)} unique languages: {', '.join(sorted(languages_found))}")
# Find keys missing from language files, applying filters
missing_keys_by_file = find_missing_keys(used_keys, all_keys_by_file, args.filter_prefix, args.min_missing_count)
if missing_keys_by_file:
print("\nDetected keys used in code but missing from language files:")
total_missing = sum(len(keys) for keys in missing_keys_by_file.values())
unique_missing_keys = set()
for keys in missing_keys_by_file.values():
unique_missing_keys.update(keys)
print(f"Total missing entries: {total_missing} across {len(missing_keys_by_file)} files")
print(f"Unique missing keys: {len(unique_missing_keys)}")
# Display a preview of missing keys for each language
for file_path, keys in missing_keys_by_file.items():
lang = get_language_from_path(file_path)
print(f" {lang}: {len(keys)} missing keys")
# Show a few examples of missing keys
for key in sorted(keys)[:5]:
print(f" \"{key}\"")
if len(keys) > 5:
print(f" ... and {len(keys) - 5} more keys")
# Write missing keys to file using the specified output file name
if write_missing_keys_to_file(missing_keys_by_file, output_file=args.output_file):
print(f"\nDetailed missing keys report written to {args.output_file}")
# Create a template file if requested
if args.create_template:
# Collect all unique missing keys
all_missing_keys = set()
for keys in missing_keys_by_file.values():
all_missing_keys.update(keys)
create_localization_template(
all_missing_keys,
all_keys_by_file,
reference_language=args.reference_language,
output_file=args.template_file
)
else:
if args.filter_prefix:
print(f"\nNo missing keys found matching the specified prefixes: {', '.join(args.filter_prefix)}")
else:
print("\nNo missing keys found. All localization files are complete.")
# Optional: identify keys that might be unused in code
if not args.no_prompt and input("\nWould you like to check for potentially unused keys in localization files? (y/n): ").strip().lower() == 'y':
print("\nChecking for potentially unused keys...")
unused_keys_by_file = {}
total_unused = 0
for file_path, keys in all_keys_by_file.items():
# Find keys in the strings file that aren't found in code
unused_keys = [key for key in keys if key not in used_keys]
if unused_keys:
unused_keys_by_file[file_path] = unused_keys
total_unused += len(unused_keys)
if unused_keys_by_file:
print(f"Found {total_unused} potentially unused keys across all language files.")
for file_path, keys in unused_keys_by_file.items():
lang = get_language_from_path(file_path)
percent = (len(keys) / len(all_keys_by_file[file_path])) * 100
print(f" {lang}: {len(keys)}/{len(all_keys_by_file[file_path])} keys ({percent:.1f}%) may be unused")
print("\nNote: These keys might be used dynamically or loaded at runtime.")
print("Review manually before removing any keys.")
if args.log_file:
write_log(args.log_file, used_keys, unused_keys_by_file, all_keys_by_file, file_occurrences)
print(f"Analysis report written to {args.log_file}")
else:
print("No potentially unused keys found. All keys in localization files appear to be used in code.")
return
# Comment removal preference
if args.no_prompt:
remove_comments = False
clean_whitespace = args.clean_whitespace
sort_keys = args.sort_keys
else:
remove_comments = input("Do you want to remove comment lines? (y/n): ").strip().lower() == 'y'
# Unnecessary whitespace cleaning preference
if args.clean_whitespace:
clean_whitespace = True
else:
clean_whitespace = input("Do you want to clean unnecessary whitespace and empty lines? (y/n): ").strip().lower() == 'y'
# Alphabetical key sorting preference
if args.sort_keys:
sort_keys = True
else:
sort_keys = input("Do you want to sort keys alphabetically? (y/n): ").strip().lower() == 'y'
# Duplicate key removal preference
auto_deduplicate = True # Automatically deduplicate without asking
# Find used keys
print("Searching for localization keys in code files...")
used_keys, file_occurrences = find_localization_keys_in_code(project_path, exclude_patterns)
print(f"Total {len(used_keys)} keys are used.")
# Find strings files
print("Searching for Localizable.strings files...")
strings_files = find_strings_files(project_path, exclude_patterns)
print(f"Found {len(strings_files)} strings files.")
if not strings_files:
print("No Localizable.strings files found. Terminating.")
return
# Collect information about deletions
duplicates_removed_by_file = {}
comments_removed_by_file = {}
whitespace_removed_by_file = {}
sorted_keys_by_file = {}
# Clean duplicate keys
if auto_deduplicate:
print("\nCleaning duplicate keys...")
total_duplicates_removed = 0
for file_path in strings_files:
lang = get_language_from_path(file_path)
duplicates_removed, unique_keys = remove_duplicate_keys(file_path)
duplicates_removed_by_file[file_path] = (duplicates_removed, unique_keys)
total_duplicates_removed += duplicates_removed
if duplicates_removed > 0:
print(f" {file_path} ({lang}): {duplicates_removed} duplicate keys removed, {unique_keys} unique keys remain")
else:
print(f" {file_path} ({lang}): No duplicate keys found")
print(f"Total {total_duplicates_removed} duplicate keys removed.\n")
# First clean comment lines (if requested)
if remove_comments:
print("\nCleaning comment lines...")
total_comments_removed = 0
for file_path in strings_files:
lang = get_language_from_path(file_path)
comments_removed = clean_comments_from_file(file_path)
comments_removed_by_file[file_path] = comments_removed
total_comments_removed += comments_removed
print(f" {file_path} ({lang}): {comments_removed} comment lines removed")
print(f"Total {total_comments_removed} comment lines removed.\n")
# Clean unnecessary whitespace (if requested)
if clean_whitespace:
print("\nCleaning unnecessary whitespace and empty lines...")
total_whitespace_removed = 0
for file_path in strings_files:
lang = get_language_from_path(file_path)
whitespace_removed = clean_whitespace_from_file(file_path)
whitespace_removed_by_file[file_path] = whitespace_removed
total_whitespace_removed += whitespace_removed
print(f" {file_path} ({lang}): {whitespace_removed} unnecessary whitespace/lines cleaned")
print(f"Total {total_whitespace_removed} unnecessary whitespace/lines cleaned.\n")
# Sort keys alphabetically (if requested)
if sort_keys:
print("\nSorting keys alphabetically...")
total_keys_sorted = 0
for file_path in strings_files:
lang = get_language_from_path(file_path)
keys_sorted = sort_keys_in_file(file_path)
sorted_keys_by_file[file_path] = keys_sorted
total_keys_sorted += keys_sorted
print(f" {file_path} ({lang}): {keys_sorted} keys sorted alphabetically")
print(f"Total {total_keys_sorted} keys sorted alphabetically.\n")
# Parse keys from each file
all_keys_by_file = {}
unused_keys_by_file = {}
for file_path in strings_files:
lang = get_language_from_path(file_path)
print(f"Processing: {file_path} ({lang})")
keys = parse_strings_file(file_path)
all_keys_by_file[file_path] = keys
# Find unused keys
unused_keys = [key for key in keys if key not in used_keys]
unused_keys_by_file[file_path] = unused_keys
print(f" Total keys: {len(keys)}")
print(f" Unused keys: {len(unused_keys)}")
# Find keys missing from language files
missing_keys_by_file = find_missing_keys(used_keys, all_keys_by_file, args.filter_prefix, args.min_missing_count)
if missing_keys_by_file:
print("\nDetected keys used in project but missing from language files:")
total_missing = sum(len(keys) for keys in missing_keys_by_file.values())
print(f"Total missing keys: {total_missing}")
for file_path, keys in missing_keys_by_file.items():
lang = get_language_from_path(file_path)
print(f" {file_path} ({lang}): {len(keys)} missing keys")
# Write missing keys to file using the specified output file name
if write_missing_keys_to_file(missing_keys_by_file, output_file=args.output_file):
print(f"Missing keys written to {args.output_file}")
# If there are no unused keys
if all(not keys for keys in unused_keys_by_file.values()):
print("\nNo unused keys found in any language file. Terminating.")
write_log(args.log_file, used_keys, unused_keys_by_file, all_keys_by_file, file_occurrences,
comments_removed_by_file if remove_comments else None,
duplicates_removed_by_file if auto_deduplicate else None,
whitespace_removed_by_file if clean_whitespace else None,
sorted_keys_by_file if sort_keys else None)
print(f"\nAnalysis report written: {args.log_file}")
return
# Show unused keys and ask for deletion confirmation
for file_path, unused_keys in unused_keys_by_file.items():
if not unused_keys:
continue
lang = get_language_from_path(file_path)
print(f"\n{lang} language has {len(unused_keys)} unused keys:")
# Show all unused keys
for i, key in enumerate(sorted(unused_keys), 1):
print(f" {i}. \"{key}\" = \"{all_keys_by_file[file_path][key]}\"")
if args.confirm_all:
confirm = 'y'
else:
confirm = input(f"\nDo you want to delete {len(unused_keys)} unused keys in {lang} language? (y/n/s): ")
if confirm.lower() == 'y':
print(f"Deleting unused keys: {file_path}")
removed = clean_strings_file(file_path, unused_keys)
print(f"Completed. {removed} keys deleted.")
elif confirm.lower() == 's':
keys_to_keep = input("Enter the keys you want to keep, separated by commas (e.g. key1,key2): ").split(',')
keys_to_keep = [k.strip() for k in keys_to_keep]
keys_to_remove = [key for key in unused_keys if key not in keys_to_keep]
print(f"Deleting {len(keys_to_remove)} keys, keeping {len(keys_to_keep)} keys.")
removed = clean_strings_file(file_path, keys_to_remove)
print(f"Completed. {removed} keys deleted.")
else:
print("Deletion cancelled.")
# Write log file
write_log(args.log_file, used_keys, unused_keys_by_file, all_keys_by_file, file_occurrences,
comments_removed_by_file if remove_comments else None,
duplicates_removed_by_file if auto_deduplicate else None,
whitespace_removed_by_file if clean_whitespace else None,
sorted_keys_by_file if sort_keys else None)
print(f"\nAnalysis report written: {args.log_file}")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment