oguzdelioglu · May 24, 2025 01:31
diff --git a/clean_localizations.py b/clean_localizations.py
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 r"""
 iOS/macOS Localization Manager and Analysis Tool

 This tool analyzes, cleans, and manages localization files (Localizable.strings) in iOS or macOS projects.
 It performs the following operations:

 1. LOCALIZATION KEY DETECTION:
   - Intelligently detects localization keys used in Swift, Objective-C, Kotlin, JavaScript, and SQL files
   - Supports various localization methods (NSLocalizedString, String(localized:), LocalizedStringKey, etc.)
   - Finds keys defined in variables and referenced indirectly in code
   - Detects keys used in common UI component initializers and structures

 2. MISSING KEYS ANALYSIS:
   - Identifies keys used in code but missing from localization files
   - Generates detailed reports showing which keys are missing from each language
   - Can filter results by key prefixes (e.g., only show missing achievement keys)
   - Creates template files for translators with placeholder values

 3. CLEANING OPERATIONS:
   - Automatically detects duplicate keys and removes them while keeping the last definition
   - Cleans comment lines (starting with //) and inline comments (after key-value definitions)
   - Detects and removes keys that are not used anywhere in the project (with confirmation)
   - Cleans unnecessary whitespace and empty lines (with --clean-whitespace option)
   - Sorts keys alphabetically (with --sort-keys option)

 4. DETAILED REPORTING:
   - Lists all used and unused keys
   - Reports the number of cleaned duplicate keys and comment lines and which files they were in
   - Shows which files use the keys and how many times they are used
   - Saves all analysis results to a detailed log file

 Usage:
 python3 clean_localizations.py [project_directory] [options]

 Basic Options:
 --confirm-all           Automatically confirm all deletion operations
 --log-file FILE_NAME    Specify the log file name (default: localization_log.txt)
 --exclude PATTERN       Patterns for directories/files to exclude (can be used multiple times)
 --verbose, -v           Show detailed output
 --clean-whitespace      Clean unnecessary whitespace and empty lines
 --sort-keys             Sort keys alphabetically
 --no-prompt             Run without interactive prompts

 Missing Keys Detection:
 --auto-detect-missing-keys Run in missing keys detection mode
 --filter-prefix PREFIX    Filter keys by prefix(es) (can be used multiple times: --filter-prefix ach_ badge_)
 --min-missing-count NUM   Only include languages with at least NUM missing keys
 --output-file FILE       Output file for missing keys report (default: missing_keys.txt)

 Template Generation:
 --create-template       Create a template Localizable.strings file with missing keys
 --template-file FILE    Output file for the template (default: template_localizable.strings)
 --reference-language LANG Language to use for reference values (default: en)

 Examples:
 python3 clean_localizations.py                               # Run in current directory
 python3 clean_localizations.py /path/to/project              # Run in a specific directory
 python3 clean_localizations.py --confirm-all                 # Automatically confirm all deletions
 python3 clean_localizations.py --exclude "^\.git$"           # Exclude .git directory
 python3 clean_localizations.py --clean-whitespace            # Clean unnecessary whitespace
 python3 clean_localizations.py --sort-keys                   # Sort keys alphabetically
 python3 clean_localizations.py --auto-detect-missing-keys    # Only detect missing keys
 python3 clean_localizations.py --filter-prefix ach_ badge_   # Only show keys with these prefixes
 python3 clean_localizations.py --create-template --filter-prefix auth_  # Create a template for auth keys
 """

 import os
 import re
 import argparse
 import json
 from pathlib import Path
 from collections import Counter, defaultdict

 def parse_args():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(description='Detect unused localization keys in iOS projects.')
    parser.add_argument('project_path', nargs='?', default='.', help='Project directory (default: current directory)')
    parser.add_argument('--confirm-all', action='store_true', help='Automatically confirm all deletion operations')
    parser.add_argument('--log-file', default='localization_log.txt', help='Log file (default: localization_log.txt)')
    parser.add_argument('--exclude', nargs='+', default=[], help='Patterns for directories/files to exclude')
    parser.add_argument('--verbose', '-v', action='store_true', help='Show detailed output')
    parser.add_argument('--clean-whitespace', action='store_true', help='Clean unnecessary whitespace and empty lines')
    parser.add_argument('--sort-keys', action='store_true', help='Sort keys alphabetically')
    parser.add_argument('--auto-detect-missing-keys', action='store_true', help='Automatically detect missing keys without other operations')
    parser.add_argument('--no-prompt', action='store_true', help='Run without interactive prompts')
    parser.add_argument('--filter-prefix', nargs='+', default=[], 
                       help='Filter missing keys to only include those with specific prefixes (e.g., "ach_" "button_")')
    parser.add_argument('--min-missing-count', type=int, default=0, 
                       help='Only report languages with at least this many missing keys')
    parser.add_argument('--output-file', default='missing_keys.txt', 
                       help='Output file for missing keys (default: missing_keys.txt)')
    parser.add_argument('--create-template', action='store_true',
                       help='Create a template Localizable.strings file with missing keys for translation')
    parser.add_argument('--template-file', default='template_localizable.strings',
                       help='Output file for the template (default: template_localizable.strings)')
    parser.add_argument('--reference-language', default='en',
                       help='Language to use for reference values in the template (default: en)')
    return parser.parse_args()

 def find_strings_files(project_path, exclude_patterns):
    """Find all Localizable.strings files."""
    strings_files = []
    
    for root, dirs, files in os.walk(project_path):
        # Filter out excluded directories
        dirs[:] = [d for d in dirs if not any(re.match(pattern, d) for pattern in exclude_patterns)]
        
        for filename in files:
            if filename == "Localizable.strings":
                strings_files.append(os.path.join(root, filename))
    
    return strings_files

 def parse_strings_file(file_path):
    """Parse a Localizable.strings file and return key-value pairs."""
    keys = {}
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
    except UnicodeDecodeError:
        # Try UTF-16
        try:
            with open(file_path, 'r', encoding='utf-16') as f:
                content = f.read()
        except Exception as e:
            print(f"Error: Could not read file {file_path}: {e}")
            return keys
    
    # Find all lines matching the "key" = "value"; pattern
    pattern = r'"([^"\\]*(?:\\.[^"\\]*)*)"\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"\s*;'
    matches = re.findall(pattern, content)
    
    for key, value in matches:
        keys[key] = value
    
    return keys

 def extract_keys_from_files(project_path, exclude_patterns):
    """Find all string keys directly defined in code and also extract from various structure patterns."""
    used_keys = Counter()
    file_occurrences = defaultdict(list)
    all_keys_by_file = {}
    
    # File extensions to scan
    swift_extensions = ['.swift']
    sql_extensions = ['.sql']
    objc_extensions = ['.m', '.h']
    kotlin_extensions = ['.kt', '.kts']
    java_extensions = ['.java']
    js_extensions = ['.js', '.jsx', '.ts', '.tsx']
    
    # Track potential key variables {variable_name: key_string}
    variables_by_file = {}
    
    # Collect possible localization key patterns from existing code
    key_pattern_analysis = analyze_key_patterns(project_path, exclude_patterns)
    
    # First pass - collect all variable declarations
    for root, dirs, files in os.walk(project_path):
        # Filter out excluded directories
        dirs[:] = [d for d in dirs if not any(re.match(pattern, d) for pattern in exclude_patterns)]
        
        for filename in files:
            file_path = os.path.join(root, filename)
            file_extension = os.path.splitext(filename)[1].lower()
                
            try:
                # For code files
                if any(filename.endswith(ext) for ext in 
                       swift_extensions + sql_extensions + objc_extensions + 
                       kotlin_extensions + java_extensions + js_extensions):
                    with open(file_path, 'r', encoding='utf-8') as f:
                        content = f.read()
                        
                        # Extract all strings in the file for potential localization keys
                        all_strings = re.findall(r'"([^"\\]*(?:\\.[^"\\]*)*)"', content)
                        
                        # Analyze the content to detect localization context
                        has_localization_context = detect_localization_context(content)
                        
                        # Find all variable declarations that seem to be localization keys
                        file_variables = {}
                        collect_variable_declarations(content, file_variables)
                        
                        if file_variables:
                            variables_by_file[file_path] = file_variables
            except Exception as e:
                print(f"Error reading file (pass 1): {file_path} - {str(e)}")
    
    # Second pass - process direct usages, variable references, and contextual analysis
    for root, dirs, files in os.walk(project_path):
        # Filter out excluded directories
        dirs[:] = [d for d in dirs if not any(re.match(pattern, d) for pattern in exclude_patterns)]
        
        for filename in files:
            file_path = os.path.join(root, filename)
            file_extension = os.path.splitext(filename)[1].lower()
            
            try:
                # Determine language based on file extension
                if any(filename.endswith(ext) for ext in swift_extensions + objc_extensions):
                    language = 'swift'
                elif any(filename.endswith(ext) for ext in kotlin_extensions + java_extensions):
                    language = 'kotlin/android'
                elif any(filename.endswith(ext) for ext in js_extensions):
                    language = 'javascript/react'
                elif any(filename.endswith(ext) for ext in sql_extensions):
                    language = 'sql'
                else:
                    # Skip unsupported file types
                    continue
                
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
                    
                    file_variables = variables_by_file.get(file_path, {})
                    
                    # Known localization function direct usage patterns
                    process_localization_function_usage(content, file_path, used_keys, file_occurrences, language)
                    
                    # Process context-aware string assignments
                    # This includes categoryKey and similar assignments that are likely localization keys
                    process_contextual_string_assignments(content, file_path, used_keys, file_occurrences, key_pattern_analysis)
                    
                    # Process variable references that might be localization keys
                    process_variable_references(content, file_path, used_keys, file_occurrences, variables_by_file)
                    
                    # Analyze string literals in UI components to find localized strings
                    process_ui_component_strings(content, file_path, used_keys, file_occurrences, key_pattern_analysis)
                
            except Exception as e:
                print(f"Error reading file (pass 2): {file_path} - {str(e)}")
    
    # Final pass - collect any remaining potential keys from contextual analysis
    for file_path, vars_in_file in variables_by_file.items():
        for variable_name, key in vars_in_file.items():
            if key not in used_keys and is_valid_localization_key(key, key_pattern_analysis):
                # If the key is defined but not detected in usage, add it anyway if it follows patterns
                used_keys[key] += 1
                file_occurrences[key].append((file_path, f"potential_key:{variable_name}"))
    
    return used_keys, file_occurrences

 def analyze_key_patterns(project_path, exclude_patterns):
    """
    Dynamically analyze the codebase to identify localization key patterns.
    Returns a pattern analysis object with detected patterns and conventions.
    """
    print("Analyzing codebase for localization key patterns...")
    
    # Stores our analysis results
    key_patterns = {
        'prefixes': set(),
        'suffixes': set(),
        'assignments': defaultdict(int),  # Variable names used for key assignments
        'formats': defaultdict(int),      # Key format statistics (snake_case, etc.)
        'contexts': defaultdict(set),     # Context in which keys appear
        'known_keys': set(),              # Keys found in localization files
    }
    
    # First pass - collect keys from localization files
    for root, dirs, files in os.walk(project_path):
        dirs[:] = [d for d in dirs if not any(re.match(pattern, d) for pattern in exclude_patterns)]
        
        for filename in files:
            if filename == "Localizable.strings":
                file_path = os.path.join(root, filename)
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        content = f.read()
                except UnicodeDecodeError:
                    try:
                        with open(file_path, 'r', encoding='utf-16') as f:
                            content = f.read()
                    except Exception:
                        continue
                
                # Extract keys
                key_pattern = r'"([^"\\]*(?:\\.[^"\\]*)*)"\s*=\s*"[^"\\]*(?:\\.[^"\\]*)*"\s*;'
                keys = re.findall(key_pattern, content)
                
                # Add to known keys
                key_patterns['known_keys'].update(keys)
                
                # Analyze prefix and suffix patterns
                for key in keys:
                    # Extract prefix (everything before first underscore/dot)
                    prefix_match = re.match(r'^([a-z0-9]+)[_\.]', key)
                    if prefix_match:
                        key_patterns['prefixes'].add(prefix_match.group(1))
                    
                    # Extract suffix (everything after last underscore/dot)
                    suffix_match = re.search(r'[_\.]([a-z0-9]+)$', key)
                    if suffix_match:
                        key_patterns['suffixes'].add(suffix_match.group(1))
                    
                    # Record format
                    if '_' in key:
                        key_patterns['formats']['snake_case'] += 1
                    elif '.' in key:
                        key_patterns['formats']['dot_notation'] += 1
                    elif re.search(r'[a-z][A-Z]', key):
                        key_patterns['formats']['camelCase'] += 1
    
    # Second pass - analyze code for key assignment patterns
    for root, dirs, files in os.walk(project_path):
        dirs[:] = [d for d in dirs if not any(re.match(pattern, d) for pattern in exclude_patterns)]
        
        for filename in files:
            if filename.endswith('.swift'):
                file_path = os.path.join(root, filename)
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        content = f.read()
                except Exception:
                    continue
                
                # Look for variable assignments to strings
                assignment_patterns = [
                    # Common key assignment patterns
                    r'(\w+)Key\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
                    r'let\s+(\w+)Key\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
                    r'var\s+(\w+)Key\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
                    # Other assignment forms
                    r'(\w+)(?:Title|Label)\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
                    r'unit(?:Key)?:\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
                    r'titleKey:\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
                ]
                
                for pattern in assignment_patterns:
                    for match in re.finditer(pattern, content):
                        groups = match.groups()
                        if len(groups) >= 2:
                            # Get the variable name (first group)
                            var_type = groups[0]
                            # Get the key (last group)
                            key = groups[-1]
                            
                            # Update context analysis
                            start_pos = max(0, match.start() - 50)
                            end_pos = min(len(content), match.end() + 50)
                            context_text = content[start_pos:end_pos]
                            
                            # Analyze context - add to contexts if seems to be localization
                            if any(indicator in context_text for indicator in ['localized', 'LocalizedStringKey', 'String(localized:']):
                                key_patterns['contexts'][var_type].add('localization')
                                
                                # Check for specific UI contexts
                                if 'TextField' in context_text or 'Text(' in context_text:
                                    key_patterns['contexts'][var_type].add('ui_text')
                                elif 'Button' in context_text:
                                    key_patterns['contexts'][var_type].add('button')
                                
                                # Count assignments by variable type
                                key_patterns['assignments'][var_type] += 1
    
    # Print some stats about what we found
    print(f"Found {len(key_patterns['known_keys'])} known localization keys in .strings files")
    if key_patterns['prefixes']:
        print(f"Detected {len(key_patterns['prefixes'])} common key prefixes, including: {', '.join(list(key_patterns['prefixes'])[:10])}...")
    if key_patterns['suffixes']:
        print(f"Detected {len(key_patterns['suffixes'])} common key suffixes, including: {', '.join(list(key_patterns['suffixes'])[:10])}...")
    if key_patterns['assignments']:
        print(f"Most common variable contexts for key assignments: {', '.join(sorted(key_patterns['assignments'].keys(), key=lambda k: key_patterns['assignments'][k], reverse=True)[:5])}")
    
    return key_patterns

 def detect_localization_context(content):
    """
    Analyze file content to determine if it's likely to contain localization logic.
    Returns a boolean indicating if localization context is detected.
    """
    # Look for common localization imports and frameworks
    localization_indicators = [
        'LocalizedStringKey', 'NSLocalizedString', 'String(localized:',
        'Localizable.strings', '.localized', 'formatMessage',
        'i18n', 'translate(', 't("', 'gettext', 'getString(R.string.',
        'useTranslation', 'LocalizationProvider'
    ]
    
    for indicator in localization_indicators:
        if indicator in content:
            return True
    
    return False

 def collect_variable_declarations(content, file_variables):
    """
    Extract variable declarations that might be localization keys.
    Stores results in the file_variables dict.
    """
    # Standard variable declarations across languages
    patterns = [
        # Swift/Obj-C
        r'(?:let|var)\s+(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
        r'static\s+(?:let|var)\s+(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
        # JavaScript/TypeScript
        r'(?:const|let|var)\s+(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*=\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]',
        # Java/Kotlin
        r'(?:final|private|public|protected|static)?\s*(?:String|val|var)\s+(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
    ]
    
    for pattern in patterns:
        for match in re.finditer(pattern, content):
            if len(match.groups()) >= 2:
                var_name, value = match.groups()[:2]
                if is_potential_localization_key(value):
                    file_variables[var_name] = value

 def is_potential_localization_key(text):
    """
    Determine if a string is likely to be a localization key based on its format.
    More sophisticated than the basic is_valid_localization_key function.
    """
    if not text or not isinstance(text, str):
        return False
    
    # Empty string check
    if not text.strip():
        return False
    
    # Purely numeric keys are invalid
    if text.isdigit() or text.replace('.', '', 1).isdigit():
        return False
    
    # Check for specific patterns of localization keys
    # Common patterns include:
    # 1. snake_case with dots or underscores (module.key_name)
    # 2. Short identifiers, not natural text (keys are typically concise)
    # 3. Absence of spaces (keys rarely have spaces)
    # 4. Presence of common prefixes/suffixes like title, label, etc.
    
    # Basic format checks
    if ' ' in text:  # Spaces usually indicate natural text, not keys
        return False
    
    if len(text) > 100:  # Keys are typically shorter than long text
        return False
    
    # Pattern checks
    if re.match(r'^[a-z][a-z0-9]*[_\.][a-z0-9_\.]+$', text):  # module.key or module_key pattern
        return True
    
    if re.match(r'^[a-z][a-z0-9_\.]+$', text) and ('_' in text or '.' in text):  # snake_case or dot.notation
        return True
    
    # Check for common key components
    key_components = ['title', 'label', 'message', 'error', 'success', 'button', 
                     'status', 'header', 'footer', 'desc', 'tooltip', 'placeholder',
                     'name', 'text']
    
    for component in key_components:
        if f"_{component}" in text or f".{component}" in text or text.endswith(f"_{component}") or text.endswith(f".{component}"):
            return True
        
    return False

 def process_localization_function_usage(content, file_path, used_keys, file_occurrences, language):
    """
    Process direct usage of localization functions in the code.
    """
    # Map of language to localization function patterns
    localization_patterns = {
        'swift': [
            # NSLocalizedString and other Swift patterns
            (r'NSLocalizedString\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"', "NSLocalizedString"),
            (r'String\(localized:\s*"([^"\\]*(?:\\.[^"\\]*)*)"', "String(localized:)"),
            (r'\.localized\(\s*(?:[^,]*,\s*)?[^"]*"([^"\\]*(?:\\.[^"\\]*)*)"', ".localized()"),
            (r'Text\(LocalizedStringKey\("([^"\\]*(?:\\.[^"\\]*)*)"\)\)', "Text(LocalizedStringKey)"),
            (r'LocalizedStringKey\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"', "LocalizedStringKey"),
        ],
        'kotlin/android': [
            # Android getString patterns
            (r'getString\(R\.string\.([a-zA-Z0-9_]+)', "getString"),
            (r'\.getString\(R\.string\.([a-zA-Z0-9_]+)', "context.getString"),
            (r'stringResource\(R\.string\.([a-zA-Z0-9_]+)', "stringResource"),
        ],
        'javascript/react': [
            # React i18n patterns
            (r'(?:i18n|t|translate)\(\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]', "i18n/translate"),
            (r'formatMessage\(\s*{\s*id:\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]', "formatMessage"),
        ],
    }
    
    # Process patterns for the specific language
    for pattern, pattern_name in localization_patterns.get(language, []):
        matches = re.findall(pattern, content)
        for key in matches:
            used_keys[key] += 1
            file_occurrences[key].append((file_path, pattern_name))

 def process_contextual_string_assignments(content, file_path, used_keys, file_occurrences, key_pattern_analysis):
    """
    Process string assignments that are likely to be localization keys based on context.
    Uses dynamic pattern analysis rather than hard-coded patterns.
    """
    # Dynamic pattern for finding any assignment with string literals
    # This will catch category, title, label, unit assignments and more
    assignment_pattern = r'(\w+)(?:\s*(?:=|:)|\w+Key:)\s*"([^"\\]*(?:\\.[^"\\]*)*)"'
    
    # Set of variable name fragments that suggest localization key assignments
    potential_key_vars = set()
    
    # Build potential variable names from pattern analysis
    if key_pattern_analysis and 'assignments' in key_pattern_analysis:
        for var_type in key_pattern_analysis['assignments']:
            if len(var_type) > 3:  # Avoid very short names that might cause false positives
                potential_key_vars.add(var_type.lower())
    
    # Add common key variable indicators if we didn't find enough from analysis
    if len(potential_key_vars) < 5:
        potential_key_vars.update(['key', 'title', 'label', 'unit', 'category', 'field', 'text', 'message'])
    
    # Find all assignments in the content
    for match in re.finditer(assignment_pattern, content):
        var_name, value = match.groups()
        
        # Skip if value is empty
        if not value.strip():
            continue
            
        # Get surrounding context to check for localization indicators
        start_pos = max(0, match.start() - 100)
        end_pos = min(len(content), match.end() + 100)
        context = content[start_pos:end_pos]
        
        # Analyze the variable name - does it suggest a key assignment?
        var_lower = var_name.lower()
        
        # Look for common key-related variable names or parameter names
        is_potential_key_var = (
            any(key_var in var_lower for key_var in potential_key_vars) or
            'key' in var_lower or
            'title' in context.lower() and 'key' in context.lower() or
            'unit' in context.lower() and 'key' in context.lower() or
            'localized' in context.lower()
        )
        
        # Check for specific indicators in the context
        has_localization_context = any(indicator in context for indicator in [
            'localized', 'LocalizedStringKey', 'String(localized:', 'NSLocalizedString',
            'Text(', 'Label(', 'TextField(', 'Button(', 'NavigationTitle', 'titleKey:', 'unitKey:'
        ])
        
        # If it looks like a key, has a key-like format, or appears in a localization context
        if (is_potential_key_var or 
            has_localization_context or 
            (value in key_pattern_analysis.get('known_keys', set())) or
            is_key_like_format(value)):
            
            # Check the value format - does it follow key patterns we've seen?
            if is_valid_localization_key(value, key_pattern_analysis):
                used_keys[value] += 1
                file_occurrences[value].append((file_path, f"context_assigned:{var_name}"))
                
        # Special case for Swift parameters with named arguments like titleKey: "value", unitKey: "value"
        named_param_match = re.search(r'(\w+)Key:\s*"([^"\\]*(?:\\.[^"\\]*)*)"', context)
        if named_param_match:
            param_name, param_value = named_param_match.groups()
            if is_valid_localization_key(param_value, key_pattern_analysis):
                used_keys[param_value] += 1
                file_occurrences[param_value].append((file_path, f"named_param:{param_name}Key"))

 def is_key_like_format(value):
    """Check if a string has a format typical of localization keys."""
    # Most keys have underscores or dots and no spaces
    if ' ' in value:
        return False
        
    if '_' in value or '.' in value:
        return True
        
    # Keys typically have lowercase letters, often with specific prefixes
    if re.match(r'^[a-z][a-zA-Z0-9_\.]+$', value):
        return True
        
    return False

 def process_variable_references(content, file_path, used_keys, file_occurrences, variables_by_file):
    """
    Process references to variables that might contain localization keys.
    """
    # Patterns for variable usage in localization functions
    var_usage_patterns = [
        # Swift patterns
        r'LocalizedStringKey\(\s*(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*\)',
        r'NSLocalizedString\(\s*(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*,',
        r'String\(localized:\s*(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*\)',
        # JavaScript/React patterns
        r'(?:i18n|t|translate)\(\s*(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*[,)]',
        # Android patterns
        r'getString\(\s*(\w+(?:Key|Title|Message|Text|Label|Description|Desc|String|Note)?)\s*\)',
    ]
    
    file_variables = variables_by_file.get(file_path, {})
    
    for pattern in var_usage_patterns:
        for match in re.finditer(pattern, content):
            var_name = match.group(1)
            
            # Check if variable is in current file
            if var_name in file_variables:
                key = file_variables[var_name]
                used_keys[key] += 1
                file_occurrences[key].append((file_path, f"variable:{var_name}"))
            else:
                # Try to find the variable in other files
                for other_file, vars_in_file in variables_by_file.items():
                    if var_name in vars_in_file:
                        key = vars_in_file[var_name]
                        used_keys[key] += 1
                        file_occurrences[key].append((file_path, f"external_variable:{var_name}"))
                        break

 def process_ui_component_strings(content, file_path, used_keys, file_occurrences, key_pattern_analysis):
    """
    Process string literals used in UI components that might be localized.
    Also looks for SwiftUI view parameters that are likely to be localization keys.
    """
    # Patterns for UI components with string literals that are often localized
    ui_patterns = [
        # SwiftUI patterns
        r'Text\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"(?:\s*,|\s*\))(?!.*attributedString)',
        r'Button\([^)]*\)\s*{\s*Text\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
        r'NavigationLink\([^)]*\)\s*{\s*Text\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
        r'Label\(\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
        # UIKit patterns
        r'\.title\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
        r'\.text\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
        r'\.placeholder\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
        r'\.buttonTitle\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
        # React/JSX patterns
        r'<Text[^>]*>\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]',
        r'<Button[^>]*>\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]',
        r'<Label[^>]*>\s*[\'"]([^\'"\\\n]*(?:\\.[^\'"\\\n]*)*)[\'"]',
    ]
    
    # Special patterns for SwiftUI named key parameters
    named_key_params = [
        r'titleKey:\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
        r'labelKey:\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
        r'unitKey:\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
        r'messageKey:\s*"([^"\\]*(?:\\.[^"\\]*)*)"',
    ]
    
    # Process standard UI patterns
    for pattern in ui_patterns:
        for match in re.finditer(pattern, content):
            text = match.group(1)
            
            # Skip if likely translated text rather than a key
            if not is_potential_localization_key(text) and is_translatable_text(text):
                continue
                
            # If it follows key patterns, it might be a direct key reference
            if is_potential_localization_key(text):
                used_keys[text] += 1
                file_occurrences[text].append((file_path, "ui_component"))
    
    # Process named key parameters (titleKey:, unitKey:, etc.)
    for pattern in named_key_params:
        for match in re.finditer(pattern, content):
            key = match.group(1)
            param_type = pattern.split(':')[0]  # Extract param name (titleKey, unitKey, etc.)
            
            if is_valid_localization_key(key, key_pattern_analysis):
                used_keys[key] += 1
                file_occurrences[key].append((file_path, f"named_param:{param_type}"))

 def is_translatable_text(text):
    """
    Determine if a string is likely to be translatable human-readable text rather than a key.
    """
    # Human readable text likely contains:
    # - Spaces
    # - Multiple words
    # - Punctuation like periods, commas, question marks
    # - Starts with capital letter (in many languages)
    # - Natural language sentence structure
    
    # Check for spaces (most UI text has spaces between words)
    if ' ' in text:
        return True
        
    # Check for common punctuation used in sentences
    if any(punct in text for punct in '.,:;!?'):
        return True
        
    # Check for capitalization pattern typical in sentences
    if text and text[0].isupper() and not text.isupper():
        # First letter is uppercase but not all text is uppercase
        return True
        
    # Check word count - more than 1-2 words suggests natural text
    word_count = len(text.split())
    if word_count > 1:
        return True
        
    return False

 def find_localization_keys_in_code(project_path, exclude_patterns):
    """Find localization keys used in code files."""
    return extract_keys_from_files(project_path, exclude_patterns)

 def write_log(log_file, used_keys, unused_keys_by_file, all_keys_by_file, file_occurrences, 
             comments_removed_by_file=None, duplicates_removed_by_file=None, whitespace_removed_by_file=None, sorted_keys_by_file=None):
    """Write usage statistics to the log file."""
    with open(log_file, 'w', encoding='utf-8') as f:
        f.write("=== LOCALIZATION ANALYSIS REPORT ===\n\n")
        
        # Add cleanup information
        if duplicates_removed_by_file:
            f.write("--- DUPLICATE KEYS ---\n")
            total_duplicates = sum(count for count, _ in duplicates_removed_by_file.values())
            f.write(f"Total {total_duplicates} duplicate keys cleaned.\n\n")
            
            for file_path, (dup_count, unique_count) in duplicates_removed_by_file.items():
                if dup_count > 0:
                    lang = get_language_from_path(file_path)
                    f.write(f"{file_path} ({lang}): {dup_count} duplicate keys removed, {unique_count} unique keys remain\n")
            f.write("\n")
        
        if comments_removed_by_file:
            f.write("--- COMMENT LINES ---\n")
            total_comments = sum(comments_removed_by_file.values())
            f.write(f"Total {total_comments} comment lines cleaned.\n\n")
            
            for file_path, comment_count in comments_removed_by_file.items():
                if comment_count > 0:
                    lang = get_language_from_path(file_path)
                    f.write(f"{file_path} ({lang}): {comment_count} comment lines removed\n")
            f.write("\n")
        
        if whitespace_removed_by_file:
            f.write("--- UNNECESSARY WHITESPACE AND EMPTY LINES ---\n")
            total_whitespace = sum(whitespace_removed_by_file.values())
            f.write(f"Total {total_whitespace} unnecessary whitespace and empty lines cleaned.\n\n")
            
            for file_path, whitespace_count in whitespace_removed_by_file.items():
                if whitespace_count > 0:
                    lang = get_language_from_path(file_path)
                    f.write(f"{file_path} ({lang}): {whitespace_count} unnecessary whitespace/lines cleaned\n")
            f.write("\n")
        
        if sorted_keys_by_file:
            f.write("--- SORTED KEYS ---\n")
            total_sorted = sum(sorted_keys_by_file.values())
            f.write(f"Total {total_sorted} keys sorted alphabetically.\n\n")
            
            for file_path, sorted_count in sorted_keys_by_file.items():
                if sorted_count > 0:
                    lang = get_language_from_path(file_path)
                    f.write(f"{file_path} ({lang}): {sorted_count} keys sorted alphabetically\n")
            f.write("\n")
        
        f.write("--- USED KEYS ---\n")
        f.write(f"Total {len(used_keys)} keys are used.\n\n")
        
        # Top 10 most used keys
        f.write("Top 10 most used keys:\n")
        for key, count in used_keys.most_common(10):
            f.write(f"  {key}: {count} times\n")
        f.write("\n")
        
        # All used keys and where they are used
        f.write("All used keys and where they are used (alphabetically):\n")
        for key in sorted(used_keys.keys()):
            f.write(f"  {key}: {used_keys[key]} times\n")
            # Show up to 5 files for each key
            for i, (file_path, pattern_name) in enumerate(file_occurrences[key][:5]):
                short_path = os.path.relpath(file_path)
                f.write(f"    - {short_path} ({pattern_name})\n")
            if len(file_occurrences[key]) > 5:
                f.write(f"    ... and {len(file_occurrences[key]) - 5} more files\n")
        f.write("\n")
        
        # Unused keys by file
        f.write("--- UNUSED KEYS BY FILE ---\n")
        total_unused = 0
        for file_path, keys in unused_keys_by_file.items():
            if keys:
                total_unused += len(keys)
                percent = (len(keys) / len(all_keys_by_file[file_path])) * 100 if all_keys_by_file[file_path] else 0
                f.write(f"\n{file_path} ({len(keys)}/{len(all_keys_by_file[file_path])}, %{percent:.1f}):\n")
                for key in sorted(keys):
                    f.write(f"  {key}\n")
        
        f.write(f"\nTotal {total_unused} unused keys detected.\n")

 def clean_strings_file(file_path, keys_to_remove):
    """Remove specified keys from the strings file."""
    encoding = 'utf-8'
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
    except UnicodeDecodeError:
        # Try UTF-16
        try:
            with open(file_path, 'r', encoding='utf-16') as f:
                content = f.read()
                encoding = 'utf-16'
        except Exception as e:
            print(f"Error: Could not read file {file_path}: {e}")
            return
    
    lines = content.split('\n')
    filtered_lines = []
    removed_count = 0
    
    for line in lines:
        # Check the line
        should_keep = True
        for key in keys_to_remove:
            pattern = r'"' + re.escape(key) + r'"\s*=\s*"[^"\\]*(?:\\.[^"\\]*)*"\s*;'
            if re.search(pattern, line):
                should_keep = False
                removed_count += 1
                break
        
        if should_keep:
            filtered_lines.append(line)
    
    # Write the file with the same encoding
    with open(file_path, 'w', encoding=encoding) as f:
        f.write('\n'.join(filtered_lines))
    
    return removed_count

 def get_language_from_path(file_path):
    """Extract language code from file path."""
    match = re.search(r'/([^/]+)\.lproj/Localizable\.strings', file_path)
    if match:
        return match.group(1)
    return os.path.basename(os.path.dirname(os.path.dirname(file_path)))

 def clean_comments_from_file(file_path):
    """Clean comment lines from Localizable.strings file."""
    encoding = 'utf-8'
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
    except UnicodeDecodeError:
        # Try UTF-16
        try:
            with open(file_path, 'r', encoding='utf-16') as f:
                content = f.read()
                encoding = 'utf-16'
        except Exception as e:
            print(f"Error: Could not read file {file_path}: {e}")
            return 0
    
    lines = content.split('\n')
    cleaned_lines = []
    removed_count = 0
    
    for line in lines:
        # Is it a full comment line?
        if line.strip().startswith('//'):
            removed_count += 1
            continue
        
        # Is there an inline comment?
        comment_pos = line.find('//')
        if comment_pos > 0:
            # Check if the key-value pair has ended
            # "key" = "value"; // comment
            if '";' in line[:comment_pos]:
                # Remove the comment part, keep the key-value
                line = line[:comment_pos].rstrip()
            
            # Otherwise, don't remove the comment (probably // characters inside key-value)
        
        cleaned_lines.append(line)
    
    # Write the file with the same encoding
    with open(file_path, 'w', encoding=encoding) as f:
        f.write('\n'.join(cleaned_lines))
    
    return removed_count

 def remove_duplicate_keys(file_path):
    """Clean duplicate keys from Localizable.strings file.
    Keeps the last occurrence, removes previous occurrences."""
    encoding = 'utf-8'
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
    except UnicodeDecodeError:
        # Try UTF-16
        try:
            with open(file_path, 'r', encoding='utf-16') as f:
                content = f.read()
                encoding = 'utf-16'
        except Exception as e:
            print(f"Error: Could not read file {file_path}: {e}")
            return 0, 0
    
    lines = content.split('\n')
    seen_keys = {}  # key -> last seen line number and content
    duplicate_indexes = []  # Line indexes to delete
    key_pattern = re.compile(r'^\s*"([^"\\]*(?:\\.[^"\\]*)*)"\s*=\s*".*";')
    
    # Find duplicate keys
    for i, line in enumerate(lines):
        # Process only lines containing keys
        match = key_pattern.match(line)
        if match:
            key = match.group(1)
            if key in seen_keys:
                # This key was already seen, record previous occurrence index
                duplicate_indexes.append(seen_keys[key][0])
            
            # Update last seen index of the key
            seen_keys[key] = (i, line)
    
    # Delete duplicate lines
    cleaned_lines = [line for i, line in enumerate(lines) if i not in duplicate_indexes]
    
    # Write the file with the same encoding
    with open(file_path, 'w', encoding=encoding) as f:
        f.write('\n'.join(cleaned_lines))
    
    return len(duplicate_indexes), len(seen_keys)

 def clean_whitespace_from_file(file_path):
    """Clean unnecessary whitespace and empty lines from Localizable.strings file."""
    encoding = 'utf-8'
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
    except UnicodeDecodeError:
        # Try UTF-16
        try:
            with open(file_path, 'r', encoding='utf-16') as f:
                content = f.read()
                encoding = 'utf-16'
        except Exception as e:
            print(f"Error: Could not read file {file_path}: {e}")
            return 0
    
    lines = content.split('\n')
    cleaned_lines = []
    removed_count = 0
    
    for line in lines:
        # Clean whitespace at the beginning and end of the line
        original_line = line
        line = line.strip()
        
        # Skip completely empty lines
        if not line:
            removed_count += 1
            continue
        
        # Format lines with "key" = "value"; pattern
        key_value_pattern = r'"([^"\\]*(?:\\.[^"\\]*)*)"\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"\s*;'
        match = re.match(key_value_pattern, line)
        if match:
            key, value = match.groups()
            # Recreate the key-value pair with a formatted format
            formatted_line = f'"{key}" = "{value}";'
            cleaned_lines.append(formatted_line)
            # Increment counter if different from original line
            if formatted_line != original_line.strip():
                removed_count += 1
        else:
            # If not a key-value pair, add the cleaned version
            if line != original_line:
                removed_count += 1
            cleaned_lines.append(line)
    
    # Write the file with the same encoding
    with open(file_path, 'w', encoding=encoding) as f:
        f.write('\n'.join(cleaned_lines))
    
    return removed_count

 def sort_keys_in_file(file_path):
    """Sort keys alphabetically in a Localizable.strings file."""
    encoding = 'utf-8'
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
    except UnicodeDecodeError:
        # Try UTF-16
        try:
            with open(file_path, 'r', encoding='utf-16') as f:
                content = f.read()
                encoding = 'utf-16'
        except Exception as e:
            print(f"Error: Could not read file {file_path}: {e}")
            return 0
    
    # Find all lines matching the "key" = "value"; pattern
    pattern = r'"([^"\\]*(?:\\.[^"\\]*)*)"\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"\s*;'
    matches = re.findall(pattern, content)
    
    if not matches:
        print(f"Error: No keys found in file {file_path}.")
        return 0
    
    # Collect keys and values in a dictionary
    key_values = {key: value for key, value in matches}
    
    # Create sorted lines using sorted keys
    sorted_lines = []
    for key in sorted(key_values.keys()):
        sorted_lines.append(f'"{key}" = "{key_values[key]}";')
    
    # Write the file with the same encoding
    with open(file_path, 'w', encoding=encoding) as f:
        f.write('\n'.join(sorted_lines))
    
    return len(matches)

 def find_missing_keys(used_keys, all_keys_by_file, filter_prefixes=None, min_missing_count=0):
    """Find keys that are used in the project but missing from language files.
    
    Args:
        used_keys: Dictionary of keys used in the code
        all_keys_by_file: Dictionary mapping file paths to their keys
        filter_prefixes: Optional list of prefixes to filter keys by
        min_missing_count: Minimum number of missing keys for a file to be included
        
    Returns:
        Dictionary mapping file paths to their missing keys
    """
    missing_keys_by_file = {}
    
    for file_path, keys in all_keys_by_file.items():
        # Find keys that are used in the project but not in this language file
        missing_keys = [key for key in used_keys if key not in keys]
        
        # Apply prefix filtering if specified
        if filter_prefixes:
            missing_keys = [key for key in missing_keys if any(key.startswith(prefix) for prefix in filter_prefixes)]
        
        # Only include files with enough missing keys
        if missing_keys and len(missing_keys) >= min_missing_count:
            missing_keys_by_file[file_path] = missing_keys
    
    return missing_keys_by_file

 def write_missing_keys_to_file(missing_keys_by_file, output_file="missing_keys.txt"):
    """
    Write missing keys to a detailed report file.
    
    This function organizes missing keys by language and provides both a summary
    and comprehensive listing with formatting for better readability.
    """
    if not missing_keys_by_file:
        return False
    
    # Group missing keys by language for better organization
    keys_by_language = {}
    all_missing_keys = set()
    
    for file_path, keys in missing_keys_by_file.items():
        lang = get_language_from_path(file_path)
        if lang not in keys_by_language:
            keys_by_language[lang] = {"files": {}, "missing_key_count": 0, "total_keys": set()}
        
        keys_by_language[lang]["files"][file_path] = keys
        keys_by_language[lang]["missing_key_count"] += len(keys)
        keys_by_language[lang]["total_keys"].update(keys)
        all_missing_keys.update(keys)
    
    # Sort languages by missing key count (descending)
    sorted_languages = sorted(keys_by_language.keys(), 
                            key=lambda lang: keys_by_language[lang]["missing_key_count"],
                            reverse=True)
    
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write("=== MISSING LOCALIZATION KEYS REPORT ===\n\n")
        
        # Total summary
        total_missing = len(all_missing_keys)
        total_file_keys = sum(len(keys) for keys in missing_keys_by_file.values())
        f.write(f"SUMMARY:\n")
        f.write(f"- Total unique missing keys: {total_missing}\n")
        f.write(f"- Total missing entries across all files: {total_file_keys}\n")
        f.write(f"- Languages with missing keys: {len(keys_by_language)}\n\n")
        
        # Language summary table
        f.write("LANGUAGE SUMMARY:\n")
        f.write("╔════════════════╦═══════════════════════════╦═══════════════╗\n")
        f.write("║ Language        ║ Missing Keys              ║ Files Affected ║\n")
        f.write("╠════════════════╬═══════════════════════════╬═══════════════╣\n")
        
        for lang in sorted_languages:
            lang_data = keys_by_language[lang]
            file_count = len(lang_data["files"])
            missing_count = lang_data["missing_key_count"]
            unique_count = len(lang_data["total_keys"])
            
            # Format the summary line with padding for alignment
            f.write(f"║ {lang:<14} ║ {missing_count:>5} ({unique_count} unique) ║ {file_count:>13} ║\n")
        
        f.write("╚════════════════╩═══════════════════════════╩═══════════════╝\n\n")
        
        # Common missing keys across multiple languages
        common_keys = {}
        for lang, lang_data in keys_by_language.items():
            for key in lang_data["total_keys"]:
                if key not in common_keys:
                    common_keys[key] = set()
                common_keys[key].add(lang)
        
        # Sort by number of languages affected (descending)
        most_common_keys = sorted(common_keys.items(), key=lambda x: len(x[1]), reverse=True)
        
        if most_common_keys:
            f.write("KEYS MISSING IN MULTIPLE LANGUAGES:\n")
            f.write("╔══════════════════════════════════════════════════════╦═══════════════════════════╗\n")
            f.write("║ Key                                                  ║ Missing in Languages      ║\n")
            f.write("╠══════════════════════════════════════════════════════╬═══════════════════════════╣\n")
            
            # Show only keys missing in multiple languages (>1)
            multi_lang_keys = [(key, langs) for key, langs in most_common_keys if len(langs) > 1]
            
            for key, langs in multi_lang_keys[:20]:  # Limit to 20 for readability
                # Truncate long keys
                display_key = key[:48] + "..." if len(key) > 48 else key
                display_key = display_key.ljust(48)
                
                # Format languages as comma-separated list
                langs_str = ", ".join(sorted(langs))
                if len(langs_str) > 25:
                    langs_str = langs_str[:22] + "..."
                
                f.write(f"║ {display_key} ║ {langs_str:<23} ║\n")
            
            if len(multi_lang_keys) > 20:
                f.write(f"║ ... and {len(multi_lang_keys) - 20} more keys                           ║                       ║\n")
            
            f.write("╚══════════════════════════════════════════════════════╩═══════════════════════════╝\n\n")
        
        # Detailed listing by language
        f.write("DETAILED MISSING KEYS BY LANGUAGE:\n\n")
        
        for lang in sorted_languages:
            lang_data = keys_by_language[lang]
            file_count = len(lang_data["files"])
            missing_count = lang_data["missing_key_count"]
            unique_count = len(lang_data["total_keys"])
            
            f.write(f"LANGUAGE: {lang}\n")
            f.write(f"  - Missing keys: {missing_count} ({unique_count} unique)\n")
            f.write(f"  - Files affected: {file_count}\n\n")
            
            # Sort files to ensure consistent output
            sorted_files = sorted(lang_data["files"].keys())
            
            for file_path in sorted_files:
                keys = lang_data["files"][file_path]
                rel_path = os.path.relpath(file_path)
                f.write(f"  File: {rel_path}\n")
                f.write(f"  Missing keys: {len(keys)}\n")
                
                # Sort keys alphabetically for consistent output
                for key in sorted(keys):
                    f.write(f'    "{key}"\n')
                
                f.write("\n")
            
            f.write("-" * 80 + "\n\n")
    
    return True

 def create_localization_template(missing_keys, all_keys_by_file, reference_language='en', output_file='template_localizable.strings'):
    """
    Create a template Localizable.strings file with missing keys.
    
    Args:
        missing_keys: Set of missing keys to include in the template
        all_keys_by_file: Dictionary of all keys by file, used to find values for reference
        reference_language: Language code to use for reference values
        output_file: Output file path for the template
        
    Returns:
        True if successful, False otherwise
    """
    if not missing_keys:
        print("No missing keys to create template for.")
        return False
    
    # Find the reference language file
    reference_file = None
    reference_values = {}
    
    for file_path in all_keys_by_file.keys():
        if reference_language in file_path:
            reference_file = file_path
            reference_values = all_keys_by_file[file_path]
            break
    
    # If reference language wasn't found, use the first file as reference
    if reference_file is None and all_keys_by_file:
        reference_file = list(all_keys_by_file.keys())[0]
        reference_values = all_keys_by_file[reference_file]
    
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write('// Template Localizable.strings file with missing keys\n')
        f.write('// Generated by clean_localizations.py\n')
        f.write('// Reference language: ' + (reference_language if reference_file else 'none') + '\n\n')
        
        sorted_keys = sorted(missing_keys)
        
        for key in sorted_keys:
            # Try to get the value from the reference language
            if key in reference_values:
                value = reference_values[key]
                f.write(f'"{key}" = "{value}";\n')
            else:
                # If not found, use a placeholder with the key name
                f.write(f'"{key}" = "TRANSLATE: {key}";\n')
        
    print(f"Created template file with {len(sorted_keys)} missing keys: {output_file}")
    if not reference_file:
        print(f"Warning: Reference language '{reference_language}' not found, using placeholders for all values.")
    
    return True

 def is_valid_localization_key(key, key_pattern_analysis=None):
    """
    Dynamically determine if a string is likely to be a valid localization key
    based on pattern analysis from the project.
    """
    if not key or not isinstance(key, str):
        return False
    
    # Empty string check
    if not key.strip() or key.strip() == " ":
        return False
    
    # Purely numeric keys are invalid
    if key.isdigit() or key.replace('.', '', 1).isdigit():
        return False
    
    # If we have pattern analysis results from the project
    if key_pattern_analysis:
        # Direct match with known keys
        if key in key_pattern_analysis.get('known_keys', set()):
            return True
        
        # Check if key follows detected prefix patterns
        prefixes = key_pattern_analysis.get('prefixes', set())
        for prefix in prefixes:
            if key.startswith(prefix + '_') or key.startswith(prefix + '.'):
                return True
                
        # Check if key has detected suffix patterns
        suffixes = key_pattern_analysis.get('suffixes', set())
        for suffix in suffixes:
            if key.endswith('_' + suffix) or key.endswith('.' + suffix):
                return True
    
    # Basic format validation for localization keys
    if '_' in key or '.' in key:
        # If it has a structure like a localization key (underscore/dot separated)
        # And doesn't contain spaces or weird characters
        if re.match(r'^[a-z][a-zA-Z0-9_\.]+$', key) and ' ' not in key:
            return True
    
    return False

 def main():
    args = parse_args()
    project_path = os.path.abspath(args.project_path)
    
    # Use r-string to fix regex patterns
    exclude_patterns = args.exclude or [r'^\.git$', r'^\.build$', r'^Pods$', r'^Carthage$', r'^\.DS_Store$']
    
    print(f"Project directory: {project_path}")
    print(f"Excluded patterns: {exclude_patterns}")
    
    # Auto-detect missing keys mode
    if args.auto_detect_missing_keys:
        print("Running in auto-detect missing keys mode...")
        
        # Display filter settings if any
        if args.filter_prefix:
            print(f"Filtering keys by prefixes: {', '.join(args.filter_prefix)}")
        if args.min_missing_count > 0:
            print(f"Only reporting languages with at least {args.min_missing_count} missing keys")
        
        # Find used keys
        print("Searching for localization keys in code files...")
        used_keys, file_occurrences = find_localization_keys_in_code(project_path, exclude_patterns)
        print(f"Total {len(used_keys)} keys are used in code.")
        
        # Find strings files
        print("Searching for Localizable.strings files...")
        strings_files = find_strings_files(project_path, exclude_patterns)
        
        if not strings_files:
            print("No Localizable.strings files found. Terminating.")
            return
        
        print(f"Found {len(strings_files)} localization files.")
        
        # Parse keys from each file
        all_keys_by_file = {}
        languages_found = set()
        
        for file_path in strings_files:
            lang = get_language_from_path(file_path)
            languages_found.add(lang)
            print(f"Processing: {file_path} ({lang})")
            
            keys = parse_strings_file(file_path)
            all_keys_by_file[file_path] = keys
            
            # Calculate percentage of total used keys found in this file
            if used_keys:
                found_keys = len([k for k in keys if k in used_keys])
                percent_found = (found_keys / len(used_keys)) * 100
            else:
                found_keys = 0
                percent_found = 0
                
            print(f"  Total keys: {len(keys)}")
            print(f"  Contains {found_keys}/{len(used_keys)} used keys ({percent_found:.1f}%)")
        
        print(f"\nFound {len(languages_found)} unique languages: {', '.join(sorted(languages_found))}")
        
        # Find keys missing from language files, applying filters
        missing_keys_by_file = find_missing_keys(used_keys, all_keys_by_file, args.filter_prefix, args.min_missing_count)
        
        if missing_keys_by_file:
            print("\nDetected keys used in code but missing from language files:")
            total_missing = sum(len(keys) for keys in missing_keys_by_file.values())
            unique_missing_keys = set()
            for keys in missing_keys_by_file.values():
                unique_missing_keys.update(keys)
                
            print(f"Total missing entries: {total_missing} across {len(missing_keys_by_file)} files")
            print(f"Unique missing keys: {len(unique_missing_keys)}")
            
            # Display a preview of missing keys for each language
            for file_path, keys in missing_keys_by_file.items():
                lang = get_language_from_path(file_path)
                print(f"  {lang}: {len(keys)} missing keys")
                
                # Show a few examples of missing keys
                for key in sorted(keys)[:5]:
                    print(f"    \"{key}\"")
                if len(keys) > 5:
                    print(f"    ... and {len(keys) - 5} more keys")
            
            # Write missing keys to file using the specified output file name
            if write_missing_keys_to_file(missing_keys_by_file, output_file=args.output_file):
                print(f"\nDetailed missing keys report written to {args.output_file}")
            
            # Create a template file if requested
            if args.create_template:
                # Collect all unique missing keys
                all_missing_keys = set()
                for keys in missing_keys_by_file.values():
                    all_missing_keys.update(keys)
                
                create_localization_template(
                    all_missing_keys, 
                    all_keys_by_file, 
                    reference_language=args.reference_language, 
                    output_file=args.template_file
                )
                
        else:
            if args.filter_prefix:
                print(f"\nNo missing keys found matching the specified prefixes: {', '.join(args.filter_prefix)}")
            else:
                print("\nNo missing keys found. All localization files are complete.")
        
        # Optional: identify keys that might be unused in code
        if not args.no_prompt and input("\nWould you like to check for potentially unused keys in localization files? (y/n): ").strip().lower() == 'y':
            print("\nChecking for potentially unused keys...")
            
            unused_keys_by_file = {}
            total_unused = 0
            
            for file_path, keys in all_keys_by_file.items():
                # Find keys in the strings file that aren't found in code
                unused_keys = [key for key in keys if key not in used_keys]
                if unused_keys:
                    unused_keys_by_file[file_path] = unused_keys
                    total_unused += len(unused_keys)
            
            if unused_keys_by_file:
                print(f"Found {total_unused} potentially unused keys across all language files.")
                
                for file_path, keys in unused_keys_by_file.items():
                    lang = get_language_from_path(file_path)
                    percent = (len(keys) / len(all_keys_by_file[file_path])) * 100
                    print(f"  {lang}: {len(keys)}/{len(all_keys_by_file[file_path])} keys ({percent:.1f}%) may be unused")
                
                print("\nNote: These keys might be used dynamically or loaded at runtime.")
                print("Review manually before removing any keys.")
                
                if args.log_file:
                    write_log(args.log_file, used_keys, unused_keys_by_file, all_keys_by_file, file_occurrences)
                    print(f"Analysis report written to {args.log_file}")
            else:
                print("No potentially unused keys found. All keys in localization files appear to be used in code.")
        
        return
    
    # Comment removal preference
    if args.no_prompt:
        remove_comments = False
        clean_whitespace = args.clean_whitespace
        sort_keys = args.sort_keys
    else:
        remove_comments = input("Do you want to remove comment lines? (y/n): ").strip().lower() == 'y'
        
        # Unnecessary whitespace cleaning preference
        if args.clean_whitespace:
            clean_whitespace = True
        else:
            clean_whitespace = input("Do you want to clean unnecessary whitespace and empty lines? (y/n): ").strip().lower() == 'y'
        
        # Alphabetical key sorting preference
        if args.sort_keys:
            sort_keys = True
        else:
            sort_keys = input("Do you want to sort keys alphabetically? (y/n): ").strip().lower() == 'y'
            
    # Duplicate key removal preference
    auto_deduplicate = True  # Automatically deduplicate without asking
    
    # Find used keys
    print("Searching for localization keys in code files...")
    used_keys, file_occurrences = find_localization_keys_in_code(project_path, exclude_patterns)
    print(f"Total {len(used_keys)} keys are used.")
    
    # Find strings files
    print("Searching for Localizable.strings files...")
    strings_files = find_strings_files(project_path, exclude_patterns)
    print(f"Found {len(strings_files)} strings files.")
    
    if not strings_files:
        print("No Localizable.strings files found. Terminating.")
        return
    
    # Collect information about deletions
    duplicates_removed_by_file = {}
    comments_removed_by_file = {}
    whitespace_removed_by_file = {}
    sorted_keys_by_file = {}
    
    # Clean duplicate keys
    if auto_deduplicate:
        print("\nCleaning duplicate keys...")
        total_duplicates_removed = 0
        for file_path in strings_files:
            lang = get_language_from_path(file_path)
            duplicates_removed, unique_keys = remove_duplicate_keys(file_path)
            duplicates_removed_by_file[file_path] = (duplicates_removed, unique_keys)
            total_duplicates_removed += duplicates_removed
            if duplicates_removed > 0:
                print(f"  {file_path} ({lang}): {duplicates_removed} duplicate keys removed, {unique_keys} unique keys remain")
            else:
                print(f"  {file_path} ({lang}): No duplicate keys found")
        print(f"Total {total_duplicates_removed} duplicate keys removed.\n")
    
    # First clean comment lines (if requested)
    if remove_comments:
        print("\nCleaning comment lines...")
        total_comments_removed = 0
        for file_path in strings_files:
            lang = get_language_from_path(file_path)
            comments_removed = clean_comments_from_file(file_path)
            comments_removed_by_file[file_path] = comments_removed
            total_comments_removed += comments_removed
            print(f"  {file_path} ({lang}): {comments_removed} comment lines removed")
        print(f"Total {total_comments_removed} comment lines removed.\n")
    
    # Clean unnecessary whitespace (if requested)
    if clean_whitespace:
        print("\nCleaning unnecessary whitespace and empty lines...")
        total_whitespace_removed = 0
        for file_path in strings_files:
            lang = get_language_from_path(file_path)
            whitespace_removed = clean_whitespace_from_file(file_path)
            whitespace_removed_by_file[file_path] = whitespace_removed
            total_whitespace_removed += whitespace_removed
            print(f"  {file_path} ({lang}): {whitespace_removed} unnecessary whitespace/lines cleaned")
        print(f"Total {total_whitespace_removed} unnecessary whitespace/lines cleaned.\n")
    
    # Sort keys alphabetically (if requested)
    if sort_keys:
        print("\nSorting keys alphabetically...")
        total_keys_sorted = 0
        for file_path in strings_files:
            lang = get_language_from_path(file_path)
            keys_sorted = sort_keys_in_file(file_path)
            sorted_keys_by_file[file_path] = keys_sorted
            total_keys_sorted += keys_sorted
            print(f"  {file_path} ({lang}): {keys_sorted} keys sorted alphabetically")
        print(f"Total {total_keys_sorted} keys sorted alphabetically.\n")
    
    # Parse keys from each file
    all_keys_by_file = {}
    unused_keys_by_file = {}
    
    for file_path in strings_files:
        lang = get_language_from_path(file_path)
        print(f"Processing: {file_path} ({lang})")
        
        keys = parse_strings_file(file_path)
        all_keys_by_file[file_path] = keys
        
        # Find unused keys
        unused_keys = [key for key in keys if key not in used_keys]
        unused_keys_by_file[file_path] = unused_keys
        
        print(f"  Total keys: {len(keys)}")
        print(f"  Unused keys: {len(unused_keys)}")
    
    # Find keys missing from language files
    missing_keys_by_file = find_missing_keys(used_keys, all_keys_by_file, args.filter_prefix, args.min_missing_count)
    if missing_keys_by_file:
        print("\nDetected keys used in project but missing from language files:")
        total_missing = sum(len(keys) for keys in missing_keys_by_file.values())
        print(f"Total missing keys: {total_missing}")
        
        for file_path, keys in missing_keys_by_file.items():
            lang = get_language_from_path(file_path)
            print(f"  {file_path} ({lang}): {len(keys)} missing keys")
        
        # Write missing keys to file using the specified output file name
        if write_missing_keys_to_file(missing_keys_by_file, output_file=args.output_file):
            print(f"Missing keys written to {args.output_file}")
    
    # If there are no unused keys
    if all(not keys for keys in unused_keys_by_file.values()):
        print("\nNo unused keys found in any language file. Terminating.")
        write_log(args.log_file, used_keys, unused_keys_by_file, all_keys_by_file, file_occurrences, 
                 comments_removed_by_file if remove_comments else None, 
                 duplicates_removed_by_file if auto_deduplicate else None, 
                 whitespace_removed_by_file if clean_whitespace else None, 
                 sorted_keys_by_file if sort_keys else None)
        print(f"\nAnalysis report written: {args.log_file}")
        return
    
    # Show unused keys and ask for deletion confirmation
    for file_path, unused_keys in unused_keys_by_file.items():
        if not unused_keys:
            continue
        
        lang = get_language_from_path(file_path)
        print(f"\n{lang} language has {len(unused_keys)} unused keys:")
        
        # Show all unused keys
        for i, key in enumerate(sorted(unused_keys), 1):
            print(f"  {i}. \"{key}\" = \"{all_keys_by_file[file_path][key]}\"")
        
        if args.confirm_all:
            confirm = 'y'
        else:
            confirm = input(f"\nDo you want to delete {len(unused_keys)} unused keys in {lang} language? (y/n/s): ")
        
        if confirm.lower() == 'y':
            print(f"Deleting unused keys: {file_path}")
            removed = clean_strings_file(file_path, unused_keys)
            print(f"Completed. {removed} keys deleted.")
        elif confirm.lower() == 's':
            keys_to_keep = input("Enter the keys you want to keep, separated by commas (e.g. key1,key2): ").split(',')
            keys_to_keep = [k.strip() for k in keys_to_keep]
            
            keys_to_remove = [key for key in unused_keys if key not in keys_to_keep]
            print(f"Deleting {len(keys_to_remove)} keys, keeping {len(keys_to_keep)} keys.")
            
            removed = clean_strings_file(file_path, keys_to_remove)
            print(f"Completed. {removed} keys deleted.")
        else:
            print("Deletion cancelled.")
    
    # Write log file
    write_log(args.log_file, used_keys, unused_keys_by_file, all_keys_by_file, file_occurrences, 
             comments_removed_by_file if remove_comments else None, 
             duplicates_removed_by_file if auto_deduplicate else None, 
             whitespace_removed_by_file if clean_whitespace else None, 
             sorted_keys_by_file if sort_keys else None)
    print(f"\nAnalysis report written: {args.log_file}")

 if __name__ == '__main__':
    main()