Created
March 3, 2026 11:51
-
-
Save me-suzy/0d9454405daeee981e105d6f2e79c462 to your computer and use it in GitHub Desktop.
diacritice 3534.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| BASE_DIR = r'e:\Carte\BB\17 - Site Leadership\Principal\ro' | |
| REPLACEMENTS = { | |
| # ă / Ă | |
| 'ă': 'ă', 'ă': 'ă', 'ă': 'ă', | |
| 'Ă': 'Ă', 'Ă': 'Ă', 'Ă': 'Ă', | |
| # â / Â | |
| 'â': 'â', 'â': 'â', 'â': 'â', | |
| 'Â': 'Â', 'Â': 'Â', 'Â': 'Â', | |
| 'â': 'â', 'Â': 'Â', | |
| # î / Î | |
| 'î': 'î', 'î': 'î', 'î': 'î', | |
| 'Î': 'Î', 'Î': 'Î', 'Î': 'Î', | |
| 'î': 'î', 'Î': 'Î', | |
| # ș mic / mare (virgulă + sedilă) | |
| 'ș': 'ș', 'ș': 'ș', 'ș': 'ș', # ș | |
| 'ş': 'ș', 'ş': 'ș', 'ş': 'ș', # ş | |
| 'Ș': 'Ș', 'Ș': 'Ș', 'Ș': 'Ș', # Ș | |
| 'Ş': 'Ș', 'Ş': 'Ș', 'Ş': 'Ș', # Ş | |
| # ț mic / mare (virgulă + sedilă) | |
| 'ț': 'ț', 'ț': 'ț', 'ț': 'ț', # ț | |
| 'ţ': 'ț', 'ţ': 'ț', 'ţ': 'ț', # ţ | |
| 'Ț': 'Ț', 'Ț': 'Ț', 'Ț': 'Ț', # Ț | |
| 'Ţ': 'Ț', 'Ţ': 'Ț', 'Ţ': 'Ț', # Ţ | |
| # entități text uzuale | |
| '"': '"', | |
| ''': "'", | |
| } | |
| # AICI extindem corecțiile pentru caracterele „ciudate” | |
| RAW_CHAR_REPLACEMENTS = { | |
| # sedilă → virgulă | |
| 'ş': 'ș', | |
| 'Ş': 'Ș', | |
| 'ţ': 'ț', | |
| 'Ţ': 'Ț', | |
| # punct dedesubt (ṭ, Ṭ, ṣ, Ṣ) → diacritice românești | |
| 'ṭ': 'ț', # U+1E6D LATIN SMALL LETTER T WITH DOT BELOW | |
| 'Ṭ': 'Ț', # U+1E6C LATIN CAPITAL LETTER T WITH DOT BELOW | |
| 'ṣ': 'ș', # U+1E63 LATIN SMALL LETTER S WITH DOT BELOW | |
| 'Ṣ': 'Ș', # U+1E62 LATIN CAPITAL LETTER S WITH DOT BELOW | |
| } | |
| def normalize_content(text: str) -> str: | |
| # 1. Numeric / named entities → UTF-8 | |
| for src, dst in REPLACEMENTS.items(): | |
| text = text.replace(src, dst) | |
| # 2. Caractere greșite (sedilă, punct) → diacritice românești corecte | |
| for src, dst in RAW_CHAR_REPLACEMENTS.items(): | |
| text = text.replace(src, dst) | |
| return text | |
| def main(): | |
| print(f"Încep înlocuirea în: {BASE_DIR}\n") | |
| changed_files = 0 | |
| for root, dirs, files in os.walk(BASE_DIR): | |
| for name in files: | |
| if not name.lower().endswith('.html'): | |
| continue | |
| full_path = os.path.join(root, name) | |
| try: | |
| with open(full_path, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| except UnicodeDecodeError: | |
| continue | |
| new_content = normalize_content(content) | |
| if new_content != content: | |
| with open(full_path, 'w', encoding='utf-8') as f: | |
| f.write(new_content) | |
| changed_files += 1 | |
| print(f"✅ Actualizat: {full_path}") | |
| if changed_files == 0: | |
| print("Nu a fost nevoie de nicio modificare.") | |
| else: | |
| print(f"\nGata. Au fost actualizate {changed_files} fișiere.") | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment