Skip to content

Instantly share code, notes, and snippets.

@me-suzy
Created March 3, 2026 11:51
Show Gist options
  • Select an option

  • Save me-suzy/0d9454405daeee981e105d6f2e79c462 to your computer and use it in GitHub Desktop.

Select an option

Save me-suzy/0d9454405daeee981e105d6f2e79c462 to your computer and use it in GitHub Desktop.
diacritice 3534.py
import os
BASE_DIR = r'e:\Carte\BB\17 - Site Leadership\Principal\ro'
REPLACEMENTS = {
# ă / Ă
'ă': 'ă', 'ă': 'ă', 'ă': 'ă',
'Ă': 'Ă', 'Ă': 'Ă', 'Ă': 'Ă',
# â / Â
'â': 'â', 'â': 'â', 'â': 'â',
'Â': 'Â', 'Â': 'Â', 'Â': 'Â',
'â': 'â', 'Â': 'Â',
# î / Î
'î': 'î', 'î': 'î', 'î': 'î',
'Î': 'Î', 'Î': 'Î', 'Î': 'Î',
'î': 'î', 'Î': 'Î',
# ș mic / mare (virgulă + sedilă)
'ș': 'ș', 'ș': 'ș', 'ș': 'ș', # ș
'ş': 'ș', 'ş': 'ș', 'ş': 'ș', # ş
'Ș': 'Ș', 'Ș': 'Ș', 'Ș': 'Ș', # Ș
'Ş': 'Ș', 'Ş': 'Ș', 'Ş': 'Ș', # Ş
# ț mic / mare (virgulă + sedilă)
'ț': 'ț', 'ț': 'ț', 'ț': 'ț', # ț
'ţ': 'ț', 'ţ': 'ț', 'ţ': 'ț', # ţ
'Ț': 'Ț', 'Ț': 'Ț', 'Ț': 'Ț', # Ț
'Ţ': 'Ț', 'Ţ': 'Ț', 'Ţ': 'Ț', # Ţ
# entități text uzuale
'"': '"',
''': "'",
}
# AICI extindem corecțiile pentru caracterele „ciudate”
RAW_CHAR_REPLACEMENTS = {
# sedilă → virgulă
'ş': 'ș',
'Ş': 'Ș',
'ţ': 'ț',
'Ţ': 'Ț',
# punct dedesubt (ṭ, Ṭ, ṣ, Ṣ) → diacritice românești
'ṭ': 'ț', # U+1E6D LATIN SMALL LETTER T WITH DOT BELOW
'Ṭ': 'Ț', # U+1E6C LATIN CAPITAL LETTER T WITH DOT BELOW
'ṣ': 'ș', # U+1E63 LATIN SMALL LETTER S WITH DOT BELOW
'Ṣ': 'Ș', # U+1E62 LATIN CAPITAL LETTER S WITH DOT BELOW
}
def normalize_content(text: str) -> str:
# 1. Numeric / named entities → UTF-8
for src, dst in REPLACEMENTS.items():
text = text.replace(src, dst)
# 2. Caractere greșite (sedilă, punct) → diacritice românești corecte
for src, dst in RAW_CHAR_REPLACEMENTS.items():
text = text.replace(src, dst)
return text
def main():
print(f"Încep înlocuirea în: {BASE_DIR}\n")
changed_files = 0
for root, dirs, files in os.walk(BASE_DIR):
for name in files:
if not name.lower().endswith('.html'):
continue
full_path = os.path.join(root, name)
try:
with open(full_path, 'r', encoding='utf-8') as f:
content = f.read()
except UnicodeDecodeError:
continue
new_content = normalize_content(content)
if new_content != content:
with open(full_path, 'w', encoding='utf-8') as f:
f.write(new_content)
changed_files += 1
print(f"✅ Actualizat: {full_path}")
if changed_files == 0:
print("Nu a fost nevoie de nicio modificare.")
else:
print(f"\nGata. Au fost actualizate {changed_files} fișiere.")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment