Last active
June 24, 2026 17:03
-
-
Save t5k6/a6cfb2bb360b78c8f0e724fa2b5d2d1d to your computer and use it in GitHub Desktop.
Transliterates Turkish names/words to Bulgarian Cyrillic
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| def transliterate_tr_to_bg(names, silent_g=False): | |
| """Transliterates Turkish names/words to Bulgarian Cyrillic. | |
| Transliteration follows standard Turkish-to-Bulgarian mapping conventions: | |
| - Turkish-specific letters: ı→ъ, İ→И, I→Ъ, ş→ш, ç→ч, ğ→г (optional) | |
| - Digraphs: Ya/ya → Я/я, Yu/yu → Ю/ю | |
| - Multi-character mappings: ö→йо, ü→ю, c→ддж | |
| Args: | |
| names: A string, list, tuple, or set of strings to transliterate. | |
| silent_g: If True, ğ/Ğ is treated as silent (omitted). | |
| If False (default), ğ/Ğ maps to г/Г. | |
| Returns: | |
| Transliterated string, or list of strings if input is a collection. | |
| Note: | |
| This is a character-by-character transliteration and does not handle | |
| palatalization of k, g, l before front vowels. | |
| """ | |
| def _convert(text): | |
| if not text: | |
| return "" | |
| def apply_case(original, cyrillic_lower): | |
| """Dynamically match output case to input case.""" | |
| if original.isupper(): | |
| return cyrillic_lower.upper() | |
| if original[0].isupper(): | |
| return cyrillic_lower.capitalize() | |
| return cyrillic_lower | |
| result = [] | |
| i, n = 0, len(text) | |
| # Base mapping (lowercase Turkish -> lowercase Cyrillic) | |
| base_map = { | |
| 'a':'а','b':'б','c':'дж','ç':'ч','d':'д','e':'е','f':'ф','g':'г', | |
| 'h':'х','ı':'ъ','i':'и','j':'ж','k':'к','l':'л','m':'м','n':'н','o':'о', | |
| 'p':'п','r':'р','s':'с','ş':'ш','t':'т','u':'у','v':'в','y':'й','z':'з', | |
| 'ö':'йо','ü':'ю' | |
| } | |
| while i < n: | |
| # 1. Check 2-character digraphs first | |
| if i + 1 < n: | |
| two = text[i:i+2] | |
| two_lower = two.lower() | |
| if two_lower == 'ya': | |
| # Start of word -> я/ю, else -> йа/йу | |
| cyr = 'я' if i == 0 else 'йа' | |
| result.append(apply_case(two, cyr)) | |
| i += 2 | |
| continue | |
| elif two_lower == 'yu': | |
| cyr = 'ю' if i == 0 else 'йу' | |
| result.append(apply_case(two, cyr)) | |
| i += 2 | |
| continue | |
| # 2. Single character processing | |
| ch = text[i] | |
| ch_lower = ch.lower() | |
| # Handle ğ/Ğ | |
| if ch_lower == 'ğ': | |
| if not silent_g: | |
| result.append(apply_case(ch, 'г')) | |
| i += 1 | |
| continue | |
| # Base mapping | |
| if ch_lower in base_map: | |
| result.append(apply_case(ch, base_map[ch_lower])) | |
| else: | |
| result.append(ch) # Keep punctuation, numbers, etc. | |
| i += 1 | |
| out = "".join(result) | |
| # Exception: final -ЪТ → -ИТ (case-aware) | |
| if out.upper().endswith("ЪТ"): | |
| suffix = "ИТ" if out[-2:].isupper() else "Ит" if out[-2].isupper() else "ит" | |
| out = out[:-2] + suffix | |
| return out | |
| if isinstance(names, str): | |
| return _convert(names) | |
| elif isinstance(names, (list, tuple, set)): | |
| return [_convert(str(n)) for n in names] | |
| else: | |
| raise TypeError("Input must be a string or iterable of strings.") | |
| # ========================================== | |
| # TEST BENCH | |
| # ========================================== | |
| test_cases = [ | |
| "Özdemir", "İLYAS", "Feyzullah Yardımcı", "Yaşar Dinç", | |
| "CEVİZ", "Kaya", "Ünal", "Cem", "Işık", "Dağ" | |
| ] | |
| print("=== CASE PRESERVATION TEST ===") | |
| for name in test_cases: | |
| print(f"{name:<20} -> {transliterate_tr_to_bg(name)}") | |
| print("\n=== BATCH TEST (Your Examples) ===") | |
| test_batch = [ | |
| "Adem", "Bıçak", "Cem", "Çelik", "Demir", "Enver", "Fatih", | |
| "Güneş", "Dağ", "Hasan", "Işık", "İsmail", "Jale", "Kaya", | |
| "Leyla", "Mehmet", "Nevzat", "Osman", "Özdemir", "Polat", | |
| "Ramazan", "Selim", "Şahin", "Tarık", "Uğur", "Ünal", "Veli", | |
| "Yaşar", "Ziya" | |
| ] | |
| results = transliterate_tr_to_bg(test_batch) | |
| for original, cyrillic in zip(test_batch, results): | |
| print(f"{original:<10} -> {cyrillic}") | |
| with open("names.txt", "r", encoding="utf-8") as file: | |
| for line in file: | |
| print(transliterate_tr_to_bg(line.strip())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment