Skip to content

Instantly share code, notes, and snippets.

@t5k6
Last active June 24, 2026 17:03
Show Gist options
  • Select an option

  • Save t5k6/a6cfb2bb360b78c8f0e724fa2b5d2d1d to your computer and use it in GitHub Desktop.

Select an option

Save t5k6/a6cfb2bb360b78c8f0e724fa2b5d2d1d to your computer and use it in GitHub Desktop.
Transliterates Turkish names/words to Bulgarian Cyrillic
import re
def transliterate_tr_to_bg(names, silent_g=False):
"""Transliterates Turkish names/words to Bulgarian Cyrillic.
Transliteration follows standard Turkish-to-Bulgarian mapping conventions:
- Turkish-specific letters: ı→ъ, İ→И, I→Ъ, ş→ш, ç→ч, ğ→г (optional)
- Digraphs: Ya/ya → Я/я, Yu/yu → Ю/ю
- Multi-character mappings: ö→йо, ü→ю, c→ддж
Args:
names: A string, list, tuple, or set of strings to transliterate.
silent_g: If True, ğ/Ğ is treated as silent (omitted).
If False (default), ğ/Ğ maps to г/Г.
Returns:
Transliterated string, or list of strings if input is a collection.
Note:
This is a character-by-character transliteration and does not handle
palatalization of k, g, l before front vowels.
"""
def _convert(text):
if not text:
return ""
def apply_case(original, cyrillic_lower):
"""Dynamically match output case to input case."""
if original.isupper():
return cyrillic_lower.upper()
if original[0].isupper():
return cyrillic_lower.capitalize()
return cyrillic_lower
result = []
i, n = 0, len(text)
# Base mapping (lowercase Turkish -> lowercase Cyrillic)
base_map = {
'a':'а','b':'б','c':'дж','ç':'ч','d':'д','e':'е','f':'ф','g':'г',
'h':'х','ı':'ъ','i':'и','j':'ж','k':'к','l':'л','m':'м','n':'н','o':'о',
'p':'п','r':'р','s':'с','ş':'ш','t':'т','u':'у','v':'в','y':'й','z':'з',
'ö':'йо','ü':'ю'
}
while i < n:
# 1. Check 2-character digraphs first
if i + 1 < n:
two = text[i:i+2]
two_lower = two.lower()
if two_lower == 'ya':
# Start of word -> я/ю, else -> йа/йу
cyr = 'я' if i == 0 else 'йа'
result.append(apply_case(two, cyr))
i += 2
continue
elif two_lower == 'yu':
cyr = 'ю' if i == 0 else 'йу'
result.append(apply_case(two, cyr))
i += 2
continue
# 2. Single character processing
ch = text[i]
ch_lower = ch.lower()
# Handle ğ/Ğ
if ch_lower == 'ğ':
if not silent_g:
result.append(apply_case(ch, 'г'))
i += 1
continue
# Base mapping
if ch_lower in base_map:
result.append(apply_case(ch, base_map[ch_lower]))
else:
result.append(ch) # Keep punctuation, numbers, etc.
i += 1
out = "".join(result)
# Exception: final -ЪТ → -ИТ (case-aware)
if out.upper().endswith("ЪТ"):
suffix = "ИТ" if out[-2:].isupper() else "Ит" if out[-2].isupper() else "ит"
out = out[:-2] + suffix
return out
if isinstance(names, str):
return _convert(names)
elif isinstance(names, (list, tuple, set)):
return [_convert(str(n)) for n in names]
else:
raise TypeError("Input must be a string or iterable of strings.")
# ==========================================
# TEST BENCH
# ==========================================
test_cases = [
"Özdemir", "İLYAS", "Feyzullah Yardımcı", "Yaşar Dinç",
"CEVİZ", "Kaya", "Ünal", "Cem", "Işık", "Dağ"
]
print("=== CASE PRESERVATION TEST ===")
for name in test_cases:
print(f"{name:<20} -> {transliterate_tr_to_bg(name)}")
print("\n=== BATCH TEST (Your Examples) ===")
test_batch = [
"Adem", "Bıçak", "Cem", "Çelik", "Demir", "Enver", "Fatih",
"Güneş", "Dağ", "Hasan", "Işık", "İsmail", "Jale", "Kaya",
"Leyla", "Mehmet", "Nevzat", "Osman", "Özdemir", "Polat",
"Ramazan", "Selim", "Şahin", "Tarık", "Uğur", "Ünal", "Veli",
"Yaşar", "Ziya"
]
results = transliterate_tr_to_bg(test_batch)
for original, cyrillic in zip(test_batch, results):
print(f"{original:<10} -> {cyrillic}")
with open("names.txt", "r", encoding="utf-8") as file:
for line in file:
print(transliterate_tr_to_bg(line.strip()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment