|
VYANJANA_MAP = { |
|
"क": "k", "ख": "kh", |
|
"ग": "g", "घ": "gh", |
|
"ङ": "n", |
|
|
|
"च": "ch", "छ": "chh", |
|
"ज": "j", "झ": "jh", |
|
"ञ": "n", |
|
|
|
"ट": "t", "ठ": "th", |
|
"ड": "d", "ढ": "dh", |
|
"ण": "n", |
|
|
|
"त": "t", "थ": "th", |
|
"द": "d", "ध": "dh", |
|
"न": "n", |
|
|
|
"प": "p", "फ": "ph", |
|
"ब": "b", "भ": "bh", |
|
"म": "m", |
|
|
|
"य": "y", |
|
"र": "r", |
|
"ल": "l", |
|
"व": "v", |
|
"श": "sh", |
|
"ष": "sh", |
|
"स": "s", |
|
"ह": "h", |
|
"ळ": "l", |
|
} |
|
|
|
OSHTHYA_VANJAN = {"प", "फ", "ब", "भ", "म"} |
|
|
|
SVAR_MAP = { |
|
"अ": "a", "आ": "aa", |
|
"इ": "i", "ई": "ee", |
|
"उ": "u", "ऊ": "oo", |
|
"ए": "e", "ऐ": "ai", |
|
"ओ": "o", "औ": "au", |
|
"ऋ": "ri" |
|
} |
|
|
|
MATRA_MAP = { |
|
"ा": "aa", |
|
"ि": "i", "ी": "ee", |
|
"ु": "u", "ू": "oo", |
|
"े": "e", "ै": "ai", |
|
"ो": "o", "ौ": "au", |
|
"ः": "ah", "ृ": "ri" |
|
} |
|
|
|
ANUSVARA = "ं" |
|
HALANTA = "्" |
|
VISARGA = "ः" |
|
|
|
def _get_nasalizer_for_vyanjana(vyanyana): |
|
# If next vyanjana is a oshthya vyanjana, use "m" else "n" |
|
if vyanyana in OSHTHYA_VANJAN: |
|
return "m" |
|
else: |
|
return "n" |
|
|
|
def _get_next(chars: list[str]) -> str: |
|
"""Consume next syllable and return it's latin translitration""" |
|
first = chars.pop(0) |
|
if first in VYANJANA_MAP: |
|
|
|
if not chars: |
|
# We exhausted the characters, return the current vyanjana with the inherent "a" |
|
return VYANJANA_MAP[first] + "a" |
|
|
|
# Peek next |
|
next = chars[0] |
|
|
|
if next == HALANTA: |
|
# Discard halant |
|
chars.pop(0) |
|
|
|
# If this is begining of a consonant cluster |
|
if chars: |
|
return VYANJANA_MAP[first] + _get_next(chars) |
|
|
|
# The word ends with an halanta, simply return the consonant |
|
else: |
|
return VYANJANA_MAP[first] |
|
|
|
# If this is a consonant + vowel |
|
elif next in MATRA_MAP: |
|
return VYANJANA_MAP[first] + MATRA_MAP[chars.pop(0)] |
|
|
|
# If it's the anusvar |
|
elif next == ANUSVARA: |
|
# Discard anusavar |
|
chars.pop(0) |
|
|
|
if chars and chars[0] in VYANJANA_MAP: |
|
return VYANJANA_MAP[first] + "a" + _get_nasalizer_for_vyanjana(chars[0]) |
|
else: |
|
raise ValueError("Excpected a vyanjana after anusavara") |
|
else: |
|
return VYANJANA_MAP[first] + "a" |
|
|
|
elif first in SVAR_MAP: |
|
if not chars: |
|
# Simply return the swar mapping |
|
return SVAR_MAP[first] |
|
|
|
next = chars[0] |
|
|
|
if next == ANUSVARA: |
|
# Discard anusvara |
|
chars.pop(0) |
|
if chars and chars[0] in VYANJANA_MAP: |
|
return SVAR_MAP[first] + _get_nasalizer_for_vyanjana(chars[0]) |
|
else: |
|
raise ValueError("Expected a vyanjana after anusvara") |
|
elif next == VISARGA: |
|
chars.pop(0) |
|
return SVAR_MAP[first] + "h" |
|
else: |
|
return SVAR_MAP[first] |
|
else: |
|
raise ValueError(f"Invalid devanagari text, expected start of a syllable (a vyanjana or swara, got {first})") |
|
|
|
def transliterate(devanagari_text: str) -> str: |
|
"""Transforms devanagari text to it's transliterated equivalent in Latin alphabet""" |
|
|
|
result = [] |
|
chars = list(devanagari_text) |
|
while chars: |
|
result.append(_get_next(chars)) |
|
|
|
return "".join(result) |
|
|
|
|
|
def test_transliterate(): |
|
assert transliterate("अभिजीत").title() == "Abhijeeta" |
|
assert transliterate("संस्कृत").title() == "Sanskrita" |
|
assert transliterate("राणा").title() == "Raanaa" |
|
assert transliterate("प्रकृति").title() == "Prakriti" |
|
assert transliterate("फुल").title() == "Phula" |
|
|
|
assert transliterate("अंबर").title() == "Ambara" |
|
assert transliterate("अंक").title() == "Anka" |
|
assert transliterate("आंसू").title() == "Aansoo" |
|
|
|
assert transliterate("हिमः").title() == "Himah" |
|
|
|
assert transliterate("इंजिन्").title() == "Injin" |
|
assert transliterate("मोटर्").title() == "Motar" |
|
assert transliterate("लक्षमन").title() == "Lakshamana" |
|
|
|
if __name__ == "__main__": |
|
test_transliterate() |