Skip to content

Instantly share code, notes, and snippets.

@algonacci
Created May 2, 2024 04:51
Show Gist options
  • Save algonacci/22bfa346ffcfb0a3d251ec963fb21c25 to your computer and use it in GitHub Desktop.
Save algonacci/22bfa346ffcfb0a3d251ec963fb21c25 to your computer and use it in GitHub Desktop.
Transliterate from Arabic to Indonesian
MAPPING = {
'ا': 'a', 'أ': 'a', 'إ': 'i', 'آ': 'a', 'ب': 'b', 'ت': 't', 'ث': 'th', 'ج': 'j', 'ح': 'h',
'خ': 'kh', 'د': 'd', 'ذ': 'dh', 'ر': 'r', 'ز': 'z', 'س': 's', 'ش': 'sh', 'ص': 's',
'ض': 'd', 'ط': 't', 'ظ': 'z', 'ع': "'a", 'غ': 'gh', 'ف': 'f', 'ق': 'q', 'ك': 'k',
'ل': 'l', 'م': 'm', 'ن': 'n', 'ه': 'h', 'و': 'w', 'ي': 'y', 'ئ': 'y', 'ى': 'a',
'ة': 'h', 'ؤ': 'w'
}
VOWELS = {
'\u064E': 'a', # Fatha
'\u0650': 'i', # Kasra
'\u064F': 'u', # Damma
'\u064B': 'an', # Fathatan
'\u064D': 'in', # Kasratan
'\u064C': 'un', # Dammatan
'\u0652': '', # Sukun
'\u0651': '' # Shadda
}
WORD_SEPARATORS = {'\u0020', '\u00A0'} # Space and No-Break Space
def arabic_to_latin(text: str) -> str:
result = ''
last_char = ''
word_start = True
for i, char in enumerate(text):
if char in WORD_SEPARATORS:
word_start = True
result += ' '
continue
if char in MAPPING:
mapped_char = MAPPING[char]
if result and result[-1] not in 'aiou' and char in '\u0627\u0623\u0625\u0622':
# Alif variations
mapped_char = 'a'
result += mapped_char
word_start = False
elif char in VOWELS:
if result and result[-1] not in 'aiou':
# Only add vowel marks if the last char is not a vowel
result += VOWELS[char]
word_start = False
# Handle doubling of characters with Shadda
if char == '\u0651' and i > 0 and text[i - 1] in MAPPING:
# Shadda
result += MAPPING[text[i - 1]]
last_char = char
word_start = False
if word_start and result:
# Capitalize the first letter of each word
result = result[:-1] + result[-1].upper()
word_start = False
return result
# Example usage
arabic_text = "اَلسَّلَامُ عَلَيْكُمْ"
latin_text = arabic_to_latin(arabic_text)
print("Latin:", latin_text)
#Latin: alsalaamu 'alaykum
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment