algonacci · May 2, 2024 04:51
diff --git a/arab_to_latin.py b/arab_to_latin.py
 MAPPING = {
    'ا': 'a', 'أ': 'a', 'إ': 'i', 'آ': 'a', 'ب': 'b', 'ت': 't', 'ث': 'th', 'ج': 'j', 'ح': 'h',
    'خ': 'kh', 'د': 'd', 'ذ': 'dh', 'ر': 'r', 'ز': 'z', 'س': 's', 'ش': 'sh', 'ص': 's',
    'ض': 'd', 'ط': 't', 'ظ': 'z', 'ع': "'a", 'غ': 'gh', 'ف': 'f', 'ق': 'q', 'ك': 'k',
    'ل': 'l', 'م': 'm', 'ن': 'n', 'ه': 'h', 'و': 'w', 'ي': 'y', 'ئ': 'y', 'ى': 'a',
    'ة': 'h', 'ؤ': 'w'
 }

 VOWELS = {
    '\u064E': 'a',  # Fatha
    '\u0650': 'i',  # Kasra
    '\u064F': 'u',  # Damma
    '\u064B': 'an',  # Fathatan
    '\u064D': 'in',  # Kasratan
    '\u064C': 'un',  # Dammatan
    '\u0652': '',  # Sukun
    '\u0651': ''  # Shadda
 }

 WORD_SEPARATORS = {'\u0020', '\u00A0'}  # Space and No-Break Space


 def arabic_to_latin(text: str) -> str:
    result = ''
    last_char = ''
    word_start = True

    for i, char in enumerate(text):
        if char in WORD_SEPARATORS:
            word_start = True
            result += ' '
            continue

        if char in MAPPING:
            mapped_char = MAPPING[char]
            if result and result[-1] not in 'aiou' and char in '\u0627\u0623\u0625\u0622':
                # Alif variations
                mapped_char = 'a'
            result += mapped_char
            word_start = False

        elif char in VOWELS:
            if result and result[-1] not in 'aiou':
                # Only add vowel marks if the last char is not a vowel
                result += VOWELS[char]
            word_start = False

        # Handle doubling of characters with Shadda
        if char == '\u0651' and i > 0 and text[i - 1] in MAPPING:
            # Shadda
            result += MAPPING[text[i - 1]]
            last_char = char
            word_start = False

        if word_start and result:
            # Capitalize the first letter of each word
            result = result[:-1] + result[-1].upper()
            word_start = False

    return result


 # Example usage
 arabic_text = "اَلسَّلَامُ عَلَيْكُمْ"
 latin_text = arabic_to_latin(arabic_text)
 print("Latin:", latin_text)

 #Latin: alsalaamu 'alaykum
	MAPPING = {
	'ا': 'a', 'أ': 'a', 'إ': 'i', 'آ': 'a', 'ب': 'b', 'ت': 't', 'ث': 'th', 'ج': 'j', 'ح': 'h',
	'خ': 'kh', 'د': 'd', 'ذ': 'dh', 'ر': 'r', 'ز': 'z', 'س': 's', 'ش': 'sh', 'ص': 's',
	'ض': 'd', 'ط': 't', 'ظ': 'z', 'ع': "'a", 'غ': 'gh', 'ف': 'f', 'ق': 'q', 'ك': 'k',
	'ل': 'l', 'م': 'm', 'ن': 'n', 'ه': 'h', 'و': 'w', 'ي': 'y', 'ئ': 'y', 'ى': 'a',
	'ة': 'h', 'ؤ': 'w'
	}

	VOWELS = {
	'\u064E': 'a', # Fatha
	'\u0650': 'i', # Kasra
	'\u064F': 'u', # Damma
	'\u064B': 'an', # Fathatan
	'\u064D': 'in', # Kasratan
	'\u064C': 'un', # Dammatan
	'\u0652': '', # Sukun
	'\u0651': '' # Shadda
	}

	WORD_SEPARATORS = {'\u0020', '\u00A0'} # Space and No-Break Space


	def arabic_to_latin(text: str) -> str:
	result = ''
	last_char = ''
	word_start = True

	for i, char in enumerate(text):
	if char in WORD_SEPARATORS:
	word_start = True
	result += ' '
	continue

	if char in MAPPING:
	mapped_char = MAPPING[char]
	if result and result[-1] not in 'aiou' and char in '\u0627\u0623\u0625\u0622':
	# Alif variations
	mapped_char = 'a'
	result += mapped_char
	word_start = False

	elif char in VOWELS:
	if result and result[-1] not in 'aiou':
	# Only add vowel marks if the last char is not a vowel
	result += VOWELS[char]
	word_start = False

	# Handle doubling of characters with Shadda
	if char == '\u0651' and i > 0 and text[i - 1] in MAPPING:
	# Shadda
	result += MAPPING[text[i - 1]]
	last_char = char
	word_start = False

	if word_start and result:
	# Capitalize the first letter of each word
	result = result[:-1] + result[-1].upper()
	word_start = False

	return result


	# Example usage
	arabic_text = "اَلسَّلَامُ عَلَيْكُمْ"
	latin_text = arabic_to_latin(arabic_text)
	print("Latin:", latin_text)

	#Latin: alsalaamu 'alaykum