Last active
October 19, 2025 12:58
-
-
Save Blayung/3758922f0048a811d537977700cf9432 to your computer and use it in GitHub Desktop.
A python script that converts normal english text into my own phonetic latin-based english writing system. All phonemes are represented by digraphs and the only diacritics are optional and used to inform about stress and places where digraphs are actually pronounced separately. Example text: érlij tuw bed and érlij tuw rajz mejks a maen hélthij,…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/python | |
| from big_phoney import BigPhoney, preprocessors | |
| from os import devnull | |
| from tqdm import tqdm | |
| from contextlib import redirect_stdout | |
| from unicodedata import normalize | |
| phonemeMap = { | |
| # Vowels | |
| "aa": "aa", | |
| "ae": "ae", | |
| "ah": "a", | |
| "ao": "o", | |
| "aw": "aw", | |
| "ay": "aj", | |
| "eh": "e", | |
| "er": "er", | |
| "ey": "ej", | |
| "ih": "i", | |
| "iy": "ij", | |
| "ow": "ow", | |
| "oy": "oj", | |
| "uh": "u", | |
| "uw": "uw", | |
| # Consonants | |
| "b": "b", | |
| "ch": "tsh", | |
| "d": "d", | |
| "dh": "dh", | |
| "f": "f", | |
| "g": "g", | |
| "hh": "h", | |
| "jh": "dzh", | |
| "k": "k", | |
| "l": "l", | |
| "m": "m", | |
| "n": "n", | |
| "ng": "ng", | |
| "p": "p", | |
| "r": "r", | |
| "s": "s", | |
| "sh": "sh", | |
| "t": "t", | |
| "th": "th", | |
| "v": "v", | |
| "w": "w", | |
| "y": "j", | |
| "z": "z", | |
| "zh": "zh" | |
| } | |
| vowels = "aeiou" | |
| alphabet = "qwertyuiopasdfghjklzxcvbnm" | |
| alphabet += alphabet.upper() | |
| punctuation = ".?!" | |
| enableDiaeresis = True | |
| enableStressMark = True | |
| digraphs = [] | |
| trigraphs = [] | |
| for phoneme in phonemeMap.values(): | |
| match len(phoneme): | |
| case 2: | |
| digraphs.append(phoneme) | |
| case 3: | |
| trigraphs.append(phoneme) | |
| outputText = "" | |
| word = "" | |
| capitalizeNext = True | |
| bigPhoney = BigPhoney(preprocessors = [preprocessors.ReplaceAbbreviations]) | |
| with open(devnull, "w") as devnull: | |
| for char in tqdm(open("input.txt", "r").read()): | |
| if char in alphabet: | |
| word += char | |
| continue | |
| if word != "": | |
| phonemes = [] | |
| syllableCount = 0 | |
| phonizedWord = "" | |
| with redirect_stdout(devnull): | |
| phonizedWord = bigPhoney.phonize(word) | |
| for phoneme in phonizedWord.lower().split(" "): | |
| isStressed = False | |
| if phoneme[-1].isdigit(): | |
| if phoneme[-1] == "1": | |
| isStressed = True | |
| phoneme = phoneme[:-1] | |
| try: | |
| phoneme = phonemeMap[phoneme] | |
| except KeyError: | |
| pass | |
| if phoneme[0] in vowels: | |
| syllableCount += 1 | |
| phonemes.append((phoneme, isStressed)) | |
| for phoneme in phonemes: | |
| if capitalizeNext: | |
| outputText += phoneme[0][0].upper() | |
| capitalizeNext = False | |
| else: | |
| outputText += phoneme[0][0] | |
| if enableDiaeresis and (outputText[-2:] in digraphs or outputText[-3:] in trigraphs or (len(phoneme[0]) > 1 and outputText[-2:] + phoneme[0][1] in trigraphs)): | |
| outputText += "\u0308" | |
| if enableStressMark and syllableCount > 1 and phoneme[1]: | |
| outputText += "\u0301" | |
| outputText += phoneme[0][1:] | |
| word = "" | |
| if char in punctuation: | |
| capitalizeNext = True | |
| outputText += char | |
| open("output.txt", "w").write(normalize("NFC", outputText)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment