Skip to content

Instantly share code, notes, and snippets.

@Blayung
Last active October 19, 2025 12:58
Show Gist options
  • Select an option

  • Save Blayung/3758922f0048a811d537977700cf9432 to your computer and use it in GitHub Desktop.

Select an option

Save Blayung/3758922f0048a811d537977700cf9432 to your computer and use it in GitHub Desktop.
A python script that converts normal english text into my own phonetic latin-based english writing system. All phonemes are represented by digraphs and the only diacritics are optional and used to inform about stress and places where digraphs are actually pronounced separately. Example text: érlij tuw bed and érlij tuw rajz mejks a maen hélthij,…
#!/bin/python
from big_phoney import BigPhoney, preprocessors
from os import devnull
from tqdm import tqdm
from contextlib import redirect_stdout
from unicodedata import normalize
phonemeMap = {
# Vowels
"aa": "aa",
"ae": "ae",
"ah": "a",
"ao": "o",
"aw": "aw",
"ay": "aj",
"eh": "e",
"er": "er",
"ey": "ej",
"ih": "i",
"iy": "ij",
"ow": "ow",
"oy": "oj",
"uh": "u",
"uw": "uw",
# Consonants
"b": "b",
"ch": "tsh",
"d": "d",
"dh": "dh",
"f": "f",
"g": "g",
"hh": "h",
"jh": "dzh",
"k": "k",
"l": "l",
"m": "m",
"n": "n",
"ng": "ng",
"p": "p",
"r": "r",
"s": "s",
"sh": "sh",
"t": "t",
"th": "th",
"v": "v",
"w": "w",
"y": "j",
"z": "z",
"zh": "zh"
}
vowels = "aeiou"
alphabet = "qwertyuiopasdfghjklzxcvbnm"
alphabet += alphabet.upper()
punctuation = ".?!"
enableDiaeresis = True
enableStressMark = True
digraphs = []
trigraphs = []
for phoneme in phonemeMap.values():
match len(phoneme):
case 2:
digraphs.append(phoneme)
case 3:
trigraphs.append(phoneme)
outputText = ""
word = ""
capitalizeNext = True
bigPhoney = BigPhoney(preprocessors = [preprocessors.ReplaceAbbreviations])
with open(devnull, "w") as devnull:
for char in tqdm(open("input.txt", "r").read()):
if char in alphabet:
word += char
continue
if word != "":
phonemes = []
syllableCount = 0
phonizedWord = ""
with redirect_stdout(devnull):
phonizedWord = bigPhoney.phonize(word)
for phoneme in phonizedWord.lower().split(" "):
isStressed = False
if phoneme[-1].isdigit():
if phoneme[-1] == "1":
isStressed = True
phoneme = phoneme[:-1]
try:
phoneme = phonemeMap[phoneme]
except KeyError:
pass
if phoneme[0] in vowels:
syllableCount += 1
phonemes.append((phoneme, isStressed))
for phoneme in phonemes:
if capitalizeNext:
outputText += phoneme[0][0].upper()
capitalizeNext = False
else:
outputText += phoneme[0][0]
if enableDiaeresis and (outputText[-2:] in digraphs or outputText[-3:] in trigraphs or (len(phoneme[0]) > 1 and outputText[-2:] + phoneme[0][1] in trigraphs)):
outputText += "\u0308"
if enableStressMark and syllableCount > 1 and phoneme[1]:
outputText += "\u0301"
outputText += phoneme[0][1:]
word = ""
if char in punctuation:
capitalizeNext = True
outputText += char
open("output.txt", "w").write(normalize("NFC", outputText))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment