Blayung · October 19, 2025 12:58
diff --git a/convert.py b/convert.py
 #!/bin/python
 from big_phoney import BigPhoney, preprocessors
 from os import devnull
 from tqdm import tqdm
 from contextlib import redirect_stdout
 from unicodedata import normalize

 phonemeMap = {
    # Vowels
    "aa": "aa",
    "ae": "ae",
    "ah": "a",
    "ao": "o",
    "aw": "aw",
    "ay": "aj",
    "eh": "e",
    "er": "er",
    "ey": "ej",
    "ih": "i",
    "iy": "ij",
    "ow": "ow",
    "oy": "oj",
    "uh": "u",
    "uw": "uw",
    # Consonants
    "b": "b",
    "ch": "tsh",
    "d": "d",
    "dh": "dh",
    "f": "f",
    "g": "g",
    "hh": "h",
    "jh": "dzh",
    "k": "k",
    "l": "l",
    "m": "m",
    "n": "n",
    "ng": "ng",
    "p": "p",
    "r": "r",
    "s": "s",
    "sh": "sh",
    "t": "t",
    "th": "th",
    "v": "v",
    "w": "w",
    "y": "j",
    "z": "z",
    "zh": "zh"
 }

 vowels = "aeiou"

 alphabet = "qwertyuiopasdfghjklzxcvbnm"
 alphabet += alphabet.upper()
 punctuation = ".?!"

 enableDiaeresis = True
 enableStressMark = True

 digraphs = []
 trigraphs = []

 for phoneme in phonemeMap.values():
    match len(phoneme):
        case 2:
            digraphs.append(phoneme)
        case 3:
            trigraphs.append(phoneme)


 outputText = ""

 word = ""
 capitalizeNext = True

 bigPhoney = BigPhoney(preprocessors = [preprocessors.ReplaceAbbreviations])

 with open(devnull, "w") as devnull:
    for char in tqdm(open("input.txt", "r").read()):
        if char in alphabet:
            word += char
            continue

        if word != "":
            phonemes = []
            syllableCount = 0

            phonizedWord = ""
            with redirect_stdout(devnull):
                phonizedWord = bigPhoney.phonize(word)

            for phoneme in phonizedWord.lower().split(" "):
                isStressed = False
                if phoneme[-1].isdigit():
                    if phoneme[-1] == "1":
                        isStressed = True
                    phoneme = phoneme[:-1]

                try:
                    phoneme = phonemeMap[phoneme]
                except KeyError:
                    pass

                if phoneme[0] in vowels:
                    syllableCount += 1

                phonemes.append((phoneme, isStressed))

            for phoneme in phonemes:
                if capitalizeNext:
                    outputText += phoneme[0][0].upper()
                    capitalizeNext = False
                else:
                    outputText += phoneme[0][0]
                    if enableDiaeresis and (outputText[-2:] in digraphs or outputText[-3:] in trigraphs or (len(phoneme[0]) > 1 and outputText[-2:] + phoneme[0][1] in trigraphs)):
                        outputText += "\u0308"

                if enableStressMark and syllableCount > 1 and phoneme[1]:
                    outputText += "\u0301"

                outputText += phoneme[0][1:]

            word = ""

        if char in punctuation:
            capitalizeNext = True

        outputText += char

 open("output.txt", "w").write(normalize("NFC", outputText))
	#!/bin/python
	from big_phoney import BigPhoney, preprocessors
	from os import devnull
	from tqdm import tqdm
	from contextlib import redirect_stdout
	from unicodedata import normalize

	phonemeMap = {
	# Vowels
	"aa": "aa",
	"ae": "ae",
	"ah": "a",
	"ao": "o",
	"aw": "aw",
	"ay": "aj",
	"eh": "e",
	"er": "er",
	"ey": "ej",
	"ih": "i",
	"iy": "ij",
	"ow": "ow",
	"oy": "oj",
	"uh": "u",
	"uw": "uw",
	# Consonants
	"b": "b",
	"ch": "tsh",
	"d": "d",
	"dh": "dh",
	"f": "f",
	"g": "g",
	"hh": "h",
	"jh": "dzh",
	"k": "k",
	"l": "l",
	"m": "m",
	"n": "n",
	"ng": "ng",
	"p": "p",
	"r": "r",
	"s": "s",
	"sh": "sh",
	"t": "t",
	"th": "th",
	"v": "v",
	"w": "w",
	"y": "j",
	"z": "z",
	"zh": "zh"
	}

	vowels = "aeiou"

	alphabet = "qwertyuiopasdfghjklzxcvbnm"
	alphabet += alphabet.upper()
	punctuation = ".?!"

	enableDiaeresis = True
	enableStressMark = True

	digraphs = []
	trigraphs = []

	for phoneme in phonemeMap.values():
	match len(phoneme):
	case 2:
	digraphs.append(phoneme)
	case 3:
	trigraphs.append(phoneme)


	outputText = ""

	word = ""
	capitalizeNext = True

	bigPhoney = BigPhoney(preprocessors = [preprocessors.ReplaceAbbreviations])

	with open(devnull, "w") as devnull:
	for char in tqdm(open("input.txt", "r").read()):
	if char in alphabet:
	word += char
	continue

	if word != "":
	phonemes = []
	syllableCount = 0

	phonizedWord = ""
	with redirect_stdout(devnull):
	phonizedWord = bigPhoney.phonize(word)

	for phoneme in phonizedWord.lower().split(" "):
	isStressed = False
	if phoneme[-1].isdigit():
	if phoneme[-1] == "1":
	isStressed = True
	phoneme = phoneme[:-1]

	try:
	phoneme = phonemeMap[phoneme]
	except KeyError:
	pass

	if phoneme[0] in vowels:
	syllableCount += 1

	phonemes.append((phoneme, isStressed))

	for phoneme in phonemes:
	if capitalizeNext:
	outputText += phoneme[0][0].upper()
	capitalizeNext = False
	else:
	outputText += phoneme[0][0]
	if enableDiaeresis and (outputText[-2:] in digraphs or outputText[-3:] in trigraphs or (len(phoneme[0]) > 1 and outputText[-2:] + phoneme[0][1] in trigraphs)):
	outputText += "\u0308"

	if enableStressMark and syllableCount > 1 and phoneme[1]:
	outputText += "\u0301"

	outputText += phoneme[0][1:]

	word = ""

	if char in punctuation:
	capitalizeNext = True

	outputText += char

	open("output.txt", "w").write(normalize("NFC", outputText))
No results found