Skip to content

Instantly share code, notes, and snippets.

@oubihis
Created March 14, 2024 10:32
Show Gist options
  • Save oubihis/6f1bc94bb05a0b83928931f08fa6dc43 to your computer and use it in GitHub Desktop.
Save oubihis/6f1bc94bb05a0b83928931f08fa6dc43 to your computer and use it in GitHub Desktop.
Python Script for Character Encoding Repair in SQL Files
import re
class EncodingRepairer:
REPLACEMENTS = {
"€": "€", "‚": "‚", "„": "„", "…": "…", "ˆ": "ˆ",
"‹": "‹", "‘": "‘", "’": "’", "“": "“", "â€": "”",
"•": "•", "–": "–", "—": "—", "Ëœ": "˜", "â„¢": "™",
"›": "›", "Å“": "œ", "Å’": "Œ", "ž": "ž", "Ÿ": "Ÿ",
"Å¡": "š", "Ž": "Ž", "¡": "¡", "¢": "¢", "£": "£",
"¤": "¤", "Â¥": "¥", "¦": "¦", "§": "§", "¨": "¨",
"©": "©", "ª": "ª", "«": "«", "¬": "¬", "®": "®",
"¯": "¯", "°": "°", "±": "±", "²": "²", "³": "³",
"´": "´", "µ": "µ", "¶": "¶", "·": "·", "¸": "¸",
"¹": "¹", "º": "º", "»": "»", "¼": "¼", "½": "½",
"¾": "¾", "¿": "¿", "À": "À", "Â": "Â", "Ã": "Ã",
"Ä": "Ä", "Ã…": "Å", "Æ": "Æ", "Ç": "Ç", "È": "È",
"É": "É", "Ê": "Ê", "Ë": "Ë", "ÃŒ": "Ì", "ÃŽ": "Î",
"Ñ": "Ñ", "Ã’": "Ò", "Ó": "Ó", "Ô": "Ô", "Õ": "Õ",
"Ö": "Ö", "×": "×", "Ø": "Ø", "Ù": "Ù", "Ú": "Ú",
"Û": "Û", "Ü": "Ü", "Þ": "Þ", "ß": "ß", "á": "á",
"â": "â", "ã": "ã", "ä": "ä", "Ã¥": "å", "æ": "æ",
"ç": "ç", "è": "è", "é": "é", "ê": "ê", "ë": "ë",
"ì": "ì", "í": "í", "î": "î", "ï": "ï", "ð": "ð",
"ñ": "ñ", "ò": "ò", "ó": "ó", "ô": "ô", "õ": "õ",
"ö": "ö", "÷": "÷", "ø": "ø", "ù": "ù", "ú": "ú",
"û": "û", "ü": "ü", "ý": "ý", "þ": "þ", "ÿ": "ÿ"
}
def repair(self, value):
if not value:
return value
return re.sub('|'.join(map(re.escape, self.REPLACEMENTS.keys())),
lambda m: self.REPLACEMENTS[m.group(0)], value)
# Open input file
input_file_path = "/old.sql"
output_file_path = "/new-out.sql"
repairer = EncodingRepairer()
with open(input_file_path, 'r') as input_file:
input_content = input_file.read()
# Apply character replacements
repaired_content = repairer.repair(input_content)
# Write modified content to output file
with open(output_file_path, 'w') as output_file:
output_file.write(repaired_content)
print("File successfully processed.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment