Created
March 14, 2024 10:32
-
-
Save oubihis/6f1bc94bb05a0b83928931f08fa6dc43 to your computer and use it in GitHub Desktop.
Python Script for Character Encoding Repair in SQL Files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
class EncodingRepairer: | |
REPLACEMENTS = { | |
"€": "€", "‚": "‚", "„": "„", "…": "…", "ˆ": "ˆ", | |
"‹": "‹", "‘": "‘", "’": "’", "“": "“", "â€": "”", | |
"•": "•", "–": "–", "—": "—", "Ëœ": "˜", "â„¢": "™", | |
"›": "›", "Å“": "œ", "Å’": "Œ", "ž": "ž", "Ÿ": "Ÿ", | |
"Å¡": "š", "Ž": "Ž", "¡": "¡", "¢": "¢", "£": "£", | |
"¤": "¤", "Â¥": "¥", "¦": "¦", "§": "§", "¨": "¨", | |
"©": "©", "ª": "ª", "«": "«", "¬": "¬", "®": "®", | |
"¯": "¯", "°": "°", "±": "±", "²": "²", "³": "³", | |
"´": "´", "µ": "µ", "¶": "¶", "·": "·", "¸": "¸", | |
"¹": "¹", "º": "º", "»": "»", "¼": "¼", "½": "½", | |
"¾": "¾", "¿": "¿", "À": "À", "Â": "Â", "Ã": "Ã", | |
"Ä": "Ä", "Ã…": "Å", "Æ": "Æ", "Ç": "Ç", "È": "È", | |
"É": "É", "Ê": "Ê", "Ë": "Ë", "ÃŒ": "Ì", "ÃŽ": "Î", | |
"Ñ": "Ñ", "Ã’": "Ò", "Ó": "Ó", "Ô": "Ô", "Õ": "Õ", | |
"Ö": "Ö", "×": "×", "Ø": "Ø", "Ù": "Ù", "Ú": "Ú", | |
"Û": "Û", "Ü": "Ü", "Þ": "Þ", "ß": "ß", "á": "á", | |
"â": "â", "ã": "ã", "ä": "ä", "Ã¥": "å", "æ": "æ", | |
"ç": "ç", "è": "è", "é": "é", "ê": "ê", "ë": "ë", | |
"ì": "ì", "Ã": "í", "î": "î", "ï": "ï", "ð": "ð", | |
"ñ": "ñ", "ò": "ò", "ó": "ó", "ô": "ô", "õ": "õ", | |
"ö": "ö", "÷": "÷", "ø": "ø", "ù": "ù", "ú": "ú", | |
"û": "û", "ü": "ü", "ý": "ý", "þ": "þ", "ÿ": "ÿ" | |
} | |
def repair(self, value): | |
if not value: | |
return value | |
return re.sub('|'.join(map(re.escape, self.REPLACEMENTS.keys())), | |
lambda m: self.REPLACEMENTS[m.group(0)], value) | |
# Open input file | |
input_file_path = "/old.sql" | |
output_file_path = "/new-out.sql" | |
repairer = EncodingRepairer() | |
with open(input_file_path, 'r') as input_file: | |
input_content = input_file.read() | |
# Apply character replacements | |
repaired_content = repairer.repair(input_content) | |
# Write modified content to output file | |
with open(output_file_path, 'w') as output_file: | |
output_file.write(repaired_content) | |
print("File successfully processed.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment