Skip to content

Instantly share code, notes, and snippets.

@AARP41298
Created July 10, 2025 20:02
Show Gist options
  • Save AARP41298/b9cfc5327d22452327c9011dd3773130 to your computer and use it in GitHub Desktop.
Save AARP41298/b9cfc5327d22452327c9011dd3773130 to your computer and use it in GitHub Desktop.
import re
def fix_encoding(input_file, output_file):
# Leer el archivo en modo binario para trabajar con los bytes directamente
with open(input_file, 'rb') as f:
content = f.read()
# Decodificar el contenido
try:
fixed_content = content.decode('utf-8').encode('latin1').decode('utf-8')
except UnicodeDecodeError:
fixed_content = content.decode('latin1')
# Corregir las vocales con acento mal codificadas
replacements = {
"á": "á", "é": "é", "í": "í", "ó": "ó", "ú": "ú",
"Ã�": "Á", "É": "É", "Ã�": "Í", "Ó": "Ó", "Ú": "Ú",
"ñ": "ñ", "Ñ": "Ñ", "ü": "ü", "Ü": "Ü"
}
for broken, correct in replacements.items():
fixed_content = fixed_content.replace(broken, correct)
# Reemplazar 'encoding' 'latin1' por 'encoding' 'utf8'
fixed_content = re.sub(r"\('encoding' 'latin1'\)", "('encoding' 'utf8')", fixed_content)
# Guardar el archivo corregido
with open(output_file, 'w', encoding='utf-8') as f:
f.write(fixed_content)
print(f"Archivo corregido guardado en: {output_file}")
# Ejemplo de uso
fix_encoding('songs.dta', 'songs.dta.fix')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment