Last active
April 23, 2019 00:08
-
-
Save tsudoko/0b5d416a7f4efef74d9a8a69663aca02 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import ast | |
import sys | |
encodings = ["ascii", "big5", "big5hkscs", "cp037", "cp424", "cp437", "cp500", "cp720", "cp737", "cp775", "cp850", "cp852", "cp855", "cp856", "cp857", "cp858", "cp860", "cp861", "cp862", "cp863", "cp864", "cp865", "cp866", "cp869", "cp874", "cp875", "cp932", "cp949", "cp950", "cp1006", "cp1026", "cp1140", "cp1250", "cp1251", "cp1252", "cp1253", "cp1254", "cp1255", "cp1256", "cp1257", "cp1258", "euc_jp", "euc_jis_2004", "euc_jisx0213", "euc_kr", "gb2312", "gbk", "gb18030", "hz", "iso2022_jp", "iso2022_jp_1", "iso2022_jp_2", "iso2022_jp_2004", "iso2022_jp_3", "iso2022_jp_ext", "iso2022_kr", "latin_1", "iso8859_2", "iso8859_3", "iso8859_4", "iso8859_5", "iso8859_6", "iso8859_7", "iso8859_8", "iso8859_9", "iso8859_10", "iso8859_13", "iso8859_14", "iso8859_15", "iso8859_16", "johab", "koi8_r", "koi8_u", "mac_cyrillic", "mac_greek", "mac_iceland", "mac_latin2", "mac_roman", "mac_turkish", "ptcp154", "shift_jis", "shift_jis_2004", "shift_jisx0213", "utf_32", "utf_32_be", "utf_32_le", "utf_16", "utf_16_be", "utf_16_le", "utf_7", "utf_8", "utf_8_sig"] | |
def encode(string): | |
d = {} | |
for enc in encodings: | |
try: | |
d[enc.replace(" ", "␣")] = string.encode(enc) | |
except UnicodeEncodeError: | |
pass | |
return d | |
def decode(bytestr): | |
d = {} | |
for enc in encodings: | |
try: | |
d[enc.replace(" ", "␣")] = bytestr.decode(enc) | |
except UnicodeDecodeError: | |
pass | |
return d | |
def cleanprint(dic): | |
for k, v in dic.items(): | |
print(k, v) | |
if __name__ == "__main__": | |
p = argparse.ArgumentParser() | |
p.add_argument("-d", "--double", action="store_true") | |
p.add_argument("-f", "--from") | |
p.add_argument("-F", "--from-eval") | |
p.add_argument("-t", "--to") | |
p.add_argument("-T", "--to-eval") | |
args = p.parse_args() | |
if not getattr(args, "from") and not args.from_eval: | |
p.error("either -f or -F must be provided") | |
if getattr(args, "from") and args.from_eval: | |
p.error("can't use both -f and -F") | |
if args.to and args.to_eval: | |
p.error("can't use both -t and -T") | |
if getattr(args, "from"): | |
encs = encode(getattr(args, "from")) | |
if args.double: | |
e1 = encs | |
encs = {} | |
for k1, f in e1.items(): | |
for k2, v in decode(f).items(): | |
encs[k1 + " " + k2] = v | |
elif args.from_eval: | |
encs = decode(ast.literal_eval(args.from_eval)) | |
if args.double: | |
e1 = encs | |
encs = {} | |
for k1, f in e1.items(): | |
for k2, v in encode(f).items(): | |
encs[k1 + " " + k2] = v | |
if args.to: | |
encs = {k: v for k, v in encs.items() if v == args.to} | |
elif args.to_eval: | |
encs = {k: v for k, v in encs.items() if v == ast.literal_eval(args.to_eval)} | |
cleanprint(encs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment