Created
October 6, 2019 19:42
-
-
Save neuschaefer/bd3c34e3440c29611c7956d53b695f6f to your computer and use it in GitHub Desktop.
Unicode codepoint lister
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import sys | |
DATAFILE='/usr/share/unicode/UnicodeData.txt' | |
class Entry: | |
def __init__(self, line): | |
fields = line.split(';') | |
self.codepoint = int(fields[0], 16) | |
self.name = fields[1] | |
self.description = fields[10] | |
def get_description(self): | |
if self.description: return self.description | |
elif self.name: return self.name | |
class Table: | |
def __init__(self, f=DATAFILE): | |
lines = open(f).readlines() | |
self.map = {} | |
for line in lines: | |
e = Entry(line) | |
self.map[e.codepoint] = e | |
def get_description(self, codepoint): | |
if isinstance(codepoint, str): | |
codepoint = ord(codepoint) | |
if codepoint in self.map: | |
return self.map[codepoint].get_description() | |
else: | |
return '(unknown)' | |
table = Table() | |
for string in sys.argv[1:]: | |
for codepoint in string: | |
print("U+%04X\t%s\t%s" % (ord(codepoint), codepoint, table.get_description(codepoint))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment