Skip to content

Instantly share code, notes, and snippets.

@dejurin
Created April 10, 2025 20:15
Show Gist options
  • Save dejurin/9facabd44ec2a0b1eab7ba1c9c8d7cb2 to your computer and use it in GitHub Desktop.
Save dejurin/9facabd44ec2a0b1eab7ba1c9c8d7cb2 to your computer and use it in GitHub Desktop.
List of all (non-control) Unicode characters with codepoint and Unicode descriptor (as given by python module "unicodedata").
#!/usr/bin/python
import unicodedata
import math
import csv
MIN_CHAR = 0x20
MAX_CHAR = 0x100000
PROGRESS = 0x8000
print("|%s|" % ("*" * math.ceil((MAX_CHAR - MIN_CHAR) / PROGRESS)))
print("|", end="")
with open("unicode-chars.csv", "w", encoding="utf-8", newline='') as file:
writer = csv.writer(file, quoting=csv.QUOTE_MINIMAL, lineterminator='\n')
writer.writerow(["Character", "Name", "Codepoint"]) # Заголовок
for i in range(MIN_CHAR, MAX_CHAR):
char = chr(i)
try:
name = unicodedata.name(char)
codepoint = hex(i)[2:].rjust(5, "0").upper()
writer.writerow([char, name.title(), codepoint])
except ValueError:
pass
if (i - MIN_CHAR) % PROGRESS == 0:
print("#", end="", flush=True)
print("|")
# Source: https://gist.github.com/TriMill/78ee02de3d5427ca186c2edd5f2714d5
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment