Last active
March 30, 2026 09:47
-
-
Save Oldes/2f31b16f333151744991b22a3e15e792 to your computer and use it in GitHub Desktop.
Get Unicode ranges of characters, which are expected to have zero and wide widths
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Rebol [ | |
| title: "Zero & wide width unicode ranges" | |
| ] | |
| import thru-cache | |
| data: read-thru/string https://unicode.org/Public/UNIDATA/EastAsianWidth.txt | |
| hexa-digits: system/catalog/bitsets/hex-digits | |
| prev-wide: none | |
| prev-size: n: 0 | |
| print as-yellow "// Wide-width ranges //" | |
| parse data [ | |
| any [ | |
| #"#" thru LF ;; skip comments | |
| | | |
| copy val1 2 6 hexa-digits (val2: val1) ;; lower range | |
| opt [".." copy val2 2 6 hexa-digits] ;; upper range | |
| thru "; " copy type to SP [thru LF | to end] | |
| ( | |
| wide?: type == "W" | |
| case [ | |
| all [wide? not prev-wide] [ | |
| ;; Insert newline if size starts to be over 16bit range | |
| if all [prev-size = 4 4 < length? val1] [ | |
| print LF n: 0 | |
| ] | |
| ;; Start with the lower range value | |
| prin ajoin [" {0x" val1 ","] | |
| prev-wide: val2 | |
| ] | |
| wide? [ | |
| ;; Extend run | |
| prev-wide: val2 | |
| ] | |
| prev-wide [ | |
| ;; End with the upper range value | |
| prin ajoin ["0x" prev-wide "},"] | |
| ++ n | |
| if zero? n // 6 [prin LF] | |
| prev-size: length? val1 | |
| prev-wide: none | |
| ] | |
| ] | |
| ) | |
| ] | |
| (prin LF) | |
| ] | |
| data: read-thru/string https://unicode.org/Public/UNIDATA/UnicodeData.txt | |
| zero-widths: ["Mn" "Me" "Cf"] ;; Nonspacing Mark, Enclosing Mark, Format | |
| prev-zero: none | |
| prev-size: n: 0 | |
| print as-yellow "// Zero-width ranges //" | |
| parse data [ | |
| any [ | |
| copy val to #";" skip thru #";" copy type to #";" [thru LF | to end] | |
| ( | |
| zero-width?: did find zero-widths type | |
| case [ | |
| all [zero-width? not prev-zero] [ | |
| ;; Insert newline if size starts to be over 16bit range | |
| if all [prev-size = 4 4 < length? val] [ | |
| print LF n: 0 | |
| ] | |
| ;; Start with the lower range value | |
| prin ajoin [" {0x" val ","] | |
| prev-zero: val | |
| ] | |
| zero-width? [ | |
| prev-zero: val ;; extend run | |
| ] | |
| prev-zero [ | |
| ;; End with the upper range value | |
| prin ajoin ["0x" prev-zero "},"] | |
| ++ n | |
| if zero? n // 6 [prin LF] | |
| prev-size: length? val | |
| prev-zero: none | |
| ] | |
| ] | |
| ) | |
| ] | |
| (prin LF) | |
| ] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment