Last active
April 2, 2025 06:11
-
-
Save FlyTechVideos/a1ffcfbdff88600658b8f3c280eed992 to your computer and use it in GitHub Desktop.
A not entirely inaccurate oracle for whether or not Windows XP will recognize a given string as being unicode (ref. IsTextUnicode).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
# Please excuse the awfully formatted code, I did not take the time to make it look nice. | |
def main(): | |
print() | |
if len(sys.argv) != 2: | |
print(f' Usage: {sys.argv[0]}') | |
string_to_check = sys.argv[1] | |
if len(string_to_check) % 2 == 1: | |
print(f' ERROR: String is of ODD length ({len(string_to_check)}). Strings of ODD length are never recognized as Unicode.') | |
return | |
print(f' String: {string_to_check}') | |
print() | |
values = [ord(c) for c in string_to_check] | |
last_lower = 0 | |
last_higher = 0 | |
lower_difference = 0 | |
higher_difference = 0 | |
max_len = 9 | |
print(' _LowD HighD _LowS HighS Char') | |
print(' Lucky ASCII:', end= '') | |
for i in range(0, len(values), 2): | |
ascii_val = f'[{values[i]} {values[i+1]}]' | |
padding = max_len - len(ascii_val) | |
if i == 0: | |
print(f' [{values[i]} {values[i+1]}]', end='') | |
else: | |
print(f' [{values[i]} {values[i+1]}]', end='') | |
print(' ' * padding, end = '') | |
lower = values[i] | |
higher = values[i+1] | |
lower_difference_curr = max(lower, last_lower) - min(lower, last_lower) | |
higher_difference_curr = max(higher, last_higher) - min(higher, last_higher) | |
low_padding = 3 - len(f'{lower_difference_curr}') | |
high_padding = 3 - len(f'{higher_difference_curr}') | |
print(f' {" " * low_padding}{lower_difference_curr} {" " * high_padding}{higher_difference_curr}', end='') | |
lower_difference += max(lower, last_lower) - min(lower, last_lower) | |
higher_difference += max(higher, last_higher) - min(higher, last_higher) | |
low_padding = 3 - len(f'{lower_difference}') | |
high_padding = 3 - len(f'{higher_difference}') | |
print(f' {" " * low_padding}{lower_difference} {" " * high_padding}{higher_difference}', end='') | |
print(f' {string_to_check[i]}{string_to_check[i+1]}') | |
last_lower = lower | |
last_higher = higher | |
print() | |
print(f' Lower Diff: {lower_difference}') | |
print(f' Higher Diff: {higher_difference}') | |
print(f' 2 * Higher Diff: {2 * higher_difference}') | |
print(f' 3 * Higher Diff: {3 * higher_difference}') | |
print(f'3 * Higher Diff - Lower: {3 * higher_difference - lower_difference}') | |
print() | |
print(f' higher < lower: {"YES" if 1 * higher_difference < lower_difference else "NO"}') | |
print(f' 2 * higher < lower: {"YES" if 2 * higher_difference < lower_difference else "NO"}') | |
print(f' 3 * higher < lower: {"YES" if 3 * higher_difference < lower_difference else "NO"}') | |
print() | |
print(f' >>>>> CENSORED: {"YES" if 3 * higher_difference < lower_difference else "NO"} <<<<<') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@Artoria2e5
This script only checks the "char sum imbalance". If this check says NO, that doesn't mean the string is classified as non-unicode. You can try using the other code I posted that simply calls IsTextUnicode on Windows (or, if you're at that point, just try it in an XP VM).