Skip to content

Instantly share code, notes, and snippets.

@ronchaine
Created March 27, 2014 23:06
Show Gist options
  • Save ronchaine/9821117 to your computer and use it in GitHub Desktop.
Save ronchaine/9821117 to your computer and use it in GitHub Desktop.
reading unicode characters from standard input.
char32_t getuchar()
{
int inp = getchar();
if (inp == EOF)
return 0;
if (!(inp & 0x80))
return (char32_t)inp;
int unicode_bytes = 0;
for (int i = 7; i > 0; --i)
{
if (~inp & (1 << i))
break;
unicode_bytes++;
}
unsigned char tmp[4] = {0, 0, 0, 0};
tmp[unicode_bytes - 1] = inp;
char32_t rval;
for (int i = unicode_bytes -2; i >= 0; --i)
{
tmp[i] = getchar();
}
rval = (tmp[0] & 0x3f);
for (int i = 1; i < unicode_bytes; ++i)
{
if (i == unicode_bytes-1)
{
rval |= (0x1f & tmp[i]) << (6 * i);
}
else
rval |= (0x3f & tmp[i]) << (6 * i);
}
return rval;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment