Created
December 27, 2018 17:33
-
-
Save KWillets/8a633c0b5774a6d77bb721d11d3778d0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <x86intrin.h> | |
#include <inttypes.h> | |
#include "streamvbyte_shuffle_tables_decode.h" | |
void dump(const __m128i x, char * tag) { | |
printf( "%6s: ", tag); | |
char * xc = (char *) &x; | |
for( int i =0; i < 16; i++) | |
printf("%2i,", xc[i]); | |
printf("\n"); | |
} | |
void dumpx(const __m128i x, char * tag) { | |
printf( "%6s: ", tag); | |
unsigned char * xc = (unsigned char *) &x; | |
for( int i =0; i < 16; i++) | |
printf("%2x,", xc[i]); | |
printf("\n"); | |
} | |
#define mask _mm_setr_epi8(0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,\ | |
0x3F,0x3F,0x3F,0x3F,\ | |
0x1F,0x1F,\ | |
0x0F, 0x07) | |
#define len _mm_setr_epi8(0,0,0,0,0,0,0,0,-1,-1,-1,-1,1,1,2,3) | |
#define iota1 _mm_setr_epi8(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16) | |
// shared routines | |
__m128i compress( __m128i bitfields ) { | |
const __m128i pack16 = _mm_maddubs_epi16(bitfields, _mm_set1_epi32(0x40014001)); | |
return _mm_madd_epi16(pack16, _mm_set1_epi32(0x10000001)); | |
} | |
__m128i make_upper4(__m128i utf8) {return _mm_and_si128(_mm_srli_epi64(utf8, 4), _mm_set1_epi8(0x0F));} | |
__m128i shift( __m128i x, int i ) { | |
char ix[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; | |
return _mm_shuffle_epi8(x, _mm_loadu_si128((__m128i *)(ix + i))); | |
} | |
/* | |
build P which shifts initial bytes left one character under pshufb | |
0-->3-->6-->9 etc. | |
*/ | |
// prepend 3x1 dummy slots (ensures [0:3] are initial bytes) | |
// raise P^3 | |
// broadcast [0:3] to 32 bit lanes | |
// pull lengths | |
// d = lengths signed sub 4,3,2,1 | |
// unsigned sadd Q + d => keeps sign bit for pshufb | |
// input: 16 utf-8 bytes, with an initial byte aligned to 0 | |
// output: 4xutf-32 (todo: iteration) | |
void utf32( __m128i utf8, uint32_t *out) { | |
__m128i upper4 = make_upper4(utf8); | |
__m128i lengths = _mm_alignr_epi8(_mm_shuffle_epi8( len, upper4 ), | |
_mm_set1_epi8(0), | |
16-3); | |
dump(lengths, "length"); | |
__m128i P = _mm_adds_epu8( lengths, iota1 ); | |
__m128i P2 = _mm_shuffle_epi8(P,P); | |
__m128i P3 = _mm_shuffle_epi8(P,P2); | |
dump(P, "P"); | |
dump(P3, "P3"); // [0] is always 3 => 0 after sub, ? | |
__m128i bcast = _mm_shuffle_epi8(P3, _mm_setr_epi8(0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3)); | |
__m128i lens = _mm_shuffle_epi8(lengths, bcast); | |
__m128i cont = _mm_sub_epi8( lens, _mm_setr_epi8(0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3)); | |
__m128i pull = _mm_adds_epu8(cont, bcast); | |
dump(pull, "pull"); | |
// sub 3 from pull and apply to unshifted source vector | |
__m128i pull_from_source = _mm_sub_epi8(pull, _mm_set1_epi8(3)); | |
__m128i masked = _mm_and_si128(_mm_shuffle_epi8(mask,upper4), utf8); | |
__m128i bitfields = _mm_shuffle_epi8(masked, pull_from_source); | |
dumpx(masked, "masked"); | |
dumpx(bitfields, "fields"); | |
__m128i mout = compress(bitfields); | |
dumpx(mout, "final"); | |
_mm_store_si128((__m128i *) out, mout); | |
} | |
void utf32_code(__m128i utf8, uint32_t *out) { | |
__m128i upper4 = make_upper4(utf8); | |
__m128i lengths = _mm_shuffle_epi8(len, upper4 ); | |
__m128i P = _mm_adds_epu8(lengths, iota1); | |
__m128i P2 = _mm_shuffle_epi8(P,P); | |
__m128i P4 = _mm_shuffle_epi8(P2,P2); | |
char jmp[16]; | |
_mm_storeu_si128((__m128i *) jmp, P4); | |
// iterate through jmp | |
int multi = 0; | |
__m128i masked; | |
unsigned char bcode[16]; | |
for(int i = 0; jmp[i] > i; i = jmp[i]) { | |
int blen = jmp[i] - i; | |
printf("i = %d len = %d\n", i, blen); | |
if( blen == 4 ) | |
// ascii-only requires no masking, merging, or validation | |
_mm_store_si128((__m128i *)out, _mm_cvtepu8_epi32(utf8)); | |
else { | |
if( !multi ) { | |
__m128i code12 = _mm_or_si128(lengths,_mm_slli_epi64(_mm_shuffle_epi8(lengths, P), 2)); | |
__m128i code = _mm_or_si128(code12,_mm_slli_epi64(_mm_shuffle_epi8(code12, P2), 4)); | |
_mm_storeu_si128( (__m128i *)bcode, code ); | |
masked = _mm_and_si128(_mm_shuffle_epi8(mask,upper4), utf8); | |
multi = 1; | |
} | |
printf("code: %2x\n", bcode[i]); | |
uint8_t *pshuf = (uint8_t *) &shuffleTable[bcode[i]]; // endianness is wrong | |
__m128i Shuf = *(__m128i *)pshuf; | |
dump(Shuf, "Shuf"); | |
__m128i bitfields = _mm_shuffle_epi8(shift(masked, i), Shuf); | |
// todo: compress bitfields | |
__m128i mout = compress(bitfields); | |
dumpx(mout, "final"); | |
} | |
out += 4; | |
} | |
} | |
int main() { | |
// reference: https://unicode-table.com/en/3139/ | |
// 0x61, 0x3131, 0x62, 0x3134, 0x3137, 0x3139, ... | |
__m128i utf8 = *(__m128i *) "aㄱbㄴㄷㄹㅁㅂㅅㅇㅈㅊㅋㅌㅍㅎ"; | |
uint32_t out[16]; | |
utf32(utf8, out); | |
for(int i =0; i < 4; i++) { | |
printf("%8x,", out[i]); | |
} | |
printf("\n==== DONE =====\n"); | |
utf32_code(utf8, out); | |
__m128i ascii = *(__m128i *) "abcdefghijklmnop"; | |
utf32_code(ascii, out); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment