Skip to content

Instantly share code, notes, and snippets.

@KWillets
Created December 27, 2018 17:33
Show Gist options
  • Save KWillets/8a633c0b5774a6d77bb721d11d3778d0 to your computer and use it in GitHub Desktop.
Save KWillets/8a633c0b5774a6d77bb721d11d3778d0 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <x86intrin.h>
#include <inttypes.h>
#include "streamvbyte_shuffle_tables_decode.h"
void dump(const __m128i x, char * tag) {
printf( "%6s: ", tag);
char * xc = (char *) &x;
for( int i =0; i < 16; i++)
printf("%2i,", xc[i]);
printf("\n");
}
void dumpx(const __m128i x, char * tag) {
printf( "%6s: ", tag);
unsigned char * xc = (unsigned char *) &x;
for( int i =0; i < 16; i++)
printf("%2x,", xc[i]);
printf("\n");
}
#define mask _mm_setr_epi8(0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,\
0x3F,0x3F,0x3F,0x3F,\
0x1F,0x1F,\
0x0F, 0x07)
#define len _mm_setr_epi8(0,0,0,0,0,0,0,0,-1,-1,-1,-1,1,1,2,3)
#define iota1 _mm_setr_epi8(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)
// shared routines
__m128i compress( __m128i bitfields ) {
const __m128i pack16 = _mm_maddubs_epi16(bitfields, _mm_set1_epi32(0x40014001));
return _mm_madd_epi16(pack16, _mm_set1_epi32(0x10000001));
}
__m128i make_upper4(__m128i utf8) {return _mm_and_si128(_mm_srli_epi64(utf8, 4), _mm_set1_epi8(0x0F));}
__m128i shift( __m128i x, int i ) {
char ix[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
return _mm_shuffle_epi8(x, _mm_loadu_si128((__m128i *)(ix + i)));
}
/*
build P which shifts initial bytes left one character under pshufb
0-->3-->6-->9 etc.
*/
// prepend 3x1 dummy slots (ensures [0:3] are initial bytes)
// raise P^3
// broadcast [0:3] to 32 bit lanes
// pull lengths
// d = lengths signed sub 4,3,2,1
// unsigned sadd Q + d => keeps sign bit for pshufb
// input: 16 utf-8 bytes, with an initial byte aligned to 0
// output: 4xutf-32 (todo: iteration)
void utf32( __m128i utf8, uint32_t *out) {
__m128i upper4 = make_upper4(utf8);
__m128i lengths = _mm_alignr_epi8(_mm_shuffle_epi8( len, upper4 ),
_mm_set1_epi8(0),
16-3);
dump(lengths, "length");
__m128i P = _mm_adds_epu8( lengths, iota1 );
__m128i P2 = _mm_shuffle_epi8(P,P);
__m128i P3 = _mm_shuffle_epi8(P,P2);
dump(P, "P");
dump(P3, "P3"); // [0] is always 3 => 0 after sub, ?
__m128i bcast = _mm_shuffle_epi8(P3, _mm_setr_epi8(0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3));
__m128i lens = _mm_shuffle_epi8(lengths, bcast);
__m128i cont = _mm_sub_epi8( lens, _mm_setr_epi8(0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3));
__m128i pull = _mm_adds_epu8(cont, bcast);
dump(pull, "pull");
// sub 3 from pull and apply to unshifted source vector
__m128i pull_from_source = _mm_sub_epi8(pull, _mm_set1_epi8(3));
__m128i masked = _mm_and_si128(_mm_shuffle_epi8(mask,upper4), utf8);
__m128i bitfields = _mm_shuffle_epi8(masked, pull_from_source);
dumpx(masked, "masked");
dumpx(bitfields, "fields");
__m128i mout = compress(bitfields);
dumpx(mout, "final");
_mm_store_si128((__m128i *) out, mout);
}
void utf32_code(__m128i utf8, uint32_t *out) {
__m128i upper4 = make_upper4(utf8);
__m128i lengths = _mm_shuffle_epi8(len, upper4 );
__m128i P = _mm_adds_epu8(lengths, iota1);
__m128i P2 = _mm_shuffle_epi8(P,P);
__m128i P4 = _mm_shuffle_epi8(P2,P2);
char jmp[16];
_mm_storeu_si128((__m128i *) jmp, P4);
// iterate through jmp
int multi = 0;
__m128i masked;
unsigned char bcode[16];
for(int i = 0; jmp[i] > i; i = jmp[i]) {
int blen = jmp[i] - i;
printf("i = %d len = %d\n", i, blen);
if( blen == 4 )
// ascii-only requires no masking, merging, or validation
_mm_store_si128((__m128i *)out, _mm_cvtepu8_epi32(utf8));
else {
if( !multi ) {
__m128i code12 = _mm_or_si128(lengths,_mm_slli_epi64(_mm_shuffle_epi8(lengths, P), 2));
__m128i code = _mm_or_si128(code12,_mm_slli_epi64(_mm_shuffle_epi8(code12, P2), 4));
_mm_storeu_si128( (__m128i *)bcode, code );
masked = _mm_and_si128(_mm_shuffle_epi8(mask,upper4), utf8);
multi = 1;
}
printf("code: %2x\n", bcode[i]);
uint8_t *pshuf = (uint8_t *) &shuffleTable[bcode[i]]; // endianness is wrong
__m128i Shuf = *(__m128i *)pshuf;
dump(Shuf, "Shuf");
__m128i bitfields = _mm_shuffle_epi8(shift(masked, i), Shuf);
// todo: compress bitfields
__m128i mout = compress(bitfields);
dumpx(mout, "final");
}
out += 4;
}
}
int main() {
// reference: https://unicode-table.com/en/3139/
// 0x61, 0x3131, 0x62, 0x3134, 0x3137, 0x3139, ...
__m128i utf8 = *(__m128i *) "aㄱbㄴㄷㄹㅁㅂㅅㅇㅈㅊㅋㅌㅍㅎ";
uint32_t out[16];
utf32(utf8, out);
for(int i =0; i < 4; i++) {
printf("%8x,", out[i]);
}
printf("\n==== DONE =====\n");
utf32_code(utf8, out);
__m128i ascii = *(__m128i *) "abcdefghijklmnop";
utf32_code(ascii, out);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment