Skip to content

Instantly share code, notes, and snippets.

@KWillets
Created October 8, 2018 16:15
Show Gist options
  • Save KWillets/10a12dcfd9208f5bf48d5c209729ab03 to your computer and use it in GitHub Desktop.
Save KWillets/10a12dcfd9208f5bf48d5c209729ab03 to your computer and use it in GitHub Desktop.
Jaccard index count benchmarks
#include <stdlib.h>
#include <stdio.h>
#include <x86intrin.h>
#include <inttypes.h>
#include "benchmark.h"
// multiple of 4:
#define N (2048)
// i5 1.637
static inline int simple( uint32_t *a, uint32_t *b, size_t n ) {
size_t i;
size_t cnt=0;
for( i=0; i<n; i++)
if(a[i] == b[i])
cnt++;
return cnt;
}
// i5 1.67
static inline int mask( uint32_t *a, uint32_t *b, size_t n ) {
size_t cnt=0;
for(size_t i=0; i<n; i+=4) {
__m128i eq = _mm_cmpeq_epi32( *(__m128i *)(a+i), *(__m128i *)(b+i));
int m = _mm_movemask_epi8(eq);
cnt += __builtin_popcount(m)>>2;
}
return cnt;
}
// skylake: .66
static inline int add( uint32_t *a, uint32_t *b, size_t n ) {
int32_t cnt[4];
__m128i acc = _mm_set1_epi32(0);
for(size_t i=0; i<n; i+=4) {
__m128i eq = _mm_cmpeq_epi32( *(__m128i *)(a+i), *(__m128i *)(b+i));
acc = _mm_add_epi32(acc, eq); // mask == (int) -1
}
_mm_store_si128((__m128i *)cnt, acc);
return -(cnt[0]+cnt[1]+cnt[2]+cnt[3]);
}
// skylake: .5
static inline int add2( uint32_t *a, uint32_t *b, size_t n ) {
int32_t cnt[4];
__m128i acc = _mm_set1_epi32(0);
__m128i acc2 = _mm_set1_epi32(0);
for(size_t i=0; i<n; i+=8) {
__m128i eq = _mm_cmpeq_epi32( *(__m128i *)(a+i), *(__m128i *)(b+i));
__m128i eq2 = _mm_cmpeq_epi32( *(__m128i *)(a+i+4), *(__m128i *)(b+i+4));
acc = _mm_add_epi32(acc, eq); // mask == (int) -1
acc2 = _mm_add_epi32(acc2, eq2); // mask == (int) -1
}
acc = _mm_add_epi32(acc, acc2);
_mm_store_si128((__m128i *)cnt, acc);
return -(cnt[0]+cnt[1]+cnt[2]+cnt[3]);
}
int main() {
uint32_t *a = malloc(N * sizeof(int));
uint32_t *b = malloc(N * sizeof(int));
const int repeat = 100;
for(int i=0; i<N; i++) {
a[i]=b[i]=rand();
}
int expected = simple(a,b,N);
printf("expected = %d\n", expected);
BEST_TIME(simple(a, b, N),
expected, , repeat, N, 1);
BEST_TIME(mask(a, b, N),
expected, , repeat, N, 1);
BEST_TIME(add(a,b,N),
expected, , repeat, N, 1);
BEST_TIME(add2(a,b,N),
expected, , repeat, N, 1);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment