Skip to content

Instantly share code, notes, and snippets.

@00xc
Last active May 13, 2024 10:49
Show Gist options
  • Save 00xc/e6b160970e242c7c3f439f1ae98287ad to your computer and use it in GitHub Desktop.
Save 00xc/e6b160970e242c7c3f439f1ae98287ad to your computer and use it in GitHub Desktop.
Finding the lowest set bit in a 512-bit array using SIMD
#![feature(portable_simd)]
#![feature(stdsimd)]
use core::arch::x86_64::*;
unsafe fn _mm512_tzcnt_si512(v: __m512i) -> Option<usize> {
let zeros = _mm512_setzero_si512();
let mask = _mm512_cmpneq_epi64_mask(v, zeros);
// 63 - lzcnt((v - 1) ^ v);
let unweighted = _mm512_sub_epi64(
_mm512_set1_epi64(63),
// lzcnt((v - 1) ^ v)
_mm512_lzcnt_epi64(
// (v - 1) ^ v
_mm512_xor_epi64(
v,
// v - 1
_mm512_sub_epi64(
v,
_mm512_set1_epi64(1),
),
),
)
);
let weights = _mm512_set_epi64(
64 * 7,
64 * 6,
64 * 5,
64 * 4,
64 * 3,
64 * 2,
64 * 1,
64 * 0,
);
let weighted = _mm512_mask_add_epi64(
_mm512_set1_epi64(i64::MAX),
mask,
weights,
unweighted,
);
let result = _mm512_reduce_min_epi64(weighted);
(result < 512).then_some(result as usize)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment