Skip to content

Instantly share code, notes, and snippets.

@pps83
Last active March 31, 2025 16:21
Show Gist options
  • Save pps83/3210a2f980fd02bb2ba2e5a1fc4a2ef0 to your computer and use it in GitHub Desktop.
Save pps83/3210a2f980fd02bb2ba2e5a1fc4a2ef0 to your computer and use it in GitHub Desktop.
__builtin_ctz (ctzl, ctzll) and __builtin_clz (clzl, clzll) for Visual Studio
// Note, bsf/bsr are used by default.
// Enable /arch:AVX2 compilation for better optimizations
#if defined(_MSC_VER) && !defined(__clang__)
#include <intrin.h>
#include <limits.h>
#if (defined(__cplusplus) && (__cplusplus >= 202002L)) || \
(defined(_MSVC_LANG) && (_MSVC_LANG >= 202002L))
#include <type_traits>
#define CONSTEVAL_ (std::is_constant_evaluated())
#define CONSTEXPR_ constexpr
#else
#define CONSTEXPR_
#endif
static CONSTEXPR_ __forceinline int __builtin_ctz(unsigned x)
{
#ifdef CONSTEVAL_
if CONSTEVAL_
{
for (int i = 0; i < sizeof(x) * CHAR_BIT; ++i)
{
if ((x >> i) & 1)
return i;
}
return sizeof(x) * CHAR_BIT;
}
#endif
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC)
return (int)_CountTrailingZeros(x);
#elif defined(__AVX2__) || defined(__BMI__)
return (int)_tzcnt_u32(x);
#else
unsigned long r;
_BitScanForward(&r, x);
return (int)r;
#endif
}
static CONSTEXPR_ __forceinline int __builtin_ctzll(unsigned long long x)
{
#ifdef CONSTEVAL_
if CONSTEVAL_
{
for (int i = 0; i < sizeof(x) * CHAR_BIT; ++i)
{
if ((x >> i) & 1)
return i;
}
return sizeof(x) * CHAR_BIT;
}
#endif
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC)
return (int)_CountTrailingZeros64(x);
#elif defined(_WIN64)
#if defined(__AVX2__) || defined(__BMI__)
return (int)_tzcnt_u64(x);
#else
unsigned long r;
_BitScanForward64(&r, x);
return (int)r;
#endif
#else
int l = __builtin_ctz((unsigned)x);
int h = __builtin_ctz((unsigned)(x >> 32)) + 32;
return !!((unsigned)x) ? l : h;
#endif
}
static CONSTEXPR_ __forceinline int __builtin_ctzl(unsigned long x)
{
return sizeof(x) == 8 ? __builtin_ctzll(x) : __builtin_ctz((unsigned)x);
}
static CONSTEXPR_ __forceinline int __builtin_clz(unsigned x)
{
#ifdef CONSTEVAL_
if CONSTEVAL_
{
for (int i = 0; i < sizeof(x) * CHAR_BIT; ++i)
{
if (x >> (sizeof(x) * CHAR_BIT - 1 - i))
return i;
}
return sizeof(x) * CHAR_BIT;
}
#endif
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC)
return (int)_CountLeadingZeros(x);
#elif defined(__AVX2__) || defined(__LZCNT__)
return (int)_lzcnt_u32(x);
#else
unsigned long r;
_BitScanReverse(&r, x);
return (int)(r ^ 31);
#endif
}
static CONSTEXPR_ __forceinline int __builtin_clzll(unsigned long long x)
{
#ifdef CONSTEVAL_
if CONSTEVAL_
{
for (int i = 0; i < sizeof(x) * CHAR_BIT; ++i)
{
if (x >> (sizeof(x) * CHAR_BIT - 1 - i))
return i;
}
return sizeof(x) * CHAR_BIT;
}
#endif
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC)
return (int)_CountLeadingZeros64(x);
#elif defined(_WIN64)
#if defined(__AVX2__) || defined(__LZCNT__)
return (int)_lzcnt_u64(x);
#else
unsigned long r;
_BitScanReverse64(&r, x);
return (int)(r ^ 63);
#endif
#else
int l = __builtin_clz((unsigned)x) + 32;
int h = __builtin_clz((unsigned)(x >> 32));
return !!((unsigned)(x >> 32)) ? h : l;
#endif
}
static CONSTEXPR_ __forceinline int __builtin_clzl(unsigned long x)
{
return sizeof(x) == 8 ? __builtin_clzll(x) : __builtin_clz((unsigned)x);
}
#undef CONSTEVAL_
#undef CONSTEXPR_
#endif // defined(_MSC_VER) && !defined(__clang__)
@pps83
Copy link
Author

pps83 commented May 7, 2024

Thanks very much, Pavel, that's perfect, you're a star! :-)

Note, I updated it to:

  • check for __AVX2__ builds to use lzcnt/tzcnt vs bsf/bsr
  • add code to handle arm/arm64
  • add guard for clang-cl

The code was tested to verify that it's bit exact with the code that clang/gcc emits for these builtins.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment