Skip to content

Instantly share code, notes, and snippets.

@jweinst1
Created December 9, 2025 08:34
Show Gist options
  • Select an option

  • Save jweinst1/0ba7b13909aa3f5135830d5f1e8aacc2 to your computer and use it in GitHub Desktop.

Select an option

Save jweinst1/0ba7b13909aa3f5135830d5f1e8aacc2 to your computer and use it in GitHub Desktop.
uses optimized popcount of integer series shifted right once to get very fast mean approximation
#include <array>
#include <cstdint>
#include <cstddef>
#include <cmath>
#include <cstdio>
#include <climits>
#include <vector>
#include <random>
#include <cassert>
struct ConstexprBitset64 {
uint64_t block = 0;
constexpr void set(size_t idx) {
block |= uint64_t{1} << idx;
}
constexpr void clear(size_t idx) {
block &= ~(uint64_t{1} << idx);
}
constexpr bool test(size_t idx) const {
return (block >> idx) & 1u;
}
// mean approximate functions
constexpr size_t getMeanApprox64() const {
// Uses 64 bit approx
return __builtin_popcountll(block) >> 1;
}
constexpr size_t count() const {
return __builtin_popcountll(block);
}
};
struct BitSet256 {
ConstexprBitset64 bins[4];
constexpr size_t count() const {
return bins[0].count() +
bins[1].count() +
bins[2].count() +
bins[3].count();
}
constexpr size_t mean() const {
return bins[0].getMeanApprox64() +
bins[1].getMeanApprox64() +
bins[2].getMeanApprox64() +
bins[3].getMeanApprox64();
}
constexpr void set(size_t idx) {
const size_t block = idx >> 6;
const size_t offset = idx & 63;
bins[block].set(offset);
}
};
template<size_t dimCount>
struct MeanNode {
BitSet256 vec[dimCount];
std::array<uint8_t, dimCount> curMean = {};
constexpr void calcMean() {
for (int i = 0; i < dimCount; ++i)
{
curMean[i] = vec[i].mean();
}
}
constexpr size_t distanceFromEuc(const std::array<uint8_t, dimCount>& other) const {
size_t total = 0;
for (int i = 0; i < dimCount; ++i)
{
size_t diff = curMean[i] - other[i];
total += diff * diff;
}
// no sqrt for speed
return total;
}
constexpr void insert(const std::array<uint8_t, dimCount>& other) {
for (int i = 0; i < dimCount; ++i)
{
vec[i].set(other[i]);
}
// todo make optional
calcMean();
}
};
static constexpr size_t testSampleSize = 1024 * 1024;
static constexpr size_t dimensionLength = 96;
using VectorType = std::array<uint8_t, dimensionLength>;
int main(int argc, char const *argv[])
{
std::printf("Using test sample size %zu\n", testSampleSize);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<uint16_t> distrib(0, 255);
std::vector<VectorType> randData;
size_t curRand = 0;
for (int i = 0; i < testSampleSize; ++i)
{
VectorType v;
for (int j = 0; j < dimensionLength; ++j)
{
v[j] = static_cast<uint8_t>(distrib(gen));
}
randData.push_back(v);
}
static constexpr size_t meanCenterCount = 10;
std::array<MeanNode<dimensionLength>, meanCenterCount> meanCenters;
for (int i = 0; i < meanCenterCount; ++i)
{
meanCenters[i].insert(randData[curRand++]);
}
while (curRand > randData.size()) {
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment