Skip to content

Instantly share code, notes, and snippets.

@Yuu6883
Created October 21, 2025 12:31
Show Gist options
  • Select an option

  • Save Yuu6883/e2415116324114038f528d6d8b9599e2 to your computer and use it in GitHub Desktop.

Select an option

Save Yuu6883/e2415116324114038f528d6d8b9599e2 to your computer and use it in GitHub Desktop.
#include <iostream>
#include <random>
#include <vector>
#include <chrono>
#include <functional>
#include <algorithm>
#include <cassert>
using namespace std;
using namespace chrono;
struct alignas(64) Test {
double chance;
uint64_t id;
uint64_t bytes[6];
};
// using TestVector = AlignedVector<struct Test>;
using TestVector = vector<struct Test>;
constexpr uint32_t ITER = 16;
constexpr size_t TEST_ELEM = 1024 * 1024 * 32;
// do not inline constants
volatile double threshold = 0.25 / ITER;
volatile uint64_t V = 0xDEADBEEF;
void bench(TestVector& test, std::function<void(TestVector&)> func, const char* notes) {
auto start = high_resolution_clock::now();
for (size_t iter = 0; iter < ITER; iter++)
func(test);
auto end = high_resolution_clock::now();
cout << notes << ": "
<< duration_cast<milliseconds>(end - start).count() << " milliseconds, "
<< test.size() << " elements remain." << endl;
}
void bench1(TestVector& test) {
size_t size = test.size();
for (size_t i = 0; i < size;) {
auto& t = test[i];
t.chance -= threshold;
// simulate read modify write
t.bytes[4] += t.bytes[1];
t.bytes[5] += t.bytes[2] + V;
if (t.chance < threshold) {
if (i + 1 < size)
test[i] = std::move(test.back());
test.pop_back();
size--;
}
else i++;
}
}
void bench2(TestVector& test) {
size_t i = 0;
auto newEnd = remove_if(test.begin(), test.end(), [&](Test& t) {
t.chance -= threshold;
// simulate read modify write
t.bytes[4] += t.bytes[1];
t.bytes[5] += t.bytes[2] + V;
return t.chance < threshold;
});
test.erase(newEnd, test.end());
}
void bench3(TestVector& test) {
size_t write_index = 0;
size_t end = test.size();
for (size_t i = 0; i < end; i++) {
auto& t = test[i];
double chance = t.chance - threshold;
bool remove = chance < threshold;
auto& dest = test[write_index];
dest.id = t.id;
dest.chance = chance;
dest.bytes[0] = t.bytes[0];
dest.bytes[1] = t.bytes[1];
dest.bytes[2] = t.bytes[2];
dest.bytes[3] = t.bytes[3];
// modify in stack/registers, then write back to write_index
dest.bytes[4] = t.bytes[4] + t.bytes[1];
dest.bytes[5] = t.bytes[5] + t.bytes[2] + V;
write_index += !remove;
}
test.resize(write_index);
}
int main() {
TestVector t1;
mt19937 rng(0);
uniform_real_distribution<double> roll(0.0, 1.0);
t1.reserve(TEST_ELEM);
t1.resize(TEST_ELEM);
// populate with bogus data
for (size_t i = 0; i < t1.size(); i++) {
t1[i].id = i;
t1[i].chance = roll(rng);
for (size_t j = 0; j < 7; j++)
t1[i].bytes[j] = 7 * i + j;
}
// copy
TestVector t2 = t1;
TestVector t3 = t1;
assert(static_cast<uint64_t>(t1.data()) % 64 == 0);
assert(static_cast<uint64_t>(t2.data()) % 64 == 0);
assert(static_cast<uint64_t>(t3.data()) % 64 == 0);
bench(t1, &bench1, "test1 (swap back)");
bench(t2, &bench2, "test2 (std::remove_if)");
bench(t3, &bench3, "test3 (branchless write)");
std::sort(t1.begin(), t1.end(), [](const Test& a, const Test& b) { return a.id < b.id; });
assert(t1.size() == t2.size() && t1.size() == t3.size());
for (size_t i = 0; i < t1.size(); i++) {
if (t1[i].id != t2[i].id || t1[i].id != t3[i].id) {
cout << "Mismatch at index " << i << ": " << t1[i].id << " vs " << t2[i].id << " vs " << t3[i].id << endl;
return 1;
}
if (t1[i].bytes[0] != t2[i].bytes[0] || t1[i].bytes[0] != t3[i].bytes[0] ||
t1[i].bytes[1] != t2[i].bytes[1] || t1[i].bytes[1] != t3[i].bytes[1] ||
t1[i].bytes[2] != t2[i].bytes[2] || t1[i].bytes[2] != t3[i].bytes[2] ||
t1[i].bytes[3] != t2[i].bytes[3] || t1[i].bytes[3] != t3[i].bytes[3] ||
t1[i].bytes[4] != t2[i].bytes[4] || t1[i].bytes[4] != t3[i].bytes[4] ||
t1[i].bytes[5] != t2[i].bytes[5] || t1[i].bytes[5] != t3[i].bytes[5]) {
cout << "Byte mismatch at index " << i << endl;
return 1;
}
if (t1[i].chance != t2[i].chance || t1[i].chance != t3[i].chance) {
cout << "Chance mismatch at index " << i << endl;
return 1;
}
}
cout << "All tests passed!" << endl;
return 0;
}
@Yuu6883
Copy link
Author

Yuu6883 commented Oct 21, 2025

Output on my PC:

test1 (swap back): 1132 milliseconds, 24641574 elements remain.
test2 (std::remove_if): 1782 milliseconds, 24641574 elements remain.
test3 (branchless write): 1300 milliseconds, 24641574 elements remain.
All tests passed!

You can play around with different TEST_ELEM and ITER for more or less cache-coherent runs.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment