Skip to content

Instantly share code, notes, and snippets.

@sansmoraxz
Last active October 10, 2024 05:17
Show Gist options
  • Save sansmoraxz/d963546d30e03c24b74902a47d4dc1bc to your computer and use it in GitHub Desktop.
Save sansmoraxz/d963546d30e03c24b74902a47d4dc1bc to your computer and use it in GitHub Desktop.
Pure C++ benchmarking of https://arxiv.org/abs/2410.00907
#include <iostream>
#include <random>
#include <chrono>
#include <vector>
#include <iomanip>
using namespace std;
const u_int32_t FLOAT32_EXPONENT_MASK = 0xC0880000; // -0x3F780000
// Baseline C++ implementation from the paper
_Float32 lmul(_Float32 x, _Float32 y)
{
_Float32 s;
u_int32_t *x_bits_ptr = reinterpret_cast<u_int32_t *>(&x);
u_int32_t *y_bits_ptr = reinterpret_cast<u_int32_t *>(&y);
u_int32_t result = *x_bits_ptr + *y_bits_ptr + FLOAT32_EXPONENT_MASK;
s = *reinterpret_cast<_Float32 *>(&result);
return s;
}
// Pure assembly implementation for comparison
_Float32 lmul_asm(_Float32 x, _Float32 y)
{
_Float32 s;
asm(
"movl %1, %%eax\n\t" // Move x bits to eax
"movl %2, %%ebx\n\t" // Move y bits to ebx
"addl %%ebx, %%eax\n\t" // Add y bits to x bits
"addl %3, %%eax\n\t" // Add FLOAT32_EXPONENT_MASK to the result
"movl %%eax, %0" // Move the result to s
: "=m"(s)
: "m"(x), "m"(y), "r"(FLOAT32_EXPONENT_MASK)
: "%eax", "%ebx");
return s;
}
// Function to generate random floating point numbers
float generateRandomFloat(float min, float max)
{
random_device rd;
mt19937 gen(rd());
uniform_real_distribution<> dis(min, max);
return dis(gen);
}
// Benchmark multiplication operator
double benchmarkFunction(int iterations, _Float32 (*f)(_Float32, _Float32), vector<_Float32> x_values, vector<_Float32> y_values)
{
auto start = chrono::high_resolution_clock::now();
for (int i = 0; i < iterations; ++i)
{
volatile _Float32 result = f(x_values[i], y_values[i]); // volatile to prevent optimization
}
auto end = chrono::high_resolution_clock::now();
return chrono::duration_cast<chrono::duration<double>>(end - start).count();
}
int main()
{
const int iterations = 500; // Number of iterations for benchmarking
const int times = 5;
cout << "Iterations per benchmark: " << iterations << endl;
cout << "Initializing random numbers..." << endl;
vector<_Float32> x_values(iterations);
vector<_Float32> y_values(iterations);
for (int i = 0; i < iterations; ++i)
{
x_values[i] = generateRandomFloat(-2.0, 2.0);
y_values[i] = generateRandomFloat(-2.0, 2.0);
}
for (int i = 0; i < times; i++)
{
cout << "Iteration: " << i << " of " << times << endl;
// Benchmark multiplication operator
double timeMultiplicationOperator = benchmarkFunction(iterations, [](_Float32 x, _Float32 y) -> _Float32
{ return x * y; }, x_values, y_values);
cout << "Time taken by multiplication operator: " << timeMultiplicationOperator << " seconds" << endl;
// Benchmark lmul_asm function
double timeLmulFunction = benchmarkFunction(iterations, lmul_asm, x_values, y_values);
cout << "Time taken by lmul_asm function: " << timeLmulFunction << " seconds" << endl;
// Benchmark lmul function
double timeLmulFunction2 = benchmarkFunction(iterations, lmul, x_values, y_values);
cout << "Time taken by lmul function: " << timeLmulFunction2 << " seconds" << endl;
}
// Validate results with random numbers
float x = generateRandomFloat(-1.0, 1.0);
float y = generateRandomFloat(-1.0, 1.0);
cout << left << setw(20) << "x"
<< left << setw(20) << "y"
<< left << setw(20) << "x*y"
<< left << setw(20) << "lmul_asm(x, y)"
<< left << setw(20) << "lmul(x, y)" << endl;
cout << string(100, '-') << endl;
for (int i = 0; i < 10; ++i)
{
float x = generateRandomFloat(-1.0, 1.0);
float y = generateRandomFloat(-1.0, 1.0);
cout << left << setw(20) << x
<< left << setw(20) << y
<< left << setw(20) << x * y
<< left << setw(20) << lmul_asm(x, y)
<< left << setw(20) << lmul(x, y) << endl;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment