Last active
October 10, 2024 05:17
-
-
Save sansmoraxz/d963546d30e03c24b74902a47d4dc1bc to your computer and use it in GitHub Desktop.
Pure C++ benchmarking of https://arxiv.org/abs/2410.00907
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <random> | |
#include <chrono> | |
#include <vector> | |
#include <iomanip> | |
using namespace std; | |
const u_int32_t FLOAT32_EXPONENT_MASK = 0xC0880000; // -0x3F780000 | |
// Baseline C++ implementation from the paper | |
_Float32 lmul(_Float32 x, _Float32 y) | |
{ | |
_Float32 s; | |
u_int32_t *x_bits_ptr = reinterpret_cast<u_int32_t *>(&x); | |
u_int32_t *y_bits_ptr = reinterpret_cast<u_int32_t *>(&y); | |
u_int32_t result = *x_bits_ptr + *y_bits_ptr + FLOAT32_EXPONENT_MASK; | |
s = *reinterpret_cast<_Float32 *>(&result); | |
return s; | |
} | |
// Pure assembly implementation for comparison | |
_Float32 lmul_asm(_Float32 x, _Float32 y) | |
{ | |
_Float32 s; | |
asm( | |
"movl %1, %%eax\n\t" // Move x bits to eax | |
"movl %2, %%ebx\n\t" // Move y bits to ebx | |
"addl %%ebx, %%eax\n\t" // Add y bits to x bits | |
"addl %3, %%eax\n\t" // Add FLOAT32_EXPONENT_MASK to the result | |
"movl %%eax, %0" // Move the result to s | |
: "=m"(s) | |
: "m"(x), "m"(y), "r"(FLOAT32_EXPONENT_MASK) | |
: "%eax", "%ebx"); | |
return s; | |
} | |
// Function to generate random floating point numbers | |
float generateRandomFloat(float min, float max) | |
{ | |
random_device rd; | |
mt19937 gen(rd()); | |
uniform_real_distribution<> dis(min, max); | |
return dis(gen); | |
} | |
// Benchmark multiplication operator | |
double benchmarkFunction(int iterations, _Float32 (*f)(_Float32, _Float32), vector<_Float32> x_values, vector<_Float32> y_values) | |
{ | |
auto start = chrono::high_resolution_clock::now(); | |
for (int i = 0; i < iterations; ++i) | |
{ | |
volatile _Float32 result = f(x_values[i], y_values[i]); // volatile to prevent optimization | |
} | |
auto end = chrono::high_resolution_clock::now(); | |
return chrono::duration_cast<chrono::duration<double>>(end - start).count(); | |
} | |
int main() | |
{ | |
const int iterations = 500; // Number of iterations for benchmarking | |
const int times = 5; | |
cout << "Iterations per benchmark: " << iterations << endl; | |
cout << "Initializing random numbers..." << endl; | |
vector<_Float32> x_values(iterations); | |
vector<_Float32> y_values(iterations); | |
for (int i = 0; i < iterations; ++i) | |
{ | |
x_values[i] = generateRandomFloat(-2.0, 2.0); | |
y_values[i] = generateRandomFloat(-2.0, 2.0); | |
} | |
for (int i = 0; i < times; i++) | |
{ | |
cout << "Iteration: " << i << " of " << times << endl; | |
// Benchmark multiplication operator | |
double timeMultiplicationOperator = benchmarkFunction(iterations, [](_Float32 x, _Float32 y) -> _Float32 | |
{ return x * y; }, x_values, y_values); | |
cout << "Time taken by multiplication operator: " << timeMultiplicationOperator << " seconds" << endl; | |
// Benchmark lmul_asm function | |
double timeLmulFunction = benchmarkFunction(iterations, lmul_asm, x_values, y_values); | |
cout << "Time taken by lmul_asm function: " << timeLmulFunction << " seconds" << endl; | |
// Benchmark lmul function | |
double timeLmulFunction2 = benchmarkFunction(iterations, lmul, x_values, y_values); | |
cout << "Time taken by lmul function: " << timeLmulFunction2 << " seconds" << endl; | |
} | |
// Validate results with random numbers | |
float x = generateRandomFloat(-1.0, 1.0); | |
float y = generateRandomFloat(-1.0, 1.0); | |
cout << left << setw(20) << "x" | |
<< left << setw(20) << "y" | |
<< left << setw(20) << "x*y" | |
<< left << setw(20) << "lmul_asm(x, y)" | |
<< left << setw(20) << "lmul(x, y)" << endl; | |
cout << string(100, '-') << endl; | |
for (int i = 0; i < 10; ++i) | |
{ | |
float x = generateRandomFloat(-1.0, 1.0); | |
float y = generateRandomFloat(-1.0, 1.0); | |
cout << left << setw(20) << x | |
<< left << setw(20) << y | |
<< left << setw(20) << x * y | |
<< left << setw(20) << lmul_asm(x, y) | |
<< left << setw(20) << lmul(x, y) << endl; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment