Last active
August 5, 2025 18:27
-
-
Save vaguinerg/baafe41426f3ccf2c6350144392e0eac to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import std/[times, math, strformat] | |
const | |
SIZE = 50_000_000 | |
proc initSeqs(): (seq[float32], seq[float32], seq[float32]) = | |
var A = newSeq[float32](SIZE) | |
var B = newSeq[float32](SIZE) | |
var D = newSeq[float32](SIZE) | |
for i in 0..<SIZE: | |
A[i] = float32(i) / float32(SIZE) | |
B[i] = float32(SIZE - i) / float32(SIZE) | |
D[i] = float32((i * 17) mod 1000) / 1000.0'f32 | |
(A, B, D) | |
proc benchmark(name: string, body: proc()) = | |
let start = getTime() | |
body() | |
let duration = getTime() - start | |
echo &"{name:<20} | {duration.inMilliseconds:>6} ms" | |
proc main() = | |
let (A, B, D) = initSeqs() | |
var C = newSeq[float32](SIZE) | |
echo "Test | Time (ms)" | |
echo "--------------------+-----------" | |
benchmark("add", proc() = | |
for i in 0..<SIZE: | |
C[i] = A[i] + B[i] | |
) | |
benchmark("mul", proc() = | |
for i in 0..<SIZE: | |
C[i] = A[i] * B[i] | |
) | |
benchmark("fma", proc() = | |
for i in 0..<SIZE: | |
C[i] = A[i] * B[i] + D[i] | |
) | |
benchmark("polynomial", proc() = | |
for i in 0..<SIZE: | |
let x = A[i] | |
C[i] = x * x * x + 2.0'f32 * x * x + 3.0'f32 * x + 4.0'f32 | |
) | |
benchmark("linear interp", proc() = | |
for i in 0..<SIZE: | |
C[i] = A[i] * (1.0'f32 - D[i]) + B[i] * D[i] | |
) | |
benchmark("sqrt approx", proc() = | |
for i in 0..<SIZE: | |
C[i] = 1.0'f32 / sqrt(A[i] + 1.0'f32) | |
) | |
benchmark("scale offset", proc() = | |
for i in 0..<SIZE: | |
C[i] = A[i] * 2.5'f32 + 0.1'f32 | |
) | |
benchmark("horizontal sum", proc() = | |
var sum1 = 0.0'f32 | |
for i in 0..<SIZE: | |
sum1 += A[i] + B[i] | |
C[0] = sum1 | |
) | |
benchmark("min", proc() = | |
for i in 0..<SIZE: | |
C[i] = min(A[i], B[i]) | |
) | |
benchmark("conditional", proc() = | |
var sum2 = 0.0'f32 | |
for i in 0..<SIZE: | |
if D[i] > 0.5'f32: | |
sum2 += A[i] | |
else: | |
sum2 += B[i] | |
C[0] = sum2 | |
) | |
benchmark("blend mask", proc() = | |
for i in 0..<SIZE: | |
let mask = if D[i] > 0.5'f32: 1.0'f32 else: 0.0'f32 | |
C[i] = mask * A[i] + (1.0'f32 - mask) * B[i] | |
) | |
benchmark("prefix sum", proc() = | |
C[0] = A[0] | |
for i in 1..<SIZE: | |
C[i] = C[i-1] + A[i] | |
) | |
benchmark("fused loops", proc() = | |
for i in 0..<SIZE: | |
C[i] = A[i] + B[i] | |
for i in 0..<SIZE: | |
C[i] = C[i] * 2.0'f32 | |
) | |
benchmark("muladd const", proc() = | |
for i in 0..<SIZE: | |
C[i] = A[i] * 3.14159'f32 + 2.71828'f32 | |
) | |
benchmark("linear blend", proc() = | |
for i in 0..<SIZE: | |
C[i] = A[i] * (1.0'f32 - D[i]) + B[i] * D[i] | |
) | |
when isMainModule: | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment