Skip to content

Instantly share code, notes, and snippets.

@vaguinerg
Last active August 5, 2025 18:27
Show Gist options
  • Save vaguinerg/baafe41426f3ccf2c6350144392e0eac to your computer and use it in GitHub Desktop.
Save vaguinerg/baafe41426f3ccf2c6350144392e0eac to your computer and use it in GitHub Desktop.
import std/[times, math, strformat]
const
SIZE = 50_000_000
proc initSeqs(): (seq[float32], seq[float32], seq[float32]) =
var A = newSeq[float32](SIZE)
var B = newSeq[float32](SIZE)
var D = newSeq[float32](SIZE)
for i in 0..<SIZE:
A[i] = float32(i) / float32(SIZE)
B[i] = float32(SIZE - i) / float32(SIZE)
D[i] = float32((i * 17) mod 1000) / 1000.0'f32
(A, B, D)
proc benchmark(name: string, body: proc()) =
let start = getTime()
body()
let duration = getTime() - start
echo &"{name:<20} | {duration.inMilliseconds:>6} ms"
proc main() =
let (A, B, D) = initSeqs()
var C = newSeq[float32](SIZE)
echo "Test | Time (ms)"
echo "--------------------+-----------"
benchmark("add", proc() =
for i in 0..<SIZE:
C[i] = A[i] + B[i]
)
benchmark("mul", proc() =
for i in 0..<SIZE:
C[i] = A[i] * B[i]
)
benchmark("fma", proc() =
for i in 0..<SIZE:
C[i] = A[i] * B[i] + D[i]
)
benchmark("polynomial", proc() =
for i in 0..<SIZE:
let x = A[i]
C[i] = x * x * x + 2.0'f32 * x * x + 3.0'f32 * x + 4.0'f32
)
benchmark("linear interp", proc() =
for i in 0..<SIZE:
C[i] = A[i] * (1.0'f32 - D[i]) + B[i] * D[i]
)
benchmark("sqrt approx", proc() =
for i in 0..<SIZE:
C[i] = 1.0'f32 / sqrt(A[i] + 1.0'f32)
)
benchmark("scale offset", proc() =
for i in 0..<SIZE:
C[i] = A[i] * 2.5'f32 + 0.1'f32
)
benchmark("horizontal sum", proc() =
var sum1 = 0.0'f32
for i in 0..<SIZE:
sum1 += A[i] + B[i]
C[0] = sum1
)
benchmark("min", proc() =
for i in 0..<SIZE:
C[i] = min(A[i], B[i])
)
benchmark("conditional", proc() =
var sum2 = 0.0'f32
for i in 0..<SIZE:
if D[i] > 0.5'f32:
sum2 += A[i]
else:
sum2 += B[i]
C[0] = sum2
)
benchmark("blend mask", proc() =
for i in 0..<SIZE:
let mask = if D[i] > 0.5'f32: 1.0'f32 else: 0.0'f32
C[i] = mask * A[i] + (1.0'f32 - mask) * B[i]
)
benchmark("prefix sum", proc() =
C[0] = A[0]
for i in 1..<SIZE:
C[i] = C[i-1] + A[i]
)
benchmark("fused loops", proc() =
for i in 0..<SIZE:
C[i] = A[i] + B[i]
for i in 0..<SIZE:
C[i] = C[i] * 2.0'f32
)
benchmark("muladd const", proc() =
for i in 0..<SIZE:
C[i] = A[i] * 3.14159'f32 + 2.71828'f32
)
benchmark("linear blend", proc() =
for i in 0..<SIZE:
C[i] = A[i] * (1.0'f32 - D[i]) + B[i] * D[i]
)
when isMainModule:
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment