Last active
September 16, 2020 19:52
-
-
Save MaxGraey/a826c71909353e3a28a54e8a749c06ac to your computer and use it in GitHub Desktop.
Benchmark different nearest functions for Rust
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "bench" | |
version = "0.1.0" | |
authors = ["MaxGraey <[email protected]>"] | |
[profile.bench] | |
codegen-units = 1 | |
opt-level = 3 | |
lto = true | |
debug = false | |
rpath = false | |
debug-assertions = false | |
panic = 'unwind' | |
[profile.release] | |
codegen-units = 1 | |
opt-level = 3 | |
lto = true | |
debug = true | |
rpath = false | |
debug-assertions = false | |
panic = 'unwind' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![feature(stdsimd)] | |
#![feature(test)] | |
extern crate test; | |
#[cfg(target_arch = "x86")] | |
use std::arch::x86::*; | |
#[cfg(target_arch = "x86_64")] | |
use std::arch::x86_64::*; | |
const TOINT_64: f64 = 1.0 / f64::EPSILON; | |
pub extern "C" fn f64_nearest_original(x: f64) -> f64 { | |
// Rust doesn't have a nearest function, so do it manually. | |
if x == 0.0 { | |
// Preserve the sign of zero. | |
x | |
} else { | |
// Nearest is either ceil or floor depending on which is nearest or even. | |
let u = x.ceil(); | |
let d = x.floor(); | |
let um = (x - u).abs(); | |
let dm = (x - d).abs(); | |
if um < dm | |
|| (um == dm && { | |
let h = u / 2.; | |
h.floor() == h | |
}) | |
{ | |
u | |
} else { | |
d | |
} | |
} | |
} | |
pub extern "C" fn f64_nearest_branch(x: f64) -> f64 { | |
if x == 0.0 { | |
x | |
} else { | |
let i = x.to_bits(); | |
let e = i >> 52 & 0x7ff; | |
if e < 0x3ff + 52 { | |
if i >> 63 != 0 { | |
x - TOINT_64 + TOINT_64 | |
} else { | |
x + TOINT_64 - TOINT_64 | |
} | |
} else { | |
x | |
} | |
} | |
} | |
pub extern "C" fn f64_nearest_copysign(x: f64) -> f64 { | |
let i = x.to_bits(); | |
let e = i >> 52 & 0x7ff; | |
if e < 0x3ff + 52 { | |
if i >> 63 != 0 { | |
x - TOINT_64 + TOINT_64 | |
} else { | |
x + TOINT_64 - TOINT_64 | |
}.copysign(x) | |
} else { | |
x | |
} | |
} | |
pub extern "C" fn f64_nearest_abs_copysign(x: f64) -> f64 { | |
let i = x.to_bits(); | |
let e = i >> 52 & 0x7ff; | |
if e < 0x3ff + 52 { | |
(x.abs() + TOINT_64 - TOINT_64).copysign(x) | |
} else { | |
x | |
} | |
} | |
pub extern "C" fn f64_nearest_abs_copysign_without_bits(x: f64) -> f64 { | |
let y = x.abs(); | |
let m = f64::from_bits(0x3ff_u64 + 52 << 52_u64); | |
if y < m { | |
(y + TOINT_64 - TOINT_64).copysign(x) | |
} else { | |
x | |
} | |
} | |
#[repr(C)] | |
#[repr(align(16))] | |
struct F64x2(f64, f64); | |
pub extern "C" fn f64_nearest_sse41(x: f64) -> f64 { | |
// woraround due to stdsimd missed _mm_extract_pd | |
let mut inout = F64x2(x, 0_f64); | |
unsafe { | |
_mm_store_pd( | |
&mut inout.0, | |
_mm_round_pd( | |
_mm_load_pd(&inout.0), | |
_MM_FROUND_RINT | |
) | |
) | |
}; | |
inout.0 | |
} | |
use test::{Bencher, black_box}; | |
#[bench] | |
fn nearest_original(b: &mut Bencher) { | |
let min = black_box(-10_000); | |
let max = black_box( 10_000); | |
b.iter(|| { | |
let mut sum = black_box(0_f64); | |
for x in min..=max { | |
sum += f64_nearest_original(x as f64 * 0.5); | |
} | |
black_box(sum) | |
}); | |
} | |
#[bench] | |
fn nearest_branch(b: &mut Bencher) { | |
let min = black_box(-10_000); | |
let max = black_box( 10_000); | |
b.iter(|| { | |
let mut sum = black_box(0_f64); | |
for x in min..=max { | |
sum += f64_nearest_branch(x as f64 * 0.5); | |
} | |
black_box(sum) | |
}); | |
} | |
#[bench] | |
fn nearest_copysign(b: &mut Bencher) { | |
let min = black_box(-10_000); | |
let max = black_box( 10_000); | |
b.iter(|| { | |
let mut sum = black_box(0_f64); | |
for x in min..=max { | |
sum += f64_nearest_copysign(x as f64 * 0.5); | |
} | |
black_box(sum) | |
}); | |
} | |
#[bench] | |
fn nearest_abs_copysign(b: &mut Bencher) { | |
let min = black_box(-10_000); | |
let max = black_box( 10_000); | |
b.iter(|| { | |
let mut sum = black_box(0_f64); | |
for x in min..=max { | |
sum += f64_nearest_abs_copysign(x as f64 * 0.5); | |
} | |
black_box(sum) | |
}); | |
} | |
#[bench] | |
fn nearest_abs_copysign_without_bits(b: &mut Bencher) { | |
let min = black_box(-10_000); | |
let max = black_box( 10_000); | |
b.iter(|| { | |
let mut sum = black_box(0_f64); | |
for x in min..=max { | |
sum += f64_nearest_abs_copysign_without_bits(x as f64 * 0.5); | |
} | |
black_box(sum) | |
}); | |
} | |
#[bench] | |
fn nearest_sse41(b: &mut Bencher) { | |
let min = black_box(-10_000); | |
let max = black_box( 10_000); | |
b.iter(|| { | |
let mut sum = black_box(0_f64); | |
for x in min..=max { | |
sum += f64_nearest_sse41(x as f64 * 0.5); | |
} | |
black_box(sum) | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
launch with
cargo bench
:launch with
RUSTFLAGS='-C target-cpu=native' cargo bench
: