Created
January 13, 2022 21:03
-
-
Save KungFuJesus/429cf1172bfc6d1e92715b97dd228f8f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* benchmark_adler32.cc -- benchmark adler32 variants | |
* Copyright (C) 2020 Nathan Moinvaziri | |
* For conditions of distribution and use, see copyright notice in zlib.h | |
*/ | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <assert.h> | |
#include <benchmark/benchmark.h> | |
extern "C" { | |
# include "zbuild.h" | |
# include "zutil.h" | |
# include "cpu_features.h" | |
# include "zutil_p.h" | |
} | |
#define MAX_RANDOM_INTS (1024 * 1024) | |
#define MAX_RANDOM_INTS_SIZE (MAX_RANDOM_INTS * sizeof(uint32_t)) | |
#define BENCHMARK_ADLER(fixture, name, fptr, support_flag) \ | |
BENCHMARK_DEFINE_F(fixture, name)(benchmark::State& state) { \ | |
if (!support_flag) { \ | |
state.SkipWithError("CPU does not support " #name); \ | |
} \ | |
Bench(state, fptr); \ | |
}\ | |
BENCHMARK_REGISTER_F(adler32, name)->Range(2048, MAX_RANDOM_INTS_SIZE); | |
typedef uint32_t (*adler32_func)(uint32_t adler, const unsigned char *buf, size_t len); | |
class adler32: public benchmark::Fixture { | |
private: | |
uint32_t *random_ints; | |
public: | |
void SetUp(const ::benchmark::State& state) { | |
/* Control the alignment so that we have the best case scenario for loads. With | |
* AVX512, unaligned loads can mean we're crossing a cacheline boundary at every load. | |
* And while this is a realistic scenario, it makes it difficult to compare benchmark | |
* to benchmark because one allocation could have been aligned perfectly for the loads | |
* while the subsequent one happened to not be. This is not to be advantageous to AVX512 | |
* (indeed, all lesser SIMD implementations benefit from this aligned allocation), but to | |
* control the _consistency_ of the results */ | |
random_ints = (uint32_t *)zng_alloc(MAX_RANDOM_INTS_SIZE); | |
assert(random_ints != NULL); | |
for (int32_t i = 0; i < MAX_RANDOM_INTS; i++) { | |
random_ints[i] = rand(); | |
} | |
} | |
void Bench(benchmark::State& state, adler32_func adler32) { | |
uint32_t hash = 0; | |
for (auto _ : state) { | |
hash = adler32(hash, (const unsigned char *)random_ints, state.range(0)); | |
} | |
benchmark::DoNotOptimize(hash); | |
} | |
void TearDown(const ::benchmark::State& state) { | |
zng_free(random_ints); | |
} | |
}; | |
BENCHMARK_ADLER(adler32, c, adler32_c, 1); | |
#ifdef ARM_NEON_ADLER32 | |
BENCHMARK_ADLER(adler32, neon, adler32_neon, arm_has_neon); | |
#endif | |
#ifdef PPC_VMX_ADLER32 | |
BENCHMARK_ADLER(adler32, vmx, adler32_vmx, power_cpu_has_altivec); | |
#endif | |
#ifdef X86_SSE41_ADLER32 | |
BENCHMARK_ADLER(adler32, sse41, adler32_sse41, x86_cpu_has_sse41); | |
#endif | |
#ifdef X86_SSSE3_ADLER32 | |
BENCHMARK_ADLER(adler32, ssse3, adler32_ssse3, x86_cpu_has_ssse3); | |
#endif | |
#ifdef X86_AVX2_ADLER32 | |
BENCHMARK_ADLER(adler32, avx2, adler32_avx2, x86_cpu_has_avx2); | |
#endif | |
#if defined(X86_AVX512_ADLER32) | |
BENCHMARK_ADLER(adler32, avx512, adler32_avx512, x86_cpu_has_avx512); | |
#endif | |
#if defined(X86_AVX512VNNI_ADLER32) | |
BENCHMARK_ADLER(adler32, avx512_vnni, adler32_avx512_vnni, x86_cpu_has_avx512vnni); | |
#endif | |
#ifdef POWER8_VSX_ADLER32 | |
BENCHMARK_ADLER(adler32, power8, adler32_power8, power_cpu_has_arch_2_07); | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment