Last active
February 18, 2019 17:45
-
-
Save lighterowl/fb7ea088334091791b2ecf3c0d71e92d to your computer and use it in GitHub Desktop.
A benchmark comparing bytewise and bitwise CRC implementations
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <benchmark/benchmark.h> | |
#include <cstdint> | |
#include <array> | |
#include <random> | |
#include <algorithm> | |
static std::array<std::uint8_t, (1 << 16)> data; | |
static unsigned int CRC16_bitwise(const uint8_t *buf, int len) | |
{ | |
unsigned int crc = 0xFFFF; | |
for (int pos = 0; pos < len; pos++) | |
{ | |
crc ^= (unsigned int)buf[pos]; // XOR byte into least sig. byte of crc | |
for (int i = 8; i != 0; i--) { // Loop over each bit | |
if ((crc & 0x0001) != 0) { // If the LSB is set | |
crc >>= 1; // Shift right and XOR 0xA001 | |
crc ^= 0xA001; | |
} | |
else // Else LSB is not set | |
crc >>= 1; // Just shift right | |
} | |
} | |
return crc; | |
} | |
static uint16_t CRC16_bytewise (const uint8_t *nData, int wLength) | |
{ | |
static const uint16_t wCRCTable[] = { | |
0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, | |
0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, | |
0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, | |
0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, | |
0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, | |
0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, | |
0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, | |
0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, | |
0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, | |
0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, | |
0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, | |
0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, | |
0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, | |
0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, | |
0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, | |
0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, | |
0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, | |
0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, | |
0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, | |
0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, | |
0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, | |
0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, | |
0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, | |
0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, | |
0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, | |
0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, | |
0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, | |
0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, | |
0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, | |
0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, | |
0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, | |
0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 }; | |
uint8_t nTemp; | |
uint16_t wCRCWord = 0xFFFF; | |
while (wLength--) | |
{ | |
nTemp = *nData++ ^ wCRCWord; | |
wCRCWord >>= 8; | |
wCRCWord ^= wCRCTable[(nTemp & 0xFF)]; | |
} | |
return wCRCWord; | |
} // End: CRC16 | |
static void BM_BitwiseCRC(benchmark::State& state) { | |
for (auto _ : state) | |
benchmark::DoNotOptimize(CRC16_bitwise(data.data(), state.range(0))); | |
state.SetBytesProcessed(state.iterations() * state.range(0)); | |
} | |
BENCHMARK(BM_BitwiseCRC)->RangeMultiplier(2)->Range(8, 1<<16); | |
static void BM_BytewiseCRC(benchmark::State& state) { | |
for (auto _ : state) | |
benchmark::DoNotOptimize(CRC16_bytewise(data.data(), state.range(0))); | |
state.SetBytesProcessed(state.iterations() * state.range(0)); | |
} | |
BENCHMARK(BM_BytewiseCRC)->RangeMultiplier(2)->Range(8, 1<<16); | |
int main(int argc, char** argv) { | |
std::random_device r; | |
std::default_random_engine e1(r()); | |
std::uniform_int_distribution<uint8_t> uniform_dist(0, 0xFF); | |
std::generate(std::begin(data), std::end(data), [&]() { | |
return uniform_dist(e1); | |
}); | |
benchmark::Initialize(&argc, argv); | |
benchmark::RunSpecifiedBenchmarks(); | |
} | |
/* Results on a i7-2630QM : | |
------------------------------------------------------------------------------- | |
Benchmark Time CPU Iterations UserCounters... | |
------------------------------------------------------------------------------- | |
BM_BitwiseCRC/8 133 ns 132 ns 5381388 bytes_per_second=57.6174M/s | |
BM_BitwiseCRC/16 268 ns 268 ns 2609578 bytes_per_second=56.89M/s | |
BM_BitwiseCRC/32 538 ns 538 ns 1291759 bytes_per_second=56.7167M/s | |
BM_BitwiseCRC/64 1078 ns 1078 ns 634390 bytes_per_second=56.6419M/s | |
BM_BitwiseCRC/128 2143 ns 2141 ns 327213 bytes_per_second=57.0035M/s | |
BM_BitwiseCRC/256 4280 ns 4276 ns 162734 bytes_per_second=57.1019M/s | |
BM_BitwiseCRC/512 8569 ns 8563 ns 81169 bytes_per_second=57.0248M/s | |
BM_BitwiseCRC/1024 17145 ns 17132 ns 40801 bytes_per_second=57.0007M/s | |
BM_BitwiseCRC/2048 34240 ns 34168 ns 20202 bytes_per_second=57.1624M/s | |
BM_BitwiseCRC/4096 68297 ns 68250 ns 10147 bytes_per_second=57.2348M/s | |
BM_BitwiseCRC/8192 137088 ns 137001 ns 5103 bytes_per_second=57.025M/s | |
BM_BitwiseCRC/16384 275114 ns 274899 ns 2558 bytes_per_second=56.839M/s | |
BM_BitwiseCRC/32768 548467 ns 547900 ns 1254 bytes_per_second=57.0359M/s | |
BM_BitwiseCRC/65536 1098852 ns 1098000 ns 637 bytes_per_second=56.9217M/s | |
BM_BytewiseCRC/8 14.6 ns 14.5 ns 49068842 bytes_per_second=524.685M/s | |
BM_BytewiseCRC/16 35.3 ns 35.2 ns 19844193 bytes_per_second=433.19M/s | |
BM_BytewiseCRC/32 86.3 ns 86.3 ns 8097436 bytes_per_second=353.816M/s | |
BM_BytewiseCRC/64 192 ns 192 ns 3610579 bytes_per_second=317.391M/s | |
BM_BytewiseCRC/128 397 ns 397 ns 1747146 bytes_per_second=307.44M/s | |
BM_BytewiseCRC/256 808 ns 807 ns 859120 bytes_per_second=302.531M/s | |
BM_BytewiseCRC/512 1628 ns 1627 ns 426534 bytes_per_second=300.076M/s | |
BM_BytewiseCRC/1024 3292 ns 3289 ns 214352 bytes_per_second=296.951M/s | |
BM_BytewiseCRC/2048 6548 ns 6544 ns 105093 bytes_per_second=298.481M/s | |
BM_BytewiseCRC/4096 13125 ns 13116 ns 53236 bytes_per_second=297.823M/s | |
BM_BytewiseCRC/8192 26390 ns 26349 ns 26357 bytes_per_second=296.505M/s | |
BM_BytewiseCRC/16384 53102 ns 53041 ns 13195 bytes_per_second=294.583M/s | |
BM_BytewiseCRC/32768 104876 ns 104795 ns 6552 bytes_per_second=298.2M/s | |
BM_BytewiseCRC/65536 213223 ns 213057 ns 3342 bytes_per_second=293.349M/s | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment