Last active
December 13, 2024 10:15
-
-
Save chmduquesne/22fc50d44ec85bc051ee25b3e211ac5f to your computer and use it in GitHub Desktop.
Parametrized rolling checksum in python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Self | |
import random | |
# An implementation of buzhash64 compatible with | |
# https://github.com/chmduquesne/rollinghash/blob/master/buzhash64/buzhash64.go | |
# List of hashes used by the default go implementation (seed=1) | |
go_hashes = [ | |
0x4d65822107fcfd52, 0x78629a0f5f3f164f, 0x55104dc76695721d, 0x380704bb7b4d7c03, # noqa: E501 | |
0x365a858149c6e2d1, 0x57e9d1860d1d68d8, 0x0866cb397916001e, 0x1408d2ac22c4d294, # noqa: E501 | |
0x0c697f48392907a0, 0x268447a4189deb99, 0x41f27cc6f3875d04, 0x68255aaf95e94627, # noqa: E501 | |
0x1b6cffa2ba517936, 0x30b95ff183c471d4, 0x28b621587cb3ad0b, 0x3c04951aa42655d9, # noqa: E501 | |
0x243a768b7c4e0b68, 0x25845c95d4491d1b, 0x56ec3f2525632186, 0x1bf98be2a9d78d73, # noqa: E501 | |
0x1a02070f169c1121, 0x2e3108dabb158644, 0x490bd268b68e6a3f, 0x6e661e92759805f5, # noqa: E501 | |
0x2584c47f2cdf5b8a, 0x2606cd2b57d29245, 0x6054502fc5d6d268, 0x1a714cf86b83d0e2, # noqa: E501 | |
0x6ec34c367674cb74, 0x592e17f7b068d9db, 0x430c8b35bb9457d8, 0x039f6f78a15d523b, # noqa: E501 | |
0x144419db794209ff, 0x4dba7b0f9da1d7eb, 0x7cd4b7a55a25e0cb, 0x0a2b894cf840ec4b, # noqa: E501 | |
0x4c22b02936d4ff9b, 0x079143f7f4a5ee3b, 0x589442fd5ad145f4, 0x26984b92f6740304, # noqa: E501 | |
0x162d968d3f71f8cb, 0x4542c29291018d7c, 0x45a6e3cafccae224, 0x23a62343b186b51f, # noqa: E501 | |
0x3629d9f17d9e8fbc, 0x43ea3b9393f93f33, 0x207403def63a5b6f, 0x241b3ae419476c36, # noqa: E501 | |
0x64f1017fbc897d06, 0x2e4fa459169873f5, 0x70b5a315724c7af1, 0x2607c649581eeb39, # noqa: E501 | |
0x727a71f52257bb7d, 0x0c7964976f269a28, 0x7d0b9ca8be8e9981, 0x09825e117039374b, # noqa: E501 | |
0x1c73fac825416fed, 0x572d92faded7e411, 0x1ee9f7676678e7aa, 0x27dff7ab244fcd36, # noqa: E501 | |
0x7767830356aa6b86, 0x5ef4e81ede4561ad, 0x6688f8bd3e99b0a8, 0x5d78399cbed80a3a, # noqa: E501 | |
0x176a156ae58348b0, 0x36d467a4af63e58d, 0x72d0a1e9406aec9d, 0x57613082c233f007, # noqa: E501 | |
0x7d4d8e9fa5ead0bd, 0x760b0d22050143a6, 0x0ba08e4b738b6829, 0x3f1f46e83699caf3, # noqa: E501 | |
0x76a780ea967cd710, 0x7a3ba6f606f665a6, 0x2c89c16725fd3d7f, 0x586d68260fd6e479, # noqa: E501 | |
0x5aff01c926fbf29b, 0x4829ee0716de4c35, 0x5322787c2bf3394b, 0x46a03cb44af864ba, # noqa: E501 | |
0x60bed31f1cb9e6c6, 0x33afd37941439089, 0x10b92d0169a39144, 0x7e34179dc34f182d, # noqa: E501 | |
0x72bb5389421657ff, 0x293a0c2bf9fc6568, 0x5c4e91e98b02c917, 0x528047936c9c64b7, # noqa: E501 | |
0x0af2560383d17909, 0x55b4a4b2ea3d4ca5, 0x4fb58fbeaf635d47, 0x2f5218587fc78769, # noqa: E501 | |
0x1e503382be14186f, 0x44841df33539b1ea, 0x17f7ae24e9174548, 0x1e925507c051e18a, # noqa: E501 | |
0x5065855807b73658, 0x103970a329ec300c, 0x2402a18da250bf34, 0x3485757ea7ed5d97, # noqa: E501 | |
0x37ab3641fe3dea79, 0x50031d27b8b352f7, 0x466b36dbc9b344e9, 0x4fd269fd8e5f0475, # noqa: E501 | |
0x5d55cb471941e52a, 0x6a4eef7a2694763d, 0x0010d6326b40eabc, 0x5e377ef58485d68b, # noqa: E501 | |
0x3332aafe336eacca, 0x3fba24704399a363, 0x4d4f278a67149b9c, 0x346e5f29ae10a901, # noqa: E501 | |
0x03cc44bf5a5ffefb, 0x003e6306563b26de, 0x005d29286f00f02b, 0x7539a2019f06397d, # noqa: E501 | |
0x4b7fafc3545836c4, 0x479a2bf931d6416b, 0x685f325712f4128d, 0x7062b076752f33ff, # noqa: E501 | |
0x3aae3e3e4a305605, 0x4cd239ea0c8dc214, 0x035ca80d72521a90, 0x6c443faf8eb3e4a1, # noqa: E501 | |
0x1ff5f26283efc6c6, 0x5225fcd6090ec04f, 0x1facfc5dc1540864, 0x163a5aceec2c8aaa, # noqa: E501 | |
0x4bdb185b70ab53ba, 0x683e14a538d3b494, 0x58cfb024878d4063, 0x03e19bf7a317ae3f, # noqa: E501 | |
0x4504d6353cb62f07, 0x7ce2e98ef360412c, 0x601900fb4ffbf3a9, 0x25a1ffb522d554b4, # noqa: E501 | |
0x606796b83f190476, 0x1352ca320796a710, 0x2d89c820f5c353cf, 0x6a7cb5cf04f59bb7, # noqa: E501 | |
0x1dac9b582d230176, 0x505ce263e2d6a9ce, 0x3fcb626c3f1d7427, 0x0b7fbfbcafd915bb, # noqa: E501 | |
0x03398e40b01aa47d, 0x323423cfcde2c269, 0x4b70e7ac7417bf38, 0x76fd839a1e094f9a, # noqa: E501 | |
0x493a23eb55ece0ea, 0x4b56783ccb94539b, 0x34b4a3c813d346b5, 0x46baf44754e0c0c1, # noqa: E501 | |
0x3eecfdbc6db30e37, 0x7a9e3bdcdc02b390, 0x660aedf1a6e222f5, 0x0dbeaa0fe2f8c1fe, # noqa: E501 | |
0x643a7d712e166bdf, 0x32560c7a67588a74, 0x10b166a221898f34, 0x1852fe624c330f1d, # noqa: E501 | |
0x5eb29c7719af53ba, 0x53b7a0ff70658b94, 0x0c97d70a133c9673, 0x429bd23a4efeeadd, # noqa: E501 | |
0x0cc3f10e0f212551, 0x136f9ac7070f0914, 0x09c09a3e6f241c57, 0x2858bd10f13e41b7, # noqa: E501 | |
0x146f70ff3be70cb0, 0x11a39040f4b6f47f, 0x294b4e8e20f31127, 0x450064ce6551cb89, # noqa: E501 | |
0x4911aa87289cbd2c, 0x41a2d5288946f23d, 0x57930cf840a79c3b, 0x5396d24a03c6d982, # noqa: E501 | |
0x4322cee10365790c, 0x53bf1faf0cf52517, 0x5bb1f57b0bb131e8, 0x517d8ebf3da5475c, # noqa: E501 | |
0x01a44786139efcca, 0x03ed64e9bcd44eb4, 0x0c8c4694a54af747, 0x2f3f0d6fb73c32ed, # noqa: E501 | |
0x69c93fb09f6c47ac, 0x2c80d58fe8ba8f22, 0x2c1283b654043a66, 0x20624c583b0a7f20, # noqa: E501 | |
0x1bb55397b4926431, 0x470a4f5ae17c02d5, 0x33770eb58f0d2558, 0x40d4e552014fbff2, # noqa: E501 | |
0x15974b9d7f803594, 0x2a6a467079b76fbe, 0x69f98c4033fe2656, 0x59a30874792c8ee8, # noqa: E501 | |
0x076a20af6b41292d, 0x7fe4754afdff9c32, 0x34ad5ac882093298, 0x0e4b5ac059483870, # noqa: E501 | |
0x63efbff5b2d5a113, 0x0bca82a42dd96e5a, 0x06d8e96f5b8e56a9, 0x5b7b2709ebd9dda9, # noqa: E501 | |
0x2018fa6e04f9ce92, 0x6ca000e8cb440950, 0x7ca82947a67e52b1, 0x1b35327a49f6d261, # noqa: E501 | |
0x02c19e7792417fc3, 0x78fc24541c3b6bd9, 0x0be67230b027b7e0, 0x52aaab031f765a41, # noqa: E501 | |
0x27ebdd8f44c9ab40, 0x396747c045d99121, 0x3e5ddb0efd7a84af, 0x0a8eb1ac99b75788, # noqa: E501 | |
0x55fe7f03e3abff4a, 0x33395eafa88aa67f, 0x733c374d736e41cc, 0x7995c5dc9cbcbe5e, # noqa: E501 | |
0x28dfd8d37b3ccebc, 0x3febdd25e1b7fa93, 0x33415dbd315ae6af, 0x0289172b9cced2e2, # noqa: E501 | |
0x5290a23119ea0f2f, 0x36df4331a9770722, 0x2b77e80684a6bfdc, 0x7197e13488f03f07, # noqa: E501 | |
0x1e3ffa8aa44a03a4, 0x61ebca0827a6b885, 0x04939bb8b580c8ba, 0x5d214064018153da, # noqa: E501 | |
0x501b6a22b648e604, 0x41acd9f551180278, 0x0945fcdd893a310f, 0x5cb389ac728f5f4c, # noqa: E501 | |
0x709ec18437f5198b, 0x7d275a873cc0ea9b, 0x6c7ae37ae39d02db, 0x6a85764813883142, # noqa: E501 | |
0x1fb95e8cca599392, 0x74ea42afc12d154e, 0x099ad1bdc176163d, 0x6ae4ae6d5c92e2b8, # noqa: E501 | |
0x508df0dcf9f95ede, 0x60390908b802bdfc, 0x50e57d0f8a928585, 0x0c68571ddca6e10b, # noqa: E501 | |
0x01e5dcfd887953e8, 0x4abb18c948b9e962, 0x08cd00c4e533e9a3, 0x7fc76fad5e0ce6e5, # noqa: E501 | |
0x53189b251dba77ae, 0x7e23bc6fc8214b8a, 0x6adaea4753b428d7, 0x2a80d0564cf20a65 # noqa: E501 | |
] | |
# Generate the list of hashes | |
def generate_hashes(seed: int) -> list: | |
rand = random.Random(seed) | |
res = [] | |
for _ in range(256): | |
res.append(rand.getrandbits(64)) | |
return res | |
# Implementation of Buzhash64 | |
class Buzhash64(): | |
sum: int = 0 | |
n_rotate: int = 0 | |
n_rotate_complement: int = 0 | |
window: bytearray = bytearray() | |
oldest: int = 0 | |
bytehash: list = 0 | |
def __init__(self, seed=0, use_go_hashes=False): | |
if use_go_hashes: | |
self.bytehash = go_hashes | |
else: | |
self.bytehash = generate_hashes(seed) | |
def digest(self: Self) -> bytes: | |
return self.sum.to_bytes(8) | |
def hexdigest(self: Self) -> bytes: | |
return self.digest().hex() | |
def update(self: Self, data: bytes) -> int: | |
if len(data) == 0: | |
return 0 | |
# rearrange the window so that the oldest byte is at the start | |
if self.oldest != 0: | |
self.window = \ | |
self.window[:self.oldest] + self.window[self.oldest:] | |
self.window += bytearray(data) | |
self.sum = 0 | |
for c in self.window: | |
self.sum = (self.sum << 1 | self.sum >> 63) & 0xFFFFFFFFFFFFFFFF | |
self.sum ^= self.bytehash[c] | |
self.n_rotate = len(self.window) % 64 | |
self.n_rotate_complement = 64 - self.n_rotate | |
return len(data) | |
def roll(self: Self, c: int): | |
hn = self.bytehash[c] | |
h0 = self.bytehash[self.window[self.oldest]] | |
self.window[self.oldest] = c | |
n = len(self.window) | |
self.oldest += 1 | |
if self.oldest >= n: | |
self.oldest = 0 | |
self.sum = ( | |
(self.sum << 1 | self.sum >> 63) ^ | |
(h0 << self.n_rotate | h0 >> self.n_rotate_complement) ^ hn | |
) & 0xFFFFFFFFFFFFFFFF | |
# Small test | |
if __name__ == "__main__": | |
h = Buzhash64(use_go_hashes=True) | |
# h = Buzhash64(use_go_hashes=False) | |
s = b'The quick brown fox jumps over the lazy dog' | |
n = 16 | |
h.update(s[:n]) | |
for i in range(n, len(s)): | |
h.roll(s[i]) | |
print(f'{s[i-n+1:i+1]}: checksum {h.hexdigest()}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment