Skip to content

Instantly share code, notes, and snippets.

@chmduquesne
Last active December 13, 2024 10:15
Show Gist options
  • Save chmduquesne/22fc50d44ec85bc051ee25b3e211ac5f to your computer and use it in GitHub Desktop.
Save chmduquesne/22fc50d44ec85bc051ee25b3e211ac5f to your computer and use it in GitHub Desktop.
Parametrized rolling checksum in python
from typing import Self
import random
# An implementation of buzhash64 compatible with
# https://github.com/chmduquesne/rollinghash/blob/master/buzhash64/buzhash64.go
# List of hashes used by the default go implementation (seed=1)
go_hashes = [
0x4d65822107fcfd52, 0x78629a0f5f3f164f, 0x55104dc76695721d, 0x380704bb7b4d7c03, # noqa: E501
0x365a858149c6e2d1, 0x57e9d1860d1d68d8, 0x0866cb397916001e, 0x1408d2ac22c4d294, # noqa: E501
0x0c697f48392907a0, 0x268447a4189deb99, 0x41f27cc6f3875d04, 0x68255aaf95e94627, # noqa: E501
0x1b6cffa2ba517936, 0x30b95ff183c471d4, 0x28b621587cb3ad0b, 0x3c04951aa42655d9, # noqa: E501
0x243a768b7c4e0b68, 0x25845c95d4491d1b, 0x56ec3f2525632186, 0x1bf98be2a9d78d73, # noqa: E501
0x1a02070f169c1121, 0x2e3108dabb158644, 0x490bd268b68e6a3f, 0x6e661e92759805f5, # noqa: E501
0x2584c47f2cdf5b8a, 0x2606cd2b57d29245, 0x6054502fc5d6d268, 0x1a714cf86b83d0e2, # noqa: E501
0x6ec34c367674cb74, 0x592e17f7b068d9db, 0x430c8b35bb9457d8, 0x039f6f78a15d523b, # noqa: E501
0x144419db794209ff, 0x4dba7b0f9da1d7eb, 0x7cd4b7a55a25e0cb, 0x0a2b894cf840ec4b, # noqa: E501
0x4c22b02936d4ff9b, 0x079143f7f4a5ee3b, 0x589442fd5ad145f4, 0x26984b92f6740304, # noqa: E501
0x162d968d3f71f8cb, 0x4542c29291018d7c, 0x45a6e3cafccae224, 0x23a62343b186b51f, # noqa: E501
0x3629d9f17d9e8fbc, 0x43ea3b9393f93f33, 0x207403def63a5b6f, 0x241b3ae419476c36, # noqa: E501
0x64f1017fbc897d06, 0x2e4fa459169873f5, 0x70b5a315724c7af1, 0x2607c649581eeb39, # noqa: E501
0x727a71f52257bb7d, 0x0c7964976f269a28, 0x7d0b9ca8be8e9981, 0x09825e117039374b, # noqa: E501
0x1c73fac825416fed, 0x572d92faded7e411, 0x1ee9f7676678e7aa, 0x27dff7ab244fcd36, # noqa: E501
0x7767830356aa6b86, 0x5ef4e81ede4561ad, 0x6688f8bd3e99b0a8, 0x5d78399cbed80a3a, # noqa: E501
0x176a156ae58348b0, 0x36d467a4af63e58d, 0x72d0a1e9406aec9d, 0x57613082c233f007, # noqa: E501
0x7d4d8e9fa5ead0bd, 0x760b0d22050143a6, 0x0ba08e4b738b6829, 0x3f1f46e83699caf3, # noqa: E501
0x76a780ea967cd710, 0x7a3ba6f606f665a6, 0x2c89c16725fd3d7f, 0x586d68260fd6e479, # noqa: E501
0x5aff01c926fbf29b, 0x4829ee0716de4c35, 0x5322787c2bf3394b, 0x46a03cb44af864ba, # noqa: E501
0x60bed31f1cb9e6c6, 0x33afd37941439089, 0x10b92d0169a39144, 0x7e34179dc34f182d, # noqa: E501
0x72bb5389421657ff, 0x293a0c2bf9fc6568, 0x5c4e91e98b02c917, 0x528047936c9c64b7, # noqa: E501
0x0af2560383d17909, 0x55b4a4b2ea3d4ca5, 0x4fb58fbeaf635d47, 0x2f5218587fc78769, # noqa: E501
0x1e503382be14186f, 0x44841df33539b1ea, 0x17f7ae24e9174548, 0x1e925507c051e18a, # noqa: E501
0x5065855807b73658, 0x103970a329ec300c, 0x2402a18da250bf34, 0x3485757ea7ed5d97, # noqa: E501
0x37ab3641fe3dea79, 0x50031d27b8b352f7, 0x466b36dbc9b344e9, 0x4fd269fd8e5f0475, # noqa: E501
0x5d55cb471941e52a, 0x6a4eef7a2694763d, 0x0010d6326b40eabc, 0x5e377ef58485d68b, # noqa: E501
0x3332aafe336eacca, 0x3fba24704399a363, 0x4d4f278a67149b9c, 0x346e5f29ae10a901, # noqa: E501
0x03cc44bf5a5ffefb, 0x003e6306563b26de, 0x005d29286f00f02b, 0x7539a2019f06397d, # noqa: E501
0x4b7fafc3545836c4, 0x479a2bf931d6416b, 0x685f325712f4128d, 0x7062b076752f33ff, # noqa: E501
0x3aae3e3e4a305605, 0x4cd239ea0c8dc214, 0x035ca80d72521a90, 0x6c443faf8eb3e4a1, # noqa: E501
0x1ff5f26283efc6c6, 0x5225fcd6090ec04f, 0x1facfc5dc1540864, 0x163a5aceec2c8aaa, # noqa: E501
0x4bdb185b70ab53ba, 0x683e14a538d3b494, 0x58cfb024878d4063, 0x03e19bf7a317ae3f, # noqa: E501
0x4504d6353cb62f07, 0x7ce2e98ef360412c, 0x601900fb4ffbf3a9, 0x25a1ffb522d554b4, # noqa: E501
0x606796b83f190476, 0x1352ca320796a710, 0x2d89c820f5c353cf, 0x6a7cb5cf04f59bb7, # noqa: E501
0x1dac9b582d230176, 0x505ce263e2d6a9ce, 0x3fcb626c3f1d7427, 0x0b7fbfbcafd915bb, # noqa: E501
0x03398e40b01aa47d, 0x323423cfcde2c269, 0x4b70e7ac7417bf38, 0x76fd839a1e094f9a, # noqa: E501
0x493a23eb55ece0ea, 0x4b56783ccb94539b, 0x34b4a3c813d346b5, 0x46baf44754e0c0c1, # noqa: E501
0x3eecfdbc6db30e37, 0x7a9e3bdcdc02b390, 0x660aedf1a6e222f5, 0x0dbeaa0fe2f8c1fe, # noqa: E501
0x643a7d712e166bdf, 0x32560c7a67588a74, 0x10b166a221898f34, 0x1852fe624c330f1d, # noqa: E501
0x5eb29c7719af53ba, 0x53b7a0ff70658b94, 0x0c97d70a133c9673, 0x429bd23a4efeeadd, # noqa: E501
0x0cc3f10e0f212551, 0x136f9ac7070f0914, 0x09c09a3e6f241c57, 0x2858bd10f13e41b7, # noqa: E501
0x146f70ff3be70cb0, 0x11a39040f4b6f47f, 0x294b4e8e20f31127, 0x450064ce6551cb89, # noqa: E501
0x4911aa87289cbd2c, 0x41a2d5288946f23d, 0x57930cf840a79c3b, 0x5396d24a03c6d982, # noqa: E501
0x4322cee10365790c, 0x53bf1faf0cf52517, 0x5bb1f57b0bb131e8, 0x517d8ebf3da5475c, # noqa: E501
0x01a44786139efcca, 0x03ed64e9bcd44eb4, 0x0c8c4694a54af747, 0x2f3f0d6fb73c32ed, # noqa: E501
0x69c93fb09f6c47ac, 0x2c80d58fe8ba8f22, 0x2c1283b654043a66, 0x20624c583b0a7f20, # noqa: E501
0x1bb55397b4926431, 0x470a4f5ae17c02d5, 0x33770eb58f0d2558, 0x40d4e552014fbff2, # noqa: E501
0x15974b9d7f803594, 0x2a6a467079b76fbe, 0x69f98c4033fe2656, 0x59a30874792c8ee8, # noqa: E501
0x076a20af6b41292d, 0x7fe4754afdff9c32, 0x34ad5ac882093298, 0x0e4b5ac059483870, # noqa: E501
0x63efbff5b2d5a113, 0x0bca82a42dd96e5a, 0x06d8e96f5b8e56a9, 0x5b7b2709ebd9dda9, # noqa: E501
0x2018fa6e04f9ce92, 0x6ca000e8cb440950, 0x7ca82947a67e52b1, 0x1b35327a49f6d261, # noqa: E501
0x02c19e7792417fc3, 0x78fc24541c3b6bd9, 0x0be67230b027b7e0, 0x52aaab031f765a41, # noqa: E501
0x27ebdd8f44c9ab40, 0x396747c045d99121, 0x3e5ddb0efd7a84af, 0x0a8eb1ac99b75788, # noqa: E501
0x55fe7f03e3abff4a, 0x33395eafa88aa67f, 0x733c374d736e41cc, 0x7995c5dc9cbcbe5e, # noqa: E501
0x28dfd8d37b3ccebc, 0x3febdd25e1b7fa93, 0x33415dbd315ae6af, 0x0289172b9cced2e2, # noqa: E501
0x5290a23119ea0f2f, 0x36df4331a9770722, 0x2b77e80684a6bfdc, 0x7197e13488f03f07, # noqa: E501
0x1e3ffa8aa44a03a4, 0x61ebca0827a6b885, 0x04939bb8b580c8ba, 0x5d214064018153da, # noqa: E501
0x501b6a22b648e604, 0x41acd9f551180278, 0x0945fcdd893a310f, 0x5cb389ac728f5f4c, # noqa: E501
0x709ec18437f5198b, 0x7d275a873cc0ea9b, 0x6c7ae37ae39d02db, 0x6a85764813883142, # noqa: E501
0x1fb95e8cca599392, 0x74ea42afc12d154e, 0x099ad1bdc176163d, 0x6ae4ae6d5c92e2b8, # noqa: E501
0x508df0dcf9f95ede, 0x60390908b802bdfc, 0x50e57d0f8a928585, 0x0c68571ddca6e10b, # noqa: E501
0x01e5dcfd887953e8, 0x4abb18c948b9e962, 0x08cd00c4e533e9a3, 0x7fc76fad5e0ce6e5, # noqa: E501
0x53189b251dba77ae, 0x7e23bc6fc8214b8a, 0x6adaea4753b428d7, 0x2a80d0564cf20a65 # noqa: E501
]
# Generate the list of hashes
def generate_hashes(seed: int) -> list:
rand = random.Random(seed)
res = []
for _ in range(256):
res.append(rand.getrandbits(64))
return res
# Implementation of Buzhash64
class Buzhash64():
sum: int = 0
n_rotate: int = 0
n_rotate_complement: int = 0
window: bytearray = bytearray()
oldest: int = 0
bytehash: list = 0
def __init__(self, seed=0, use_go_hashes=False):
if use_go_hashes:
self.bytehash = go_hashes
else:
self.bytehash = generate_hashes(seed)
def digest(self: Self) -> bytes:
return self.sum.to_bytes(8)
def hexdigest(self: Self) -> bytes:
return self.digest().hex()
def update(self: Self, data: bytes) -> int:
if len(data) == 0:
return 0
# rearrange the window so that the oldest byte is at the start
if self.oldest != 0:
self.window = \
self.window[:self.oldest] + self.window[self.oldest:]
self.window += bytearray(data)
self.sum = 0
for c in self.window:
self.sum = (self.sum << 1 | self.sum >> 63) & 0xFFFFFFFFFFFFFFFF
self.sum ^= self.bytehash[c]
self.n_rotate = len(self.window) % 64
self.n_rotate_complement = 64 - self.n_rotate
return len(data)
def roll(self: Self, c: int):
hn = self.bytehash[c]
h0 = self.bytehash[self.window[self.oldest]]
self.window[self.oldest] = c
n = len(self.window)
self.oldest += 1
if self.oldest >= n:
self.oldest = 0
self.sum = (
(self.sum << 1 | self.sum >> 63) ^
(h0 << self.n_rotate | h0 >> self.n_rotate_complement) ^ hn
) & 0xFFFFFFFFFFFFFFFF
# Small test
if __name__ == "__main__":
h = Buzhash64(use_go_hashes=True)
# h = Buzhash64(use_go_hashes=False)
s = b'The quick brown fox jumps over the lazy dog'
n = 16
h.update(s[:n])
for i in range(n, len(s)):
h.roll(s[i])
print(f'{s[i-n+1:i+1]}: checksum {h.hexdigest()}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment