Optimized adler32 hashing algorithm. (#2948)

* Optimized adler32 implementations.

 - Adapted adler32 implementation from Crypto++ public domain library.
 - Added unit tests for adler32 hashing algorithm.

* tabified adler32 implementation to match stdlib.

* Formatting to be consistent. Make unrolling use macro.

---------

Co-authored-by: soerlemans <sebasoerlemans+git@gmail.com>
Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
This commit is contained in:
soerlemans
2026-02-19 17:51:33 +01:00
committed by GitHub
parent 7ae4c5a1ab
commit 152558f5bc
2 changed files with 137 additions and 14 deletions

View File

@@ -4,7 +4,8 @@
module std::hash::adler32;
const uint ADLER_CONST @private = 65521;
const uint ADLER32_CONST @private = 65521;
struct Adler32
{
@@ -19,19 +20,79 @@ fn void Adler32.init(&self)
fn void Adler32.updatec(&self, char c)
{
self.a = (self.a + c) % ADLER_CONST;
self.b = (self.b + self.a) % ADLER_CONST;
self.a = (self.a + c) % ADLER32_CONST;
self.b = (self.b + self.a) % ADLER32_CONST;
}
fn void Adler32.update(&self, char[] data)
{
// Safe chunking constant which is optimized for L1 cache on most systems 32768 (32 KB).
// 0x8000 ~ (2^32 / 65521 / 2).
// The division is done so that we are guarenteed to never overflow.
const uint SAFE_CHUNKING_SIZE = 0x8000;
// In order
const uint UNROLL_SIZE = 8;
uint a = self.a;
uint b = self.b;
foreach (char x : data)
char* buf = data;
usz len = data.len;
// Align pointer traversing buffer pointer to the unroll alignment size.
if (len % UNROLL_SIZE != 0)
{
a = (a + x) % ADLER_CONST;
b = (b + a) % ADLER_CONST;
do
{
a += *buf;
b += a;
buf++;
len--;
} while (len % UNROLL_SIZE != 0);
if (a >= ADLER32_CONST)
{
a -= ADLER32_CONST;
}
b %= ADLER32_CONST;
}
// Calculate rest of adler32 checksum.
while (len > 0)
{
$for var $i = 0; $i < UNROLL_SIZE; $i++:
a += buf[$i]; b += a;
$endfor
len -= UNROLL_SIZE;
buf += UNROLL_SIZE;
// Even with 8 max value (0xFF) bytes being additioned to a (0xFF * 8 = 2040 for worst case).
// There is no chance that a will be > 2 * ADLER32_CONST, so modulo is not needed here.
// So its more performant to use subtraction.
if (a >= ADLER32_CONST)
{
a -= ADLER32_CONST;
}
// We need to periodically chunk b because it accumulates a which is a sum, so it grows rapidly.
// So every 4K of bytes we modulo in order to prevent uint integer overflow.
if (len % SAFE_CHUNKING_SIZE == 0)
{
b %= ADLER32_CONST;
}
}
// No need to explicitely modulo after loop end with ADLER32_CONST.
// As a and b are guarenteed to be under ADLER32_CONST.
// Do assert on debug.
assert(a < ADLER32_CONST);
assert(b < ADLER32_CONST);
*self = { a, b };
}
@@ -42,12 +103,10 @@ fn uint Adler32.final(&self)
fn uint hash(char[] data)
{
uint a = 1;
uint b = 0;
foreach (char x : data)
{
a = (a + x) % ADLER_CONST;
b = (b + a) % ADLER_CONST;
}
return (b << 16) | a;
Adler32 adler;
adler.init();
adler.update(data);
return adler.final();
}