From 152558f5bc50e4871144abdcba4a63cab2aaeea4 Mon Sep 17 00:00:00 2001 From: soerlemans <35037988+soerlemans@users.noreply.github.com> Date: Thu, 19 Feb 2026 17:51:33 +0100 Subject: [PATCH] Optimized adler32 hashing algorithm. (#2948) * Optimized adler32 implementations. - Adapted adler32 implementation from Crypto++ public domain library. - Added unit tests for adler32 hashing algorithm. * tabified adler32 implementation to match stdlib. * Formatting to be consistent. Make unrolling use macro. --------- Co-authored-by: soerlemans Co-authored-by: Christoffer Lerno --- lib/std/hash/adler32.c3 | 87 +++++++++++++++++++++++++++----- test/unit/stdlib/hash/adler32.c3 | 64 +++++++++++++++++++++++ 2 files changed, 137 insertions(+), 14 deletions(-) create mode 100644 test/unit/stdlib/hash/adler32.c3 diff --git a/lib/std/hash/adler32.c3 b/lib/std/hash/adler32.c3 index 23e7af763..5a88c09e6 100644 --- a/lib/std/hash/adler32.c3 +++ b/lib/std/hash/adler32.c3 @@ -4,7 +4,8 @@ module std::hash::adler32; -const uint ADLER_CONST @private = 65521; +const uint ADLER32_CONST @private = 65521; + struct Adler32 { @@ -19,19 +20,79 @@ fn void Adler32.init(&self) fn void Adler32.updatec(&self, char c) { - self.a = (self.a + c) % ADLER_CONST; - self.b = (self.b + self.a) % ADLER_CONST; + self.a = (self.a + c) % ADLER32_CONST; + self.b = (self.b + self.a) % ADLER32_CONST; } fn void Adler32.update(&self, char[] data) { + // Safe chunking constant which is optimized for L1 cache on most systems 32768 (32 KB). + // 0x8000 ~ (2^32 / 65521 / 2). + // The division is done so that we are guarenteed to never overflow. + const uint SAFE_CHUNKING_SIZE = 0x8000; + + // In order + const uint UNROLL_SIZE = 8; + uint a = self.a; uint b = self.b; - foreach (char x : data) + + char* buf = data; + usz len = data.len; + + // Align pointer traversing buffer pointer to the unroll alignment size. + if (len % UNROLL_SIZE != 0) { - a = (a + x) % ADLER_CONST; - b = (b + a) % ADLER_CONST; + do + { + a += *buf; + b += a; + + buf++; + len--; + } while (len % UNROLL_SIZE != 0); + + if (a >= ADLER32_CONST) + { + a -= ADLER32_CONST; + } + + b %= ADLER32_CONST; } + + // Calculate rest of adler32 checksum. + while (len > 0) + { + $for var $i = 0; $i < UNROLL_SIZE; $i++: + a += buf[$i]; b += a; + $endfor + + len -= UNROLL_SIZE; + buf += UNROLL_SIZE; + + // Even with 8 max value (0xFF) bytes being additioned to a (0xFF * 8 = 2040 for worst case). + // There is no chance that a will be > 2 * ADLER32_CONST, so modulo is not needed here. + // So its more performant to use subtraction. + if (a >= ADLER32_CONST) + { + a -= ADLER32_CONST; + } + + // We need to periodically chunk b because it accumulates a which is a sum, so it grows rapidly. + // So every 4K of bytes we modulo in order to prevent uint integer overflow. + if (len % SAFE_CHUNKING_SIZE == 0) + { + b %= ADLER32_CONST; + } + } + + // No need to explicitely modulo after loop end with ADLER32_CONST. + // As a and b are guarenteed to be under ADLER32_CONST. + + // Do assert on debug. + assert(a < ADLER32_CONST); + assert(b < ADLER32_CONST); + *self = { a, b }; } @@ -42,12 +103,10 @@ fn uint Adler32.final(&self) fn uint hash(char[] data) { - uint a = 1; - uint b = 0; - foreach (char x : data) - { - a = (a + x) % ADLER_CONST; - b = (b + a) % ADLER_CONST; - } - return (b << 16) | a; + Adler32 adler; + adler.init(); + + adler.update(data); + + return adler.final(); } \ No newline at end of file diff --git a/test/unit/stdlib/hash/adler32.c3 b/test/unit/stdlib/hash/adler32.c3 new file mode 100644 index 000000000..24dcd620a --- /dev/null +++ b/test/unit/stdlib/hash/adler32.c3 @@ -0,0 +1,64 @@ +module adler32_test @test; + +import std::hash::adler32; +import std::io; + +fn void test_adler32_empty() +{ + const uint EXPECTED = 0x1; + + Adler32 adl; + adl.init(); + adl.update(""); + + uint final = adl.final(); + test::@check(final == EXPECTED, "Actual Adler32: 0x%x == 0x%x", final, EXPECTED); +} + +fn void test_adler32_a() +{ + const uint EXPECTED = 0x00620062; + + Adler32 adl; + adl.init(); + adl.updatec('a'); + + uint final = adl.final(); + test::@check(final == EXPECTED, "Actual Adler32: 0x%x == 0x%x", final, EXPECTED); +} + +fn void test_adler32_abc() +{ + const uint EXPECTED = 0x024d0127; + + Adler32 adl; + adl.init(); + adl.update("abc"); + + uint final = adl.final(); + test::@check(final == EXPECTED, "Actual Adler32: 0x%x == 0x%x", final, EXPECTED); +} + +fn void test_adler32_longer() +{ + const uint EXPECTED = 0x07822df1; + + Adler32 adl; + adl.init(); + adl.update("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopqabcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"); + + uint final = adl.final(); + test::@check(final == EXPECTED, "Actual Adler32: 0x%x == 0x%x", final, EXPECTED); +} + +fn void test_adler32_alphabet() +{ + const uint EXPECTED = 0x90860b20; + + Adler32 adl; + adl.init(); + adl.update("abcdefghijklmnopqrstuvwxyz"); + + uint final = adl.final(); + test::@check(final == EXPECTED, "Actual Adler32: 0x%x == 0x%x", final, EXPECTED); +}