From 152558f5bc50e4871144abdcba4a63cab2aaeea4 Mon Sep 17 00:00:00 2001
From: soerlemans <35037988+soerlemans@users.noreply.github.com>
Date: Thu, 19 Feb 2026 17:51:33 +0100
Subject: [PATCH] Optimized adler32 hashing algorithm. (#2948)

* Optimized adler32 implementations.

 - Adapted adler32 implementation from Crypto++ public domain library.
 - Added unit tests for adler32 hashing algorithm.

* tabified adler32 implementation to match stdlib.

* Formatting to be consistent. Make unrolling use macro.

---------

Co-authored-by: soerlemans <sebasoerlemans+git@gmail.com>
Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
---
 lib/std/hash/adler32.c3          | 87 +++++++++++++++++++++++++++-----
 test/unit/stdlib/hash/adler32.c3 | 64 +++++++++++++++++++++++
 2 files changed, 137 insertions(+), 14 deletions(-)
 create mode 100644 test/unit/stdlib/hash/adler32.c3

diff --git a/lib/std/hash/adler32.c3 b/lib/std/hash/adler32.c3
index 23e7af763..5a88c09e6 100644
--- a/lib/std/hash/adler32.c3
+++ b/lib/std/hash/adler32.c3
@@ -4,7 +4,8 @@
 
 module std::hash::adler32;
 
-const uint ADLER_CONST @private = 65521;
+const uint ADLER32_CONST @private = 65521;
+
 
 struct Adler32
 {
@@ -19,19 +20,79 @@ fn void Adler32.init(&self)
 
 fn void Adler32.updatec(&self, char c)
 {
-	self.a = (self.a + c) % ADLER_CONST;
-	self.b = (self.b + self.a) % ADLER_CONST;
+	self.a = (self.a + c) % ADLER32_CONST;
+	self.b = (self.b + self.a) % ADLER32_CONST;
 }
 
 fn void Adler32.update(&self, char[] data)
 {
+	// Safe chunking constant which is optimized for L1 cache on most systems 32768 (32 KB).
+	// 0x8000 ~ (2^32 / 65521 / 2).
+	// The division is done so that we are guarenteed to never overflow.
+	const uint SAFE_CHUNKING_SIZE = 0x8000;
+
+	// In order
+	const uint UNROLL_SIZE = 8;
+
 	uint a = self.a;
 	uint b = self.b;
-	foreach (char x : data)
+
+	char* buf = data;
+	usz len = data.len;
+
+	// Align pointer traversing buffer pointer to the unroll alignment size.
+	if (len % UNROLL_SIZE != 0)
 	{
-		a = (a + x) % ADLER_CONST;
-		b = (b + a) % ADLER_CONST;
+		do
+		{
+			a += *buf;
+			b += a;
+
+			buf++;
+			len--;
+		} while (len % UNROLL_SIZE != 0);
+
+		if (a >= ADLER32_CONST)
+		{
+			a -= ADLER32_CONST;
+		}
+
+		b %= ADLER32_CONST;
 	}
+
+	// Calculate rest of adler32 checksum.
+	while (len > 0)
+	{
+		$for var $i = 0; $i < UNROLL_SIZE; $i++:
+			a += buf[$i]; b += a;
+		$endfor
+
+		len -= UNROLL_SIZE;
+		buf += UNROLL_SIZE;
+
+		// Even with 8 max value (0xFF) bytes being additioned to a (0xFF * 8 = 2040 for worst case).
+		// There is no chance that a will be > 2 * ADLER32_CONST, so modulo is not needed here.
+		// So its more performant to use subtraction.
+		if (a >= ADLER32_CONST)
+		{
+			a -= ADLER32_CONST;
+		}
+
+		// We need to periodically chunk b because it accumulates a which is a sum, so it grows rapidly.
+		// So every 4K of bytes we modulo in order to prevent uint integer overflow.
+		if (len % SAFE_CHUNKING_SIZE == 0)
+		{
+			b %= ADLER32_CONST;
+		}
+	}
+
+	// No need to explicitely modulo after loop end with ADLER32_CONST.
+	// As a and b are guarenteed to be under ADLER32_CONST.
+
+	// Do assert on debug.
+	assert(a < ADLER32_CONST);
+	assert(b < ADLER32_CONST);
+
 	*self = { a, b };
 }
 
@@ -42,12 +103,10 @@ fn uint Adler32.final(&self)
 
 fn uint hash(char[] data)
 {
-	uint a = 1;
-	uint b = 0;
-	foreach (char x : data)
-	{
-		a = (a + x) % ADLER_CONST;
-		b = (b + a) % ADLER_CONST;
-	}
-	return (b << 16) | a;
+	Adler32 adler;
+	adler.init();
+
+	adler.update(data);
+
+	return adler.final();
 }
\ No newline at end of file
diff --git a/test/unit/stdlib/hash/adler32.c3 b/test/unit/stdlib/hash/adler32.c3
new file mode 100644
index 000000000..24dcd620a
--- /dev/null
+++ b/test/unit/stdlib/hash/adler32.c3
@@ -0,0 +1,64 @@
+module adler32_test @test;
+
+import std::hash::adler32;
+import std::io;
+
+fn void test_adler32_empty()
+{
+	const uint EXPECTED = 0x1;
+
+	Adler32 adl;
+	adl.init();
+	adl.update("");
+
+	uint final = adl.final();
+	test::@check(final == EXPECTED, "Actual Adler32: 0x%x == 0x%x", final, EXPECTED);
+}
+
+fn void test_adler32_a()
+{
+	const uint EXPECTED = 0x00620062;
+
+	Adler32 adl;
+	adl.init();
+	adl.updatec('a');
+
+	uint final = adl.final();
+	test::@check(final == EXPECTED, "Actual Adler32: 0x%x == 0x%x", final, EXPECTED);
+}
+
+fn void test_adler32_abc()
+{
+	const uint EXPECTED = 0x024d0127;
+
+	Adler32 adl;
+	adl.init();
+	adl.update("abc");
+
+	uint final = adl.final();
+	test::@check(final == EXPECTED, "Actual Adler32: 0x%x == 0x%x", final, EXPECTED);
+}
+
+fn void test_adler32_longer()
+{
+	const uint EXPECTED = 0x07822df1;
+
+	Adler32 adl;
+	adl.init();
+	adl.update("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopqabcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq");
+
+	uint final = adl.final();
+	test::@check(final == EXPECTED, "Actual Adler32: 0x%x == 0x%x", final, EXPECTED);
+}
+
+fn void test_adler32_alphabet()
+{
+	const uint EXPECTED = 0x90860b20;
+
+	Adler32 adl;
+	adl.init();
+	adl.update("abcdefghijklmnopqrstuvwxyz");
+
+	uint final = adl.final();
+	test::@check(final == EXPECTED, "Actual Adler32: 0x%x == 0x%x", final, EXPECTED);
+}