Optimized adler32 hashing algorithm. (#2948)

* Optimized adler32 implementations. - Adapted adler32 implementation from Crypto++ public domain library. - Added unit tests for adler32 hashing algorithm. * tabified adler32 implementation to match stdlib. * Formatting to be consistent. Make unrolling use macro. --------- Co-authored-by: soerlemans <sebasoerlemans+git@gmail.com> Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
2026-02-27 03:51:18 +00:00 · 2026-02-19 17:51:33 +01:00
parent 7ae4c5a1ab
commit 152558f5bc
2 changed files with 137 additions and 14 deletions
--- a/lib/std/hash/adler32.c3
+++ b/lib/std/hash/adler32.c3
@@ -4,7 +4,8 @@

 module std::hash::adler32;

-const uint ADLER_CONST @private = 65521;
+const uint ADLER32_CONST @private = 65521;
+

 struct Adler32
 {
@@ -19,19 +20,79 @@ fn void Adler32.init(&self)

 fn void Adler32.updatec(&self, char c)
 {
-	self.a = (self.a + c) % ADLER_CONST;
-	self.b = (self.b + self.a) % ADLER_CONST;
+	self.a = (self.a + c) % ADLER32_CONST;
+	self.b = (self.b + self.a) % ADLER32_CONST;
 }

 fn void Adler32.update(&self, char[] data)
 {
+	// Safe chunking constant which is optimized for L1 cache on most systems 32768 (32 KB).
+	// 0x8000 ~ (2^32 / 65521 / 2).
+	// The division is done so that we are guarenteed to never overflow.
+	const uint SAFE_CHUNKING_SIZE = 0x8000;
+
+	// In order
+	const uint UNROLL_SIZE = 8;
+
 	uint a = self.a;
 	uint b = self.b;
-	foreach (char x : data)
+
+	char* buf = data;
+	usz len = data.len;
+
+	// Align pointer traversing buffer pointer to the unroll alignment size.
+	if (len % UNROLL_SIZE != 0)
 	{
-		a = (a + x) % ADLER_CONST;
-		b = (b + a) % ADLER_CONST;
+		do
+		{
+			a += *buf;
+			b += a;
+
+			buf++;
+			len--;
+		} while (len % UNROLL_SIZE != 0);
+
+		if (a >= ADLER32_CONST)
+		{
+			a -= ADLER32_CONST;
+		}
+
+		b %= ADLER32_CONST;
 	}
+
+	// Calculate rest of adler32 checksum.
+	while (len > 0)
+	{
+		$for var $i = 0; $i < UNROLL_SIZE; $i++:
+			a += buf[$i]; b += a;
+		$endfor
+
+		len -= UNROLL_SIZE;
+		buf += UNROLL_SIZE;
+
+		// Even with 8 max value (0xFF) bytes being additioned to a (0xFF * 8 = 2040 for worst case).
+		// There is no chance that a will be > 2 * ADLER32_CONST, so modulo is not needed here.
+		// So its more performant to use subtraction.
+		if (a >= ADLER32_CONST)
+		{
+			a -= ADLER32_CONST;
+		}
+
+		// We need to periodically chunk b because it accumulates a which is a sum, so it grows rapidly.
+		// So every 4K of bytes we modulo in order to prevent uint integer overflow.
+		if (len % SAFE_CHUNKING_SIZE == 0)
+		{
+			b %= ADLER32_CONST;
+		}
+	}
+
+	// No need to explicitely modulo after loop end with ADLER32_CONST.
+	// As a and b are guarenteed to be under ADLER32_CONST.
+
+	// Do assert on debug.
+	assert(a < ADLER32_CONST);
+	assert(b < ADLER32_CONST);
+
 	*self = { a, b };
 }

@@ -42,12 +103,10 @@ fn uint Adler32.final(&self)

 fn uint hash(char[] data)
 {
-	uint a = 1;
-	uint b = 0;
-	foreach (char x : data)
-	{
-		a = (a + x) % ADLER_CONST;
-		b = (b + a) % ADLER_CONST;
-	}
-	return (b << 16) | a;
+	Adler32 adler;
+	adler.init();
+
+	adler.update(data);
+
+	return adler.final();
 }