Add Murmur3 hash

This commit is contained in:
Christoffer Lerno
2026-02-06 17:53:19 +01:00
parent 0be291e0d7
commit 40aa4d4dcd
4 changed files with 292 additions and 5 deletions

243
lib/std/hash/murmur.c3 Normal file
View File

@@ -0,0 +1,243 @@
module std::hash::murmur3;
<*
@param [in] data : "The data to hash"
@param seed : "The seed to use for hashing"
@require (data.len / 4) <= int.max : "Too much data"
*>
fn uint hash32(char[] data, uint seed)
{
int nblocks = (int)data.len / 4;
uint h1 = seed;
const uint C1 = 0xcc9e2d51;
const uint C2 = 0x1b873593;
uint* blocks = (uint *)(data.ptr + nblocks * 4);
for (int i = -nblocks; i != 0; i++)
{
uint k1 = getblock32(blocks, i);
k1 *= C1;
k1 = k1.rotl(15);
k1 *= C2;
h1 ^= k1;
h1 = h1.rotl(13);
h1 = h1 * 5U + 0xe6546b64;
}
char* tail = data.ptr + nblocks * 4;
uint k1;
switch (data.len & 3)
{
case 3: k1 ^= tail[2] << 16; nextcase;
case 2: k1 ^= tail[1] << 8; nextcase;
case 1: k1 ^= tail[0]; k1 *= C1; k1 = k1.rotl(15); k1 *= C2; h1 ^= k1;
}
h1 ^= (uint)data.len;
h1 = fmix32(h1);
return h1;
}
<*
@param [in] data : "The data to hash"
@param seed : "The seed to use for hashing"
@require (data.len / 16) <= int.max : "Too much data"
*>
fn uint128 hash128_64(char[] data, uint seed)
{
ulong len = data.len;
int nblocks = (int)(len / 16);
ulong h1 = seed;
ulong h2 = seed;
const ulong C1 = 0x87c37b91114253d5UL;
const ulong C2 = 0x4cf5ad432745937fUL;
ulong* blocks = (ulong*)data.ptr; // Unaligned!
for (int i = 0; i < nblocks; i++)
{
ulong k1 = getblock64(blocks, i * 2 + 0);
ulong k2 = getblock64(blocks, i * 2 + 1);
k1 *= C1; k1 = k1.rotl(31); k1 *= C2; h1 ^= k1;
h1 = h1.rotl(27); h1 += h2; h1 = h1 * 5U + 0x52dce729;
k2 *= C2; k2 = k2.rotl(33); k2 *= C1; h2 ^= k2;
h2 = h2.rotl(31); h2 += h1; h2 = h2 * 5U + 0x38495ab5;
}
char* tail = data.ptr + nblocks * 16;
ulong k1, k2;
switch (len & 15)
{
case 15: k2 ^= ((ulong)tail[14]) << 48; nextcase;
case 14: k2 ^= ((ulong)tail[13]) << 40; nextcase;
case 13: k2 ^= ((ulong)tail[12]) << 32; nextcase;
case 12: k2 ^= ((ulong)tail[11]) << 24; nextcase;
case 11: k2 ^= ((ulong)tail[10]) << 16; nextcase;
case 10: k2 ^= ((ulong)tail[ 9]) << 8; nextcase;
case 9: k2 ^= ((ulong)tail[ 8]) << 0;
k2 *= C2; k2 = k2.rotl(33); k2 *= C1; h2 ^= k2;
nextcase;
case 8: k1 ^= ((ulong)tail[ 7]) << 56; nextcase;
case 7: k1 ^= ((ulong)tail[ 6]) << 48; nextcase;
case 6: k1 ^= ((ulong)tail[ 5]) << 40; nextcase;
case 5: k1 ^= ((ulong)tail[ 4]) << 32; nextcase;
case 4: k1 ^= ((ulong)tail[ 3]) << 24; nextcase;
case 3: k1 ^= ((ulong)tail[ 2]) << 16; nextcase;
case 2: k1 ^= ((ulong)tail[ 1]) << 8; nextcase;
case 1: k1 ^= ((ulong)tail[ 0]) << 0;
k1 *= C1; k1 = k1.rotl(31); k1 *= C2; h1 ^= k1;
}
h1 ^= len;
h2 ^= len;
h1 += h2;
h2 += h1;
h1 = fmix64(h1);
h2 = fmix64(h2);
h1 += h2;
h2 += h1;
return h1 + (uint128)h2 << 64U;
}
<*
@param [in] data : "The data to hash"
@param seed : "The seed to use for hashing"
@require data.len <= uint.max : "Too much data"
*>
fn uint128 hash128_32(char[] data, uint seed)
{
uint len = data.len;
int nblocks = (int)(len / 16);
uint h1 = seed;
uint h2 = seed;
uint h3 = seed;
uint h4 = seed;
const uint C1 = 0x239b961b;
const uint C2 = 0xab0e9789;
const uint C3 = 0x38b34ae5;
const uint C4 = 0xa1e38b93;
uint* blocks = (uint *)(data.ptr + nblocks * 16);
for (int i = -nblocks; i != 0; i++)
{
uint k1 = getblock32(blocks, i * 4 + 0);
uint k2 = getblock32(blocks, i * 4 + 1);
uint k3 = getblock32(blocks, i * 4 + 2);
uint k4 = getblock32(blocks, i * 4 + 3);
k1 *= C1; k1 = k1.rotl(15); k1 *= C2; h1 ^= k1;
h1 = h1.rotl(19); h1 += h2; h1 = h1 * 5U + 0x561ccd1b;
k2 *= C2; k2 = k2.rotl(16); k2 *= C3; h2 ^= k2;
h2 = h2.rotl(17); h2 += h3; h2 = h2 * 5U + 0x0bcaa747;
k3 *= C3; k3 = k3.rotl(17); k3 *= C4; h3 ^= k3;
h3 = h3.rotl(15); h3 += h4; h3 = h3 * 5U + 0x96cd1c35;
k4 *= C4; k4 = k4.rotl(18); k4 *= C1; h4 ^= k4;
h4 = h4.rotl(13); h4 += h1; h4 = h4 * 5U + 0x32ac3b17;
}
char* tail = data.ptr + nblocks * 16;
uint k1, k2, k3, k4;
switch (len & 15)
{
case 15: k4 ^= tail[14] << 16; nextcase;
case 14: k4 ^= tail[13] << 8; nextcase;
case 13: k4 ^= tail[12] << 0;
k4 *= C4; k4 = k4.rotl(18); k4 *= C1; h4 ^= k4;
nextcase;
case 12: k3 ^= tail[11] << 24; nextcase;
case 11: k3 ^= tail[10] << 16; nextcase;
case 10: k3 ^= tail[ 9] << 8; nextcase;
case 9: k3 ^= tail[ 8] << 0;
k3 *= C3; k3 = k3.rotl(17); k3 *= C4; h3 ^= k3;
nextcase;
case 8: k2 ^= tail[ 7] << 24; nextcase;
case 7: k2 ^= tail[ 6] << 16; nextcase;
case 6: k2 ^= tail[ 5] << 8; nextcase;
case 5: k2 ^= tail[ 4] << 0;
k2 *= C2; k2 = k2.rotl(16); k2 *= C3; h2 ^= k2;
nextcase;
case 4: k1 ^= tail[ 3] << 24; nextcase;
case 3: k1 ^= tail[ 2] << 16; nextcase;
case 2: k1 ^= tail[ 1] << 8; nextcase;
case 1: k1 ^= tail[ 0] << 0;
k1 *= C1; k1 = k1.rotl(15); k1 *= C2; h1 ^= k1;
}
h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
h1 += h2; h1 += h3; h1 += h4;
h2 += h1; h3 += h1; h4 += h1;
h1 = fmix32(h1);
h2 = fmix32(h2);
h3 = fmix32(h3);
h4 = fmix32(h4);
h1 += h2; h1 += h3; h1 += h4;
h2 += h1; h3 += h1; h4 += h1;
return h1 + (uint128)h2 << 32U + (uint128)h3 << 64U + (uint128)h4 << 96U;
}
macro uint getblock32(uint* p, int i) @local
{
UIntLE* p_le = (UIntLE*)p + i;
return mem::load(p_le, 1).val;
}
macro ulong getblock64(ulong* p, int i) @local
{
ULongLE* p_le = (ULongLE*)p + i;
return mem::load(p_le, 1).val;
}
macro uint fmix32(uint h) @local
{
h ^= h >> 16UL;
h *= 0x85ebca6b;
h ^= h >> 13UL;
h *= 0xc2b2ae35;
h ^= h >> 16UL;
return h;
}
macro ulong fmix64(ulong k) @local
{
k ^= k >> 33U;
k *= 0xff51afd7ed558ccd;
k ^= k >> 33U;
k *= 0xc4ceb9fe1a85ec53;
k ^= k >> 33U;
return k;
}

View File

@@ -13,6 +13,7 @@
- Remove dependency on temp allocator in String.join.
- Remove dependency on temp allocator in File.open.
- Added PEM encoding/decoding. #2858
- Add Murmur3 hash.
### Fixes
- Add error message if directory with output file name already exists

View File

@@ -0,0 +1,44 @@
module std::hash::murmur3_test @test;
import std::hash::murmur3;
fn void hash32()
{
test::eq(0, murmur3::hash32("", 0));
test::eq(0x514E28B7, murmur3::hash32("", 1));
test::eq(0x81F16F39, murmur3::hash32("", 0xffffffff));
test::eq(0x2362F9DE, murmur3::hash32("\0\0\0\0", 0));
test::eq(0x5A97808A, murmur3::hash32("aaaa", 0x9747b28c));
test::eq(0x283E0130, murmur3::hash32("aaa", 0x9747b28c));
test::eq(0x5D211726, murmur3::hash32("aa", 0x9747b28c));
test::eq(0x7FA09EA6, murmur3::hash32("a", 0x9747b28c));
test::eq(0xF0478627, murmur3::hash32("abcd", 0x9747b28c));
test::eq(0xC84A62DD, murmur3::hash32("abc", 0x9747b28c));
test::eq(0x74875592, murmur3::hash32("ab", 0x9747b28c));
test::eq(0x7FA09EA6, murmur3::hash32("a", 0x9747b28c));
test::eq(0x24884CBA, murmur3::hash32("Hello, world!", 0x9747b28c));
test::eq(0xD58063C1, murmur3::hash32("ππππππππ", 0x9747b28c));
char[256] test = { [0..255] = 'a' };
test::eq(0x37405BDC, murmur3::hash32(&test, 0x9747b28c));
}
fn void hash128_64()
{
test::eq(0, murmur3::hash128_64("", 0));
test::eq(0x51622daa78f835834610abe56eff5cb5, murmur3::hash128_64("", 1));
test::eq(0x857421121ee6446b6af1df4d9d3bc9ec, murmur3::hash128_64("", 0xffffffff));
test::eq(0x589623161cf526f1cfa0f7ddd84c76bc, murmur3::hash128_64("\0\0\0\0", 0));
test::eq(0xf66e73e07751664edbcf7463becf7e04, murmur3::hash128_64("xxxxxxxxxxxxxxxxxxxxxxxxxxxx", 123));
test::eq(0xf19732fdd373c3f5421c8c738743acad, murmur3::hash128_64("Hello, world!", 123));
test::eq(0x79200aeeb9546c79ca47f42bf86d4004, murmur3::hash128_64("Hello, world!", 321));
}
fn void hash128_32()
{
test::eq(0, murmur3::hash128_32("", 0));
test::eq(0x26f3e79926f3e79926f3e799fedc5245, murmur3::hash128_32("", 123));
test::eq(0x989d49f7989d49f7989d49f7051e08a9, murmur3::hash128_32("", 0xFFFFFFFF));
test::eq(0x9e5178409e5178409e517840cc066f1f, murmur3::hash128_32("\0\0\0\0", 0));
test::eq(0x1fec60474cf929d378825a165e40bab2, murmur3::hash128_32("xxxxxxxxxxxxxxxxxxxxxxxxxxxx", 123));
test::eq(0x9e37c886a41621625a1aacd761c9129e, murmur3::hash128_32("Hello, world!", 123));
test::eq(0xa7170f0f045880c5c26c4193d5fbdcb3, murmur3::hash128_32("Hello, world!", 321));
}

View File

@@ -6,8 +6,7 @@ fn void test_sha256_empty()
Sha256 sha;
sha.init();
sha.update("");
test::@check(sha.final() == x"E3B0C442 98FC1C14 9AFBF4C8 996FB924 27AE41E4 649B934C A495991B 7852B855");
test::eq(sha.final(), x"E3B0C442 98FC1C14 9AFBF4C8 996FB924 27AE41E4 649B934C A495991B 7852B855");
}
fn void test_sha256_abc()
@@ -16,7 +15,7 @@ fn void test_sha256_abc()
sha.init();
sha.update("abc");
test::@check(sha.final() == x"BA7816BF 8F01CFEA 414140DE 5DAE2223 B00361A3 96177A9C B410FF61 F20015AD");
test::eq(sha.final(), x"BA7816BF 8F01CFEA 414140DE 5DAE2223 B00361A3 96177A9C B410FF61 F20015AD");
}
fn void test_sha256_longer()
@@ -24,7 +23,7 @@ fn void test_sha256_longer()
Sha256 sha;
sha.init();
sha.update("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopqabcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq");
test::@check(sha.final() == x"59F109D9 533B2B70 E7C3B814 A2BD218F 78EA5D37 14455BC6 7987CF0D 664399CF");
test::eq(sha.final(), x"59F109D9 533B2B70 E7C3B814 A2BD218F 78EA5D37 14455BC6 7987CF0D 664399CF");
}
fn void test_sha256_multi_update_permute()
@@ -37,7 +36,7 @@ fn void test_sha256_multi_update_permute()
usz i = 0;
for (; i < input.len / step; i++) sha.update(input[i * step : step]);
if (i * step < input.len) sha.update(input[i * step..]);
test::@check(sha.final() == x"b527293dfb70dcce37e593f4c43e1b81909615722bad041b90b8df22bebd00a0", "Mismatch for step %d", step);
test::eq(sha.final(), x"b527293dfb70dcce37e593f4c43e1b81909615722bad041b90b8df22bebd00a0");
}
}