mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 12:01:16 +00:00
Add Murmur3 hash
This commit is contained in:
243
lib/std/hash/murmur.c3
Normal file
243
lib/std/hash/murmur.c3
Normal file
@@ -0,0 +1,243 @@
|
||||
module std::hash::murmur3;
|
||||
|
||||
<*
|
||||
@param [in] data : "The data to hash"
|
||||
@param seed : "The seed to use for hashing"
|
||||
@require (data.len / 4) <= int.max : "Too much data"
|
||||
*>
|
||||
fn uint hash32(char[] data, uint seed)
|
||||
{
|
||||
int nblocks = (int)data.len / 4;
|
||||
uint h1 = seed;
|
||||
|
||||
const uint C1 = 0xcc9e2d51;
|
||||
const uint C2 = 0x1b873593;
|
||||
|
||||
uint* blocks = (uint *)(data.ptr + nblocks * 4);
|
||||
|
||||
for (int i = -nblocks; i != 0; i++)
|
||||
{
|
||||
uint k1 = getblock32(blocks, i);
|
||||
|
||||
k1 *= C1;
|
||||
k1 = k1.rotl(15);
|
||||
k1 *= C2;
|
||||
|
||||
h1 ^= k1;
|
||||
h1 = h1.rotl(13);
|
||||
h1 = h1 * 5U + 0xe6546b64;
|
||||
}
|
||||
|
||||
char* tail = data.ptr + nblocks * 4;
|
||||
|
||||
uint k1;
|
||||
|
||||
switch (data.len & 3)
|
||||
{
|
||||
case 3: k1 ^= tail[2] << 16; nextcase;
|
||||
case 2: k1 ^= tail[1] << 8; nextcase;
|
||||
case 1: k1 ^= tail[0]; k1 *= C1; k1 = k1.rotl(15); k1 *= C2; h1 ^= k1;
|
||||
}
|
||||
|
||||
h1 ^= (uint)data.len;
|
||||
|
||||
h1 = fmix32(h1);
|
||||
|
||||
return h1;
|
||||
}
|
||||
|
||||
<*
|
||||
@param [in] data : "The data to hash"
|
||||
@param seed : "The seed to use for hashing"
|
||||
@require (data.len / 16) <= int.max : "Too much data"
|
||||
|
||||
*>
|
||||
fn uint128 hash128_64(char[] data, uint seed)
|
||||
{
|
||||
ulong len = data.len;
|
||||
int nblocks = (int)(len / 16);
|
||||
|
||||
ulong h1 = seed;
|
||||
ulong h2 = seed;
|
||||
|
||||
const ulong C1 = 0x87c37b91114253d5UL;
|
||||
const ulong C2 = 0x4cf5ad432745937fUL;
|
||||
|
||||
ulong* blocks = (ulong*)data.ptr; // Unaligned!
|
||||
|
||||
for (int i = 0; i < nblocks; i++)
|
||||
{
|
||||
ulong k1 = getblock64(blocks, i * 2 + 0);
|
||||
ulong k2 = getblock64(blocks, i * 2 + 1);
|
||||
|
||||
k1 *= C1; k1 = k1.rotl(31); k1 *= C2; h1 ^= k1;
|
||||
|
||||
h1 = h1.rotl(27); h1 += h2; h1 = h1 * 5U + 0x52dce729;
|
||||
|
||||
k2 *= C2; k2 = k2.rotl(33); k2 *= C1; h2 ^= k2;
|
||||
|
||||
h2 = h2.rotl(31); h2 += h1; h2 = h2 * 5U + 0x38495ab5;
|
||||
}
|
||||
|
||||
char* tail = data.ptr + nblocks * 16;
|
||||
ulong k1, k2;
|
||||
|
||||
switch (len & 15)
|
||||
{
|
||||
case 15: k2 ^= ((ulong)tail[14]) << 48; nextcase;
|
||||
case 14: k2 ^= ((ulong)tail[13]) << 40; nextcase;
|
||||
case 13: k2 ^= ((ulong)tail[12]) << 32; nextcase;
|
||||
case 12: k2 ^= ((ulong)tail[11]) << 24; nextcase;
|
||||
case 11: k2 ^= ((ulong)tail[10]) << 16; nextcase;
|
||||
case 10: k2 ^= ((ulong)tail[ 9]) << 8; nextcase;
|
||||
case 9: k2 ^= ((ulong)tail[ 8]) << 0;
|
||||
k2 *= C2; k2 = k2.rotl(33); k2 *= C1; h2 ^= k2;
|
||||
nextcase;
|
||||
case 8: k1 ^= ((ulong)tail[ 7]) << 56; nextcase;
|
||||
case 7: k1 ^= ((ulong)tail[ 6]) << 48; nextcase;
|
||||
case 6: k1 ^= ((ulong)tail[ 5]) << 40; nextcase;
|
||||
case 5: k1 ^= ((ulong)tail[ 4]) << 32; nextcase;
|
||||
case 4: k1 ^= ((ulong)tail[ 3]) << 24; nextcase;
|
||||
case 3: k1 ^= ((ulong)tail[ 2]) << 16; nextcase;
|
||||
case 2: k1 ^= ((ulong)tail[ 1]) << 8; nextcase;
|
||||
case 1: k1 ^= ((ulong)tail[ 0]) << 0;
|
||||
k1 *= C1; k1 = k1.rotl(31); k1 *= C2; h1 ^= k1;
|
||||
}
|
||||
|
||||
|
||||
h1 ^= len;
|
||||
h2 ^= len;
|
||||
|
||||
h1 += h2;
|
||||
h2 += h1;
|
||||
|
||||
h1 = fmix64(h1);
|
||||
h2 = fmix64(h2);
|
||||
|
||||
|
||||
h1 += h2;
|
||||
h2 += h1;
|
||||
|
||||
return h1 + (uint128)h2 << 64U;
|
||||
}
|
||||
|
||||
<*
|
||||
@param [in] data : "The data to hash"
|
||||
@param seed : "The seed to use for hashing"
|
||||
@require data.len <= uint.max : "Too much data"
|
||||
*>
|
||||
fn uint128 hash128_32(char[] data, uint seed)
|
||||
{
|
||||
uint len = data.len;
|
||||
int nblocks = (int)(len / 16);
|
||||
|
||||
uint h1 = seed;
|
||||
uint h2 = seed;
|
||||
uint h3 = seed;
|
||||
uint h4 = seed;
|
||||
|
||||
const uint C1 = 0x239b961b;
|
||||
const uint C2 = 0xab0e9789;
|
||||
const uint C3 = 0x38b34ae5;
|
||||
const uint C4 = 0xa1e38b93;
|
||||
|
||||
uint* blocks = (uint *)(data.ptr + nblocks * 16);
|
||||
|
||||
for (int i = -nblocks; i != 0; i++)
|
||||
{
|
||||
uint k1 = getblock32(blocks, i * 4 + 0);
|
||||
uint k2 = getblock32(blocks, i * 4 + 1);
|
||||
uint k3 = getblock32(blocks, i * 4 + 2);
|
||||
uint k4 = getblock32(blocks, i * 4 + 3);
|
||||
|
||||
k1 *= C1; k1 = k1.rotl(15); k1 *= C2; h1 ^= k1;
|
||||
h1 = h1.rotl(19); h1 += h2; h1 = h1 * 5U + 0x561ccd1b;
|
||||
k2 *= C2; k2 = k2.rotl(16); k2 *= C3; h2 ^= k2;
|
||||
h2 = h2.rotl(17); h2 += h3; h2 = h2 * 5U + 0x0bcaa747;
|
||||
k3 *= C3; k3 = k3.rotl(17); k3 *= C4; h3 ^= k3;
|
||||
h3 = h3.rotl(15); h3 += h4; h3 = h3 * 5U + 0x96cd1c35;
|
||||
k4 *= C4; k4 = k4.rotl(18); k4 *= C1; h4 ^= k4;
|
||||
h4 = h4.rotl(13); h4 += h1; h4 = h4 * 5U + 0x32ac3b17;
|
||||
}
|
||||
|
||||
char* tail = data.ptr + nblocks * 16;
|
||||
|
||||
uint k1, k2, k3, k4;
|
||||
|
||||
switch (len & 15)
|
||||
{
|
||||
case 15: k4 ^= tail[14] << 16; nextcase;
|
||||
case 14: k4 ^= tail[13] << 8; nextcase;
|
||||
case 13: k4 ^= tail[12] << 0;
|
||||
k4 *= C4; k4 = k4.rotl(18); k4 *= C1; h4 ^= k4;
|
||||
nextcase;
|
||||
case 12: k3 ^= tail[11] << 24; nextcase;
|
||||
case 11: k3 ^= tail[10] << 16; nextcase;
|
||||
case 10: k3 ^= tail[ 9] << 8; nextcase;
|
||||
case 9: k3 ^= tail[ 8] << 0;
|
||||
k3 *= C3; k3 = k3.rotl(17); k3 *= C4; h3 ^= k3;
|
||||
nextcase;
|
||||
case 8: k2 ^= tail[ 7] << 24; nextcase;
|
||||
case 7: k2 ^= tail[ 6] << 16; nextcase;
|
||||
case 6: k2 ^= tail[ 5] << 8; nextcase;
|
||||
case 5: k2 ^= tail[ 4] << 0;
|
||||
k2 *= C2; k2 = k2.rotl(16); k2 *= C3; h2 ^= k2;
|
||||
nextcase;
|
||||
case 4: k1 ^= tail[ 3] << 24; nextcase;
|
||||
case 3: k1 ^= tail[ 2] << 16; nextcase;
|
||||
case 2: k1 ^= tail[ 1] << 8; nextcase;
|
||||
case 1: k1 ^= tail[ 0] << 0;
|
||||
k1 *= C1; k1 = k1.rotl(15); k1 *= C2; h1 ^= k1;
|
||||
}
|
||||
|
||||
h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
|
||||
|
||||
h1 += h2; h1 += h3; h1 += h4;
|
||||
h2 += h1; h3 += h1; h4 += h1;
|
||||
|
||||
h1 = fmix32(h1);
|
||||
h2 = fmix32(h2);
|
||||
h3 = fmix32(h3);
|
||||
h4 = fmix32(h4);
|
||||
|
||||
h1 += h2; h1 += h3; h1 += h4;
|
||||
h2 += h1; h3 += h1; h4 += h1;
|
||||
|
||||
return h1 + (uint128)h2 << 32U + (uint128)h3 << 64U + (uint128)h4 << 96U;
|
||||
}
|
||||
|
||||
macro uint getblock32(uint* p, int i) @local
|
||||
{
|
||||
UIntLE* p_le = (UIntLE*)p + i;
|
||||
return mem::load(p_le, 1).val;
|
||||
}
|
||||
|
||||
macro ulong getblock64(ulong* p, int i) @local
|
||||
{
|
||||
ULongLE* p_le = (ULongLE*)p + i;
|
||||
return mem::load(p_le, 1).val;
|
||||
}
|
||||
|
||||
macro uint fmix32(uint h) @local
|
||||
{
|
||||
h ^= h >> 16UL;
|
||||
h *= 0x85ebca6b;
|
||||
h ^= h >> 13UL;
|
||||
h *= 0xc2b2ae35;
|
||||
h ^= h >> 16UL;
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
|
||||
macro ulong fmix64(ulong k) @local
|
||||
{
|
||||
k ^= k >> 33U;
|
||||
k *= 0xff51afd7ed558ccd;
|
||||
k ^= k >> 33U;
|
||||
k *= 0xc4ceb9fe1a85ec53;
|
||||
k ^= k >> 33U;
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
- Remove dependency on temp allocator in String.join.
|
||||
- Remove dependency on temp allocator in File.open.
|
||||
- Added PEM encoding/decoding. #2858
|
||||
- Add Murmur3 hash.
|
||||
|
||||
### Fixes
|
||||
- Add error message if directory with output file name already exists
|
||||
|
||||
44
test/unit/stdlib/hash/murmur.c3
Normal file
44
test/unit/stdlib/hash/murmur.c3
Normal file
@@ -0,0 +1,44 @@
|
||||
module std::hash::murmur3_test @test;
|
||||
import std::hash::murmur3;
|
||||
|
||||
fn void hash32()
|
||||
{
|
||||
test::eq(0, murmur3::hash32("", 0));
|
||||
test::eq(0x514E28B7, murmur3::hash32("", 1));
|
||||
test::eq(0x81F16F39, murmur3::hash32("", 0xffffffff));
|
||||
test::eq(0x2362F9DE, murmur3::hash32("\0\0\0\0", 0));
|
||||
test::eq(0x5A97808A, murmur3::hash32("aaaa", 0x9747b28c));
|
||||
test::eq(0x283E0130, murmur3::hash32("aaa", 0x9747b28c));
|
||||
test::eq(0x5D211726, murmur3::hash32("aa", 0x9747b28c));
|
||||
test::eq(0x7FA09EA6, murmur3::hash32("a", 0x9747b28c));
|
||||
test::eq(0xF0478627, murmur3::hash32("abcd", 0x9747b28c));
|
||||
test::eq(0xC84A62DD, murmur3::hash32("abc", 0x9747b28c));
|
||||
test::eq(0x74875592, murmur3::hash32("ab", 0x9747b28c));
|
||||
test::eq(0x7FA09EA6, murmur3::hash32("a", 0x9747b28c));
|
||||
test::eq(0x24884CBA, murmur3::hash32("Hello, world!", 0x9747b28c));
|
||||
test::eq(0xD58063C1, murmur3::hash32("ππππππππ", 0x9747b28c));
|
||||
char[256] test = { [0..255] = 'a' };
|
||||
test::eq(0x37405BDC, murmur3::hash32(&test, 0x9747b28c));
|
||||
}
|
||||
|
||||
fn void hash128_64()
|
||||
{
|
||||
test::eq(0, murmur3::hash128_64("", 0));
|
||||
test::eq(0x51622daa78f835834610abe56eff5cb5, murmur3::hash128_64("", 1));
|
||||
test::eq(0x857421121ee6446b6af1df4d9d3bc9ec, murmur3::hash128_64("", 0xffffffff));
|
||||
test::eq(0x589623161cf526f1cfa0f7ddd84c76bc, murmur3::hash128_64("\0\0\0\0", 0));
|
||||
test::eq(0xf66e73e07751664edbcf7463becf7e04, murmur3::hash128_64("xxxxxxxxxxxxxxxxxxxxxxxxxxxx", 123));
|
||||
test::eq(0xf19732fdd373c3f5421c8c738743acad, murmur3::hash128_64("Hello, world!", 123));
|
||||
test::eq(0x79200aeeb9546c79ca47f42bf86d4004, murmur3::hash128_64("Hello, world!", 321));
|
||||
}
|
||||
|
||||
fn void hash128_32()
|
||||
{
|
||||
test::eq(0, murmur3::hash128_32("", 0));
|
||||
test::eq(0x26f3e79926f3e79926f3e799fedc5245, murmur3::hash128_32("", 123));
|
||||
test::eq(0x989d49f7989d49f7989d49f7051e08a9, murmur3::hash128_32("", 0xFFFFFFFF));
|
||||
test::eq(0x9e5178409e5178409e517840cc066f1f, murmur3::hash128_32("\0\0\0\0", 0));
|
||||
test::eq(0x1fec60474cf929d378825a165e40bab2, murmur3::hash128_32("xxxxxxxxxxxxxxxxxxxxxxxxxxxx", 123));
|
||||
test::eq(0x9e37c886a41621625a1aacd761c9129e, murmur3::hash128_32("Hello, world!", 123));
|
||||
test::eq(0xa7170f0f045880c5c26c4193d5fbdcb3, murmur3::hash128_32("Hello, world!", 321));
|
||||
}
|
||||
@@ -6,8 +6,7 @@ fn void test_sha256_empty()
|
||||
Sha256 sha;
|
||||
sha.init();
|
||||
sha.update("");
|
||||
|
||||
test::@check(sha.final() == x"E3B0C442 98FC1C14 9AFBF4C8 996FB924 27AE41E4 649B934C A495991B 7852B855");
|
||||
test::eq(sha.final(), x"E3B0C442 98FC1C14 9AFBF4C8 996FB924 27AE41E4 649B934C A495991B 7852B855");
|
||||
}
|
||||
|
||||
fn void test_sha256_abc()
|
||||
@@ -16,7 +15,7 @@ fn void test_sha256_abc()
|
||||
sha.init();
|
||||
sha.update("abc");
|
||||
|
||||
test::@check(sha.final() == x"BA7816BF 8F01CFEA 414140DE 5DAE2223 B00361A3 96177A9C B410FF61 F20015AD");
|
||||
test::eq(sha.final(), x"BA7816BF 8F01CFEA 414140DE 5DAE2223 B00361A3 96177A9C B410FF61 F20015AD");
|
||||
}
|
||||
|
||||
fn void test_sha256_longer()
|
||||
@@ -24,7 +23,7 @@ fn void test_sha256_longer()
|
||||
Sha256 sha;
|
||||
sha.init();
|
||||
sha.update("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopqabcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq");
|
||||
test::@check(sha.final() == x"59F109D9 533B2B70 E7C3B814 A2BD218F 78EA5D37 14455BC6 7987CF0D 664399CF");
|
||||
test::eq(sha.final(), x"59F109D9 533B2B70 E7C3B814 A2BD218F 78EA5D37 14455BC6 7987CF0D 664399CF");
|
||||
}
|
||||
|
||||
fn void test_sha256_multi_update_permute()
|
||||
@@ -37,7 +36,7 @@ fn void test_sha256_multi_update_permute()
|
||||
usz i = 0;
|
||||
for (; i < input.len / step; i++) sha.update(input[i * step : step]);
|
||||
if (i * step < input.len) sha.update(input[i * step..]);
|
||||
test::@check(sha.final() == x"b527293dfb70dcce37e593f4c43e1b81909615722bad041b90b8df22bebd00a0", "Mismatch for step %d", step);
|
||||
test::eq(sha.final(), x"b527293dfb70dcce37e593f4c43e1b81909615722bad041b90b8df22bebd00a0");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user