From ed92476916b872d45920f50236b2bf4952fd934f Mon Sep 17 00:00:00 2001 From: Zack Puhl Date: Sat, 19 Jul 2025 18:06:10 -0400 Subject: [PATCH] Add wyhash2 and metro64/128 modern hashing (#2293) * add wyhash2, metro64, and metro128 hashes; best performing non-crypto hash functions * add superfast 64-bit a5hash; not streamed, no 128-bit impl * add komihash and associated tests/benchmarks --------- Co-authored-by: Christoffer Lerno --- benchmarks/stdlib/hash/non_crypto_shootout.c3 | 94 +++++++ lib/std/hash/a5hash.c3 | 96 +++++++ lib/std/hash/komi.c3 | 156 +++++++++++ lib/std/hash/metro128.c3 | 149 ++++++++++ lib/std/hash/metro64.c3 | 152 ++++++++++ lib/std/hash/wyhash2.c3 | 56 ++++ releasenotes.md | 1 + test/unit/stdlib/hash/a5hash.c3 | 71 +++++ test/unit/stdlib/hash/komi.c3 | 259 ++++++++++++++++++ test/unit/stdlib/hash/metro.c3 | 140 ++++++++++ test/unit/stdlib/hash/wyhash2.c3 | 89 ++++++ 11 files changed, 1263 insertions(+) create mode 100644 benchmarks/stdlib/hash/non_crypto_shootout.c3 create mode 100644 lib/std/hash/a5hash.c3 create mode 100644 lib/std/hash/komi.c3 create mode 100644 lib/std/hash/metro128.c3 create mode 100644 lib/std/hash/metro64.c3 create mode 100644 lib/std/hash/wyhash2.c3 create mode 100644 test/unit/stdlib/hash/a5hash.c3 create mode 100644 test/unit/stdlib/hash/komi.c3 create mode 100644 test/unit/stdlib/hash/metro.c3 create mode 100644 test/unit/stdlib/hash/wyhash2.c3 diff --git a/benchmarks/stdlib/hash/non_crypto_shootout.c3 b/benchmarks/stdlib/hash/non_crypto_shootout.c3 new file mode 100644 index 000000000..50e67311c --- /dev/null +++ b/benchmarks/stdlib/hash/non_crypto_shootout.c3 @@ -0,0 +1,94 @@ +// Copyright (c) 2025 Zack Puhl . All rights reserved. +// Use of this source code is governed by the MIT license +// a copy of which can be found in the LICENSE_STDLIB file. +module non_crypto_benchmarks; + + +const usz COMMON_ITERATIONS = 1 << 18; + +const char[] COMMON_1 = { 0xA5 }; +const char[] COMMON_4 = { 0xA5, 0xA5, 0xA5, 0xA5, }; +const char[] COMMON_8 = { [0..7] = 0xA5 }; +const char[] COMMON_16 = { [0..15] = 0xA5 }; +const char[] COMMON_32 = { [0..31] = 0xA5 }; +const char[] COMMON_64 = { [0..63] = 0xA5 }; +const char[] COMMON_128 = { [0..127] = 0xA5 }; +const char[] COMMON_1024 = { [0..1023] = 0xA5 }; + + +fn void initialize_bench() @init +{ + set_benchmark_warmup_iterations(3); + set_benchmark_max_iterations(COMMON_ITERATIONS + 3); +} + + +// ======================================================================================= +module non_crypto_benchmarks @benchmark; + +import std::hash; + + +fn void fnv64a_1() => fnv64a::hash(COMMON_1); +fn void fnv32a_1() => fnv32a::hash(COMMON_1); +fn void wyhash2_1() => wyhash2::hash(COMMON_1); +fn void metro64_1() => metro64::hash(COMMON_1); +fn void metro128_1() => metro128::hash(COMMON_1); +fn void a5hash_1() => a5hash::hash(COMMON_1); +fn void komi_1() => komi::hash(COMMON_1); + +fn void fnv64a_4() => fnv64a::hash(COMMON_4); +fn void fnv32a_4() => fnv32a::hash(COMMON_4); +fn void wyhash2_4() => wyhash2::hash(COMMON_4); +fn void metro64_4() => metro64::hash(COMMON_4); +fn void metro128_4() => metro128::hash(COMMON_4); +fn void a5hash_4() => a5hash::hash(COMMON_4); +fn void komi_4() => komi::hash(COMMON_4); + +fn void fnv64a_8() => fnv64a::hash(COMMON_8); +fn void fnv32a_8() => fnv32a::hash(COMMON_8); +fn void wyhash2_8() => wyhash2::hash(COMMON_8); +fn void metro64_8() => metro64::hash(COMMON_8); +fn void metro128_8() => metro128::hash(COMMON_8); +fn void a5hash_8() => a5hash::hash(COMMON_8); +fn void komi_8() => komi::hash(COMMON_8); + +fn void fnv64a_16() => fnv64a::hash(COMMON_16); +fn void fnv32a_16() => fnv32a::hash(COMMON_16); +fn void wyhash2_16() => wyhash2::hash(COMMON_16); +fn void metro64_16() => metro64::hash(COMMON_16); +fn void metro128_16() => metro128::hash(COMMON_16); +fn void a5hash_16() => a5hash::hash(COMMON_16); +fn void komi_16() => komi::hash(COMMON_16); + +fn void fnv64a_32() => fnv64a::hash(COMMON_32); +fn void fnv32a_32() => fnv32a::hash(COMMON_32); +// NOTE: wyhash2 cannot be used on inputs > 16 bytes. +fn void metro64_32() => metro64::hash(COMMON_32); +fn void metro128_32() => metro128::hash(COMMON_32); +fn void a5hash_32() => a5hash::hash(COMMON_32); +fn void komi_32() => komi::hash(COMMON_32); + +fn void fnv64a_64() => fnv64a::hash(COMMON_64); +fn void fnv32a_64() => fnv32a::hash(COMMON_64); +// NOTE: wyhash2 cannot be used on inputs > 16 bytes. +fn void metro64_64() => metro64::hash(COMMON_64); +fn void metro128_64() => metro128::hash(COMMON_64); +fn void a5hash_64() => a5hash::hash(COMMON_64); +fn void komi_64() => komi::hash(COMMON_64); + +fn void fnv64a_128() => fnv64a::hash(COMMON_128); +fn void fnv32a_128() => fnv32a::hash(COMMON_128); +// NOTE: wyhash2 cannot be used on inputs > 16 bytes. +fn void metro64_128() => metro64::hash(COMMON_128); +fn void metro128_128() => metro128::hash(COMMON_128); +fn void a5hash_128() => a5hash::hash(COMMON_128); +fn void komi_128() => komi::hash(COMMON_128); + +fn void fnv64a_1024() => fnv64a::hash(COMMON_1024); +fn void fnv32a_1024() => fnv32a::hash(COMMON_1024); +// NOTE: wyhash2 cannot be used on inputs > 16 bytes. +fn void metro64_1024() => metro64::hash(COMMON_1024); +fn void metro128_1024() => metro128::hash(COMMON_1024); +fn void a5hash_1024() => a5hash::hash(COMMON_1024); +fn void komi_1024() => komi::hash(COMMON_1024); diff --git a/lib/std/hash/a5hash.c3 b/lib/std/hash/a5hash.c3 new file mode 100644 index 000000000..6107962c6 --- /dev/null +++ b/lib/std/hash/a5hash.c3 @@ -0,0 +1,96 @@ +// Copyright (c) 2025 Zack Puhl . All rights reserved. +// Use of this source code is governed by the MIT license +// a copy of which can be found in the LICENSE_STDLIB file. +// +// An implementation of Aleksey Vaneev's a5hash, version 5.16, in C3: +// https://github.com/avaneev/komihash +// +// The license for komihash from the above repository at the time of writing is as follows: +// +// >> MIT License +// >> +// >> Copyright (c) 2025 Aleksey Vaneev +// >> +// >> Permission is hereby granted, free of charge, to any person obtaining a copy +// >> of this software and associated documentation files (the "Software"), to deal +// >> in the Software without restriction, including without limitation the rights +// >> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// >> copies of the Software, and to permit persons to whom the Software is +// >> furnished to do so, subject to the following conditions: +// >> +// >> The above copyright notice and this permission notice shall be included in all +// >> copies or substantial portions of the Software. +// >> +// >> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// >> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// >> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// >> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// >> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// >> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// >> SOFTWARE. +// +// +module std::hash::a5hash; + + +macro @a5mul(#u, #v, #lo, #hi) @local +{ + uint128 imd = (uint128)#u * (uint128)#v; + #lo = (ulong)imd; + #hi = (ulong)(imd >> 64); +} + + +fn ulong hash(char[] data, ulong seed = 0) +{ + ulong seed1 = 0x243F_6A88_85A3_08D3 ^ data.len; + ulong seed2 = 0x4528_21E6_38D0_1377 ^ data.len; + ulong val10 = 0xAAAA_AAAA_AAAA_AAAA; + ulong val01 = 0x5555_5555_5555_5555; + ulong a, b; + + @a5mul(seed2 ^ (seed & val10), seed1 ^ (seed & val01), seed1, seed2); + + val10 ^= seed2; + + if (@likely(data.len > 3)) + { + if (data.len > 16) + { + val01 ^= seed1; + + for (; data.len > 16; data = data[16..]) + { + @a5mul( + @unaligned_load(((ulong*)data.ptr)[0], 1) ^ seed1, + @unaligned_load(((ulong*)data.ptr)[1], 1) ^ seed2, + seed1, seed2 + ); + + seed1 += val01; + seed2 += val10; + } + + a = @unaligned_load(*(ulong*)(data.ptr + (uptr)data.len - 16), 1); + b = @unaligned_load(*(ulong*)(data.ptr + (uptr)data.len - 8), 1); + } + else + { + a = ((ulong)@unaligned_load(*(uint*)&data[0], 1) << 32) + | @unaligned_load(*(uint*)&data[^4], 1); + + b = ((ulong)@unaligned_load(*(uint*)&data[(data.len >> 3) * 4], 1) << 32) + | @unaligned_load(*(uint*)(data.ptr + data.len - 4 - (data.len >> 3) * 4), 1); + } + } + else + { + a = data.len ? (data[0] | (data.len > 1 ? ((ulong)data[1] << 8) : 0) | (data.len > 2 ? ((ulong)data[2] << 16) : 0)) : 0; + b = 0; + } + + @a5mul(a ^ seed1, b ^ seed2, seed1, seed2); + @a5mul(val01 ^ seed1, seed2, a, b); + + return a ^ b; +} diff --git a/lib/std/hash/komi.c3 b/lib/std/hash/komi.c3 new file mode 100644 index 000000000..d48ea2b27 --- /dev/null +++ b/lib/std/hash/komi.c3 @@ -0,0 +1,156 @@ +// Copyright (c) 2025 Zack Puhl . All rights reserved. +// Use of this source code is governed by the MIT license +// a copy of which can be found in the LICENSE_STDLIB file. +// +// An implementation of Aleksey Vaneev's komihash, version 5.27, in C3: +// https://github.com/avaneev/komihash +// +// The license for komihash from the above repository at the time of writing is as follows: +// +// >> MIT License +// >> +// >> Copyright (c) 2021-2025 Aleksey Vaneev +// >> +// >> Permission is hereby granted, free of charge, to any person obtaining a copy +// >> of this software and associated documentation files (the "Software"), to deal +// >> in the Software without restriction, including without limitation the rights +// >> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// >> copies of the Software, and to permit persons to whom the Software is +// >> furnished to do so, subject to the following conditions: +// >> +// >> The above copyright notice and this permission notice shall be included in all +// >> copies or substantial portions of the Software. +// >> +// >> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// >> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// >> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// >> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// >> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// >> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// >> SOFTWARE. +// +// +module std::hash::komi; + + +macro @komimul(#u, #v, #lo, #hi) @local +{ + uint128 imd = (uint128)#u * (uint128)#v; + #lo = (ulong)imd; + #hi += (ulong)(imd >> 64); +} + + +fn ulong hash(char[] data, ulong seed = 0) +{ + ulong seed1 = 0x243F_6A88_85A3_08D3 ^ (seed & 0x5555_5555_5555_5555); + ulong seed5 = 0x4528_21E6_38D0_1377 ^ (seed & 0xAAAA_AAAA_AAAA_AAAA); + ulong r1h, r2h; + + // HASHROUND + @komimul(seed1, seed5, seed1, seed5); + seed1 ^= seed5; + + if (@likely(data.len < 16)) + { + r1h = seed1; + r2h = seed5; + + if (@likely(data.len >= 8)) + { + r1h ^= @unaligned_load(*(ulong*)data.ptr, 1); + + r2h ^= (data.len < 12) + ? ((data[data.len - 3] | ((ulong)data[data.len - 2] << 8) | ((ulong)data[data.len - 1] << 16) | ((ulong)1 << 24)) >> ((data.len * 8) ^ 88)) + : (((@unaligned_load(*(uint*)&data[^4], 1) | ((ulong)1 << 32)) >> (128 - data.len * 8)) << 32 | @unaligned_load(*(uint*)&data[8], 1)); + } + else if (data.len != 0) + { + r1h ^= (data.len < 4) + ? (((ulong)1 << (data.len * 8)) ^ data[0] ^ (data.len > 1 ? (ulong)data[1] << 8 : 0) ^ (data.len > 2 ? (ulong)data[2] << 16 : 0)) + : (((@unaligned_load(*(uint*)&data[^4], 1) | ((ulong)1 << 32)) >> (64 - data.len * 8)) << 32 | @unaligned_load(*(uint*)&data[0], 1)); + } + } + else if (data.len < 32) + { + // HASH16 + @komimul( + @unaligned_load(*(ulong*)&data[0], 1) ^ seed1, + @unaligned_load(*(ulong*)&data[8], 1) ^ seed5, + seed1, seed5 + ); + seed1 ^= seed5; + + if (data.len < 24) + { + r1h = (((@unaligned_load(*(ulong*)&data[^8], 1) >> 8) | ((ulong)1 << 56)) >> (((int)(data.len * 8) ^ 184))) ^ seed1; + r2h = seed5; + } + else + { + r1h = @unaligned_load(*(ulong*)&data[16], 1) ^ seed1; + r2h = (((@unaligned_load(*(ulong*)&data[^8], 1) >> 8) | ((ulong)1 << 56)) >> (((int)(data.len * 8) ^ 248))) ^ seed5; + } + } + else + { + if (data.len >= 64) + { + ulong[8] seeds = { + seed1, 0x1319_8A2E_0370_7344 ^ seed1, 0xA409_3822_299F_31D0 ^ seed1, 0x082E_FA98_EC4E_6C89 ^ seed1, + seed5, 0xBE54_66CF_34E9_0C6C ^ seed5, 0xC0AC_29B7_C97C_50DD ^ seed5, 0x3F84_D5B5_B547_0917 ^ seed5, + }; + + // HASHLOOP64 + for (; data.len >= 64; data = data[64:^64]) + { + $for var $x = 0; $x < 4; ++$x : + @komimul( + @unaligned_load(*(ulong*)&data[0 + ($x * 8)], 1) ^ seeds[$x], + @unaligned_load(*(ulong*)&data[32 + ($x * 8)], 1) ^ seeds[4 + $x], + seeds[$x], seeds[4 + $x] + ); + $endfor + + seeds[3] ^= seeds[6]; + seeds[0] ^= seeds[7]; + seeds[2] ^= seeds[5]; + seeds[1] ^= seeds[4]; + } + + seed1 = seeds[0] ^ seeds[1] ^ seeds[2] ^ seeds[3]; + seed5 = seeds[4] ^ seeds[5] ^ seeds[6] ^ seeds[7]; + } + + for (; data.len >= 16; data = data[16:^16]) + { + @komimul( + @unaligned_load(*(ulong*)&data[0], 1) ^ seed1, + @unaligned_load(*(ulong*)&data[8], 1) ^ seed5, + seed1, seed5 + ); + seed1 ^= seed5; + } + + if (data.len < 8) + { + // NOTE: This is translated from the original code. It grabs the last ulong off the buffer even though the + // data slice is less than 8 bytes. This is possible because this branch only occurs in a loop where + // the original data slice length is >= 32. + r1h = (((@unaligned_load(*(ulong*)(data.ptr + data.len - 8), 1) >> 8) | ((ulong)1 << 56)) >> ((data.len * 8) ^ 0x38)) ^ seed1; + r2h = seed5; + } + else + { + r1h = @unaligned_load(*(ulong*)data.ptr, 1) ^ seed1; + r2h = (((@unaligned_load(*(ulong*)&data[^8], 1) >> 8) | ((ulong)1 << 56)) >> ((data.len * 8) ^ 0x78)) ^ seed5; + } + } + + // HASHFIN + @komimul(r1h, r2h, seed1, seed5); + seed1 ^= seed5; + @komimul(seed1, seed5, seed1, seed5); + seed1 ^= seed5; + return seed1; +} diff --git a/lib/std/hash/metro128.c3 b/lib/std/hash/metro128.c3 new file mode 100644 index 000000000..ea645db32 --- /dev/null +++ b/lib/std/hash/metro128.c3 @@ -0,0 +1,149 @@ +// Copyright (c) 2025 Zack Puhl . All rights reserved. +// Use of this source code is governed by the MIT license +// a copy of which can be found in the LICENSE_STDLIB file. +// +// MetroHash64 and MetroHash128 are different enough to warrant their own +// modules, and there would be no reason to create a generic module just +// for the two. If you inspect the differences, the only shared portion +// of the entire process is the `update` method. +// +module std::hash::metro128; + + +const ulong[4] K @local = { + 0xc83a91e1, + 0x8648dbdb, + 0x7bdec03b, + 0x2f5870a5, +}; + + +struct MetroHash128 +{ + union + { + ulong[4] state; + uint128 result; + } + union + { + ulong[4] stomach_64; + char[32] stomach; + } + ulong bytes; +} + + +fn uint128 hash(char[] data, ulong seed = 0) +{ + MetroHash128 m; + m.init(seed); + m.update(data); + return m.final(); +} + + +fn void MetroHash128.init(&self, ulong seed = 0) +{ + self.state = { + (seed - K[0]) * K[3], + (seed + K[1]) * K[2], + (seed + K[0]) * K[2], + (seed - K[1]) * K[3], + }; +} + + +fn void MetroHash128.update(&self, char[] data) +{ + if (self.bytes % 32) // partial buffer + { + ulong to_fill = min(data.len, (32 - (self.bytes % 32))); + + self.stomach[(self.bytes % 32):to_fill] = data[:to_fill]; + + data = data[to_fill..]; + self.bytes += to_fill; + + if (self.bytes % 32) return; // still awaiting more input, or final + + self.state[0] += self.stomach_64[0] * K[0]; self.state[0] = self.state[0].rotr(29) + self.state[2]; + self.state[1] += self.stomach_64[1] * K[1]; self.state[1] = self.state[1].rotr(29) + self.state[3]; + self.state[2] += self.stomach_64[2] * K[2]; self.state[2] = self.state[2].rotr(29) + self.state[0]; + self.state[3] += self.stomach_64[3] * K[3]; self.state[3] = self.state[3].rotr(29) + self.state[1]; + } + + self.bytes += data.len; + + for (; data.len >= 32; data = data[32:^32]) + { + self.state[0] += @unaligned_load(((ulong*)data.ptr)[0], 1) * K[0]; self.state[0] = self.state[0].rotr(29) + self.state[2]; + self.state[1] += @unaligned_load(((ulong*)data.ptr)[1], 1) * K[1]; self.state[1] = self.state[1].rotr(29) + self.state[3]; + self.state[2] += @unaligned_load(((ulong*)data.ptr)[2], 1) * K[2]; self.state[2] = self.state[2].rotr(29) + self.state[0]; + self.state[3] += @unaligned_load(((ulong*)data.ptr)[3], 1) * K[3]; self.state[3] = self.state[3].rotr(29) + self.state[1]; + } + + // Gobble up the leftover bytes. Nom nom. + if (data.len > 0) self.stomach[:data.len] = data[..]; +} + + +fn uint128 MetroHash128.final(&self) +{ + if (self.bytes >= 32) + { + self.state[2] ^= (((self.state[0] + self.state[3]) * K[0]) + self.state[1]).rotr(21) * K[1]; + self.state[3] ^= (((self.state[1] + self.state[2]) * K[1]) + self.state[0]).rotr(21) * K[0]; + self.state[0] ^= (((self.state[0] + self.state[2]) * K[0]) + self.state[3]).rotr(21) * K[1]; + self.state[1] ^= (((self.state[1] + self.state[3]) * K[1]) + self.state[2]).rotr(21) * K[0]; + } + + char[] final_data = self.stomach[:(self.bytes % 32)]; + + if (final_data.len >= 16) + { + self.state[0] += ((ulong*)final_data.ptr)[0] * K[2]; self.state[0] = self.state[0].rotr(33) * K[3]; + self.state[1] += ((ulong*)final_data.ptr)[1] * K[2]; self.state[1] = self.state[1].rotr(33) * K[3]; + self.state[0] ^= ((self.state[0] * K[2]) + self.state[1]).rotr(45) * K[1]; + self.state[1] ^= ((self.state[1] * K[3]) + self.state[0]).rotr(45) * K[0]; + + final_data = final_data[16:^16]; + } + + if (final_data.len >= 8) + { + self.state[0] += @unaligned_load(((ulong*)final_data.ptr)[0], 1) * K[2]; self.state[0] = self.state[0].rotr(33) * K[3]; + self.state[0] ^= ((self.state[0] * K[2]) + self.state[1]).rotr(27) * K[1]; + + final_data = final_data[8:^8]; + } + + if (final_data.len >= 4) + { + self.state[1] += @unaligned_load(((uint*)final_data.ptr)[0], 1) * K[2]; self.state[1] = self.state[1].rotr(33) * K[3]; + self.state[1] ^= ((self.state[1] * K[3]) + self.state[0]).rotr(46) * K[0]; + + final_data = final_data[4:^4]; + } + + if (final_data.len >= 2) + { + self.state[0] += @unaligned_load(((ushort*)final_data.ptr)[0], 1) * K[2]; self.state[0] = self.state[0].rotr(33) * K[3]; + self.state[0] ^= ((self.state[0] * K[2]) + self.state[1]).rotr(22) * K[1]; + + final_data = final_data[2:^2]; + } + + if (final_data.len >= 1) + { + self.state[1] += ((char*)final_data.ptr)[0] * K[2]; self.state[1] = self.state[1].rotr(33) * K[3]; + self.state[1] ^= ((self.state[1] * K[3]) + self.state[0]).rotr(58) * K[0]; + } + + self.state[0] += ((self.state[0] * K[0]) + self.state[1]).rotr(13); + self.state[1] += ((self.state[1] * K[1]) + self.state[0]).rotr(37); + self.state[0] += ((self.state[0] * K[2]) + self.state[1]).rotr(13); + self.state[1] += ((self.state[1] * K[3]) + self.state[0]).rotr(37); + + return self.result; +} diff --git a/lib/std/hash/metro64.c3 b/lib/std/hash/metro64.c3 new file mode 100644 index 000000000..f75f17426 --- /dev/null +++ b/lib/std/hash/metro64.c3 @@ -0,0 +1,152 @@ +// Copyright (c) 2025 Zack Puhl . All rights reserved. +// Use of this source code is governed by the MIT license +// a copy of which can be found in the LICENSE_STDLIB file. +// +// MetroHash64 and MetroHash128 are different enough to warrant their own +// modules, and there would be no reason to create a generic module just +// for the two. If you inspect the differences, the only shared portion +// of the entire process is the `update` method. +// +module std::hash::metro64; + + +const ulong[4] K @local = { + 0xd6d018f5, + 0xa2aa033b, + 0x62992fc1, + 0x30bc5b29, +}; + + +struct MetroHash64 +{ + union + { + ulong[4] state; + ulong result; + } + union + { + ulong[4] stomach_64; + char[32] stomach; + } + ulong bytes; + ulong vseed; +} + + +fn ulong hash(char[] data, ulong seed = 0) +{ + MetroHash64 m; + m.init(seed); + m.update(data); + return m.final(); +} + + +fn void MetroHash64.init(&self, ulong seed = 0) +{ + self.vseed = (seed + K[2]) * K[0]; + + self.state[0] = self.vseed; + self.state[1] = self.vseed; + self.state[2] = self.vseed; + self.state[3] = self.vseed; +} + + +fn void MetroHash64.update(&self, char[] data) +{ + if (self.bytes % 32) // partial buffer + { + ulong to_fill = min(data.len, (32 - (self.bytes % 32))); + + self.stomach[(self.bytes % 32):to_fill] = data[:to_fill]; + + data = data[to_fill..]; + self.bytes += to_fill; + + if (self.bytes % 32) return; // still awaiting more input, or final + + self.state[0] += self.stomach_64[0] * K[0]; self.state[0] = self.state[0].rotr(29) + self.state[2]; + self.state[1] += self.stomach_64[1] * K[1]; self.state[1] = self.state[1].rotr(29) + self.state[3]; + self.state[2] += self.stomach_64[2] * K[2]; self.state[2] = self.state[2].rotr(29) + self.state[0]; + self.state[3] += self.stomach_64[3] * K[3]; self.state[3] = self.state[3].rotr(29) + self.state[1]; + } + + self.bytes += data.len; + + for (; data.len >= 32; data = data[32:^32]) + { + self.state[0] += @unaligned_load(((ulong*)data.ptr)[0], 1) * K[0]; self.state[0] = self.state[0].rotr(29) + self.state[2]; + self.state[1] += @unaligned_load(((ulong*)data.ptr)[1], 1) * K[1]; self.state[1] = self.state[1].rotr(29) + self.state[3]; + self.state[2] += @unaligned_load(((ulong*)data.ptr)[2], 1) * K[2]; self.state[2] = self.state[2].rotr(29) + self.state[0]; + self.state[3] += @unaligned_load(((ulong*)data.ptr)[3], 1) * K[3]; self.state[3] = self.state[3].rotr(29) + self.state[1]; + } + + // Gobble up the leftover bytes. Nom nom. + if (data.len > 0) self.stomach[:data.len] = data[..]; +} + + +fn ulong MetroHash64.final(&self) +{ + if (self.bytes >= 32) + { + self.state[2] ^= (((self.state[0] + self.state[3]) * K[0]) + self.state[1]).rotr(37) * K[1]; + self.state[3] ^= (((self.state[1] + self.state[2]) * K[1]) + self.state[0]).rotr(37) * K[0]; + self.state[0] ^= (((self.state[0] + self.state[2]) * K[0]) + self.state[3]).rotr(37) * K[1]; + self.state[1] ^= (((self.state[1] + self.state[3]) * K[1]) + self.state[2]).rotr(37) * K[0]; + + self.state[0] = self.vseed + (self.state[0] ^ self.state[1]); + } + + char[] final_data = self.stomach[:(self.bytes % 32)]; + + if (final_data.len >= 16) + { + self.state[1] = self.state[0] + @unaligned_load(((ulong*)final_data.ptr)[0], 1) * K[2]; self.state[1] = self.state[1].rotr(29) * K[3]; + self.state[2] = self.state[0] + @unaligned_load(((ulong*)final_data.ptr)[1], 1) * K[2]; self.state[2] = self.state[2].rotr(29) * K[3]; + self.state[1] ^= (self.state[1] * K[0]).rotr(21) + self.state[2]; + self.state[2] ^= (self.state[2] * K[3]).rotr(21) + self.state[1]; + self.state[0] += self.state[2]; + + final_data = final_data[16:^16]; + } + + if (final_data.len >= 8) + { + self.state[0] += @unaligned_load(((ulong*)final_data.ptr)[0], 1) * K[3]; + self.state[0] ^= self.state[0].rotr(55) * K[1]; + + final_data = final_data[8:^8]; + } + + if (final_data.len >= 4) + { + self.state[0] += @unaligned_load(((uint*)final_data.ptr)[0], 1) * K[3]; + self.state[0] ^= self.state[0].rotr(26) * K[1]; + + final_data = final_data[4:^4]; + } + + if (final_data.len >= 2) + { + self.state[0] += @unaligned_load(((ushort*)final_data.ptr)[0], 1) * K[3]; + self.state[0] ^= self.state[0].rotr(48) * K[1]; + + final_data = final_data[2:^2]; + } + + if (final_data.len >= 1) + { + self.state[0] += ((char*)final_data.ptr)[0] * K[3]; + self.state[0] ^= self.state[0].rotr(37) * K[1]; + } + + self.state[0] ^= self.state[0].rotr(28); + self.state[0] *= K[0]; + self.state[0] ^= self.state[0].rotr(29); + + return self.result; +} diff --git a/lib/std/hash/wyhash2.c3 b/lib/std/hash/wyhash2.c3 new file mode 100644 index 000000000..76771b814 --- /dev/null +++ b/lib/std/hash/wyhash2.c3 @@ -0,0 +1,56 @@ +// Copyright (c) 2025 Zack Puhl . All rights reserved. +// Use of this source code is governed by the MIT license +// a copy of which can be found in the LICENSE_STDLIB file. +// +// An implementation of Wang Yi's wyhash(2) algorithm in C3: +// https://github.com/wangyi-fudan/wyhash +// +module std::hash::wyhash2; + + +fn ulong wyr3(char* in, usz len) @inline + => ((ulong)in[0] << 16) | ((ulong)in[len >> 1] << 8) | (ulong)in[len - 1]; + + +// See: https://docs.google.com/spreadsheets/d/1HmqDj-suH4wBFNg7etwE8WVBlfCufvD5-gAnIENs94k/edit?gid=1915335726#gid=1915335726 +// Credit to article: +// https://medium.com/@tprodanov/benchmarking-non-cryptographic-hash-functions-in-rust-2e6091077d11 +// +// wyhash2 has a >90% chance of collisions when its input data is above 16 bytes in length. +// However, it is the fastest performing and most evenly randomized hash for very low-length inputs, +// making it an ideal candidate for hashing primitive data types quickly and making things like hash +// tables even faster. Therefore, a 16-byte input limit is imposed on all calls to the hash function. +// +<* +@require input.len <= 16 : `wyhash2 is not useable for inputs over 16 bytes in length.` +*> +fn ulong hash(char[] input, ulong seed = 0) +{ + seed ^= 0xa076_1d64_78bd_642f; + + ulong a, b; + + if (@likely(input.len <= 8)) // more likely to encounter 8-byte or lower type here + { + if (@likely(input.len >= 4)) + { + a = (ulong)@unaligned_load(*(uint*)input.ptr, 1); // first 4 bytes widened to a u64 + b = (ulong)@unaligned_load(*(uint*)&input[^4], 1); // a walking 4-byte window based on input.len + } + else if (input.len > 0) + { + a = wyr3(input, input.len); + } + } + else + { + a = @unaligned_load(*(ulong*)input.ptr, 1); // first 8 bytes + b = @unaligned_load(*(ulong*)&input[^8], 1); // a walking 8-byte window based on input.len + } + + uint128 r = ((uint128)a ^ 0xe703_7ed1_a0b4_28db) * ((uint128)b ^ seed); + ulong pre_res = (ulong)r ^ (ulong)(r >> 64); + + r = ((uint128)0xe703_7ed1_a0b4_28db ^ input.len) * (uint128)pre_res; + return (ulong)r ^ (ulong)(r >> 64); +} diff --git a/releasenotes.md b/releasenotes.md index 7c87c4e7f..1f01be5ca 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -26,6 +26,7 @@ - Check unaligned array access. - Add "@structlike" for typedefs. - "poison" the current function early when a declaration can't be correctly resolved. +- Add komihash, a5hash, metrohash64, metrohash128, and wyhash2 variants with tests/benchmark. #2293 ### Fixes - mkdir/rmdir would not work properly with substring paths on non-windows platforms. diff --git a/test/unit/stdlib/hash/a5hash.c3 b/test/unit/stdlib/hash/a5hash.c3 new file mode 100644 index 000000000..f8ff82e3f --- /dev/null +++ b/test/unit/stdlib/hash/a5hash.c3 @@ -0,0 +1,71 @@ +// Copyright (c) 2025 Zack Puhl . All rights reserved. +// Use of this source code is governed by the MIT license +// a copy of which can be found in the LICENSE_STDLIB file. +module a5hash_tests @test; + +import std::hash::a5hash; + + +fn void vector_1() +{ + char[] input = "This is a test of a5hash."; + ulong expected = 0xb163640b41959e6b; + + ulong actual = a5hash::hash(input); + + test::@check(actual == expected, "Hash mismatch (%x expected // %x actual).", expected, actual); +} + +fn void vector_offset() +{ + char[] input = "This is a test of a5hash."; + + ulong actual = a5hash::hash(input[1..]); +} + + +fn void vector_2() +{ + char[] input = "7 chars"; + ulong expected = 0xe49a0cc72256bbac; + + ulong actual = a5hash::hash(input); + + test::@check(actual == expected, "Hash mismatch (%x expected // %x actual).", expected, actual); +} + + +const ulong[] EXPECTED_SWEEP = { + 0xfa40305e7f876cde, 0xa462e33cc53262b4, 0x2373712194152d56, 0x948839e266ada547, + 0x6d0c1912787ad5b8, 0x6c234caa741dc983, 0x2d45a051cf4c6588, 0x16c4a4f081d55f34, + 0x2c06450d6f205485, 0x55296f9db1992971, 0x7329cd52328a9082, 0x74785ad80cb7e0cf, + 0x13288aea2281441a, 0x194ae1b6f33f8a83, 0x165e812426f0e087, 0x84981c9506adefb3, + 0x63270fe923b6935f, 0x42279ee502ecac49, 0x21d1c830488bc670, 0x4ea7876e46fdab41, + 0x8af2d66eff7dbae9, 0x8892e79538d4d132, 0x823916d272cfaa91, 0x4187aa86dc29e276, + 0xa2c8198dd1d883b0, 0x1f29c0e4fdcee024, 0xd27d762a99e59b08, 0x361f15e55087a978, + 0x30272a11795ab5d4, 0xffb5f1f42efa5c1a, 0xbc9e503290940862, 0x325c94b294da618f, + 0xa3da4b25911ac41f, 0x48b0e8e5c734e3bc, 0x5e7b0d5f607108b1, 0xaf44b82e7cc700c0, + 0x08184e9ed8940831, 0x16493a88bb9bc76c, 0x6a542a2614969994, 0x7ea3a4295a702672, + 0x4cdcae9d0feaae9a, 0xa51c82eb8201d45f, 0x4e4bce4bb46f20a4, 0xc4a97e28b2fa2993, + 0xe6d48cc40df3905e, 0x684abe59a2db9061, 0x766f289e1ab66393, 0x46f4ab742979a005, + 0xa2d0521bb9eb3653, 0xb41938068a89f9ae, 0x06c063a13b6c380d, 0xf53bf0e413522ab7, + 0x61fa9597bf50dc2e, 0x5911a437240cd52b, 0xc8929ab341f26bb6, 0x46c99c2cfcb00d14, + 0x22f46d19bf96ded8, 0xf63d8cf026448dcf, 0x7e6ab3b486536caa, 0xc2e53529793ce2a8, + 0xcf9f59fb91b7893b, 0xf95d2ae3f31aaf04, 0x423472f722383ea1, 0xc42aebbb3980132a, + 0x458efaa15efd35cb, 0xcd3e0989dc4e04ed, 0xa1c01cd5305af58a, 0x40bc73f12e21385f, + 0x8464509b2b5438ec, 0x961baaded287ad53, 0x22b0a89537728143, 0x7826002b97c764a1, + 0x25eed2c492550022, 0x833bb150f9e75741, 0xcc30d4982191208d, 0x1eaf0a962f3eedea, + 0xe98219e502cce0d5, 0x2bfe6f0253fc07c1, 0x2f8a14428430d003, 0x30e1aa29ee8b7bea, +}; + +fn void sweep() +{ + char[] input = { [0..EXPECTED_SWEEP.len] = '5' }; + + foreach (i, expected : EXPECTED_SWEEP) + { + ulong actual = a5hash::hash(input[:i], 0x12ca6b4391e055fe); + + test::@check(actual == expected, "Hash mismatch (%x expected // %x actual).", expected, actual); + } +} diff --git a/test/unit/stdlib/hash/komi.c3 b/test/unit/stdlib/hash/komi.c3 new file mode 100644 index 000000000..bbcac3ecd --- /dev/null +++ b/test/unit/stdlib/hash/komi.c3 @@ -0,0 +1,259 @@ +// Copyright (c) 2025 Zack Puhl . All rights reserved. +// Use of this source code is governed by the MIT license +// a copy of which can be found in the LICENSE_STDLIB file. +module komi_tests; + + +char[256] bulk; + +fn void setup() @init +{ + for (usz i = 0; i < bulk.len; ++i) bulk[i] = (char)i; +} + + +// ========================================================================== +module komi_tests @test; + +import std::hash::komi; + + +const char[][] INPUTS = { + "This is a 32-byte testing string", + "The cat is out of the bag", + "A 16-byte string", + "The new string", + "7 chars", +}; + +const usz[] BULK_INTERVALS = { 3, 6, 8, 12, 20, 31, 32, 40, 47, 48, 56, 64, 72, 80, 112, 132, 256 }; + + +const ulong[] EXPECTED_UNSEEDED = { + 0x05ad960802903a9d, + 0xd15723521d3c37b1, + 0x467caa28ea3da7a6, + 0xf18e67bc90c43233, + 0x2c514f6e5dcb11cb, +}; + +const ulong[] EXPECTED_BULK_UNSEEDED = { + 0x7a9717e9eea4be8b, + 0xa56469564c2ea0ff, + 0x00b4313a24431306, + 0x64c2ad96013f70fe, + 0x7a3888bc95545364, + 0xc77e02ed4b201b9a, + 0x256d74350303a1ba, + 0x59609c71697bb9df, + 0x36eb9e6a4c2c5e4b, + 0x8dd56c332850baa6, + 0xcbb722192b353999, + 0x90b07e2158f88cc0, + 0x24c9621701603741, + 0x1d4c1d97ca684334, + 0xd1a425d530652287, + 0x72623be342c20ab5, + 0x94c3dbdca59ddf57, +}; + +fn void unseeded_string_vectors() +{ + for (usz i = 0; i < INPUTS.len; ++i) + { + char[] input = INPUTS[i]; + ulong expected = EXPECTED_UNSEEDED[i]; + + ulong actual = komi::hash(input); + + test::@check(actual == expected, "Hash mismatch for unseeded '%s' (%x expected // %x actual).", (ZString)input, expected, actual); + } +} + +fn void unseeded_bulk_vectors() +{ + $assert BULK_INTERVALS.len == EXPECTED_BULK_UNSEEDED.len + : "BULK_INTERVALS length does not match the given BULK test set."; + + for (usz i = 0; i < BULK_INTERVALS.len; ++i) + { + char[] input = bulk[:BULK_INTERVALS[i]]; + ulong expected = EXPECTED_BULK_UNSEEDED[i]; + + ulong actual = komi::hash(input); + + test::@check(actual == expected, "Hash mismatch for unseeded bulk interval %d (idx %d) (%x expected // %x actual).", BULK_INTERVALS[i], i, expected, actual); + } +} + + +const ulong TEST_SEED = 0x0123456789abcdef; + +const ulong[] EXPECTED_SEEDED = { + 0x6ce66a2e8d4979a5, + 0x5b1da0b43545d196, + 0x26af914213d0c915, + 0x62d9ca1b73250cb5, + 0x90ab7c9f831cd940, +}; + +const ulong[] EXPECTED_BULK_SEEDED = { + 0x84ae4eb65b96617e, + 0xaceebc32a3c0d9e4, + 0xdaa1a90ecb95f6f8, + 0xec8eb3ef4af380b4, + 0x07045bd31abba34c, + 0xd5f619fb2e62c4ae, + 0x5a336fd2c4c39abe, + 0x0e870b4623eea8ec, + 0xe552edd6bf419d1d, + 0x37d170ddcb1223e6, + 0x1cd89e708e5098b6, + 0x765490569ccd77f2, + 0x19e9d77b86d01ee8, + 0x25f83ee520c1d241, + 0xd6007417091cd4c0, + 0x3e49c2d3727b9cc9, + 0xb2b3405ee5d65f4c, +}; + +fn void seeded_string_vectors() +{ + for (usz i = 0; i < INPUTS.len; ++i) + { + char[] input = INPUTS[i]; + ulong expected = EXPECTED_SEEDED[i]; + + ulong actual = komi::hash(input, TEST_SEED); + + test::@check(actual == expected, + "Hash mismatch for seed 0x123456789abcdef '%s' (%x expected // %x actual).", + (ZString)input, expected, actual); + } +} + +fn void seeded_offset() +{ + char[] x = "kepkoewkopkfpokfoewkfokweokefkfkkpoewkf"; + ulong actual = komi::hash(x[1..], TEST_SEED); +} + +fn void seeded_bulk_vectors() +{ + $assert BULK_INTERVALS.len == EXPECTED_BULK_SEEDED.len + : "BULK_INTERVALS length does not match the given BULK test set."; + + for (usz i = 0; i < BULK_INTERVALS.len; ++i) + { + char[] input = bulk[:BULK_INTERVALS[i]]; + ulong expected = EXPECTED_BULK_SEEDED[i]; + + ulong actual = komi::hash(input, TEST_SEED); + + test::@check(actual == expected, + "Hash mismatch for unseeded bulk interval %d (idx %d) (%x expected // %x actual).", + BULK_INTERVALS[i], i, expected, actual); + } +} + + +const ulong TEST_SEED_2 = 0x100; + +const ulong[] EXPECTED_SEEDED_2 = { + 0x5f197b30bcec1e45, + 0xa761280322bb7698, + 0x11c31ccabaa524f1, + 0x3a43b7f58281c229, + 0xcff90b0466b7e3a2, +}; + +const ulong[] EXPECTED_BULK_SEEDED_2 = { + 0x8ab53f45cc9315e3, + 0xea606e43d1976ccf, + 0x889b2f2ceecbec73, + 0xacbec1886cd23275, + 0x57c3affd1b71fcdb, + 0x7ef6ba49a3b068c3, + 0x49dbca62ed5a1ddf, + 0x192848484481e8c0, + 0x420b43a5edba1bd7, + 0xd6e8400a9de24ce3, + 0xbea291b225ff384d, + 0x0ec94062b2f06960, + 0xfa613272ecd49985, + 0x76f0bb380bc207be, + 0x4afb4e08ca77c020, + 0x410f9c129ad88aea, + 0x066c7b25f4f569ae, +}; + +fn void seeded_string_vectors_2() +{ + for (usz i = 0; i < INPUTS.len; ++i) + { + char[] input = INPUTS[i]; + ulong expected = EXPECTED_SEEDED_2[i]; + + ulong actual = komi::hash(input, TEST_SEED_2); + + test::@check(actual == expected, + "Hash mismatch for seed 0x100 '%s' (%x expected // %x actual).", + (ZString)input, expected, actual); + } +} + +fn void seeded_bulk_vectors_2() +{ + $assert BULK_INTERVALS.len == EXPECTED_BULK_SEEDED_2.len + : "BULK_INTERVALS length does not match the given BULK test set."; + + for (usz i = 0; i < BULK_INTERVALS.len; ++i) + { + char[] input = bulk[:BULK_INTERVALS[i]]; + ulong expected = EXPECTED_BULK_SEEDED_2[i]; + + ulong actual = komi::hash(input, TEST_SEED_2); + + test::@check(actual == expected, + "Hash mismatch for unseeded bulk interval %d (idx %d) (%x expected // %x actual).", + BULK_INTERVALS[i], i, expected, actual); + } +} + + +const ulong[] EXPECTED_SWEEP = { + 0x5cdcdaf25a774bdf, 0x1e5c3d2098586a17, 0x138664c4f409a6aa, 0x6936cf53c986deb1, + 0x3417ed7d7a081094, 0xd38acf8461008782, 0xf19e49a8a7c77869, 0xb4c60ae1c52e8a13, + 0xc6e7ceffa4af2605, 0xc219152657fcc7b9, 0x9562052479b8007c, 0xb4395e5aec193f02, + 0x80d9987c7e56ca6f, 0x03a29a5f5d9918d3, 0x2d4988241df24218, 0x8d569336b00c6578, + 0x1975540b1f4ed2bc, 0x7265c30b704afb6e, 0x173f6f524900ec6d, 0xcc86a82757407a99, + 0xacc0654d841e5e31, 0x67345fcf0f031a01, 0x5914b9ebdc010c00, 0x1c61fe5bda86efb4, + 0x6e133aa91b2d9218, 0x84357177a1c7df4e, 0x8cdd00e42ceeea38, 0x0fa84a74c35fd8f3, + 0x9f33f6baf88e1b8b, 0xe3d86438fdc4bbb5, 0x9be24abc570ab17d, 0x2d76ab384b25e64d, + 0xecdc96ef224dc58f, 0x62940f1aa710e4ee, 0xf45e451deff06f7b, 0x9250f7c75be78b2d, + 0x600298c67b78935e, 0x35180e348bdd7e95, 0x101cf10a0c10c8cf, 0xea20c0ab77226b52, + 0x1182f1b40f5e68b8, 0xf12820779d3a6eef, 0x34c21125302e30bd, 0x9a61527aa22e1000, + 0x24a9c494a2cc820e, 0x56f3fc85980c2630, 0xeb260f799a9dab96, 0xddee1e0ca36342f9, + 0x2249255270abe787, 0x588d83ead4d6085d, 0x8833170d2fdc30ce, 0x9ac09ffc9290461e, + 0xabb332ab60963f6f, 0x9a5ae4156a1d7b76, 0x2466ca4ff4acf4a4, 0xde76c11cc56419ef, + 0x619f641aa58485ea, 0x34d208b1da1e5684, 0xff4ab73c565450e2, 0xb09fdca3ec15641c, + 0x4eaba3d39397930a, 0x1c5cad34c08a56bd, 0x182989a7334faf06, 0xefcc05ca09cde68e, + 0xac04e3c6d5439ab5, 0xfeeee75b73596fe3, 0x1d35e0da61e6888a, 0x55c89cc84bbb5faf, + 0x39981a0668198e8f, 0x17851ab16d9312ee, 0x0caec994194bd050, 0x90f47d9d7bfd3861, + 0x3c45c729da026626, 0xf0ab0708bac2eefb, 0x6c6c15ce7b59daa9, 0xf47920556419e57b, + 0x7d997797acc04e3a, 0x329c7665007f9d3f, 0x113774f05438d762, 0x4be67982859ac5f4, +}; + +fn void sweep() +{ + char[] input = { [0..EXPECTED_SWEEP.len] = 'z' }; + + foreach (i, expected : EXPECTED_SWEEP) + { + ulong actual = komi::hash(input[:i], 0x0abab1234321); + + test::@check(actual == expected, + "Hash mismatch for seed 0x100 '%s' (%x expected // %x actual).", + (ZString)input, expected, actual); + } +} diff --git a/test/unit/stdlib/hash/metro.c3 b/test/unit/stdlib/hash/metro.c3 new file mode 100644 index 000000000..be75d0a09 --- /dev/null +++ b/test/unit/stdlib/hash/metro.c3 @@ -0,0 +1,140 @@ +// Copyright (c) 2025 Zack Puhl . All rights reserved. +// Use of this source code is governed by the MIT license +// a copy of which can be found in the LICENSE_STDLIB file. +module metrohash_tests @test; + +import std::hash::metro64; +import std::hash::metro128; + + +const char[] TEST_KEY = "012345678901234567890123456789012345678901234567890123456789012"; + + +fn void metro64_offset() +{ + metro64::hash(TEST_KEY[1..]); +} + +fn void metro128_offset() +{ + metro128::hash(TEST_KEY[1..]); +} + +fn void metro64_vectors() +{ + ulong expected_0 = 0xad4b7006ae3d756b; + ulong actual_0 = metro64::hash(TEST_KEY); + test::@check(actual_0 == expected_0, "Hash mismatch (%x expected // %x actual).", expected_0, actual_0); + + ulong expected_1 = 0xdfb8b9f41c480d3b; + ulong actual_1 = metro64::hash(TEST_KEY, 1); + test::@check(actual_1 == expected_1, "Hash mismatch (%x expected // %x actual).", expected_1, actual_1); +} + +fn void metro64_streamed() +{ + ulong expected = 0xdfb8b9f41c480d3b; + + MetroHash64 m; + m.init(1); + m.update(TEST_KEY[:13]); + m.update(TEST_KEY[13:11]); + m.update(TEST_KEY[24..]); + + ulong actual = m.final(); + + test::@check(actual == expected, "Hash mismatch (%x expected // %x actual).", expected, actual); +} + + +const ulong[66] VECTORS_SWEEP_64 = { + 0xe2f700c7be596c30, 0xd924f06e80703f5f, 0x0e407ae9f3b31eea, 0xb286855b22bb5a7c, + 0x413147f80d972772, 0xa6defbb4891b57ad, 0x0bf33d8a3a11377b, 0x5ef754dc5e155820, + 0x57817499be0ee747, 0x61410284964661e8, 0xdf14b67bf1cf84a1, 0xa34f9fc7d88adda1, + 0xeff25775757576a4, 0x5a3f096738c0f672, 0xcedb9bba97965231, 0xe2234b45b095d9f5, + 0x19cb856abaffafc5, 0x4c2385e5a329fe50, 0x0c1731f599c24394, 0x207d5d5069420af6, + 0xa00af52b3ee78ccf, 0x2649bb0315ed3705, 0x1e1e8cb19aebd947, 0x441c7ad9ede94456, + 0xde8fb76b48fe0795, 0xe28aff110a0485d3, 0x1c4be10ba94dea5e, 0xb345b8382fbcb14c, + 0xbd2083c97604113a, 0x53725cedc13b1f91, 0x6bde258654aabe35, 0x5571177c4f463a94, + 0x7893679fa856b4d8, 0xeb700288dd6ed4fb, 0x3f70383fca952a4a, 0x5b7a795ce3f141b0, + 0xa18b62d7c44d3718, 0x6e9e37eb8ef7bc49, 0x159b948172457d48, 0x113872acbfc4fc7f, + 0x114e2d0a2bbb1700, 0xfc3a6f8cae61d210, 0x627e43470bc34b5d, 0xfe08fbd0cb9abe73, + 0x89dd4e70b7c61b60, 0x6bf6d591e9c00425, 0x7bebba4795cbd4a3, 0x02dee5dde8549496, + 0x71e30b2b3c71393f, 0x2024d0a05633cc87, 0x6884bd684f1cb48b, 0xd8f23c050ee162c2, + 0xbb4425af0f4fd259, 0x7a63abf543efaf39, 0x6b6b919b7a44fefa, 0xedf8000398fe7486, + 0x7fa5131c2a164dad, 0x1831d78b576a433d, 0x8914114c29b11246, 0xd6f5b2b3c48239c7, + 0xc4d9392164f808fb, 0x98454695cda41767, 0x1463110024129443, 0x37e06b51f39b0db1, + 0xb231266aea3ac1d2, 0x9659fcfdeff62211 +}; + +fn void metro64_sweep() +{ + char[66] v = { [0..65] = 'a' }; + + for (usz x = 0; x < v.len; ++x) + { + ulong actual = metro64::hash(v[:x], 1337); + + test::@check(actual == VECTORS_SWEEP_64[x], + "Hash mismatch (%x expected // %x actual).", VECTORS_SWEEP_64[x], actual); + } +} + + +fn void metro128_vectors() +{ + uint128 expected_0 = 0x97a27450acb248059b9feda4bfe27cc7; + uint128 actual_0 = metro128::hash(TEST_KEY); + test::@check(actual_0 == expected_0, "Hash mismatch (%x expected // %x actual).", expected_0, actual_0); + + uint128 expected_1 = 0xefec147a868dd6bd7f9d1938b8cda345; + uint128 actual_1 = metro128::hash(TEST_KEY, 1); + test::@check(actual_1 == expected_1, "Hash mismatch (%x expected // %x actual).", expected_1, actual_1); +} + +fn void metro128_streamed() +{ + uint128 expected = 0xefec147a868dd6bd7f9d1938b8cda345; + + MetroHash128 m; + m.init(1); + foreach (c : TEST_KEY) m.update({c}); // byte by byte + + uint128 actual = m.final(); + + test::@check(actual == expected, "Hash mismatch (%x expected // %x actual).", expected, actual); +} + + +const uint128[66] VECTORS_SWEEP_128 = { + 0xed66a903a5af8770c4bfd518077b1d4d, 0x9c04be2535e73e406be42706b98cddd4, 0xc082896b0e4704071863c4d6b79c5fd2, 0x93e5f0fabb995f1c567d1d00031ebdb2, + 0xf33e194121b0946dadef05404de5cc63, 0xd38db248561bf524962ed9a48a841a45, 0x7e1695c8838701a49091add6ca0b6da2, 0xe9d9b67eae87f20a0d1c7e19b6c7bc8e, + 0x8b7d6e334c2130f1f8104302054a6adf, 0x363e19909e59b57d6ea1a44071334801, 0xfc07f6db22caf91dfc07eb162e94e5b4, 0x17258d6fe6821c82b721567ad5cc845c, + 0x5759d0fbfdad344f7bc4b2eec33494f4, 0x8e599e8eea792d89cebdd9c11f888f59, 0xce942bf2e18597e63bf12a03ba95e122, 0xdb0d0635c453b26ba07664c37bc7f241, + 0xa9951a456d5c08c4c1564a4e111f88cc, 0x2ddea9673d7ab8e428607e268cc4af58, 0x623ce3f6fdd7f9c070dd915d89564be7, 0xa37787b74daafdebfafdf122b1f04b92, + 0x79c36fdf895491a5d8e2b9d7b27e830e, 0x1ca5545989d706abc51eb30db70733ff, 0xe7f2557aee5921dcad639a73840f1b82, 0x5b66b8cfe8c8381d34c2cb2682f8b3a9, + 0x555d28dc1f2cedbebea4de1c24664b07, 0xcd95e57621d4b3eba9a8a240a751f2f6, 0xab25e96dc41b344295e8d5a734d236bb, 0x0e835e0ac14d8bc0c6707af9cb04780b, + 0x28c74c57374a23e54a97831fea86e71f, 0x09c02b2cb852802664531074b43b24e5, 0xb23658cd2892c1b33179800ca748c093, 0x4f0999fd7417928c77f5169eb6605115, + 0xbc85b4db9fd3096abdcfc238c815e406, 0xf68f40c0ebcf9858a34f846d6442b2c4, 0xd2a4eaa7659c2ca1603d1fae214c5f6a, 0xe382f4280e70fe32c991eacd9a417644, + 0xae43523189c866e6b759f3da9abebb0e, 0x94a3f58c1f5a71bc9d6488c74154e8d7, 0x7e869c466cf2408a0a4c2758ac1c2a1a, 0x645e5babb2ddd637d1d616db16468c8f, + 0x668d5187f86a97172fa7dff866ab4307, 0x43761b3e2011d2b1defce3b2abd3220c, 0xcbe5c5febe9e9522f92eab2faff5a4d2, 0x57effbd664e86987a7e41d0139b0c1d4, + 0x90fc91743fc288d563c6059b099debf2, 0xe7fe3b7f9e2804dc4ca39486d1ff95f8, 0xd419e052bf7a8037581a7176d5e5c40f, 0xbdcf3e2e8e9bec8b5174ee35f5c77a90, + 0xa73b9edb918e873728cb61dbff14ae18, 0x6630e865ec83027c5e930f4ca1ebe300, 0xd44cc36826feb880572a83a046c159c8, 0x1e477dc003e907a1d424a4f84654ddd2, + 0xb498e2859fa073c28a988aa0a461f9ae, 0x05666028c9d1a1a7878cbde8a82e84c4, 0xc1dac1ea4f24c32e83522d0f921560f0, 0xcbb2a8a58dc91c1230aec1f3a5c398cb, + 0x7e76d0952c34286f5ccc2a9a30f65bb3, 0x0091c352079662facb5cd03255a6ecc7, 0xcc9d1fa3518a937b594da868ac1ea634, 0xfec1ae0bb45d5fd9bc0ed7c418c2c633, + 0x9e9cbd767281cdd3779b2e8506774cd4, 0x42be3cc544dc7ed64da7d695313d7802, 0x7f57bad2d44c1f47722c3029ba9f53f5, 0xbd574d95b4635562acc1d8c5633589dc, + 0x1761b98ffa140cfdc8e6ac36327b6080, 0xfddd7de5827fc61fc01b594181f887c1 +}; + +fn void metro128_sweep() +{ + char[66] v = { [0..65] = 'a' }; + + for (usz x = 0; x < v.len; ++x) + { + uint128 actual = metro128::hash(v[:x], 1337); + + test::@check(actual == VECTORS_SWEEP_128[x], + "Hash mismatch (%x expected // %x actual).", VECTORS_SWEEP_128[x], actual); + } +} diff --git a/test/unit/stdlib/hash/wyhash2.c3 b/test/unit/stdlib/hash/wyhash2.c3 new file mode 100644 index 000000000..49477886a --- /dev/null +++ b/test/unit/stdlib/hash/wyhash2.c3 @@ -0,0 +1,89 @@ +// Copyright (c) 2025 Zack Puhl . All rights reserved. +// Use of this source code is governed by the MIT license +// a copy of which can be found in the LICENSE_STDLIB file. +module wyhash2_tests @test; + +import std::hash::wyhash2; + + +fn void simple_vector() + => test::@check(0xb4808df22d44ffcf == wyhash2::hash("abc")); + +fn void offset_check() +{ + char[*] x = "0123293829"; + wyhash2::hash(x[1..]); +} + +fn void simple_vector_seeded() + => test::@check(0x9c962ca4764da6f4 == wyhash2::hash("aax", 2)); + +fn void simple_vector_seeded_2() + => test::@check(0x49090566becc19bf == wyhash2::hash("aax", 0x9c962ca4764da6f4)); + +fn void longer_vector_seeded() + => test::@check(0x8b18145f8353c46d == wyhash2::hash("hi my name is:", 2)); + +fn void longer_vector_seeded_2() + => test::@check(0x2b8f7c0e2e562e63 == wyhash2::hash("hi my name is:", 0x8b18145f8353c46d)); + + +<* +These constant vectors are easily confirmed with a quick Rust executable: + +However, as of writing, the 0-length return value is different between the below method +(streaming the input) and using `wyhash2::wyhash_single`. So this relies on the function +retval instead of the streamer's, because all other function values match the streamer. + +```rust +use core::hash::Hasher; +use wyhash2::WyHash; + +fn main() { + let arr: [u8; 16] = [b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p']; + let secret = 0; + for i in 0..=16 { + let mut hasher = WyHash::with_seed(secret); + hasher.write(&arr[..i]); + println!("Index {}: 0x{:x}", i, hasher.finish()); + } +} +``` +*> +const ulong[17] VECTORS = { + 0x42bc986dc5eec4d3, + 0x6cf84e5a2465e867, + 0x172ba773b8ebb6d8, + 0xb4808df22d44ffcf, + 0x8cd6fedc542c39e1, + 0x89f29dfa6e5ab1e5, + 0x2d62e7827072fb65, + 0xce8a19cc22fbe893, + 0x3c36fed2521530c0, + 0x1958d0433e7579fa, + 0x787f681f01831617, + 0x7107735a3edb98ee, + 0xf4c24a45a41ea322, + 0x03779e9d9ed9ff12, + 0xd24ac6ffc05e0cb8, + 0x0b4153cef1f30b07, + 0x4ff3b52ca1e858d2, +}; + +fn void sweep() +{ + char[20] c; + + ulong actual = wyhash2::hash({}); + test::@check(actual == VECTORS[0], "Empty hash failed (%x expected // %x actual).", VECTORS[0], actual); + + for (usz x = 1; x <= 16; ++x) + { + c[x - 1] = 'a' + (char)(x - 1); + + actual = wyhash2::hash(c[:x]); + + test::@check(actual == VECTORS[x], + "Failed on '%s', length %d (%x expected // %x actual).", (ZString)&c, x, VECTORS[x], actual); + } +}