Add wyhash2 and metro64/128 modern hashing (#2293)

* add wyhash2, metro64, and metro128 hashes; best performing non-crypto hash functions
* add superfast 64-bit a5hash; not streamed, no 128-bit impl
* add komihash and associated tests/benchmarks
---------

Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
This commit is contained in:
Zack Puhl
2025-07-19 18:06:10 -04:00
committed by GitHub
parent 1218afd51f
commit ed92476916
11 changed files with 1263 additions and 0 deletions

View File

@@ -0,0 +1,94 @@
// Copyright (c) 2025 Zack Puhl <github@xmit.xyz>. All rights reserved.
// Use of this source code is governed by the MIT license
// a copy of which can be found in the LICENSE_STDLIB file.
module non_crypto_benchmarks;
const usz COMMON_ITERATIONS = 1 << 18;
const char[] COMMON_1 = { 0xA5 };
const char[] COMMON_4 = { 0xA5, 0xA5, 0xA5, 0xA5, };
const char[] COMMON_8 = { [0..7] = 0xA5 };
const char[] COMMON_16 = { [0..15] = 0xA5 };
const char[] COMMON_32 = { [0..31] = 0xA5 };
const char[] COMMON_64 = { [0..63] = 0xA5 };
const char[] COMMON_128 = { [0..127] = 0xA5 };
const char[] COMMON_1024 = { [0..1023] = 0xA5 };
fn void initialize_bench() @init
{
set_benchmark_warmup_iterations(3);
set_benchmark_max_iterations(COMMON_ITERATIONS + 3);
}
// =======================================================================================
module non_crypto_benchmarks @benchmark;
import std::hash;
fn void fnv64a_1() => fnv64a::hash(COMMON_1);
fn void fnv32a_1() => fnv32a::hash(COMMON_1);
fn void wyhash2_1() => wyhash2::hash(COMMON_1);
fn void metro64_1() => metro64::hash(COMMON_1);
fn void metro128_1() => metro128::hash(COMMON_1);
fn void a5hash_1() => a5hash::hash(COMMON_1);
fn void komi_1() => komi::hash(COMMON_1);
fn void fnv64a_4() => fnv64a::hash(COMMON_4);
fn void fnv32a_4() => fnv32a::hash(COMMON_4);
fn void wyhash2_4() => wyhash2::hash(COMMON_4);
fn void metro64_4() => metro64::hash(COMMON_4);
fn void metro128_4() => metro128::hash(COMMON_4);
fn void a5hash_4() => a5hash::hash(COMMON_4);
fn void komi_4() => komi::hash(COMMON_4);
fn void fnv64a_8() => fnv64a::hash(COMMON_8);
fn void fnv32a_8() => fnv32a::hash(COMMON_8);
fn void wyhash2_8() => wyhash2::hash(COMMON_8);
fn void metro64_8() => metro64::hash(COMMON_8);
fn void metro128_8() => metro128::hash(COMMON_8);
fn void a5hash_8() => a5hash::hash(COMMON_8);
fn void komi_8() => komi::hash(COMMON_8);
fn void fnv64a_16() => fnv64a::hash(COMMON_16);
fn void fnv32a_16() => fnv32a::hash(COMMON_16);
fn void wyhash2_16() => wyhash2::hash(COMMON_16);
fn void metro64_16() => metro64::hash(COMMON_16);
fn void metro128_16() => metro128::hash(COMMON_16);
fn void a5hash_16() => a5hash::hash(COMMON_16);
fn void komi_16() => komi::hash(COMMON_16);
fn void fnv64a_32() => fnv64a::hash(COMMON_32);
fn void fnv32a_32() => fnv32a::hash(COMMON_32);
// NOTE: wyhash2 cannot be used on inputs > 16 bytes.
fn void metro64_32() => metro64::hash(COMMON_32);
fn void metro128_32() => metro128::hash(COMMON_32);
fn void a5hash_32() => a5hash::hash(COMMON_32);
fn void komi_32() => komi::hash(COMMON_32);
fn void fnv64a_64() => fnv64a::hash(COMMON_64);
fn void fnv32a_64() => fnv32a::hash(COMMON_64);
// NOTE: wyhash2 cannot be used on inputs > 16 bytes.
fn void metro64_64() => metro64::hash(COMMON_64);
fn void metro128_64() => metro128::hash(COMMON_64);
fn void a5hash_64() => a5hash::hash(COMMON_64);
fn void komi_64() => komi::hash(COMMON_64);
fn void fnv64a_128() => fnv64a::hash(COMMON_128);
fn void fnv32a_128() => fnv32a::hash(COMMON_128);
// NOTE: wyhash2 cannot be used on inputs > 16 bytes.
fn void metro64_128() => metro64::hash(COMMON_128);
fn void metro128_128() => metro128::hash(COMMON_128);
fn void a5hash_128() => a5hash::hash(COMMON_128);
fn void komi_128() => komi::hash(COMMON_128);
fn void fnv64a_1024() => fnv64a::hash(COMMON_1024);
fn void fnv32a_1024() => fnv32a::hash(COMMON_1024);
// NOTE: wyhash2 cannot be used on inputs > 16 bytes.
fn void metro64_1024() => metro64::hash(COMMON_1024);
fn void metro128_1024() => metro128::hash(COMMON_1024);
fn void a5hash_1024() => a5hash::hash(COMMON_1024);
fn void komi_1024() => komi::hash(COMMON_1024);

96
lib/std/hash/a5hash.c3 Normal file
View File

@@ -0,0 +1,96 @@
// Copyright (c) 2025 Zack Puhl <github@xmit.xyz>. All rights reserved.
// Use of this source code is governed by the MIT license
// a copy of which can be found in the LICENSE_STDLIB file.
//
// An implementation of Aleksey Vaneev's a5hash, version 5.16, in C3:
// https://github.com/avaneev/komihash
//
// The license for komihash from the above repository at the time of writing is as follows:
//
// >> MIT License
// >>
// >> Copyright (c) 2025 Aleksey Vaneev
// >>
// >> Permission is hereby granted, free of charge, to any person obtaining a copy
// >> of this software and associated documentation files (the "Software"), to deal
// >> in the Software without restriction, including without limitation the rights
// >> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// >> copies of the Software, and to permit persons to whom the Software is
// >> furnished to do so, subject to the following conditions:
// >>
// >> The above copyright notice and this permission notice shall be included in all
// >> copies or substantial portions of the Software.
// >>
// >> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// >> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// >> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// >> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// >> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// >> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// >> SOFTWARE.
//
//
module std::hash::a5hash;
macro @a5mul(#u, #v, #lo, #hi) @local
{
uint128 imd = (uint128)#u * (uint128)#v;
#lo = (ulong)imd;
#hi = (ulong)(imd >> 64);
}
fn ulong hash(char[] data, ulong seed = 0)
{
ulong seed1 = 0x243F_6A88_85A3_08D3 ^ data.len;
ulong seed2 = 0x4528_21E6_38D0_1377 ^ data.len;
ulong val10 = 0xAAAA_AAAA_AAAA_AAAA;
ulong val01 = 0x5555_5555_5555_5555;
ulong a, b;
@a5mul(seed2 ^ (seed & val10), seed1 ^ (seed & val01), seed1, seed2);
val10 ^= seed2;
if (@likely(data.len > 3))
{
if (data.len > 16)
{
val01 ^= seed1;
for (; data.len > 16; data = data[16..])
{
@a5mul(
@unaligned_load(((ulong*)data.ptr)[0], 1) ^ seed1,
@unaligned_load(((ulong*)data.ptr)[1], 1) ^ seed2,
seed1, seed2
);
seed1 += val01;
seed2 += val10;
}
a = @unaligned_load(*(ulong*)(data.ptr + (uptr)data.len - 16), 1);
b = @unaligned_load(*(ulong*)(data.ptr + (uptr)data.len - 8), 1);
}
else
{
a = ((ulong)@unaligned_load(*(uint*)&data[0], 1) << 32)
| @unaligned_load(*(uint*)&data[^4], 1);
b = ((ulong)@unaligned_load(*(uint*)&data[(data.len >> 3) * 4], 1) << 32)
| @unaligned_load(*(uint*)(data.ptr + data.len - 4 - (data.len >> 3) * 4), 1);
}
}
else
{
a = data.len ? (data[0] | (data.len > 1 ? ((ulong)data[1] << 8) : 0) | (data.len > 2 ? ((ulong)data[2] << 16) : 0)) : 0;
b = 0;
}
@a5mul(a ^ seed1, b ^ seed2, seed1, seed2);
@a5mul(val01 ^ seed1, seed2, a, b);
return a ^ b;
}

156
lib/std/hash/komi.c3 Normal file
View File

@@ -0,0 +1,156 @@
// Copyright (c) 2025 Zack Puhl <github@xmit.xyz>. All rights reserved.
// Use of this source code is governed by the MIT license
// a copy of which can be found in the LICENSE_STDLIB file.
//
// An implementation of Aleksey Vaneev's komihash, version 5.27, in C3:
// https://github.com/avaneev/komihash
//
// The license for komihash from the above repository at the time of writing is as follows:
//
// >> MIT License
// >>
// >> Copyright (c) 2021-2025 Aleksey Vaneev
// >>
// >> Permission is hereby granted, free of charge, to any person obtaining a copy
// >> of this software and associated documentation files (the "Software"), to deal
// >> in the Software without restriction, including without limitation the rights
// >> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// >> copies of the Software, and to permit persons to whom the Software is
// >> furnished to do so, subject to the following conditions:
// >>
// >> The above copyright notice and this permission notice shall be included in all
// >> copies or substantial portions of the Software.
// >>
// >> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// >> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// >> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// >> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// >> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// >> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// >> SOFTWARE.
//
//
module std::hash::komi;
macro @komimul(#u, #v, #lo, #hi) @local
{
uint128 imd = (uint128)#u * (uint128)#v;
#lo = (ulong)imd;
#hi += (ulong)(imd >> 64);
}
fn ulong hash(char[] data, ulong seed = 0)
{
ulong seed1 = 0x243F_6A88_85A3_08D3 ^ (seed & 0x5555_5555_5555_5555);
ulong seed5 = 0x4528_21E6_38D0_1377 ^ (seed & 0xAAAA_AAAA_AAAA_AAAA);
ulong r1h, r2h;
// HASHROUND
@komimul(seed1, seed5, seed1, seed5);
seed1 ^= seed5;
if (@likely(data.len < 16))
{
r1h = seed1;
r2h = seed5;
if (@likely(data.len >= 8))
{
r1h ^= @unaligned_load(*(ulong*)data.ptr, 1);
r2h ^= (data.len < 12)
? ((data[data.len - 3] | ((ulong)data[data.len - 2] << 8) | ((ulong)data[data.len - 1] << 16) | ((ulong)1 << 24)) >> ((data.len * 8) ^ 88))
: (((@unaligned_load(*(uint*)&data[^4], 1) | ((ulong)1 << 32)) >> (128 - data.len * 8)) << 32 | @unaligned_load(*(uint*)&data[8], 1));
}
else if (data.len != 0)
{
r1h ^= (data.len < 4)
? (((ulong)1 << (data.len * 8)) ^ data[0] ^ (data.len > 1 ? (ulong)data[1] << 8 : 0) ^ (data.len > 2 ? (ulong)data[2] << 16 : 0))
: (((@unaligned_load(*(uint*)&data[^4], 1) | ((ulong)1 << 32)) >> (64 - data.len * 8)) << 32 | @unaligned_load(*(uint*)&data[0], 1));
}
}
else if (data.len < 32)
{
// HASH16
@komimul(
@unaligned_load(*(ulong*)&data[0], 1) ^ seed1,
@unaligned_load(*(ulong*)&data[8], 1) ^ seed5,
seed1, seed5
);
seed1 ^= seed5;
if (data.len < 24)
{
r1h = (((@unaligned_load(*(ulong*)&data[^8], 1) >> 8) | ((ulong)1 << 56)) >> (((int)(data.len * 8) ^ 184))) ^ seed1;
r2h = seed5;
}
else
{
r1h = @unaligned_load(*(ulong*)&data[16], 1) ^ seed1;
r2h = (((@unaligned_load(*(ulong*)&data[^8], 1) >> 8) | ((ulong)1 << 56)) >> (((int)(data.len * 8) ^ 248))) ^ seed5;
}
}
else
{
if (data.len >= 64)
{
ulong[8] seeds = {
seed1, 0x1319_8A2E_0370_7344 ^ seed1, 0xA409_3822_299F_31D0 ^ seed1, 0x082E_FA98_EC4E_6C89 ^ seed1,
seed5, 0xBE54_66CF_34E9_0C6C ^ seed5, 0xC0AC_29B7_C97C_50DD ^ seed5, 0x3F84_D5B5_B547_0917 ^ seed5,
};
// HASHLOOP64
for (; data.len >= 64; data = data[64:^64])
{
$for var $x = 0; $x < 4; ++$x :
@komimul(
@unaligned_load(*(ulong*)&data[0 + ($x * 8)], 1) ^ seeds[$x],
@unaligned_load(*(ulong*)&data[32 + ($x * 8)], 1) ^ seeds[4 + $x],
seeds[$x], seeds[4 + $x]
);
$endfor
seeds[3] ^= seeds[6];
seeds[0] ^= seeds[7];
seeds[2] ^= seeds[5];
seeds[1] ^= seeds[4];
}
seed1 = seeds[0] ^ seeds[1] ^ seeds[2] ^ seeds[3];
seed5 = seeds[4] ^ seeds[5] ^ seeds[6] ^ seeds[7];
}
for (; data.len >= 16; data = data[16:^16])
{
@komimul(
@unaligned_load(*(ulong*)&data[0], 1) ^ seed1,
@unaligned_load(*(ulong*)&data[8], 1) ^ seed5,
seed1, seed5
);
seed1 ^= seed5;
}
if (data.len < 8)
{
// NOTE: This is translated from the original code. It grabs the last ulong off the buffer even though the
// data slice is less than 8 bytes. This is possible because this branch only occurs in a loop where
// the original data slice length is >= 32.
r1h = (((@unaligned_load(*(ulong*)(data.ptr + data.len - 8), 1) >> 8) | ((ulong)1 << 56)) >> ((data.len * 8) ^ 0x38)) ^ seed1;
r2h = seed5;
}
else
{
r1h = @unaligned_load(*(ulong*)data.ptr, 1) ^ seed1;
r2h = (((@unaligned_load(*(ulong*)&data[^8], 1) >> 8) | ((ulong)1 << 56)) >> ((data.len * 8) ^ 0x78)) ^ seed5;
}
}
// HASHFIN
@komimul(r1h, r2h, seed1, seed5);
seed1 ^= seed5;
@komimul(seed1, seed5, seed1, seed5);
seed1 ^= seed5;
return seed1;
}

149
lib/std/hash/metro128.c3 Normal file
View File

@@ -0,0 +1,149 @@
// Copyright (c) 2025 Zack Puhl <github@xmit.xyz>. All rights reserved.
// Use of this source code is governed by the MIT license
// a copy of which can be found in the LICENSE_STDLIB file.
//
// MetroHash64 and MetroHash128 are different enough to warrant their own
// modules, and there would be no reason to create a generic module just
// for the two. If you inspect the differences, the only shared portion
// of the entire process is the `update` method.
//
module std::hash::metro128;
const ulong[4] K @local = {
0xc83a91e1,
0x8648dbdb,
0x7bdec03b,
0x2f5870a5,
};
struct MetroHash128
{
union
{
ulong[4] state;
uint128 result;
}
union
{
ulong[4] stomach_64;
char[32] stomach;
}
ulong bytes;
}
fn uint128 hash(char[] data, ulong seed = 0)
{
MetroHash128 m;
m.init(seed);
m.update(data);
return m.final();
}
fn void MetroHash128.init(&self, ulong seed = 0)
{
self.state = {
(seed - K[0]) * K[3],
(seed + K[1]) * K[2],
(seed + K[0]) * K[2],
(seed - K[1]) * K[3],
};
}
fn void MetroHash128.update(&self, char[] data)
{
if (self.bytes % 32) // partial buffer
{
ulong to_fill = min(data.len, (32 - (self.bytes % 32)));
self.stomach[(self.bytes % 32):to_fill] = data[:to_fill];
data = data[to_fill..];
self.bytes += to_fill;
if (self.bytes % 32) return; // still awaiting more input, or final
self.state[0] += self.stomach_64[0] * K[0]; self.state[0] = self.state[0].rotr(29) + self.state[2];
self.state[1] += self.stomach_64[1] * K[1]; self.state[1] = self.state[1].rotr(29) + self.state[3];
self.state[2] += self.stomach_64[2] * K[2]; self.state[2] = self.state[2].rotr(29) + self.state[0];
self.state[3] += self.stomach_64[3] * K[3]; self.state[3] = self.state[3].rotr(29) + self.state[1];
}
self.bytes += data.len;
for (; data.len >= 32; data = data[32:^32])
{
self.state[0] += @unaligned_load(((ulong*)data.ptr)[0], 1) * K[0]; self.state[0] = self.state[0].rotr(29) + self.state[2];
self.state[1] += @unaligned_load(((ulong*)data.ptr)[1], 1) * K[1]; self.state[1] = self.state[1].rotr(29) + self.state[3];
self.state[2] += @unaligned_load(((ulong*)data.ptr)[2], 1) * K[2]; self.state[2] = self.state[2].rotr(29) + self.state[0];
self.state[3] += @unaligned_load(((ulong*)data.ptr)[3], 1) * K[3]; self.state[3] = self.state[3].rotr(29) + self.state[1];
}
// Gobble up the leftover bytes. Nom nom.
if (data.len > 0) self.stomach[:data.len] = data[..];
}
fn uint128 MetroHash128.final(&self)
{
if (self.bytes >= 32)
{
self.state[2] ^= (((self.state[0] + self.state[3]) * K[0]) + self.state[1]).rotr(21) * K[1];
self.state[3] ^= (((self.state[1] + self.state[2]) * K[1]) + self.state[0]).rotr(21) * K[0];
self.state[0] ^= (((self.state[0] + self.state[2]) * K[0]) + self.state[3]).rotr(21) * K[1];
self.state[1] ^= (((self.state[1] + self.state[3]) * K[1]) + self.state[2]).rotr(21) * K[0];
}
char[] final_data = self.stomach[:(self.bytes % 32)];
if (final_data.len >= 16)
{
self.state[0] += ((ulong*)final_data.ptr)[0] * K[2]; self.state[0] = self.state[0].rotr(33) * K[3];
self.state[1] += ((ulong*)final_data.ptr)[1] * K[2]; self.state[1] = self.state[1].rotr(33) * K[3];
self.state[0] ^= ((self.state[0] * K[2]) + self.state[1]).rotr(45) * K[1];
self.state[1] ^= ((self.state[1] * K[3]) + self.state[0]).rotr(45) * K[0];
final_data = final_data[16:^16];
}
if (final_data.len >= 8)
{
self.state[0] += @unaligned_load(((ulong*)final_data.ptr)[0], 1) * K[2]; self.state[0] = self.state[0].rotr(33) * K[3];
self.state[0] ^= ((self.state[0] * K[2]) + self.state[1]).rotr(27) * K[1];
final_data = final_data[8:^8];
}
if (final_data.len >= 4)
{
self.state[1] += @unaligned_load(((uint*)final_data.ptr)[0], 1) * K[2]; self.state[1] = self.state[1].rotr(33) * K[3];
self.state[1] ^= ((self.state[1] * K[3]) + self.state[0]).rotr(46) * K[0];
final_data = final_data[4:^4];
}
if (final_data.len >= 2)
{
self.state[0] += @unaligned_load(((ushort*)final_data.ptr)[0], 1) * K[2]; self.state[0] = self.state[0].rotr(33) * K[3];
self.state[0] ^= ((self.state[0] * K[2]) + self.state[1]).rotr(22) * K[1];
final_data = final_data[2:^2];
}
if (final_data.len >= 1)
{
self.state[1] += ((char*)final_data.ptr)[0] * K[2]; self.state[1] = self.state[1].rotr(33) * K[3];
self.state[1] ^= ((self.state[1] * K[3]) + self.state[0]).rotr(58) * K[0];
}
self.state[0] += ((self.state[0] * K[0]) + self.state[1]).rotr(13);
self.state[1] += ((self.state[1] * K[1]) + self.state[0]).rotr(37);
self.state[0] += ((self.state[0] * K[2]) + self.state[1]).rotr(13);
self.state[1] += ((self.state[1] * K[3]) + self.state[0]).rotr(37);
return self.result;
}

152
lib/std/hash/metro64.c3 Normal file
View File

@@ -0,0 +1,152 @@
// Copyright (c) 2025 Zack Puhl <github@xmit.xyz>. All rights reserved.
// Use of this source code is governed by the MIT license
// a copy of which can be found in the LICENSE_STDLIB file.
//
// MetroHash64 and MetroHash128 are different enough to warrant their own
// modules, and there would be no reason to create a generic module just
// for the two. If you inspect the differences, the only shared portion
// of the entire process is the `update` method.
//
module std::hash::metro64;
const ulong[4] K @local = {
0xd6d018f5,
0xa2aa033b,
0x62992fc1,
0x30bc5b29,
};
struct MetroHash64
{
union
{
ulong[4] state;
ulong result;
}
union
{
ulong[4] stomach_64;
char[32] stomach;
}
ulong bytes;
ulong vseed;
}
fn ulong hash(char[] data, ulong seed = 0)
{
MetroHash64 m;
m.init(seed);
m.update(data);
return m.final();
}
fn void MetroHash64.init(&self, ulong seed = 0)
{
self.vseed = (seed + K[2]) * K[0];
self.state[0] = self.vseed;
self.state[1] = self.vseed;
self.state[2] = self.vseed;
self.state[3] = self.vseed;
}
fn void MetroHash64.update(&self, char[] data)
{
if (self.bytes % 32) // partial buffer
{
ulong to_fill = min(data.len, (32 - (self.bytes % 32)));
self.stomach[(self.bytes % 32):to_fill] = data[:to_fill];
data = data[to_fill..];
self.bytes += to_fill;
if (self.bytes % 32) return; // still awaiting more input, or final
self.state[0] += self.stomach_64[0] * K[0]; self.state[0] = self.state[0].rotr(29) + self.state[2];
self.state[1] += self.stomach_64[1] * K[1]; self.state[1] = self.state[1].rotr(29) + self.state[3];
self.state[2] += self.stomach_64[2] * K[2]; self.state[2] = self.state[2].rotr(29) + self.state[0];
self.state[3] += self.stomach_64[3] * K[3]; self.state[3] = self.state[3].rotr(29) + self.state[1];
}
self.bytes += data.len;
for (; data.len >= 32; data = data[32:^32])
{
self.state[0] += @unaligned_load(((ulong*)data.ptr)[0], 1) * K[0]; self.state[0] = self.state[0].rotr(29) + self.state[2];
self.state[1] += @unaligned_load(((ulong*)data.ptr)[1], 1) * K[1]; self.state[1] = self.state[1].rotr(29) + self.state[3];
self.state[2] += @unaligned_load(((ulong*)data.ptr)[2], 1) * K[2]; self.state[2] = self.state[2].rotr(29) + self.state[0];
self.state[3] += @unaligned_load(((ulong*)data.ptr)[3], 1) * K[3]; self.state[3] = self.state[3].rotr(29) + self.state[1];
}
// Gobble up the leftover bytes. Nom nom.
if (data.len > 0) self.stomach[:data.len] = data[..];
}
fn ulong MetroHash64.final(&self)
{
if (self.bytes >= 32)
{
self.state[2] ^= (((self.state[0] + self.state[3]) * K[0]) + self.state[1]).rotr(37) * K[1];
self.state[3] ^= (((self.state[1] + self.state[2]) * K[1]) + self.state[0]).rotr(37) * K[0];
self.state[0] ^= (((self.state[0] + self.state[2]) * K[0]) + self.state[3]).rotr(37) * K[1];
self.state[1] ^= (((self.state[1] + self.state[3]) * K[1]) + self.state[2]).rotr(37) * K[0];
self.state[0] = self.vseed + (self.state[0] ^ self.state[1]);
}
char[] final_data = self.stomach[:(self.bytes % 32)];
if (final_data.len >= 16)
{
self.state[1] = self.state[0] + @unaligned_load(((ulong*)final_data.ptr)[0], 1) * K[2]; self.state[1] = self.state[1].rotr(29) * K[3];
self.state[2] = self.state[0] + @unaligned_load(((ulong*)final_data.ptr)[1], 1) * K[2]; self.state[2] = self.state[2].rotr(29) * K[3];
self.state[1] ^= (self.state[1] * K[0]).rotr(21) + self.state[2];
self.state[2] ^= (self.state[2] * K[3]).rotr(21) + self.state[1];
self.state[0] += self.state[2];
final_data = final_data[16:^16];
}
if (final_data.len >= 8)
{
self.state[0] += @unaligned_load(((ulong*)final_data.ptr)[0], 1) * K[3];
self.state[0] ^= self.state[0].rotr(55) * K[1];
final_data = final_data[8:^8];
}
if (final_data.len >= 4)
{
self.state[0] += @unaligned_load(((uint*)final_data.ptr)[0], 1) * K[3];
self.state[0] ^= self.state[0].rotr(26) * K[1];
final_data = final_data[4:^4];
}
if (final_data.len >= 2)
{
self.state[0] += @unaligned_load(((ushort*)final_data.ptr)[0], 1) * K[3];
self.state[0] ^= self.state[0].rotr(48) * K[1];
final_data = final_data[2:^2];
}
if (final_data.len >= 1)
{
self.state[0] += ((char*)final_data.ptr)[0] * K[3];
self.state[0] ^= self.state[0].rotr(37) * K[1];
}
self.state[0] ^= self.state[0].rotr(28);
self.state[0] *= K[0];
self.state[0] ^= self.state[0].rotr(29);
return self.result;
}

56
lib/std/hash/wyhash2.c3 Normal file
View File

@@ -0,0 +1,56 @@
// Copyright (c) 2025 Zack Puhl <github@xmit.xyz>. All rights reserved.
// Use of this source code is governed by the MIT license
// a copy of which can be found in the LICENSE_STDLIB file.
//
// An implementation of Wang Yi's wyhash(2) algorithm in C3:
// https://github.com/wangyi-fudan/wyhash
//
module std::hash::wyhash2;
fn ulong wyr3(char* in, usz len) @inline
=> ((ulong)in[0] << 16) | ((ulong)in[len >> 1] << 8) | (ulong)in[len - 1];
// See: https://docs.google.com/spreadsheets/d/1HmqDj-suH4wBFNg7etwE8WVBlfCufvD5-gAnIENs94k/edit?gid=1915335726#gid=1915335726
// Credit to article:
// https://medium.com/@tprodanov/benchmarking-non-cryptographic-hash-functions-in-rust-2e6091077d11
//
// wyhash2 has a >90% chance of collisions when its input data is above 16 bytes in length.
// However, it is the fastest performing and most evenly randomized hash for very low-length inputs,
// making it an ideal candidate for hashing primitive data types quickly and making things like hash
// tables even faster. Therefore, a 16-byte input limit is imposed on all calls to the hash function.
//
<*
@require input.len <= 16 : `wyhash2 is not useable for inputs over 16 bytes in length.`
*>
fn ulong hash(char[] input, ulong seed = 0)
{
seed ^= 0xa076_1d64_78bd_642f;
ulong a, b;
if (@likely(input.len <= 8)) // more likely to encounter 8-byte or lower type here
{
if (@likely(input.len >= 4))
{
a = (ulong)@unaligned_load(*(uint*)input.ptr, 1); // first 4 bytes widened to a u64
b = (ulong)@unaligned_load(*(uint*)&input[^4], 1); // a walking 4-byte window based on input.len
}
else if (input.len > 0)
{
a = wyr3(input, input.len);
}
}
else
{
a = @unaligned_load(*(ulong*)input.ptr, 1); // first 8 bytes
b = @unaligned_load(*(ulong*)&input[^8], 1); // a walking 8-byte window based on input.len
}
uint128 r = ((uint128)a ^ 0xe703_7ed1_a0b4_28db) * ((uint128)b ^ seed);
ulong pre_res = (ulong)r ^ (ulong)(r >> 64);
r = ((uint128)0xe703_7ed1_a0b4_28db ^ input.len) * (uint128)pre_res;
return (ulong)r ^ (ulong)(r >> 64);
}

View File

@@ -26,6 +26,7 @@
- Check unaligned array access.
- Add "@structlike" for typedefs.
- "poison" the current function early when a declaration can't be correctly resolved.
- Add komihash, a5hash, metrohash64, metrohash128, and wyhash2 variants with tests/benchmark. #2293
### Fixes
- mkdir/rmdir would not work properly with substring paths on non-windows platforms.

View File

@@ -0,0 +1,71 @@
// Copyright (c) 2025 Zack Puhl <github@xmit.xyz>. All rights reserved.
// Use of this source code is governed by the MIT license
// a copy of which can be found in the LICENSE_STDLIB file.
module a5hash_tests @test;
import std::hash::a5hash;
fn void vector_1()
{
char[] input = "This is a test of a5hash.";
ulong expected = 0xb163640b41959e6b;
ulong actual = a5hash::hash(input);
test::@check(actual == expected, "Hash mismatch (%x expected // %x actual).", expected, actual);
}
fn void vector_offset()
{
char[] input = "This is a test of a5hash.";
ulong actual = a5hash::hash(input[1..]);
}
fn void vector_2()
{
char[] input = "7 chars";
ulong expected = 0xe49a0cc72256bbac;
ulong actual = a5hash::hash(input);
test::@check(actual == expected, "Hash mismatch (%x expected // %x actual).", expected, actual);
}
const ulong[] EXPECTED_SWEEP = {
0xfa40305e7f876cde, 0xa462e33cc53262b4, 0x2373712194152d56, 0x948839e266ada547,
0x6d0c1912787ad5b8, 0x6c234caa741dc983, 0x2d45a051cf4c6588, 0x16c4a4f081d55f34,
0x2c06450d6f205485, 0x55296f9db1992971, 0x7329cd52328a9082, 0x74785ad80cb7e0cf,
0x13288aea2281441a, 0x194ae1b6f33f8a83, 0x165e812426f0e087, 0x84981c9506adefb3,
0x63270fe923b6935f, 0x42279ee502ecac49, 0x21d1c830488bc670, 0x4ea7876e46fdab41,
0x8af2d66eff7dbae9, 0x8892e79538d4d132, 0x823916d272cfaa91, 0x4187aa86dc29e276,
0xa2c8198dd1d883b0, 0x1f29c0e4fdcee024, 0xd27d762a99e59b08, 0x361f15e55087a978,
0x30272a11795ab5d4, 0xffb5f1f42efa5c1a, 0xbc9e503290940862, 0x325c94b294da618f,
0xa3da4b25911ac41f, 0x48b0e8e5c734e3bc, 0x5e7b0d5f607108b1, 0xaf44b82e7cc700c0,
0x08184e9ed8940831, 0x16493a88bb9bc76c, 0x6a542a2614969994, 0x7ea3a4295a702672,
0x4cdcae9d0feaae9a, 0xa51c82eb8201d45f, 0x4e4bce4bb46f20a4, 0xc4a97e28b2fa2993,
0xe6d48cc40df3905e, 0x684abe59a2db9061, 0x766f289e1ab66393, 0x46f4ab742979a005,
0xa2d0521bb9eb3653, 0xb41938068a89f9ae, 0x06c063a13b6c380d, 0xf53bf0e413522ab7,
0x61fa9597bf50dc2e, 0x5911a437240cd52b, 0xc8929ab341f26bb6, 0x46c99c2cfcb00d14,
0x22f46d19bf96ded8, 0xf63d8cf026448dcf, 0x7e6ab3b486536caa, 0xc2e53529793ce2a8,
0xcf9f59fb91b7893b, 0xf95d2ae3f31aaf04, 0x423472f722383ea1, 0xc42aebbb3980132a,
0x458efaa15efd35cb, 0xcd3e0989dc4e04ed, 0xa1c01cd5305af58a, 0x40bc73f12e21385f,
0x8464509b2b5438ec, 0x961baaded287ad53, 0x22b0a89537728143, 0x7826002b97c764a1,
0x25eed2c492550022, 0x833bb150f9e75741, 0xcc30d4982191208d, 0x1eaf0a962f3eedea,
0xe98219e502cce0d5, 0x2bfe6f0253fc07c1, 0x2f8a14428430d003, 0x30e1aa29ee8b7bea,
};
fn void sweep()
{
char[] input = { [0..EXPECTED_SWEEP.len] = '5' };
foreach (i, expected : EXPECTED_SWEEP)
{
ulong actual = a5hash::hash(input[:i], 0x12ca6b4391e055fe);
test::@check(actual == expected, "Hash mismatch (%x expected // %x actual).", expected, actual);
}
}

View File

@@ -0,0 +1,259 @@
// Copyright (c) 2025 Zack Puhl <github@xmit.xyz>. All rights reserved.
// Use of this source code is governed by the MIT license
// a copy of which can be found in the LICENSE_STDLIB file.
module komi_tests;
char[256] bulk;
fn void setup() @init
{
for (usz i = 0; i < bulk.len; ++i) bulk[i] = (char)i;
}
// ==========================================================================
module komi_tests @test;
import std::hash::komi;
const char[][] INPUTS = {
"This is a 32-byte testing string",
"The cat is out of the bag",
"A 16-byte string",
"The new string",
"7 chars",
};
const usz[] BULK_INTERVALS = { 3, 6, 8, 12, 20, 31, 32, 40, 47, 48, 56, 64, 72, 80, 112, 132, 256 };
const ulong[] EXPECTED_UNSEEDED = {
0x05ad960802903a9d,
0xd15723521d3c37b1,
0x467caa28ea3da7a6,
0xf18e67bc90c43233,
0x2c514f6e5dcb11cb,
};
const ulong[] EXPECTED_BULK_UNSEEDED = {
0x7a9717e9eea4be8b,
0xa56469564c2ea0ff,
0x00b4313a24431306,
0x64c2ad96013f70fe,
0x7a3888bc95545364,
0xc77e02ed4b201b9a,
0x256d74350303a1ba,
0x59609c71697bb9df,
0x36eb9e6a4c2c5e4b,
0x8dd56c332850baa6,
0xcbb722192b353999,
0x90b07e2158f88cc0,
0x24c9621701603741,
0x1d4c1d97ca684334,
0xd1a425d530652287,
0x72623be342c20ab5,
0x94c3dbdca59ddf57,
};
fn void unseeded_string_vectors()
{
for (usz i = 0; i < INPUTS.len; ++i)
{
char[] input = INPUTS[i];
ulong expected = EXPECTED_UNSEEDED[i];
ulong actual = komi::hash(input);
test::@check(actual == expected, "Hash mismatch for unseeded '%s' (%x expected // %x actual).", (ZString)input, expected, actual);
}
}
fn void unseeded_bulk_vectors()
{
$assert BULK_INTERVALS.len == EXPECTED_BULK_UNSEEDED.len
: "BULK_INTERVALS length does not match the given BULK test set.";
for (usz i = 0; i < BULK_INTERVALS.len; ++i)
{
char[] input = bulk[:BULK_INTERVALS[i]];
ulong expected = EXPECTED_BULK_UNSEEDED[i];
ulong actual = komi::hash(input);
test::@check(actual == expected, "Hash mismatch for unseeded bulk interval %d (idx %d) (%x expected // %x actual).", BULK_INTERVALS[i], i, expected, actual);
}
}
const ulong TEST_SEED = 0x0123456789abcdef;
const ulong[] EXPECTED_SEEDED = {
0x6ce66a2e8d4979a5,
0x5b1da0b43545d196,
0x26af914213d0c915,
0x62d9ca1b73250cb5,
0x90ab7c9f831cd940,
};
const ulong[] EXPECTED_BULK_SEEDED = {
0x84ae4eb65b96617e,
0xaceebc32a3c0d9e4,
0xdaa1a90ecb95f6f8,
0xec8eb3ef4af380b4,
0x07045bd31abba34c,
0xd5f619fb2e62c4ae,
0x5a336fd2c4c39abe,
0x0e870b4623eea8ec,
0xe552edd6bf419d1d,
0x37d170ddcb1223e6,
0x1cd89e708e5098b6,
0x765490569ccd77f2,
0x19e9d77b86d01ee8,
0x25f83ee520c1d241,
0xd6007417091cd4c0,
0x3e49c2d3727b9cc9,
0xb2b3405ee5d65f4c,
};
fn void seeded_string_vectors()
{
for (usz i = 0; i < INPUTS.len; ++i)
{
char[] input = INPUTS[i];
ulong expected = EXPECTED_SEEDED[i];
ulong actual = komi::hash(input, TEST_SEED);
test::@check(actual == expected,
"Hash mismatch for seed 0x123456789abcdef '%s' (%x expected // %x actual).",
(ZString)input, expected, actual);
}
}
fn void seeded_offset()
{
char[] x = "kepkoewkopkfpokfoewkfokweokefkfkkpoewkf";
ulong actual = komi::hash(x[1..], TEST_SEED);
}
fn void seeded_bulk_vectors()
{
$assert BULK_INTERVALS.len == EXPECTED_BULK_SEEDED.len
: "BULK_INTERVALS length does not match the given BULK test set.";
for (usz i = 0; i < BULK_INTERVALS.len; ++i)
{
char[] input = bulk[:BULK_INTERVALS[i]];
ulong expected = EXPECTED_BULK_SEEDED[i];
ulong actual = komi::hash(input, TEST_SEED);
test::@check(actual == expected,
"Hash mismatch for unseeded bulk interval %d (idx %d) (%x expected // %x actual).",
BULK_INTERVALS[i], i, expected, actual);
}
}
const ulong TEST_SEED_2 = 0x100;
const ulong[] EXPECTED_SEEDED_2 = {
0x5f197b30bcec1e45,
0xa761280322bb7698,
0x11c31ccabaa524f1,
0x3a43b7f58281c229,
0xcff90b0466b7e3a2,
};
const ulong[] EXPECTED_BULK_SEEDED_2 = {
0x8ab53f45cc9315e3,
0xea606e43d1976ccf,
0x889b2f2ceecbec73,
0xacbec1886cd23275,
0x57c3affd1b71fcdb,
0x7ef6ba49a3b068c3,
0x49dbca62ed5a1ddf,
0x192848484481e8c0,
0x420b43a5edba1bd7,
0xd6e8400a9de24ce3,
0xbea291b225ff384d,
0x0ec94062b2f06960,
0xfa613272ecd49985,
0x76f0bb380bc207be,
0x4afb4e08ca77c020,
0x410f9c129ad88aea,
0x066c7b25f4f569ae,
};
fn void seeded_string_vectors_2()
{
for (usz i = 0; i < INPUTS.len; ++i)
{
char[] input = INPUTS[i];
ulong expected = EXPECTED_SEEDED_2[i];
ulong actual = komi::hash(input, TEST_SEED_2);
test::@check(actual == expected,
"Hash mismatch for seed 0x100 '%s' (%x expected // %x actual).",
(ZString)input, expected, actual);
}
}
fn void seeded_bulk_vectors_2()
{
$assert BULK_INTERVALS.len == EXPECTED_BULK_SEEDED_2.len
: "BULK_INTERVALS length does not match the given BULK test set.";
for (usz i = 0; i < BULK_INTERVALS.len; ++i)
{
char[] input = bulk[:BULK_INTERVALS[i]];
ulong expected = EXPECTED_BULK_SEEDED_2[i];
ulong actual = komi::hash(input, TEST_SEED_2);
test::@check(actual == expected,
"Hash mismatch for unseeded bulk interval %d (idx %d) (%x expected // %x actual).",
BULK_INTERVALS[i], i, expected, actual);
}
}
const ulong[] EXPECTED_SWEEP = {
0x5cdcdaf25a774bdf, 0x1e5c3d2098586a17, 0x138664c4f409a6aa, 0x6936cf53c986deb1,
0x3417ed7d7a081094, 0xd38acf8461008782, 0xf19e49a8a7c77869, 0xb4c60ae1c52e8a13,
0xc6e7ceffa4af2605, 0xc219152657fcc7b9, 0x9562052479b8007c, 0xb4395e5aec193f02,
0x80d9987c7e56ca6f, 0x03a29a5f5d9918d3, 0x2d4988241df24218, 0x8d569336b00c6578,
0x1975540b1f4ed2bc, 0x7265c30b704afb6e, 0x173f6f524900ec6d, 0xcc86a82757407a99,
0xacc0654d841e5e31, 0x67345fcf0f031a01, 0x5914b9ebdc010c00, 0x1c61fe5bda86efb4,
0x6e133aa91b2d9218, 0x84357177a1c7df4e, 0x8cdd00e42ceeea38, 0x0fa84a74c35fd8f3,
0x9f33f6baf88e1b8b, 0xe3d86438fdc4bbb5, 0x9be24abc570ab17d, 0x2d76ab384b25e64d,
0xecdc96ef224dc58f, 0x62940f1aa710e4ee, 0xf45e451deff06f7b, 0x9250f7c75be78b2d,
0x600298c67b78935e, 0x35180e348bdd7e95, 0x101cf10a0c10c8cf, 0xea20c0ab77226b52,
0x1182f1b40f5e68b8, 0xf12820779d3a6eef, 0x34c21125302e30bd, 0x9a61527aa22e1000,
0x24a9c494a2cc820e, 0x56f3fc85980c2630, 0xeb260f799a9dab96, 0xddee1e0ca36342f9,
0x2249255270abe787, 0x588d83ead4d6085d, 0x8833170d2fdc30ce, 0x9ac09ffc9290461e,
0xabb332ab60963f6f, 0x9a5ae4156a1d7b76, 0x2466ca4ff4acf4a4, 0xde76c11cc56419ef,
0x619f641aa58485ea, 0x34d208b1da1e5684, 0xff4ab73c565450e2, 0xb09fdca3ec15641c,
0x4eaba3d39397930a, 0x1c5cad34c08a56bd, 0x182989a7334faf06, 0xefcc05ca09cde68e,
0xac04e3c6d5439ab5, 0xfeeee75b73596fe3, 0x1d35e0da61e6888a, 0x55c89cc84bbb5faf,
0x39981a0668198e8f, 0x17851ab16d9312ee, 0x0caec994194bd050, 0x90f47d9d7bfd3861,
0x3c45c729da026626, 0xf0ab0708bac2eefb, 0x6c6c15ce7b59daa9, 0xf47920556419e57b,
0x7d997797acc04e3a, 0x329c7665007f9d3f, 0x113774f05438d762, 0x4be67982859ac5f4,
};
fn void sweep()
{
char[] input = { [0..EXPECTED_SWEEP.len] = 'z' };
foreach (i, expected : EXPECTED_SWEEP)
{
ulong actual = komi::hash(input[:i], 0x0abab1234321);
test::@check(actual == expected,
"Hash mismatch for seed 0x100 '%s' (%x expected // %x actual).",
(ZString)input, expected, actual);
}
}

View File

@@ -0,0 +1,140 @@
// Copyright (c) 2025 Zack Puhl <github@xmit.xyz>. All rights reserved.
// Use of this source code is governed by the MIT license
// a copy of which can be found in the LICENSE_STDLIB file.
module metrohash_tests @test;
import std::hash::metro64;
import std::hash::metro128;
const char[] TEST_KEY = "012345678901234567890123456789012345678901234567890123456789012";
fn void metro64_offset()
{
metro64::hash(TEST_KEY[1..]);
}
fn void metro128_offset()
{
metro128::hash(TEST_KEY[1..]);
}
fn void metro64_vectors()
{
ulong expected_0 = 0xad4b7006ae3d756b;
ulong actual_0 = metro64::hash(TEST_KEY);
test::@check(actual_0 == expected_0, "Hash mismatch (%x expected // %x actual).", expected_0, actual_0);
ulong expected_1 = 0xdfb8b9f41c480d3b;
ulong actual_1 = metro64::hash(TEST_KEY, 1);
test::@check(actual_1 == expected_1, "Hash mismatch (%x expected // %x actual).", expected_1, actual_1);
}
fn void metro64_streamed()
{
ulong expected = 0xdfb8b9f41c480d3b;
MetroHash64 m;
m.init(1);
m.update(TEST_KEY[:13]);
m.update(TEST_KEY[13:11]);
m.update(TEST_KEY[24..]);
ulong actual = m.final();
test::@check(actual == expected, "Hash mismatch (%x expected // %x actual).", expected, actual);
}
const ulong[66] VECTORS_SWEEP_64 = {
0xe2f700c7be596c30, 0xd924f06e80703f5f, 0x0e407ae9f3b31eea, 0xb286855b22bb5a7c,
0x413147f80d972772, 0xa6defbb4891b57ad, 0x0bf33d8a3a11377b, 0x5ef754dc5e155820,
0x57817499be0ee747, 0x61410284964661e8, 0xdf14b67bf1cf84a1, 0xa34f9fc7d88adda1,
0xeff25775757576a4, 0x5a3f096738c0f672, 0xcedb9bba97965231, 0xe2234b45b095d9f5,
0x19cb856abaffafc5, 0x4c2385e5a329fe50, 0x0c1731f599c24394, 0x207d5d5069420af6,
0xa00af52b3ee78ccf, 0x2649bb0315ed3705, 0x1e1e8cb19aebd947, 0x441c7ad9ede94456,
0xde8fb76b48fe0795, 0xe28aff110a0485d3, 0x1c4be10ba94dea5e, 0xb345b8382fbcb14c,
0xbd2083c97604113a, 0x53725cedc13b1f91, 0x6bde258654aabe35, 0x5571177c4f463a94,
0x7893679fa856b4d8, 0xeb700288dd6ed4fb, 0x3f70383fca952a4a, 0x5b7a795ce3f141b0,
0xa18b62d7c44d3718, 0x6e9e37eb8ef7bc49, 0x159b948172457d48, 0x113872acbfc4fc7f,
0x114e2d0a2bbb1700, 0xfc3a6f8cae61d210, 0x627e43470bc34b5d, 0xfe08fbd0cb9abe73,
0x89dd4e70b7c61b60, 0x6bf6d591e9c00425, 0x7bebba4795cbd4a3, 0x02dee5dde8549496,
0x71e30b2b3c71393f, 0x2024d0a05633cc87, 0x6884bd684f1cb48b, 0xd8f23c050ee162c2,
0xbb4425af0f4fd259, 0x7a63abf543efaf39, 0x6b6b919b7a44fefa, 0xedf8000398fe7486,
0x7fa5131c2a164dad, 0x1831d78b576a433d, 0x8914114c29b11246, 0xd6f5b2b3c48239c7,
0xc4d9392164f808fb, 0x98454695cda41767, 0x1463110024129443, 0x37e06b51f39b0db1,
0xb231266aea3ac1d2, 0x9659fcfdeff62211
};
fn void metro64_sweep()
{
char[66] v = { [0..65] = 'a' };
for (usz x = 0; x < v.len; ++x)
{
ulong actual = metro64::hash(v[:x], 1337);
test::@check(actual == VECTORS_SWEEP_64[x],
"Hash mismatch (%x expected // %x actual).", VECTORS_SWEEP_64[x], actual);
}
}
fn void metro128_vectors()
{
uint128 expected_0 = 0x97a27450acb248059b9feda4bfe27cc7;
uint128 actual_0 = metro128::hash(TEST_KEY);
test::@check(actual_0 == expected_0, "Hash mismatch (%x expected // %x actual).", expected_0, actual_0);
uint128 expected_1 = 0xefec147a868dd6bd7f9d1938b8cda345;
uint128 actual_1 = metro128::hash(TEST_KEY, 1);
test::@check(actual_1 == expected_1, "Hash mismatch (%x expected // %x actual).", expected_1, actual_1);
}
fn void metro128_streamed()
{
uint128 expected = 0xefec147a868dd6bd7f9d1938b8cda345;
MetroHash128 m;
m.init(1);
foreach (c : TEST_KEY) m.update({c}); // byte by byte
uint128 actual = m.final();
test::@check(actual == expected, "Hash mismatch (%x expected // %x actual).", expected, actual);
}
const uint128[66] VECTORS_SWEEP_128 = {
0xed66a903a5af8770c4bfd518077b1d4d, 0x9c04be2535e73e406be42706b98cddd4, 0xc082896b0e4704071863c4d6b79c5fd2, 0x93e5f0fabb995f1c567d1d00031ebdb2,
0xf33e194121b0946dadef05404de5cc63, 0xd38db248561bf524962ed9a48a841a45, 0x7e1695c8838701a49091add6ca0b6da2, 0xe9d9b67eae87f20a0d1c7e19b6c7bc8e,
0x8b7d6e334c2130f1f8104302054a6adf, 0x363e19909e59b57d6ea1a44071334801, 0xfc07f6db22caf91dfc07eb162e94e5b4, 0x17258d6fe6821c82b721567ad5cc845c,
0x5759d0fbfdad344f7bc4b2eec33494f4, 0x8e599e8eea792d89cebdd9c11f888f59, 0xce942bf2e18597e63bf12a03ba95e122, 0xdb0d0635c453b26ba07664c37bc7f241,
0xa9951a456d5c08c4c1564a4e111f88cc, 0x2ddea9673d7ab8e428607e268cc4af58, 0x623ce3f6fdd7f9c070dd915d89564be7, 0xa37787b74daafdebfafdf122b1f04b92,
0x79c36fdf895491a5d8e2b9d7b27e830e, 0x1ca5545989d706abc51eb30db70733ff, 0xe7f2557aee5921dcad639a73840f1b82, 0x5b66b8cfe8c8381d34c2cb2682f8b3a9,
0x555d28dc1f2cedbebea4de1c24664b07, 0xcd95e57621d4b3eba9a8a240a751f2f6, 0xab25e96dc41b344295e8d5a734d236bb, 0x0e835e0ac14d8bc0c6707af9cb04780b,
0x28c74c57374a23e54a97831fea86e71f, 0x09c02b2cb852802664531074b43b24e5, 0xb23658cd2892c1b33179800ca748c093, 0x4f0999fd7417928c77f5169eb6605115,
0xbc85b4db9fd3096abdcfc238c815e406, 0xf68f40c0ebcf9858a34f846d6442b2c4, 0xd2a4eaa7659c2ca1603d1fae214c5f6a, 0xe382f4280e70fe32c991eacd9a417644,
0xae43523189c866e6b759f3da9abebb0e, 0x94a3f58c1f5a71bc9d6488c74154e8d7, 0x7e869c466cf2408a0a4c2758ac1c2a1a, 0x645e5babb2ddd637d1d616db16468c8f,
0x668d5187f86a97172fa7dff866ab4307, 0x43761b3e2011d2b1defce3b2abd3220c, 0xcbe5c5febe9e9522f92eab2faff5a4d2, 0x57effbd664e86987a7e41d0139b0c1d4,
0x90fc91743fc288d563c6059b099debf2, 0xe7fe3b7f9e2804dc4ca39486d1ff95f8, 0xd419e052bf7a8037581a7176d5e5c40f, 0xbdcf3e2e8e9bec8b5174ee35f5c77a90,
0xa73b9edb918e873728cb61dbff14ae18, 0x6630e865ec83027c5e930f4ca1ebe300, 0xd44cc36826feb880572a83a046c159c8, 0x1e477dc003e907a1d424a4f84654ddd2,
0xb498e2859fa073c28a988aa0a461f9ae, 0x05666028c9d1a1a7878cbde8a82e84c4, 0xc1dac1ea4f24c32e83522d0f921560f0, 0xcbb2a8a58dc91c1230aec1f3a5c398cb,
0x7e76d0952c34286f5ccc2a9a30f65bb3, 0x0091c352079662facb5cd03255a6ecc7, 0xcc9d1fa3518a937b594da868ac1ea634, 0xfec1ae0bb45d5fd9bc0ed7c418c2c633,
0x9e9cbd767281cdd3779b2e8506774cd4, 0x42be3cc544dc7ed64da7d695313d7802, 0x7f57bad2d44c1f47722c3029ba9f53f5, 0xbd574d95b4635562acc1d8c5633589dc,
0x1761b98ffa140cfdc8e6ac36327b6080, 0xfddd7de5827fc61fc01b594181f887c1
};
fn void metro128_sweep()
{
char[66] v = { [0..65] = 'a' };
for (usz x = 0; x < v.len; ++x)
{
uint128 actual = metro128::hash(v[:x], 1337);
test::@check(actual == VECTORS_SWEEP_128[x],
"Hash mismatch (%x expected // %x actual).", VECTORS_SWEEP_128[x], actual);
}
}

View File

@@ -0,0 +1,89 @@
// Copyright (c) 2025 Zack Puhl <github@xmit.xyz>. All rights reserved.
// Use of this source code is governed by the MIT license
// a copy of which can be found in the LICENSE_STDLIB file.
module wyhash2_tests @test;
import std::hash::wyhash2;
fn void simple_vector()
=> test::@check(0xb4808df22d44ffcf == wyhash2::hash("abc"));
fn void offset_check()
{
char[*] x = "0123293829";
wyhash2::hash(x[1..]);
}
fn void simple_vector_seeded()
=> test::@check(0x9c962ca4764da6f4 == wyhash2::hash("aax", 2));
fn void simple_vector_seeded_2()
=> test::@check(0x49090566becc19bf == wyhash2::hash("aax", 0x9c962ca4764da6f4));
fn void longer_vector_seeded()
=> test::@check(0x8b18145f8353c46d == wyhash2::hash("hi my name is:", 2));
fn void longer_vector_seeded_2()
=> test::@check(0x2b8f7c0e2e562e63 == wyhash2::hash("hi my name is:", 0x8b18145f8353c46d));
<*
These constant vectors are easily confirmed with a quick Rust executable:
However, as of writing, the 0-length return value is different between the below method
(streaming the input) and using `wyhash2::wyhash_single`. So this relies on the function
retval instead of the streamer's, because all other function values match the streamer.
```rust
use core::hash::Hasher;
use wyhash2::WyHash;
fn main() {
let arr: [u8; 16] = [b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p'];
let secret = 0;
for i in 0..=16 {
let mut hasher = WyHash::with_seed(secret);
hasher.write(&arr[..i]);
println!("Index {}: 0x{:x}", i, hasher.finish());
}
}
```
*>
const ulong[17] VECTORS = {
0x42bc986dc5eec4d3,
0x6cf84e5a2465e867,
0x172ba773b8ebb6d8,
0xb4808df22d44ffcf,
0x8cd6fedc542c39e1,
0x89f29dfa6e5ab1e5,
0x2d62e7827072fb65,
0xce8a19cc22fbe893,
0x3c36fed2521530c0,
0x1958d0433e7579fa,
0x787f681f01831617,
0x7107735a3edb98ee,
0xf4c24a45a41ea322,
0x03779e9d9ed9ff12,
0xd24ac6ffc05e0cb8,
0x0b4153cef1f30b07,
0x4ff3b52ca1e858d2,
};
fn void sweep()
{
char[20] c;
ulong actual = wyhash2::hash({});
test::@check(actual == VECTORS[0], "Empty hash failed (%x expected // %x actual).", VECTORS[0], actual);
for (usz x = 1; x <= 16; ++x)
{
c[x - 1] = 'a' + (char)(x - 1);
actual = wyhash2::hash(c[:x]);
test::@check(actual == VECTORS[x],
"Failed on '%s', length %d (%x expected // %x actual).", (ZString)&c, x, VECTORS[x], actual);
}
}