mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 03:51:18 +00:00
Add wyhash2 and metro64/128 modern hashing (#2293)
* add wyhash2, metro64, and metro128 hashes; best performing non-crypto hash functions * add superfast 64-bit a5hash; not streamed, no 128-bit impl * add komihash and associated tests/benchmarks --------- Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
This commit is contained in:
96
lib/std/hash/a5hash.c3
Normal file
96
lib/std/hash/a5hash.c3
Normal file
@@ -0,0 +1,96 @@
|
||||
// Copyright (c) 2025 Zack Puhl <github@xmit.xyz>. All rights reserved.
|
||||
// Use of this source code is governed by the MIT license
|
||||
// a copy of which can be found in the LICENSE_STDLIB file.
|
||||
//
|
||||
// An implementation of Aleksey Vaneev's a5hash, version 5.16, in C3:
|
||||
// https://github.com/avaneev/komihash
|
||||
//
|
||||
// The license for komihash from the above repository at the time of writing is as follows:
|
||||
//
|
||||
// >> MIT License
|
||||
// >>
|
||||
// >> Copyright (c) 2025 Aleksey Vaneev
|
||||
// >>
|
||||
// >> Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// >> of this software and associated documentation files (the "Software"), to deal
|
||||
// >> in the Software without restriction, including without limitation the rights
|
||||
// >> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// >> copies of the Software, and to permit persons to whom the Software is
|
||||
// >> furnished to do so, subject to the following conditions:
|
||||
// >>
|
||||
// >> The above copyright notice and this permission notice shall be included in all
|
||||
// >> copies or substantial portions of the Software.
|
||||
// >>
|
||||
// >> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// >> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// >> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// >> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// >> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// >> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// >> SOFTWARE.
|
||||
//
|
||||
//
|
||||
module std::hash::a5hash;
|
||||
|
||||
|
||||
macro @a5mul(#u, #v, #lo, #hi) @local
|
||||
{
|
||||
uint128 imd = (uint128)#u * (uint128)#v;
|
||||
#lo = (ulong)imd;
|
||||
#hi = (ulong)(imd >> 64);
|
||||
}
|
||||
|
||||
|
||||
fn ulong hash(char[] data, ulong seed = 0)
|
||||
{
|
||||
ulong seed1 = 0x243F_6A88_85A3_08D3 ^ data.len;
|
||||
ulong seed2 = 0x4528_21E6_38D0_1377 ^ data.len;
|
||||
ulong val10 = 0xAAAA_AAAA_AAAA_AAAA;
|
||||
ulong val01 = 0x5555_5555_5555_5555;
|
||||
ulong a, b;
|
||||
|
||||
@a5mul(seed2 ^ (seed & val10), seed1 ^ (seed & val01), seed1, seed2);
|
||||
|
||||
val10 ^= seed2;
|
||||
|
||||
if (@likely(data.len > 3))
|
||||
{
|
||||
if (data.len > 16)
|
||||
{
|
||||
val01 ^= seed1;
|
||||
|
||||
for (; data.len > 16; data = data[16..])
|
||||
{
|
||||
@a5mul(
|
||||
@unaligned_load(((ulong*)data.ptr)[0], 1) ^ seed1,
|
||||
@unaligned_load(((ulong*)data.ptr)[1], 1) ^ seed2,
|
||||
seed1, seed2
|
||||
);
|
||||
|
||||
seed1 += val01;
|
||||
seed2 += val10;
|
||||
}
|
||||
|
||||
a = @unaligned_load(*(ulong*)(data.ptr + (uptr)data.len - 16), 1);
|
||||
b = @unaligned_load(*(ulong*)(data.ptr + (uptr)data.len - 8), 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
a = ((ulong)@unaligned_load(*(uint*)&data[0], 1) << 32)
|
||||
| @unaligned_load(*(uint*)&data[^4], 1);
|
||||
|
||||
b = ((ulong)@unaligned_load(*(uint*)&data[(data.len >> 3) * 4], 1) << 32)
|
||||
| @unaligned_load(*(uint*)(data.ptr + data.len - 4 - (data.len >> 3) * 4), 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
a = data.len ? (data[0] | (data.len > 1 ? ((ulong)data[1] << 8) : 0) | (data.len > 2 ? ((ulong)data[2] << 16) : 0)) : 0;
|
||||
b = 0;
|
||||
}
|
||||
|
||||
@a5mul(a ^ seed1, b ^ seed2, seed1, seed2);
|
||||
@a5mul(val01 ^ seed1, seed2, a, b);
|
||||
|
||||
return a ^ b;
|
||||
}
|
||||
156
lib/std/hash/komi.c3
Normal file
156
lib/std/hash/komi.c3
Normal file
@@ -0,0 +1,156 @@
|
||||
// Copyright (c) 2025 Zack Puhl <github@xmit.xyz>. All rights reserved.
|
||||
// Use of this source code is governed by the MIT license
|
||||
// a copy of which can be found in the LICENSE_STDLIB file.
|
||||
//
|
||||
// An implementation of Aleksey Vaneev's komihash, version 5.27, in C3:
|
||||
// https://github.com/avaneev/komihash
|
||||
//
|
||||
// The license for komihash from the above repository at the time of writing is as follows:
|
||||
//
|
||||
// >> MIT License
|
||||
// >>
|
||||
// >> Copyright (c) 2021-2025 Aleksey Vaneev
|
||||
// >>
|
||||
// >> Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// >> of this software and associated documentation files (the "Software"), to deal
|
||||
// >> in the Software without restriction, including without limitation the rights
|
||||
// >> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// >> copies of the Software, and to permit persons to whom the Software is
|
||||
// >> furnished to do so, subject to the following conditions:
|
||||
// >>
|
||||
// >> The above copyright notice and this permission notice shall be included in all
|
||||
// >> copies or substantial portions of the Software.
|
||||
// >>
|
||||
// >> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// >> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// >> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// >> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// >> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// >> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// >> SOFTWARE.
|
||||
//
|
||||
//
|
||||
module std::hash::komi;
|
||||
|
||||
|
||||
macro @komimul(#u, #v, #lo, #hi) @local
|
||||
{
|
||||
uint128 imd = (uint128)#u * (uint128)#v;
|
||||
#lo = (ulong)imd;
|
||||
#hi += (ulong)(imd >> 64);
|
||||
}
|
||||
|
||||
|
||||
fn ulong hash(char[] data, ulong seed = 0)
|
||||
{
|
||||
ulong seed1 = 0x243F_6A88_85A3_08D3 ^ (seed & 0x5555_5555_5555_5555);
|
||||
ulong seed5 = 0x4528_21E6_38D0_1377 ^ (seed & 0xAAAA_AAAA_AAAA_AAAA);
|
||||
ulong r1h, r2h;
|
||||
|
||||
// HASHROUND
|
||||
@komimul(seed1, seed5, seed1, seed5);
|
||||
seed1 ^= seed5;
|
||||
|
||||
if (@likely(data.len < 16))
|
||||
{
|
||||
r1h = seed1;
|
||||
r2h = seed5;
|
||||
|
||||
if (@likely(data.len >= 8))
|
||||
{
|
||||
r1h ^= @unaligned_load(*(ulong*)data.ptr, 1);
|
||||
|
||||
r2h ^= (data.len < 12)
|
||||
? ((data[data.len - 3] | ((ulong)data[data.len - 2] << 8) | ((ulong)data[data.len - 1] << 16) | ((ulong)1 << 24)) >> ((data.len * 8) ^ 88))
|
||||
: (((@unaligned_load(*(uint*)&data[^4], 1) | ((ulong)1 << 32)) >> (128 - data.len * 8)) << 32 | @unaligned_load(*(uint*)&data[8], 1));
|
||||
}
|
||||
else if (data.len != 0)
|
||||
{
|
||||
r1h ^= (data.len < 4)
|
||||
? (((ulong)1 << (data.len * 8)) ^ data[0] ^ (data.len > 1 ? (ulong)data[1] << 8 : 0) ^ (data.len > 2 ? (ulong)data[2] << 16 : 0))
|
||||
: (((@unaligned_load(*(uint*)&data[^4], 1) | ((ulong)1 << 32)) >> (64 - data.len * 8)) << 32 | @unaligned_load(*(uint*)&data[0], 1));
|
||||
}
|
||||
}
|
||||
else if (data.len < 32)
|
||||
{
|
||||
// HASH16
|
||||
@komimul(
|
||||
@unaligned_load(*(ulong*)&data[0], 1) ^ seed1,
|
||||
@unaligned_load(*(ulong*)&data[8], 1) ^ seed5,
|
||||
seed1, seed5
|
||||
);
|
||||
seed1 ^= seed5;
|
||||
|
||||
if (data.len < 24)
|
||||
{
|
||||
r1h = (((@unaligned_load(*(ulong*)&data[^8], 1) >> 8) | ((ulong)1 << 56)) >> (((int)(data.len * 8) ^ 184))) ^ seed1;
|
||||
r2h = seed5;
|
||||
}
|
||||
else
|
||||
{
|
||||
r1h = @unaligned_load(*(ulong*)&data[16], 1) ^ seed1;
|
||||
r2h = (((@unaligned_load(*(ulong*)&data[^8], 1) >> 8) | ((ulong)1 << 56)) >> (((int)(data.len * 8) ^ 248))) ^ seed5;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (data.len >= 64)
|
||||
{
|
||||
ulong[8] seeds = {
|
||||
seed1, 0x1319_8A2E_0370_7344 ^ seed1, 0xA409_3822_299F_31D0 ^ seed1, 0x082E_FA98_EC4E_6C89 ^ seed1,
|
||||
seed5, 0xBE54_66CF_34E9_0C6C ^ seed5, 0xC0AC_29B7_C97C_50DD ^ seed5, 0x3F84_D5B5_B547_0917 ^ seed5,
|
||||
};
|
||||
|
||||
// HASHLOOP64
|
||||
for (; data.len >= 64; data = data[64:^64])
|
||||
{
|
||||
$for var $x = 0; $x < 4; ++$x :
|
||||
@komimul(
|
||||
@unaligned_load(*(ulong*)&data[0 + ($x * 8)], 1) ^ seeds[$x],
|
||||
@unaligned_load(*(ulong*)&data[32 + ($x * 8)], 1) ^ seeds[4 + $x],
|
||||
seeds[$x], seeds[4 + $x]
|
||||
);
|
||||
$endfor
|
||||
|
||||
seeds[3] ^= seeds[6];
|
||||
seeds[0] ^= seeds[7];
|
||||
seeds[2] ^= seeds[5];
|
||||
seeds[1] ^= seeds[4];
|
||||
}
|
||||
|
||||
seed1 = seeds[0] ^ seeds[1] ^ seeds[2] ^ seeds[3];
|
||||
seed5 = seeds[4] ^ seeds[5] ^ seeds[6] ^ seeds[7];
|
||||
}
|
||||
|
||||
for (; data.len >= 16; data = data[16:^16])
|
||||
{
|
||||
@komimul(
|
||||
@unaligned_load(*(ulong*)&data[0], 1) ^ seed1,
|
||||
@unaligned_load(*(ulong*)&data[8], 1) ^ seed5,
|
||||
seed1, seed5
|
||||
);
|
||||
seed1 ^= seed5;
|
||||
}
|
||||
|
||||
if (data.len < 8)
|
||||
{
|
||||
// NOTE: This is translated from the original code. It grabs the last ulong off the buffer even though the
|
||||
// data slice is less than 8 bytes. This is possible because this branch only occurs in a loop where
|
||||
// the original data slice length is >= 32.
|
||||
r1h = (((@unaligned_load(*(ulong*)(data.ptr + data.len - 8), 1) >> 8) | ((ulong)1 << 56)) >> ((data.len * 8) ^ 0x38)) ^ seed1;
|
||||
r2h = seed5;
|
||||
}
|
||||
else
|
||||
{
|
||||
r1h = @unaligned_load(*(ulong*)data.ptr, 1) ^ seed1;
|
||||
r2h = (((@unaligned_load(*(ulong*)&data[^8], 1) >> 8) | ((ulong)1 << 56)) >> ((data.len * 8) ^ 0x78)) ^ seed5;
|
||||
}
|
||||
}
|
||||
|
||||
// HASHFIN
|
||||
@komimul(r1h, r2h, seed1, seed5);
|
||||
seed1 ^= seed5;
|
||||
@komimul(seed1, seed5, seed1, seed5);
|
||||
seed1 ^= seed5;
|
||||
return seed1;
|
||||
}
|
||||
149
lib/std/hash/metro128.c3
Normal file
149
lib/std/hash/metro128.c3
Normal file
@@ -0,0 +1,149 @@
|
||||
// Copyright (c) 2025 Zack Puhl <github@xmit.xyz>. All rights reserved.
|
||||
// Use of this source code is governed by the MIT license
|
||||
// a copy of which can be found in the LICENSE_STDLIB file.
|
||||
//
|
||||
// MetroHash64 and MetroHash128 are different enough to warrant their own
|
||||
// modules, and there would be no reason to create a generic module just
|
||||
// for the two. If you inspect the differences, the only shared portion
|
||||
// of the entire process is the `update` method.
|
||||
//
|
||||
module std::hash::metro128;
|
||||
|
||||
|
||||
const ulong[4] K @local = {
|
||||
0xc83a91e1,
|
||||
0x8648dbdb,
|
||||
0x7bdec03b,
|
||||
0x2f5870a5,
|
||||
};
|
||||
|
||||
|
||||
struct MetroHash128
|
||||
{
|
||||
union
|
||||
{
|
||||
ulong[4] state;
|
||||
uint128 result;
|
||||
}
|
||||
union
|
||||
{
|
||||
ulong[4] stomach_64;
|
||||
char[32] stomach;
|
||||
}
|
||||
ulong bytes;
|
||||
}
|
||||
|
||||
|
||||
fn uint128 hash(char[] data, ulong seed = 0)
|
||||
{
|
||||
MetroHash128 m;
|
||||
m.init(seed);
|
||||
m.update(data);
|
||||
return m.final();
|
||||
}
|
||||
|
||||
|
||||
fn void MetroHash128.init(&self, ulong seed = 0)
|
||||
{
|
||||
self.state = {
|
||||
(seed - K[0]) * K[3],
|
||||
(seed + K[1]) * K[2],
|
||||
(seed + K[0]) * K[2],
|
||||
(seed - K[1]) * K[3],
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
fn void MetroHash128.update(&self, char[] data)
|
||||
{
|
||||
if (self.bytes % 32) // partial buffer
|
||||
{
|
||||
ulong to_fill = min(data.len, (32 - (self.bytes % 32)));
|
||||
|
||||
self.stomach[(self.bytes % 32):to_fill] = data[:to_fill];
|
||||
|
||||
data = data[to_fill..];
|
||||
self.bytes += to_fill;
|
||||
|
||||
if (self.bytes % 32) return; // still awaiting more input, or final
|
||||
|
||||
self.state[0] += self.stomach_64[0] * K[0]; self.state[0] = self.state[0].rotr(29) + self.state[2];
|
||||
self.state[1] += self.stomach_64[1] * K[1]; self.state[1] = self.state[1].rotr(29) + self.state[3];
|
||||
self.state[2] += self.stomach_64[2] * K[2]; self.state[2] = self.state[2].rotr(29) + self.state[0];
|
||||
self.state[3] += self.stomach_64[3] * K[3]; self.state[3] = self.state[3].rotr(29) + self.state[1];
|
||||
}
|
||||
|
||||
self.bytes += data.len;
|
||||
|
||||
for (; data.len >= 32; data = data[32:^32])
|
||||
{
|
||||
self.state[0] += @unaligned_load(((ulong*)data.ptr)[0], 1) * K[0]; self.state[0] = self.state[0].rotr(29) + self.state[2];
|
||||
self.state[1] += @unaligned_load(((ulong*)data.ptr)[1], 1) * K[1]; self.state[1] = self.state[1].rotr(29) + self.state[3];
|
||||
self.state[2] += @unaligned_load(((ulong*)data.ptr)[2], 1) * K[2]; self.state[2] = self.state[2].rotr(29) + self.state[0];
|
||||
self.state[3] += @unaligned_load(((ulong*)data.ptr)[3], 1) * K[3]; self.state[3] = self.state[3].rotr(29) + self.state[1];
|
||||
}
|
||||
|
||||
// Gobble up the leftover bytes. Nom nom.
|
||||
if (data.len > 0) self.stomach[:data.len] = data[..];
|
||||
}
|
||||
|
||||
|
||||
fn uint128 MetroHash128.final(&self)
|
||||
{
|
||||
if (self.bytes >= 32)
|
||||
{
|
||||
self.state[2] ^= (((self.state[0] + self.state[3]) * K[0]) + self.state[1]).rotr(21) * K[1];
|
||||
self.state[3] ^= (((self.state[1] + self.state[2]) * K[1]) + self.state[0]).rotr(21) * K[0];
|
||||
self.state[0] ^= (((self.state[0] + self.state[2]) * K[0]) + self.state[3]).rotr(21) * K[1];
|
||||
self.state[1] ^= (((self.state[1] + self.state[3]) * K[1]) + self.state[2]).rotr(21) * K[0];
|
||||
}
|
||||
|
||||
char[] final_data = self.stomach[:(self.bytes % 32)];
|
||||
|
||||
if (final_data.len >= 16)
|
||||
{
|
||||
self.state[0] += ((ulong*)final_data.ptr)[0] * K[2]; self.state[0] = self.state[0].rotr(33) * K[3];
|
||||
self.state[1] += ((ulong*)final_data.ptr)[1] * K[2]; self.state[1] = self.state[1].rotr(33) * K[3];
|
||||
self.state[0] ^= ((self.state[0] * K[2]) + self.state[1]).rotr(45) * K[1];
|
||||
self.state[1] ^= ((self.state[1] * K[3]) + self.state[0]).rotr(45) * K[0];
|
||||
|
||||
final_data = final_data[16:^16];
|
||||
}
|
||||
|
||||
if (final_data.len >= 8)
|
||||
{
|
||||
self.state[0] += @unaligned_load(((ulong*)final_data.ptr)[0], 1) * K[2]; self.state[0] = self.state[0].rotr(33) * K[3];
|
||||
self.state[0] ^= ((self.state[0] * K[2]) + self.state[1]).rotr(27) * K[1];
|
||||
|
||||
final_data = final_data[8:^8];
|
||||
}
|
||||
|
||||
if (final_data.len >= 4)
|
||||
{
|
||||
self.state[1] += @unaligned_load(((uint*)final_data.ptr)[0], 1) * K[2]; self.state[1] = self.state[1].rotr(33) * K[3];
|
||||
self.state[1] ^= ((self.state[1] * K[3]) + self.state[0]).rotr(46) * K[0];
|
||||
|
||||
final_data = final_data[4:^4];
|
||||
}
|
||||
|
||||
if (final_data.len >= 2)
|
||||
{
|
||||
self.state[0] += @unaligned_load(((ushort*)final_data.ptr)[0], 1) * K[2]; self.state[0] = self.state[0].rotr(33) * K[3];
|
||||
self.state[0] ^= ((self.state[0] * K[2]) + self.state[1]).rotr(22) * K[1];
|
||||
|
||||
final_data = final_data[2:^2];
|
||||
}
|
||||
|
||||
if (final_data.len >= 1)
|
||||
{
|
||||
self.state[1] += ((char*)final_data.ptr)[0] * K[2]; self.state[1] = self.state[1].rotr(33) * K[3];
|
||||
self.state[1] ^= ((self.state[1] * K[3]) + self.state[0]).rotr(58) * K[0];
|
||||
}
|
||||
|
||||
self.state[0] += ((self.state[0] * K[0]) + self.state[1]).rotr(13);
|
||||
self.state[1] += ((self.state[1] * K[1]) + self.state[0]).rotr(37);
|
||||
self.state[0] += ((self.state[0] * K[2]) + self.state[1]).rotr(13);
|
||||
self.state[1] += ((self.state[1] * K[3]) + self.state[0]).rotr(37);
|
||||
|
||||
return self.result;
|
||||
}
|
||||
152
lib/std/hash/metro64.c3
Normal file
152
lib/std/hash/metro64.c3
Normal file
@@ -0,0 +1,152 @@
|
||||
// Copyright (c) 2025 Zack Puhl <github@xmit.xyz>. All rights reserved.
|
||||
// Use of this source code is governed by the MIT license
|
||||
// a copy of which can be found in the LICENSE_STDLIB file.
|
||||
//
|
||||
// MetroHash64 and MetroHash128 are different enough to warrant their own
|
||||
// modules, and there would be no reason to create a generic module just
|
||||
// for the two. If you inspect the differences, the only shared portion
|
||||
// of the entire process is the `update` method.
|
||||
//
|
||||
module std::hash::metro64;
|
||||
|
||||
|
||||
const ulong[4] K @local = {
|
||||
0xd6d018f5,
|
||||
0xa2aa033b,
|
||||
0x62992fc1,
|
||||
0x30bc5b29,
|
||||
};
|
||||
|
||||
|
||||
struct MetroHash64
|
||||
{
|
||||
union
|
||||
{
|
||||
ulong[4] state;
|
||||
ulong result;
|
||||
}
|
||||
union
|
||||
{
|
||||
ulong[4] stomach_64;
|
||||
char[32] stomach;
|
||||
}
|
||||
ulong bytes;
|
||||
ulong vseed;
|
||||
}
|
||||
|
||||
|
||||
fn ulong hash(char[] data, ulong seed = 0)
|
||||
{
|
||||
MetroHash64 m;
|
||||
m.init(seed);
|
||||
m.update(data);
|
||||
return m.final();
|
||||
}
|
||||
|
||||
|
||||
fn void MetroHash64.init(&self, ulong seed = 0)
|
||||
{
|
||||
self.vseed = (seed + K[2]) * K[0];
|
||||
|
||||
self.state[0] = self.vseed;
|
||||
self.state[1] = self.vseed;
|
||||
self.state[2] = self.vseed;
|
||||
self.state[3] = self.vseed;
|
||||
}
|
||||
|
||||
|
||||
fn void MetroHash64.update(&self, char[] data)
|
||||
{
|
||||
if (self.bytes % 32) // partial buffer
|
||||
{
|
||||
ulong to_fill = min(data.len, (32 - (self.bytes % 32)));
|
||||
|
||||
self.stomach[(self.bytes % 32):to_fill] = data[:to_fill];
|
||||
|
||||
data = data[to_fill..];
|
||||
self.bytes += to_fill;
|
||||
|
||||
if (self.bytes % 32) return; // still awaiting more input, or final
|
||||
|
||||
self.state[0] += self.stomach_64[0] * K[0]; self.state[0] = self.state[0].rotr(29) + self.state[2];
|
||||
self.state[1] += self.stomach_64[1] * K[1]; self.state[1] = self.state[1].rotr(29) + self.state[3];
|
||||
self.state[2] += self.stomach_64[2] * K[2]; self.state[2] = self.state[2].rotr(29) + self.state[0];
|
||||
self.state[3] += self.stomach_64[3] * K[3]; self.state[3] = self.state[3].rotr(29) + self.state[1];
|
||||
}
|
||||
|
||||
self.bytes += data.len;
|
||||
|
||||
for (; data.len >= 32; data = data[32:^32])
|
||||
{
|
||||
self.state[0] += @unaligned_load(((ulong*)data.ptr)[0], 1) * K[0]; self.state[0] = self.state[0].rotr(29) + self.state[2];
|
||||
self.state[1] += @unaligned_load(((ulong*)data.ptr)[1], 1) * K[1]; self.state[1] = self.state[1].rotr(29) + self.state[3];
|
||||
self.state[2] += @unaligned_load(((ulong*)data.ptr)[2], 1) * K[2]; self.state[2] = self.state[2].rotr(29) + self.state[0];
|
||||
self.state[3] += @unaligned_load(((ulong*)data.ptr)[3], 1) * K[3]; self.state[3] = self.state[3].rotr(29) + self.state[1];
|
||||
}
|
||||
|
||||
// Gobble up the leftover bytes. Nom nom.
|
||||
if (data.len > 0) self.stomach[:data.len] = data[..];
|
||||
}
|
||||
|
||||
|
||||
fn ulong MetroHash64.final(&self)
|
||||
{
|
||||
if (self.bytes >= 32)
|
||||
{
|
||||
self.state[2] ^= (((self.state[0] + self.state[3]) * K[0]) + self.state[1]).rotr(37) * K[1];
|
||||
self.state[3] ^= (((self.state[1] + self.state[2]) * K[1]) + self.state[0]).rotr(37) * K[0];
|
||||
self.state[0] ^= (((self.state[0] + self.state[2]) * K[0]) + self.state[3]).rotr(37) * K[1];
|
||||
self.state[1] ^= (((self.state[1] + self.state[3]) * K[1]) + self.state[2]).rotr(37) * K[0];
|
||||
|
||||
self.state[0] = self.vseed + (self.state[0] ^ self.state[1]);
|
||||
}
|
||||
|
||||
char[] final_data = self.stomach[:(self.bytes % 32)];
|
||||
|
||||
if (final_data.len >= 16)
|
||||
{
|
||||
self.state[1] = self.state[0] + @unaligned_load(((ulong*)final_data.ptr)[0], 1) * K[2]; self.state[1] = self.state[1].rotr(29) * K[3];
|
||||
self.state[2] = self.state[0] + @unaligned_load(((ulong*)final_data.ptr)[1], 1) * K[2]; self.state[2] = self.state[2].rotr(29) * K[3];
|
||||
self.state[1] ^= (self.state[1] * K[0]).rotr(21) + self.state[2];
|
||||
self.state[2] ^= (self.state[2] * K[3]).rotr(21) + self.state[1];
|
||||
self.state[0] += self.state[2];
|
||||
|
||||
final_data = final_data[16:^16];
|
||||
}
|
||||
|
||||
if (final_data.len >= 8)
|
||||
{
|
||||
self.state[0] += @unaligned_load(((ulong*)final_data.ptr)[0], 1) * K[3];
|
||||
self.state[0] ^= self.state[0].rotr(55) * K[1];
|
||||
|
||||
final_data = final_data[8:^8];
|
||||
}
|
||||
|
||||
if (final_data.len >= 4)
|
||||
{
|
||||
self.state[0] += @unaligned_load(((uint*)final_data.ptr)[0], 1) * K[3];
|
||||
self.state[0] ^= self.state[0].rotr(26) * K[1];
|
||||
|
||||
final_data = final_data[4:^4];
|
||||
}
|
||||
|
||||
if (final_data.len >= 2)
|
||||
{
|
||||
self.state[0] += @unaligned_load(((ushort*)final_data.ptr)[0], 1) * K[3];
|
||||
self.state[0] ^= self.state[0].rotr(48) * K[1];
|
||||
|
||||
final_data = final_data[2:^2];
|
||||
}
|
||||
|
||||
if (final_data.len >= 1)
|
||||
{
|
||||
self.state[0] += ((char*)final_data.ptr)[0] * K[3];
|
||||
self.state[0] ^= self.state[0].rotr(37) * K[1];
|
||||
}
|
||||
|
||||
self.state[0] ^= self.state[0].rotr(28);
|
||||
self.state[0] *= K[0];
|
||||
self.state[0] ^= self.state[0].rotr(29);
|
||||
|
||||
return self.result;
|
||||
}
|
||||
56
lib/std/hash/wyhash2.c3
Normal file
56
lib/std/hash/wyhash2.c3
Normal file
@@ -0,0 +1,56 @@
|
||||
// Copyright (c) 2025 Zack Puhl <github@xmit.xyz>. All rights reserved.
|
||||
// Use of this source code is governed by the MIT license
|
||||
// a copy of which can be found in the LICENSE_STDLIB file.
|
||||
//
|
||||
// An implementation of Wang Yi's wyhash(2) algorithm in C3:
|
||||
// https://github.com/wangyi-fudan/wyhash
|
||||
//
|
||||
module std::hash::wyhash2;
|
||||
|
||||
|
||||
fn ulong wyr3(char* in, usz len) @inline
|
||||
=> ((ulong)in[0] << 16) | ((ulong)in[len >> 1] << 8) | (ulong)in[len - 1];
|
||||
|
||||
|
||||
// See: https://docs.google.com/spreadsheets/d/1HmqDj-suH4wBFNg7etwE8WVBlfCufvD5-gAnIENs94k/edit?gid=1915335726#gid=1915335726
|
||||
// Credit to article:
|
||||
// https://medium.com/@tprodanov/benchmarking-non-cryptographic-hash-functions-in-rust-2e6091077d11
|
||||
//
|
||||
// wyhash2 has a >90% chance of collisions when its input data is above 16 bytes in length.
|
||||
// However, it is the fastest performing and most evenly randomized hash for very low-length inputs,
|
||||
// making it an ideal candidate for hashing primitive data types quickly and making things like hash
|
||||
// tables even faster. Therefore, a 16-byte input limit is imposed on all calls to the hash function.
|
||||
//
|
||||
<*
|
||||
@require input.len <= 16 : `wyhash2 is not useable for inputs over 16 bytes in length.`
|
||||
*>
|
||||
fn ulong hash(char[] input, ulong seed = 0)
|
||||
{
|
||||
seed ^= 0xa076_1d64_78bd_642f;
|
||||
|
||||
ulong a, b;
|
||||
|
||||
if (@likely(input.len <= 8)) // more likely to encounter 8-byte or lower type here
|
||||
{
|
||||
if (@likely(input.len >= 4))
|
||||
{
|
||||
a = (ulong)@unaligned_load(*(uint*)input.ptr, 1); // first 4 bytes widened to a u64
|
||||
b = (ulong)@unaligned_load(*(uint*)&input[^4], 1); // a walking 4-byte window based on input.len
|
||||
}
|
||||
else if (input.len > 0)
|
||||
{
|
||||
a = wyr3(input, input.len);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
a = @unaligned_load(*(ulong*)input.ptr, 1); // first 8 bytes
|
||||
b = @unaligned_load(*(ulong*)&input[^8], 1); // a walking 8-byte window based on input.len
|
||||
}
|
||||
|
||||
uint128 r = ((uint128)a ^ 0xe703_7ed1_a0b4_28db) * ((uint128)b ^ seed);
|
||||
ulong pre_res = (ulong)r ^ (ulong)(r >> 64);
|
||||
|
||||
r = ((uint128)0xe703_7ed1_a0b4_28db ^ input.len) * (uint128)pre_res;
|
||||
return (ulong)r ^ (ulong)(r >> 64);
|
||||
}
|
||||
Reference in New Issue
Block a user