Fix $$str_hash to use a5hash like String.hash() (#2403)

* Fix `$$str_hash` to use `a5hash` like `String.hash()`
This commit is contained in:
Zack Puhl
2025-08-14 14:24:01 -04:00
committed by GitHub
parent 85dc9c45ab
commit eb8fb8871f
6 changed files with 94 additions and 3 deletions

View File

@@ -32,6 +32,7 @@
- Fix `native_cpus` functionality for OpenBSD systems. #2387
- Assert triggered when trying to slice a struct.
- Improve codegen for stack allocated large non-zero arrays.
- Implement `a5hash` in the compiler for compile-time `$$str_hash` to match `String.hash()`.
- Functions being tested for overload are now always checked before test.
- Compile time indexing at compile time in a $typeof was no considered compile time.

View File

@@ -313,7 +313,7 @@ bool sema_expr_analyse_str_hash(SemaContext *context, Expr *expr)
{
RETURN_SEMA_ERROR(inner, "You need a compile time constant string to take the hash of it.");
}
uint32_t hash = fnv1a(inner->const_expr.bytes.ptr, inner->const_expr.bytes.len);
uint32_t hash = (uint32_t)a5hash(inner->const_expr.bytes.ptr, inner->const_expr.bytes.len, 0);
expr_rewrite_const_int(expr, type_uint, hash);
return true;
}

View File

@@ -208,6 +208,7 @@ static inline int char_hex_to_nibble(char c);
INLINE char char_nibble_to_hex(int c);
static inline uint32_t fnv1a(const char *key, uint32_t len);
static inline uint64_t a5hash(const char *key, uint32_t len, uint64_t seed);
INLINE uint32_t vec_size(const void *vec);
static inline void vec_resize(void *vec, uint32_t new_size);
@@ -256,6 +257,83 @@ static inline uint32_t fnv1a(const char *key, uint32_t len)
return hash;
}
// see: `int64_mult` in bigint.c - there is no need to import all these declarations just for this
static inline void _a5mul(uint64_t u, uint64_t v, uint64_t *lo, uint64_t *hi)
{
uint64_t ul = u & 0xFFFFFFFF;
uint64_t vl = v & 0xFFFFFFFF;
uint64_t t = ul * vl;
uint64_t w3 = t & 0xFFFFFFFF;
uint64_t k = t >> 32;
u >>= 32;
t = u * vl + k;
k = t & 0xFFFFFFFF;
uint64_t w1 = t >> 32;
v >>= 32;
t = ul * v + k;
*hi = (u * v) + w1 + (t >> 32);
*lo = (t << 32) + w3;
}
static inline uint64_t a5hash(const char *key, uint32_t len, uint64_t seed)
{
uint64_t widened_len = (uint64_t)len;
uint64_t seed1 = 0x243F6A8885A308D3 ^ widened_len;
uint64_t seed2 = 0x452821E638D01377 ^ widened_len;
uint64_t val10 = 0xAAAAAAAAAAAAAAAA;
uint64_t val01 = 0x5555555555555555;
uint64_t a, b;
const char *scroll = key, *end = key + len;
_a5mul(seed2 ^ (seed & val10), seed1 ^ (seed & val01), &seed1, &seed2);
val10 ^= seed2;
if (len > 3)
{
if (len > 16)
{
val01 ^= seed1;
for (; end - scroll > 16; scroll += 16)
{
_a5mul(((uint64_t *)scroll)[0] ^ seed1, ((uint64_t *)scroll)[1] ^ seed2, &seed1, &seed2);
seed1 += val01;
seed2 += val10;
}
a = *(uint64_t *)(scroll + (end - scroll) - 16);
b = *(uint64_t *)(scroll + (end - scroll) - 8);
}
else
{
a = ((uint64_t)(*(uint32_t *)scroll) << 32) | *(uint32_t *)(end - 4);
b = ((uint64_t)(*(uint32_t *)&scroll[(len >> 3) * 4]) << 32)
| *(uint32_t *)(end - 4 - (len >> 3) * 4);
}
}
else
{
a = len
? (uint64_t)(
(uint64_t)scroll[0]
| (len > 1 ? ((uint64_t)scroll[1] << 8) : 0)
| (len > 2 ? ((uint64_t)scroll[2] << 16) : 0)
)
: 0;
b = 0;
}
_a5mul(a ^ seed1, b ^ seed2, &seed1, &seed2);
_a5mul(val01 ^ seed1, seed2, &a, &b);
return a ^ b;
}
typedef struct
{
uint32_t size;

View File

@@ -57,6 +57,6 @@ entry:
call void @llvm.memcpy.p0.p0.i32(ptr align 16 %e, ptr align 16 @.__const.7, i32 32, i1 false)
store %"char[]" { ptr @.emptystr, i64 0 }, ptr %f, align 8
store %"char[]" { ptr @.str.8, i64 3 }, ptr %g, align 8
store i32 1000299617, ptr %h, align 4
store i32 -1151103613, ptr %h, align 4
ret i32 0
}

View File

@@ -6,6 +6,6 @@ fn int main()
$assert(@str_lower("Hello World") == "hello world");
$assert(@str_find("Hello World", "o") == 4);
$assert(@str_find("Hello World", "w") == -1);
$assert(@str_hash("Hello C3") == 487972447);
$assert(@str_hash("Hello C3") == 2193775821);
return 0;
}

View File

@@ -174,6 +174,18 @@ fn void test_hash_repeat()
assert(int.typeid.hash() == int.typeid.hash());
}
fn void test_builtin_string_hashing() => @pool()
{
var $x = "";
ulong l;
$for var $i = 0; $i < 65; ++$i: // 65 is a good length to reliably test all branches w/o being excessive
l = string::tformat("%s%s", $x, $i).hash();
var $r = $$str_hash(@sprintf("%s%s", $x, $i));
assert((uint)l == (uint)$r, "Builtin $$str_hash mismatch against String.hash()");
$x = $x +++ "a";
$endfor
}
fn void test_ct_clz()
{
assert(@clz((ulong)0) == ulong.sizeof * 8);