diff --git a/releasenotes.md b/releasenotes.md index 8a0bcbeed..8c54b9cc2 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -32,6 +32,7 @@ - Fix `native_cpus` functionality for OpenBSD systems. #2387 - Assert triggered when trying to slice a struct. - Improve codegen for stack allocated large non-zero arrays. +- Implement `a5hash` in the compiler for compile-time `$$str_hash` to match `String.hash()`. - Functions being tested for overload are now always checked before test. - Compile time indexing at compile time in a $typeof was no considered compile time. diff --git a/src/compiler/sema_builtins.c b/src/compiler/sema_builtins.c index 122410162..f64104adc 100644 --- a/src/compiler/sema_builtins.c +++ b/src/compiler/sema_builtins.c @@ -313,7 +313,7 @@ bool sema_expr_analyse_str_hash(SemaContext *context, Expr *expr) { RETURN_SEMA_ERROR(inner, "You need a compile time constant string to take the hash of it."); } - uint32_t hash = fnv1a(inner->const_expr.bytes.ptr, inner->const_expr.bytes.len); + uint32_t hash = (uint32_t)a5hash(inner->const_expr.bytes.ptr, inner->const_expr.bytes.len, 0); expr_rewrite_const_int(expr, type_uint, hash); return true; } diff --git a/src/utils/lib.h b/src/utils/lib.h index c0ab93aed..7b2e1c0ff 100644 --- a/src/utils/lib.h +++ b/src/utils/lib.h @@ -208,6 +208,7 @@ static inline int char_hex_to_nibble(char c); INLINE char char_nibble_to_hex(int c); static inline uint32_t fnv1a(const char *key, uint32_t len); +static inline uint64_t a5hash(const char *key, uint32_t len, uint64_t seed); INLINE uint32_t vec_size(const void *vec); static inline void vec_resize(void *vec, uint32_t new_size); @@ -256,6 +257,83 @@ static inline uint32_t fnv1a(const char *key, uint32_t len) return hash; } +// see: `int64_mult` in bigint.c - there is no need to import all these declarations just for this +static inline void _a5mul(uint64_t u, uint64_t v, uint64_t *lo, uint64_t *hi) +{ + uint64_t ul = u & 0xFFFFFFFF; + uint64_t vl = v & 0xFFFFFFFF; + uint64_t t = ul * vl; + uint64_t w3 = t & 0xFFFFFFFF; + uint64_t k = t >> 32; + + u >>= 32; + t = u * vl + k; + k = t & 0xFFFFFFFF; + uint64_t w1 = t >> 32; + + v >>= 32; + t = ul * v + k; + + *hi = (u * v) + w1 + (t >> 32); + *lo = (t << 32) + w3; +} + +static inline uint64_t a5hash(const char *key, uint32_t len, uint64_t seed) +{ + uint64_t widened_len = (uint64_t)len; + uint64_t seed1 = 0x243F6A8885A308D3 ^ widened_len; + uint64_t seed2 = 0x452821E638D01377 ^ widened_len; + uint64_t val10 = 0xAAAAAAAAAAAAAAAA; + uint64_t val01 = 0x5555555555555555; + uint64_t a, b; + const char *scroll = key, *end = key + len; + + _a5mul(seed2 ^ (seed & val10), seed1 ^ (seed & val01), &seed1, &seed2); + + val10 ^= seed2; + + if (len > 3) + { + if (len > 16) + { + val01 ^= seed1; + + for (; end - scroll > 16; scroll += 16) + { + _a5mul(((uint64_t *)scroll)[0] ^ seed1, ((uint64_t *)scroll)[1] ^ seed2, &seed1, &seed2); + + seed1 += val01; + seed2 += val10; + } + + a = *(uint64_t *)(scroll + (end - scroll) - 16); + b = *(uint64_t *)(scroll + (end - scroll) - 8); + } + else + { + a = ((uint64_t)(*(uint32_t *)scroll) << 32) | *(uint32_t *)(end - 4); + b = ((uint64_t)(*(uint32_t *)&scroll[(len >> 3) * 4]) << 32) + | *(uint32_t *)(end - 4 - (len >> 3) * 4); + } + } + else + { + a = len + ? (uint64_t)( + (uint64_t)scroll[0] + | (len > 1 ? ((uint64_t)scroll[1] << 8) : 0) + | (len > 2 ? ((uint64_t)scroll[2] << 16) : 0) + ) + : 0; + b = 0; + } + + _a5mul(a ^ seed1, b ^ seed2, &seed1, &seed2); + _a5mul(val01 ^ seed1, seed2, &a, &b); + + return a ^ b; +} + typedef struct { uint32_t size; diff --git a/test/test_suite/compile_time/concat_append.c3t b/test/test_suite/compile_time/concat_append.c3t index 610213b5e..87cc0c9c5 100644 --- a/test/test_suite/compile_time/concat_append.c3t +++ b/test/test_suite/compile_time/concat_append.c3t @@ -57,6 +57,6 @@ entry: call void @llvm.memcpy.p0.p0.i32(ptr align 16 %e, ptr align 16 @.__const.7, i32 32, i1 false) store %"char[]" { ptr @.emptystr, i64 0 }, ptr %f, align 8 store %"char[]" { ptr @.str.8, i64 3 }, ptr %g, align 8 - store i32 1000299617, ptr %h, align 4 + store i32 -1151103613, ptr %h, align 4 ret i32 0 } diff --git a/test/test_suite/compile_time/ct_string_functions.c3t b/test/test_suite/compile_time/ct_string_functions.c3t index 45f9ebb1e..5d8a898bc 100644 --- a/test/test_suite/compile_time/ct_string_functions.c3t +++ b/test/test_suite/compile_time/ct_string_functions.c3t @@ -6,6 +6,6 @@ fn int main() $assert(@str_lower("Hello World") == "hello world"); $assert(@str_find("Hello World", "o") == 4); $assert(@str_find("Hello World", "w") == -1); - $assert(@str_hash("Hello C3") == 487972447); + $assert(@str_hash("Hello C3") == 2193775821); return 0; } \ No newline at end of file diff --git a/test/unit/stdlib/core/builtintests.c3 b/test/unit/stdlib/core/builtintests.c3 index 49c1692be..a565f2c74 100644 --- a/test/unit/stdlib/core/builtintests.c3 +++ b/test/unit/stdlib/core/builtintests.c3 @@ -174,6 +174,18 @@ fn void test_hash_repeat() assert(int.typeid.hash() == int.typeid.hash()); } +fn void test_builtin_string_hashing() => @pool() +{ + var $x = ""; + ulong l; + $for var $i = 0; $i < 65; ++$i: // 65 is a good length to reliably test all branches w/o being excessive + l = string::tformat("%s%s", $x, $i).hash(); + var $r = $$str_hash(@sprintf("%s%s", $x, $i)); + assert((uint)l == (uint)$r, "Builtin $$str_hash mismatch against String.hash()"); + $x = $x +++ "a"; + $endfor +} + fn void test_ct_clz() { assert(@clz((ulong)0) == ulong.sizeof * 8);