mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 03:51:18 +00:00
Fix $$str_hash to use a5hash like String.hash() (#2403)
* Fix `$$str_hash` to use `a5hash` like `String.hash()`
This commit is contained in:
@@ -32,6 +32,7 @@
|
||||
- Fix `native_cpus` functionality for OpenBSD systems. #2387
|
||||
- Assert triggered when trying to slice a struct.
|
||||
- Improve codegen for stack allocated large non-zero arrays.
|
||||
- Implement `a5hash` in the compiler for compile-time `$$str_hash` to match `String.hash()`.
|
||||
- Functions being tested for overload are now always checked before test.
|
||||
- Compile time indexing at compile time in a $typeof was no considered compile time.
|
||||
|
||||
|
||||
@@ -313,7 +313,7 @@ bool sema_expr_analyse_str_hash(SemaContext *context, Expr *expr)
|
||||
{
|
||||
RETURN_SEMA_ERROR(inner, "You need a compile time constant string to take the hash of it.");
|
||||
}
|
||||
uint32_t hash = fnv1a(inner->const_expr.bytes.ptr, inner->const_expr.bytes.len);
|
||||
uint32_t hash = (uint32_t)a5hash(inner->const_expr.bytes.ptr, inner->const_expr.bytes.len, 0);
|
||||
expr_rewrite_const_int(expr, type_uint, hash);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -208,6 +208,7 @@ static inline int char_hex_to_nibble(char c);
|
||||
INLINE char char_nibble_to_hex(int c);
|
||||
|
||||
static inline uint32_t fnv1a(const char *key, uint32_t len);
|
||||
static inline uint64_t a5hash(const char *key, uint32_t len, uint64_t seed);
|
||||
|
||||
INLINE uint32_t vec_size(const void *vec);
|
||||
static inline void vec_resize(void *vec, uint32_t new_size);
|
||||
@@ -256,6 +257,83 @@ static inline uint32_t fnv1a(const char *key, uint32_t len)
|
||||
return hash;
|
||||
}
|
||||
|
||||
// see: `int64_mult` in bigint.c - there is no need to import all these declarations just for this
|
||||
static inline void _a5mul(uint64_t u, uint64_t v, uint64_t *lo, uint64_t *hi)
|
||||
{
|
||||
uint64_t ul = u & 0xFFFFFFFF;
|
||||
uint64_t vl = v & 0xFFFFFFFF;
|
||||
uint64_t t = ul * vl;
|
||||
uint64_t w3 = t & 0xFFFFFFFF;
|
||||
uint64_t k = t >> 32;
|
||||
|
||||
u >>= 32;
|
||||
t = u * vl + k;
|
||||
k = t & 0xFFFFFFFF;
|
||||
uint64_t w1 = t >> 32;
|
||||
|
||||
v >>= 32;
|
||||
t = ul * v + k;
|
||||
|
||||
*hi = (u * v) + w1 + (t >> 32);
|
||||
*lo = (t << 32) + w3;
|
||||
}
|
||||
|
||||
static inline uint64_t a5hash(const char *key, uint32_t len, uint64_t seed)
|
||||
{
|
||||
uint64_t widened_len = (uint64_t)len;
|
||||
uint64_t seed1 = 0x243F6A8885A308D3 ^ widened_len;
|
||||
uint64_t seed2 = 0x452821E638D01377 ^ widened_len;
|
||||
uint64_t val10 = 0xAAAAAAAAAAAAAAAA;
|
||||
uint64_t val01 = 0x5555555555555555;
|
||||
uint64_t a, b;
|
||||
const char *scroll = key, *end = key + len;
|
||||
|
||||
_a5mul(seed2 ^ (seed & val10), seed1 ^ (seed & val01), &seed1, &seed2);
|
||||
|
||||
val10 ^= seed2;
|
||||
|
||||
if (len > 3)
|
||||
{
|
||||
if (len > 16)
|
||||
{
|
||||
val01 ^= seed1;
|
||||
|
||||
for (; end - scroll > 16; scroll += 16)
|
||||
{
|
||||
_a5mul(((uint64_t *)scroll)[0] ^ seed1, ((uint64_t *)scroll)[1] ^ seed2, &seed1, &seed2);
|
||||
|
||||
seed1 += val01;
|
||||
seed2 += val10;
|
||||
}
|
||||
|
||||
a = *(uint64_t *)(scroll + (end - scroll) - 16);
|
||||
b = *(uint64_t *)(scroll + (end - scroll) - 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
a = ((uint64_t)(*(uint32_t *)scroll) << 32) | *(uint32_t *)(end - 4);
|
||||
b = ((uint64_t)(*(uint32_t *)&scroll[(len >> 3) * 4]) << 32)
|
||||
| *(uint32_t *)(end - 4 - (len >> 3) * 4);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
a = len
|
||||
? (uint64_t)(
|
||||
(uint64_t)scroll[0]
|
||||
| (len > 1 ? ((uint64_t)scroll[1] << 8) : 0)
|
||||
| (len > 2 ? ((uint64_t)scroll[2] << 16) : 0)
|
||||
)
|
||||
: 0;
|
||||
b = 0;
|
||||
}
|
||||
|
||||
_a5mul(a ^ seed1, b ^ seed2, &seed1, &seed2);
|
||||
_a5mul(val01 ^ seed1, seed2, &a, &b);
|
||||
|
||||
return a ^ b;
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t size;
|
||||
|
||||
@@ -57,6 +57,6 @@ entry:
|
||||
call void @llvm.memcpy.p0.p0.i32(ptr align 16 %e, ptr align 16 @.__const.7, i32 32, i1 false)
|
||||
store %"char[]" { ptr @.emptystr, i64 0 }, ptr %f, align 8
|
||||
store %"char[]" { ptr @.str.8, i64 3 }, ptr %g, align 8
|
||||
store i32 1000299617, ptr %h, align 4
|
||||
store i32 -1151103613, ptr %h, align 4
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
@@ -6,6 +6,6 @@ fn int main()
|
||||
$assert(@str_lower("Hello World") == "hello world");
|
||||
$assert(@str_find("Hello World", "o") == 4);
|
||||
$assert(@str_find("Hello World", "w") == -1);
|
||||
$assert(@str_hash("Hello C3") == 487972447);
|
||||
$assert(@str_hash("Hello C3") == 2193775821);
|
||||
return 0;
|
||||
}
|
||||
@@ -174,6 +174,18 @@ fn void test_hash_repeat()
|
||||
assert(int.typeid.hash() == int.typeid.hash());
|
||||
}
|
||||
|
||||
fn void test_builtin_string_hashing() => @pool()
|
||||
{
|
||||
var $x = "";
|
||||
ulong l;
|
||||
$for var $i = 0; $i < 65; ++$i: // 65 is a good length to reliably test all branches w/o being excessive
|
||||
l = string::tformat("%s%s", $x, $i).hash();
|
||||
var $r = $$str_hash(@sprintf("%s%s", $x, $i));
|
||||
assert((uint)l == (uint)$r, "Builtin $$str_hash mismatch against String.hash()");
|
||||
$x = $x +++ "a";
|
||||
$endfor
|
||||
}
|
||||
|
||||
fn void test_ct_clz()
|
||||
{
|
||||
assert(@clz((ulong)0) == ulong.sizeof * 8);
|
||||
|
||||
Reference in New Issue
Block a user