From 604661b12c8dbb64b00d47da32b91328f3d66caf Mon Sep 17 00:00:00 2001 From: Zack Puhl Date: Mon, 4 Aug 2025 05:50:17 -0400 Subject: [PATCH] Increase Primitive Type Hash Speeds (#2329) * simplify and add much faster hash functions in key locations * add benchmark runtime @start and @end macros for better control * update benchmark reporting and hashmap tests --------- Co-authored-by: Christoffer Lerno --- benchmarks/stdlib/collections/hashmap.c3 | 218 +++++++++++++++++++++++ lib/std/collections/hashmap.c3 | 20 +-- lib/std/core/builtin.c3 | 20 ++- lib/std/core/runtime_benchmark.c3 | 75 +++++++- lib/std/math/random/math.seeder.c3 | 5 +- releasenotes.md | 1 + 6 files changed, 314 insertions(+), 25 deletions(-) create mode 100644 benchmarks/stdlib/collections/hashmap.c3 diff --git a/benchmarks/stdlib/collections/hashmap.c3 b/benchmarks/stdlib/collections/hashmap.c3 new file mode 100644 index 000000000..e41bac5df --- /dev/null +++ b/benchmarks/stdlib/collections/hashmap.c3 @@ -0,0 +1,218 @@ +// Copyright (c) 2025 Zack Puhl . All rights reserved. +// Use of this source code is governed by the MIT license +// a copy of which can be found in the LICENSE_STDLIB file. +// +// Some benchmark test ideas are sourced from this article on C++ hashmap benchmarking: +// https://martin.ankerl.com/2022/08/27/hashmap-bench-01/ +// +module hashmap_benchmarks; + +import std::collections::map; +import std::math::random; + + +const DEFAULT_ITERATIONS = 16384; + +Lcg64Random rand; + +HashMap { int, int } modifying_numbers_random; + +fn void bench_setup() @init +{ + set_benchmark_warmup_iterations(3); + set_benchmark_max_iterations(DEFAULT_ITERATIONS); + + // TODO: Cannot take the address of a @benchmark function. If we could, we could pass &insert_erase as a fn ptr and use the $qnameof CT eval internally. + set_benchmark_func_iterations($qnameof(insert_erase), 32); + set_benchmark_func_iterations($qnameof(random_access), 1024); + + random::seed(&rand, 0x4528_21e6_38d0_1377); + + for (usz i = 0; i < 1_000; ++i) modifying_numbers_random.set(rand.next_int(), rand.next_int()); +} + + +// ============================================================================================== +module hashmap_benchmarks @benchmark; + +import std::collections::map; + +import std::math::random; +import std::encoding::base64; + + +fn void generic_hash_speeds() +{ + (char){}.hash(); + (char[<100>]){}.hash(); + (char[100]){}.hash(); + (ichar){}.hash(); + (ichar[<100>]){}.hash(); + (ichar[100]){}.hash(); + (short){}.hash(); + (short[<100>]){}.hash(); + (short[100]){}.hash(); + (ushort){}.hash(); + (ushort[<100>]){}.hash(); + (ushort[100]){}.hash(); + (int){}.hash(); + (int[<100>]){}.hash(); + (int[100]){}.hash(); + (uint){}.hash(); + (uint[<100>]){}.hash(); + (uint[100]){}.hash(); + (long){}.hash(); + (long[<20>]){}.hash(); + (long[100]){}.hash(); + (ulong){}.hash(); + (ulong[<20>]){}.hash(); + (ulong[100]){}.hash(); + (int128){}.hash(); + (int128[<20>]){}.hash(); + (int128[100]){}.hash(); + (uint128){}.hash(); + (uint128[<20>]){}.hash(); + (uint128[100]){}.hash(); + (bool){}.hash(); + (bool[<100>]){}.hash(); + (bool[100]){}.hash(); + String x = "abc"; + char[] y = "abc"; + assert(x.hash() == y.hash()); + String z1 = "This is a much longer string than the above value because longer values lead to longer hashing times."; + char[] z2 = "This is a much longer string than the above value because longer values lead to longer hashing times."; + assert(z1.hash() == z2.hash()); + assert(int.typeid.hash()); +} + + +fn void hash_speeds_of_many_random_values() => @pool() +{ + var $arrsz = 10_000; + uint fake_checksum; + + char[] chars = allocator::new_array(tmem, char, $arrsz)[:$arrsz]; + foreach (&v : chars) *v = (char)random::next(&rand, uint.max); + + ushort[] shorts = allocator::new_array(tmem, ushort, $arrsz)[:$arrsz]; + foreach (&v : shorts) *v = (ushort)random::next(&rand, uint.max); + + uint[] ints = allocator::new_array(tmem, uint, $arrsz)[:$arrsz]; + foreach (&v : ints) *v = random::next(&rand, uint.max); + + ulong[] longs = allocator::new_array(tmem, ulong, $arrsz)[:$arrsz]; + foreach (&v : longs) *v = (ulong)random::next(&rand, uint.max); + + uint128[] vwideints = allocator::new_array(tmem, uint128, $arrsz)[:$arrsz]; + foreach (&v : vwideints) *v = (uint128)random::next(&rand, uint.max); + + char[48][] zstrs = allocator::new_array(tmem, char[48], $arrsz)[:$arrsz]; + String[$arrsz] strs; + foreach (x, &v : zstrs) + { + foreach (&c : (*v)[:random::next(&rand, 48)]) *c = (char)random::next(&rand, char.max); + strs[x] = ((ZString)&v[0]).str_view(); + } + + runtime::@start_benchmark(); + foreach (v : chars) fake_checksum += v.hash(); + foreach (v : shorts) fake_checksum += v.hash(); + foreach (v : ints) fake_checksum += v.hash(); + foreach (v : longs) fake_checksum += v.hash(); + foreach (v : vwideints) fake_checksum += v.hash(); + foreach (v : strs) fake_checksum += v.hash(); + runtime::@end_benchmark(); +} + + +fn void modifying_numbers_init_from_map() => @pool() +{ + HashMap { int, int } v; + v.tinit_from_map(&modifying_numbers_random); + v.free(); +} + + +fn void insert_erase() => @pool() +{ + uint iters = 1_000_000; + HashMap { int, int } v; + v.tinit(); + + runtime::@start_benchmark(); + for (int i = 0; i < iters; ++i) v[i] = i; + for (int i = 0; i < iters; ++i) v.remove(i); + + runtime::@end_benchmark(); + + v.free(); +} + + +fn void random_access() => @pool() +{ + HashMap { int, int } v; + v.tinit(); + + uint bound = 10_000; + usz pseudo_checksum = 0; + + for (uint i = 0; i < bound; ++i) v[i] = i; + + runtime::@start_benchmark(); + for (uint i = 0; i < 1_000_000; ++i) pseudo_checksum += (v[i.hash() % bound] ?? 0); + runtime::@end_benchmark(); + + v.free(); +} + + +fn void random_access_erase() => @pool() +{ + HashMap { int, int } v; + v.tinit(); + + uint bound = 10_000; + + for (uint i = 0; i < bound; ++i) v[i] = i; + + runtime::@start_benchmark(); + for (uint i = 0; i < bound; ++i) + { + v[i.hash() % bound] = i; // supplant an entry + + v.remove(random::next(&rand, bound)); // remove a random entry + } + runtime::@end_benchmark(); + + v.free(); +} + + +fn void random_access_string_keys() => @pool() +{ + HashMap { String, ulong } v; + v.tinit(); + + usz pseudo_checksum = 0; + String[5_000] saved; + + for (usz i = 0; i < saved.len; ++i) + { + ulong hash = i.hash(); + String b64key = base64::tencode(@as_char_view(hash)); + + v[b64key] = hash; + + if (i < saved.len) saved[i] = b64key; + } + + runtime::@start_benchmark(); + for (usz i = 0; i < saved.len; ++i) + { + pseudo_checksum += v[ saved[random::next(&rand, saved.len)] ]!! % 512; + } + runtime::@end_benchmark(); + + v.free(); +} diff --git a/lib/std/collections/hashmap.c3 b/lib/std/collections/hashmap.c3 index bc02eb046..d6e6cced0 100644 --- a/lib/std/collections/hashmap.c3 +++ b/lib/std/collections/hashmap.c3 @@ -229,15 +229,15 @@ fn bool HashMap.has_key(&map, Key key) fn bool HashMap.set(&map, Key key, Value value) @operator([]=) { // If the map isn't initialized, use the defaults to initialize it. - switch (map.allocator.ptr) - { - case &dummy: - map.init(mem); - case null: - map.tinit(); - default: - break; - } + switch (map.allocator.ptr) + { + case &dummy: + map.init(mem); + case null: + map.tinit(); + default: + break; + } uint hash = rehash(key.hash()); uint index = index_for(hash, map.table.len); for (Entry *e = map.table[index]; e != null; e = e.next) @@ -421,7 +421,7 @@ fn usz? HashMap.to_format(&self, Formatter* f) @dynamic { if (len > 2) len += f.print(", ")!; len += f.printf("%s: %s", entry.key, entry.value)!; - }; + }; return len + f.print(" }"); } diff --git a/lib/std/core/builtin.c3 b/lib/std/core/builtin.c3 index 92cfc929b..b5d88304a 100644 --- a/lib/std/core/builtin.c3 +++ b/lib/std/core/builtin.c3 @@ -569,8 +569,8 @@ macro uint ichar[<*>].hash(self) => hash_vec(self); macro uint bool[<*>].hash(self) => hash_vec(self); macro uint typeid.hash(typeid t) => @generic_hash(((ulong)(uptr)t)); -macro uint String.hash(String c) => (uint)fnv32a::hash(c); -macro uint char[].hash(char[] c) => (uint)fnv32a::hash(c); +macro uint String.hash(String c) => (uint)a5hash::hash(c); +macro uint char[].hash(char[] c) => (uint)a5hash::hash(c); macro uint void*.hash(void* ptr) => @generic_hash(((ulong)(uptr)ptr)); <* @@ -578,7 +578,13 @@ macro uint void*.hash(void* ptr) => @generic_hash(((ulong)(uptr)ptr)); *> macro uint hash_array(array_ptr) @local { - return (uint)fnv32a::hash(((char*)array_ptr)[:$sizeof(*array_ptr)]); + var $len = $sizeof(*array_ptr); + + $if $len > 16: + return (uint)komi::hash(((char*)array_ptr)[:$len]); + $else + return (uint)wyhash2::hash(((char*)array_ptr)[:$len]); + $endif } <* @@ -586,7 +592,13 @@ macro uint hash_array(array_ptr) @local *> macro uint hash_vec(vec) @local { - return (uint)fnv32a::hash(((char*)&&vec)[:$sizeof(vec.len * $typeof(vec).inner.sizeof)]); + var $len = $sizeof(vec.len * $typeof(vec).inner.sizeof); + + $if $len > 16: + return (uint)komi::hash(((char*)&&vec)[:$len]); + $else + return (uint)wyhash2::hash(((char*)&&vec)[:$len]); + $endif } const MAX_FRAMEADDRESS = 128; diff --git a/lib/std/core/runtime_benchmark.c3 b/lib/std/core/runtime_benchmark.c3 index 0cb98d1f8..277374600 100644 --- a/lib/std/core/runtime_benchmark.c3 +++ b/lib/std/core/runtime_benchmark.c3 @@ -1,7 +1,9 @@ module std::core::runtime; -import libc, std::time, std::io, std::sort; +import libc, std::time, std::io, std::sort, std::math, std::collections::map; -alias BenchmarkFn = fn void(); +alias BenchmarkFn = fn void (); + +HashMap { String, uint } bench_fn_iters @local; struct BenchmarkUnit { @@ -17,6 +19,7 @@ fn BenchmarkUnit[] benchmark_collection_create(Allocator allocator) foreach (i, benchmark : fns) { benchmarks[i] = { names[i], fns[i] }; + if (!bench_fn_iters.has_key(names[i])) bench_fn_iters[names[i]] = benchmark_max_iterations; } return benchmarks; } @@ -36,6 +39,30 @@ fn void set_benchmark_max_iterations(uint value) @builtin { assert(value > 0); benchmark_max_iterations = value; + foreach (k : bench_fn_iters.key_iter()) bench_fn_iters[k] = value; +} + +fn void set_benchmark_func_iterations(String func, uint value) @builtin +{ + assert(value > 0); + bench_fn_iters[func] = value; +} + + +Clock benchmark_clock @local; +NanoDuration benchmark_nano_seconds @local; +DString benchmark_log @local; +bool benchmark_warming @local; +uint this_iteration @local; + +macro @start_benchmark() => benchmark_clock = std::time::clock::now(); +macro @end_benchmark() => benchmark_nano_seconds = benchmark_clock.mark(); +macro @log_benchmark(msg, args...) => @pool() +{ + if (benchmark_warming) return; + + benchmark_log.appendf("%s [%d]: ", $$FUNC, this_iteration); + benchmark_log.appendfn(msg, ...args); } fn bool run_benchmarks(BenchmarkUnit[] benchmarks) @@ -61,33 +88,58 @@ fn bool run_benchmarks(BenchmarkUnit[] benchmarks) long sys_clock_started; long sys_clock_finished; long sys_clocks; - Clock clock; - foreach(unit : benchmarks) + foreach (unit : benchmarks) { defer name.clear(); name.appendf("Benchmarking %s ", unit.name); name.append_repeat('.', max_name - unit.name.len + 2); io::printf("%s ", name.str_view()); + benchmark_warming = true; for (uint i = 0; i < benchmark_warmup_iterations; i++) { unit.func() @inline; } + benchmark_warming = false; - clock = std::time::clock::now(); + NanoDuration running_timer; sys_clock_started = $$sysclock(); + benchmark_nano_seconds = {}; - for (uint i = 0; i < benchmark_max_iterations; i++) + uint current_benchmark_iterations = bench_fn_iters[unit.name] ?? benchmark_max_iterations; + char[] perc_str = { [0..19] = ' ', [20] = 0 }; + int perc = 0; + + for (this_iteration = 0; this_iteration < current_benchmark_iterations; ++this_iteration) { + perc_str[0..(uint)math::floor((this_iteration / (float)current_benchmark_iterations) * 20)] = '#'; + perc = (uint)math::ceil(100 * (this_iteration / (float)current_benchmark_iterations)); + + io::printf("\r%s [%s] %d / %d (%d%%)", name.str_view(), (ZString)perc_str, this_iteration, current_benchmark_iterations, perc); + io::stdout().flush()!!; + + @start_benchmark(); // can be overridden by calls inside the unit's func + unit.func() @inline; + + if (!benchmark_nano_seconds) @end_benchmark(); + running_timer += benchmark_nano_seconds; } sys_clock_finished = $$sysclock(); - NanoDuration nano_seconds = clock.mark(); sys_clocks = sys_clock_finished - sys_clock_started; - io::printfn("[COMPLETE] %.2f ns, %.2f CPU's clocks", (float)nano_seconds / benchmark_max_iterations, (float)sys_clocks / benchmark_max_iterations); + float clock_cycles = (float)sys_clocks / current_benchmark_iterations; + float measurement = (float)running_timer / current_benchmark_iterations; + String[] units = { "nanoseconds", "microseconds", "milliseconds", "seconds" }; + + float adjusted_measurement = measurement; + while (adjusted_measurement > 1_000) adjusted_measurement /= 1_000; + + io::printf("\r%s ", name.str_view()); + io::printfn("[COMPLETE] %.2f %s, %.2f CPU clocks, %d iterations", + adjusted_measurement, units[math::min(3, (int)math::floor(math::log(measurement, 1_000)))], clock_cycles, current_benchmark_iterations); } io::printfn("\n%d benchmark%s run.\n", benchmarks.len, benchmarks.len > 1 ? "s" : ""); @@ -96,5 +148,12 @@ fn bool run_benchmarks(BenchmarkUnit[] benchmarks) fn bool default_benchmark_runner(String[] args) => @pool() { + benchmark_log.init(mem); + defer + { + if (benchmark_log.len()) io::printfn("\n---------- BENCHMARK LOG ----------\n%s\n", benchmark_log.str_view()); + benchmark_log.free(); + } + return run_benchmarks(benchmark_collection_create(tmem)); } diff --git a/lib/std/math/random/math.seeder.c3 b/lib/std/math/random/math.seeder.c3 index 6011b6601..ba9dee0f6 100644 --- a/lib/std/math/random/math.seeder.c3 +++ b/lib/std/math/random/math.seeder.c3 @@ -1,5 +1,5 @@ module std::math::random; -import std::hash::fnv32a, std::time; +import std::hash::a5hash, std::time; const ODD_PHI64 @local = 0x9e3779b97f4a7c15; const MUL_MCG64 @local = 0xf1357aea2e62a9c5; @@ -69,12 +69,11 @@ fn void seeder(char[] input, char[] out_buffer) macro uint hash(value) @local { - return fnv32a::hash(&&bitcast(value, char[$sizeof(value)])); + return (uint)a5hash::hash(&&bitcast(value, char[$sizeof(value)])); } fn char[8 * 4] entropy() @if(!env::WASM_NOLIBC) { - void* addr = malloc(1); free(addr); static uint random_int; diff --git a/releasenotes.md b/releasenotes.md index 8d6e35260..7f4d0ac3a 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -18,6 +18,7 @@ - Add OpenBSD to `env::INET_DEVICES` and add required socket constants. - Added `FileMmap` to manage memory mapped files. - Add `vm::mmap_file` to memory map a file. +- Updated hash functions in default hash methods. ## 0.7.4 Change list