From 35c04cdc3657371b8319a48c8041f282b6990bb1 Mon Sep 17 00:00:00 2001 From: Zack Puhl Date: Mon, 25 Aug 2025 08:23:14 -0400 Subject: [PATCH] Add form-feed and vertical tab to` trim` defaults (#2407) * Add form-feed and vertical tab to` trim` defaults * add some initial string-based benchmarking * update to non-const string * do not account for mem times in bench * misc bench fixes to repair reporting times; improve trim tests * ok last one for real..remove (void) casts * finally, swap to more efficient default whitespace order in `trim` --- benchmarks/stdlib/core/string_trim.c3 | 46 ++++++++++++++++++++++++++ lib/std/core/runtime_benchmark.c3 | 47 ++++++++++++++++++--------- lib/std/core/string.c3 | 6 ++-- 3 files changed, 80 insertions(+), 19 deletions(-) create mode 100644 benchmarks/stdlib/core/string_trim.c3 diff --git a/benchmarks/stdlib/core/string_trim.c3 b/benchmarks/stdlib/core/string_trim.c3 new file mode 100644 index 000000000..6578b0f2f --- /dev/null +++ b/benchmarks/stdlib/core/string_trim.c3 @@ -0,0 +1,46 @@ +module string_trim_wars; + +const String WHITESPACE_TARGET = " \n\t\r\f\va \tbcde\v\f\r\t\n "; +const String WHITESPACE_NUMERIC_TARGET = " 25290 0969 99a \tbcde12332 34 43 0000"; + +fn void initialize_bench() @init +{ + set_benchmark_warmup_iterations(64); + set_benchmark_max_iterations(1 << 24); +} + +macro void trim_bench($trim_str, String $target = WHITESPACE_TARGET) => @pool() +{ + String s1; + String s2 = $target.tcopy(); + + runtime::@start_benchmark(); + + $switch: + $case @typeis($trim_str, String): + s1 = s2.trim($trim_str); + $case @typeis($trim_str, AsciiCharset): + s1 = s2.trim_charset($trim_str); + $default: $error "Unable to determine the right String `trim` operation to use."; + $endswitch + + @volatile_load(s1); + + runtime::@end_benchmark(); +} + + +module string_trim_wars @benchmark; + +fn void trim_control() => trim_bench(" "); // only spaces + +fn void trim_whitespace_default() => trim_bench("\t\n\r "); // default set +fn void trim_whitespace_default_ordered() => trim_bench(" \n\t\r"); // default \w set, but ordered by expected freq + +fn void trim_whitespace_bad() => trim_bench("\f\v\n\t\r "); // bad-perf ordering, all \w + +fn void trim_whitespace_ordered_extended() => trim_bench(" \n\t\r\f\v"); // proposed ordering, all \w +fn void trim_charset_whitespace() => trim_bench(ascii::WHITESPACE_SET); // use charset, all \w + +fn void trim_many() => trim_bench(" \n\t\r\f\v0123456789", WHITESPACE_NUMERIC_TARGET); // ordered, all \w + num +fn void trim_charset_many() => trim_bench(ascii::WHITESPACE_SET | ascii::NUMBER_SET, WHITESPACE_NUMERIC_TARGET); // set, all \w + num diff --git a/lib/std/core/runtime_benchmark.c3 b/lib/std/core/runtime_benchmark.c3 index dcf56a73f..b57467afc 100644 --- a/lib/std/core/runtime_benchmark.c3 +++ b/lib/std/core/runtime_benchmark.c3 @@ -51,12 +51,24 @@ fn void set_benchmark_func_iterations(String func, uint value) @builtin Clock benchmark_clock @local; NanoDuration benchmark_nano_seconds @local; +long cycle_start @local; +long cycle_stop @local; DString benchmark_log @local; bool benchmark_warming @local; uint this_iteration @local; -macro @start_benchmark() => benchmark_clock = std::time::clock::now(); -macro @end_benchmark() => benchmark_nano_seconds = benchmark_clock.mark(); +macro @start_benchmark() +{ + benchmark_clock = std::time::clock::now(); + cycle_start = $$sysclock(); +} + +macro @end_benchmark() +{ + benchmark_nano_seconds = benchmark_clock.mark(); + cycle_stop = $$sysclock(); +} + macro @log_benchmark(msg, args...) => @pool() { if (benchmark_warming) return; @@ -85,10 +97,6 @@ fn bool run_benchmarks(BenchmarkUnit[] benchmarks) name.clear(); - long sys_clock_started; - long sys_clock_finished; - long sys_clocks; - foreach (unit : benchmarks) { defer name.clear(); @@ -104,15 +112,14 @@ fn bool run_benchmarks(BenchmarkUnit[] benchmarks) benchmark_warming = false; NanoDuration running_timer; - sys_clock_started = $$sysclock(); - benchmark_nano_seconds = {}; + long total_clocks; uint current_benchmark_iterations = bench_fn_iters[unit.name] ?? benchmark_max_iterations; char[] perc_str = { [0..19] = ' ', [20] = 0 }; int perc = 0; uint print_step = current_benchmark_iterations / 100; - for (this_iteration = 0; this_iteration < current_benchmark_iterations; ++this_iteration) + for (this_iteration = 0; this_iteration < current_benchmark_iterations; ++this_iteration, benchmark_nano_seconds = {}) { if (0 == this_iteration % print_step) // only print right about when the % will update { @@ -127,23 +134,31 @@ fn bool run_benchmarks(BenchmarkUnit[] benchmarks) unit.func() @inline; - if (!benchmark_nano_seconds) @end_benchmark(); + if (benchmark_nano_seconds == (NanoDuration){}) @end_benchmark(); // only mark when it wasn't already by the unit.func + + total_clocks += cycle_stop - cycle_start; running_timer += benchmark_nano_seconds; } - sys_clock_finished = $$sysclock(); - sys_clocks = sys_clock_finished - sys_clock_started; - - float clock_cycles = (float)sys_clocks / current_benchmark_iterations; + float clock_cycles = (float)total_clocks / current_benchmark_iterations; float measurement = (float)running_timer / current_benchmark_iterations; String[] units = { "nanoseconds", "microseconds", "milliseconds", "seconds" }; float adjusted_measurement = measurement; while (adjusted_measurement > 1_000) adjusted_measurement /= 1_000; + float adjusted_runtime_total = (float)running_timer; + while (adjusted_runtime_total > 1_000) adjusted_runtime_total /= 1_000; io::printf("\r%s ", name.str_view()); - io::printfn("[COMPLETE] %.2f %s, %.2f CPU clocks, %d iterations", - adjusted_measurement, units[math::min(3, (int)math::floor(math::log(measurement, 1_000)))], clock_cycles, current_benchmark_iterations); + io::printfn( + "[COMPLETE] %.2f %s, %.2f CPU clocks, %d iterations (runtime %.2f %s)", + adjusted_measurement, + units[math::min(3, (int)math::floor(math::log(measurement, 1_000)))], + clock_cycles, + current_benchmark_iterations, + adjusted_runtime_total, + units[math::min(3, (int)math::floor(math::log((float)running_timer, 1_000)))], + ); } io::printfn("\n%d benchmark%s run.\n", benchmarks.len, benchmarks.len > 1 ? "s" : ""); diff --git a/lib/std/core/string.c3 b/lib/std/core/string.c3 index bb309e2c4..14f19237d 100644 --- a/lib/std/core/string.c3 +++ b/lib/std/core/string.c3 @@ -213,7 +213,7 @@ fn String String.treplace(self, String needle, String new_str) @pure @return `a substring of the string passed in` *> -fn String String.trim(self, String to_trim = "\t\n\r ") +fn String String.trim(self, String to_trim = " \n\t\r\f\v") { return self.trim_left(to_trim).trim_right(to_trim); } @@ -243,7 +243,7 @@ fn String String.trim_charset(self, AsciiCharset to_trim = ascii::WHITESPACE_SET @pure @return `a substring of the string passed in` *> -fn String String.trim_left(self, String to_trim = "\t\n\r ") +fn String String.trim_left(self, String to_trim = " \n\t\r\f\v") { usz start = 0; usz len = self.len; @@ -260,7 +260,7 @@ fn String String.trim_left(self, String to_trim = "\t\n\r ") @pure @return `a substring of the string passed in` *> -fn String String.trim_right(self, String to_trim = "\t\n\r ") +fn String String.trim_right(self, String to_trim = " \n\t\r\f\v") { usz len = self.len; while (len > 0 && char_in_set(self[len - 1], to_trim)) len--;