Add form-feed and vertical tab to trim defaults (#2407)

* Add form-feed and vertical tab to` trim` defaults

* add some initial string-based benchmarking

* update to non-const string

* do not account for mem times in bench

* misc bench fixes to repair reporting times; improve trim tests

* ok last one for real..remove (void) casts

* finally, swap to more efficient default whitespace order in `trim`
This commit is contained in:
Zack Puhl
2025-08-25 08:23:14 -04:00
committed by GitHub
parent 3e641ab82b
commit 35c04cdc36
3 changed files with 80 additions and 19 deletions

View File

@@ -0,0 +1,46 @@
module string_trim_wars;
const String WHITESPACE_TARGET = " \n\t\r\f\va \tbcde\v\f\r\t\n ";
const String WHITESPACE_NUMERIC_TARGET = " 25290 0969 99a \tbcde12332 34 43 0000";
fn void initialize_bench() @init
{
set_benchmark_warmup_iterations(64);
set_benchmark_max_iterations(1 << 24);
}
macro void trim_bench($trim_str, String $target = WHITESPACE_TARGET) => @pool()
{
String s1;
String s2 = $target.tcopy();
runtime::@start_benchmark();
$switch:
$case @typeis($trim_str, String):
s1 = s2.trim($trim_str);
$case @typeis($trim_str, AsciiCharset):
s1 = s2.trim_charset($trim_str);
$default: $error "Unable to determine the right String `trim` operation to use.";
$endswitch
@volatile_load(s1);
runtime::@end_benchmark();
}
module string_trim_wars @benchmark;
fn void trim_control() => trim_bench(" "); // only spaces
fn void trim_whitespace_default() => trim_bench("\t\n\r "); // default set
fn void trim_whitespace_default_ordered() => trim_bench(" \n\t\r"); // default \w set, but ordered by expected freq
fn void trim_whitespace_bad() => trim_bench("\f\v\n\t\r "); // bad-perf ordering, all \w
fn void trim_whitespace_ordered_extended() => trim_bench(" \n\t\r\f\v"); // proposed ordering, all \w
fn void trim_charset_whitespace() => trim_bench(ascii::WHITESPACE_SET); // use charset, all \w
fn void trim_many() => trim_bench(" \n\t\r\f\v0123456789", WHITESPACE_NUMERIC_TARGET); // ordered, all \w + num
fn void trim_charset_many() => trim_bench(ascii::WHITESPACE_SET | ascii::NUMBER_SET, WHITESPACE_NUMERIC_TARGET); // set, all \w + num

View File

@@ -51,12 +51,24 @@ fn void set_benchmark_func_iterations(String func, uint value) @builtin
Clock benchmark_clock @local;
NanoDuration benchmark_nano_seconds @local;
long cycle_start @local;
long cycle_stop @local;
DString benchmark_log @local;
bool benchmark_warming @local;
uint this_iteration @local;
macro @start_benchmark() => benchmark_clock = std::time::clock::now();
macro @end_benchmark() => benchmark_nano_seconds = benchmark_clock.mark();
macro @start_benchmark()
{
benchmark_clock = std::time::clock::now();
cycle_start = $$sysclock();
}
macro @end_benchmark()
{
benchmark_nano_seconds = benchmark_clock.mark();
cycle_stop = $$sysclock();
}
macro @log_benchmark(msg, args...) => @pool()
{
if (benchmark_warming) return;
@@ -85,10 +97,6 @@ fn bool run_benchmarks(BenchmarkUnit[] benchmarks)
name.clear();
long sys_clock_started;
long sys_clock_finished;
long sys_clocks;
foreach (unit : benchmarks)
{
defer name.clear();
@@ -104,15 +112,14 @@ fn bool run_benchmarks(BenchmarkUnit[] benchmarks)
benchmark_warming = false;
NanoDuration running_timer;
sys_clock_started = $$sysclock();
benchmark_nano_seconds = {};
long total_clocks;
uint current_benchmark_iterations = bench_fn_iters[unit.name] ?? benchmark_max_iterations;
char[] perc_str = { [0..19] = ' ', [20] = 0 };
int perc = 0;
uint print_step = current_benchmark_iterations / 100;
for (this_iteration = 0; this_iteration < current_benchmark_iterations; ++this_iteration)
for (this_iteration = 0; this_iteration < current_benchmark_iterations; ++this_iteration, benchmark_nano_seconds = {})
{
if (0 == this_iteration % print_step) // only print right about when the % will update
{
@@ -127,23 +134,31 @@ fn bool run_benchmarks(BenchmarkUnit[] benchmarks)
unit.func() @inline;
if (!benchmark_nano_seconds) @end_benchmark();
if (benchmark_nano_seconds == (NanoDuration){}) @end_benchmark(); // only mark when it wasn't already by the unit.func
total_clocks += cycle_stop - cycle_start;
running_timer += benchmark_nano_seconds;
}
sys_clock_finished = $$sysclock();
sys_clocks = sys_clock_finished - sys_clock_started;
float clock_cycles = (float)sys_clocks / current_benchmark_iterations;
float clock_cycles = (float)total_clocks / current_benchmark_iterations;
float measurement = (float)running_timer / current_benchmark_iterations;
String[] units = { "nanoseconds", "microseconds", "milliseconds", "seconds" };
float adjusted_measurement = measurement;
while (adjusted_measurement > 1_000) adjusted_measurement /= 1_000;
float adjusted_runtime_total = (float)running_timer;
while (adjusted_runtime_total > 1_000) adjusted_runtime_total /= 1_000;
io::printf("\r%s ", name.str_view());
io::printfn("[COMPLETE] %.2f %s, %.2f CPU clocks, %d iterations",
adjusted_measurement, units[math::min(3, (int)math::floor(math::log(measurement, 1_000)))], clock_cycles, current_benchmark_iterations);
io::printfn(
"[COMPLETE] %.2f %s, %.2f CPU clocks, %d iterations (runtime %.2f %s)",
adjusted_measurement,
units[math::min(3, (int)math::floor(math::log(measurement, 1_000)))],
clock_cycles,
current_benchmark_iterations,
adjusted_runtime_total,
units[math::min(3, (int)math::floor(math::log((float)running_timer, 1_000)))],
);
}
io::printfn("\n%d benchmark%s run.\n", benchmarks.len, benchmarks.len > 1 ? "s" : "");

View File

@@ -213,7 +213,7 @@ fn String String.treplace(self, String needle, String new_str)
@pure
@return `a substring of the string passed in`
*>
fn String String.trim(self, String to_trim = "\t\n\r ")
fn String String.trim(self, String to_trim = " \n\t\r\f\v")
{
return self.trim_left(to_trim).trim_right(to_trim);
}
@@ -243,7 +243,7 @@ fn String String.trim_charset(self, AsciiCharset to_trim = ascii::WHITESPACE_SET
@pure
@return `a substring of the string passed in`
*>
fn String String.trim_left(self, String to_trim = "\t\n\r ")
fn String String.trim_left(self, String to_trim = " \n\t\r\f\v")
{
usz start = 0;
usz len = self.len;
@@ -260,7 +260,7 @@ fn String String.trim_left(self, String to_trim = "\t\n\r ")
@pure
@return `a substring of the string passed in`
*>
fn String String.trim_right(self, String to_trim = "\t\n\r ")
fn String String.trim_right(self, String to_trim = " \n\t\r\f\v")
{
usz len = self.len;
while (len > 0 && char_in_set(self[len - 1], to_trim)) len--;