optimize blake3 using a runtime for loop

instruction count went from 60k to 9k
no difference in speed for -O2 or higher
This commit is contained in:
Manuel Barrio Linares
2026-02-15 22:10:53 -03:00
committed by Christoffer Lerno
parent 4b03a84b00
commit df030ac51c
2 changed files with 62 additions and 7 deletions

View File

@@ -0,0 +1,57 @@
module blake3_bench;
fn void initialize_bench() @init
{
set_benchmark_warmup_iterations(3);
set_benchmark_max_iterations(128);
input = mem::alloc_array(char, BUFSZ);
input[:BUFSZ] = (char[]){ [0..BUFSZ-1] = 0xA5 }[..];
input_slice = input[:BUFSZ];
}
fn void teardown_bench() @finalizer
{
mem::free(input);
input = null;
}
char* input;
char[] input_slice;
const usz BUFSZ = 1024 * 1024;
module blake3_bench @benchmark;
import std::hash;
fn void blake3_hash()
{
runtime::@start_benchmark();
char[*] myset = blake3::hash(input_slice);
runtime::@end_benchmark();
mem::zero_volatile(myset[..]);
}
fn void compared_with_sha256()
{
runtime::@start_benchmark();
char[*] myset = sha256::hash(input_slice);
runtime::@end_benchmark();
mem::zero_volatile(myset[..]);
}
fn void compared_with_sha512()
{
runtime::@start_benchmark();
char[*] myset = sha512::hash(input_slice);
runtime::@end_benchmark();
mem::zero_volatile(myset[..]);
}
fn void compared_with_whirlpool()
{
runtime::@start_benchmark();
char[*] myset = whirlpool::hash(input_slice);
runtime::@end_benchmark();
mem::zero_volatile(myset[..]);
}

View File

@@ -606,13 +606,11 @@ fn void compress_pre(uint[] state, uint[] cv, char[BLOCK_SIZE] block, usz block_
state[13] = (uint)(counter >> 32);
state[14] = (uint)block_len;
state[15] = (uint)flags;
@round(state, &block_words[0], 0);
@round(state, &block_words[0], 1);
@round(state, &block_words[0], 2);
@round(state, &block_words[0], 3);
@round(state, &block_words[0], 4);
@round(state, &block_words[0], 5);
@round(state, &block_words[0], 6);
for (int i = 0; i < 7; i++)
{
@round(state, &block_words[0], (usz)i);
}
}
macro compress_in_place(uint[] cv, char[BLOCK_SIZE] block, usz block_len, ulong counter, char flags) @local