mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 03:51:18 +00:00
optimize test_ct_intlog2 test and whirlpool hash (#2938)
* optimize `test_ct_intlog2` while still covering all 128 bit positions * refactor whirlpool to reduce code bloat replaced the fully unrolled round loop with a runtime loop, reducing instruction count by 80k in `process_block` and yielding aprox 30% performance boost due to improved cache locality. * use compile-time arrays for `test_ct_intlog2`
This commit is contained in:
57
benchmarks/stdlib/hash/whirlpool.c3
Normal file
57
benchmarks/stdlib/hash/whirlpool.c3
Normal file
@@ -0,0 +1,57 @@
|
||||
module whirlpool_bench;
|
||||
|
||||
fn void initialize_bench() @init
|
||||
{
|
||||
set_benchmark_warmup_iterations(3);
|
||||
set_benchmark_max_iterations(128);
|
||||
|
||||
input = mem::alloc_array(char, BUFSZ);
|
||||
input[:BUFSZ] = (char[]){ [0..BUFSZ-1] = 0xA5 }[..];
|
||||
input_slice = input[:BUFSZ];
|
||||
}
|
||||
|
||||
fn void teardown_bench() @finalizer
|
||||
{
|
||||
mem::free(input);
|
||||
input = null;
|
||||
}
|
||||
|
||||
char* input;
|
||||
char[] input_slice;
|
||||
const usz BUFSZ = 1024 * 1024;
|
||||
|
||||
module whirlpool_bench @benchmark;
|
||||
|
||||
import std::hash;
|
||||
|
||||
fn void whirlpool_hash()
|
||||
{
|
||||
runtime::@start_benchmark();
|
||||
char[*] myset = whirlpool::hash(input_slice);
|
||||
runtime::@end_benchmark();
|
||||
mem::zero_volatile(myset[..]);
|
||||
}
|
||||
|
||||
fn void compared_with_sha256()
|
||||
{
|
||||
runtime::@start_benchmark();
|
||||
char[*] myset = sha256::hash(input_slice);
|
||||
runtime::@end_benchmark();
|
||||
mem::zero_volatile(myset[..]);
|
||||
}
|
||||
|
||||
fn void compared_with_sha512()
|
||||
{
|
||||
runtime::@start_benchmark();
|
||||
char[*] myset = sha512::hash(input_slice);
|
||||
runtime::@end_benchmark();
|
||||
mem::zero_volatile(myset[..]);
|
||||
}
|
||||
|
||||
fn void compared_with_streebog_512()
|
||||
{
|
||||
runtime::@start_benchmark();
|
||||
char[*] myset = streebog::hash_512(input_slice);
|
||||
runtime::@end_benchmark();
|
||||
mem::zero_volatile(myset[..]);
|
||||
}
|
||||
@@ -128,15 +128,15 @@ fn char[HASH_SIZE] Whirlpool.final(&self)
|
||||
}
|
||||
|
||||
|
||||
macro ulong @w_op(#src, $shift) @private
|
||||
=> S_BOX[(0 * 256) + (int)(#src[($shift + 0) & 7] >> 56) ]
|
||||
^ S_BOX[(1 * 256) + (int)(#src[($shift + 7) & 7] >> 48) & 0xFF]
|
||||
^ S_BOX[(2 * 256) + (int)(#src[($shift + 6) & 7] >> 40) & 0xFF]
|
||||
^ S_BOX[(3 * 256) + (int)(#src[($shift + 5) & 7] >> 32) & 0xFF]
|
||||
^ S_BOX[(4 * 256) + (int)(#src[($shift + 4) & 7] >> 24) & 0xFF]
|
||||
^ S_BOX[(5 * 256) + (int)(#src[($shift + 3) & 7] >> 16) & 0xFF]
|
||||
^ S_BOX[(6 * 256) + (int)(#src[($shift + 2) & 7] >> 8) & 0xFF]
|
||||
^ S_BOX[(7 * 256) + (int)(#src[($shift + 1) & 7] >> 0) & 0xFF];
|
||||
macro ulong @w_op(#src, shift) @private
|
||||
=> S_BOX[(0 * 256) + (int)(#src[(shift + 0) & 7] >> 56) ]
|
||||
^ S_BOX[(1 * 256) + (int)(#src[(shift + 7) & 7] >> 48) & 0xFF]
|
||||
^ S_BOX[(2 * 256) + (int)(#src[(shift + 6) & 7] >> 40) & 0xFF]
|
||||
^ S_BOX[(3 * 256) + (int)(#src[(shift + 5) & 7] >> 32) & 0xFF]
|
||||
^ S_BOX[(4 * 256) + (int)(#src[(shift + 4) & 7] >> 24) & 0xFF]
|
||||
^ S_BOX[(5 * 256) + (int)(#src[(shift + 3) & 7] >> 16) & 0xFF]
|
||||
^ S_BOX[(6 * 256) + (int)(#src[(shift + 2) & 7] >> 8) & 0xFF]
|
||||
^ S_BOX[(7 * 256) + (int)(#src[(shift + 1) & 7] >> 0) & 0xFF];
|
||||
|
||||
|
||||
const ulong[10] RC @private = {
|
||||
@@ -158,26 +158,33 @@ fn void Whirlpool.process_block(&self, char* block) @local
|
||||
ulong[2 * 8] k; // key
|
||||
ulong[2 * 8] state; // state
|
||||
|
||||
// NOTE: These loops are unrolled with C3's Chad-tier compile-time evaluation.
|
||||
$for var $round = 0; $round < 8; $round++:
|
||||
k[$round] = self.hash[$round];
|
||||
state[$round] = $$bswap(mem::load((ulong*)block + $round, 1)) ^ self.hash[$round];
|
||||
self.hash[$round] = state[$round];
|
||||
// NOTE: These loops are kept as $for to ensure initial setup is unrolled.
|
||||
$for var $i = 0; $i < 8; $i++:
|
||||
k[$i] = self.hash[$i];
|
||||
state[$i] = $$bswap(mem::load((ulong*)block + $i, 1)) ^ self.hash[$i];
|
||||
self.hash[$i] = state[$i];
|
||||
$endfor
|
||||
|
||||
$for var $round = 0; $round < ROUNDS; ++$round :
|
||||
var $m = $round % 2;
|
||||
// Use regular for loops for the rounds to avoid massive code bloat. 80K less instructions.
|
||||
for (int round = 0; round < ROUNDS; ++round)
|
||||
{
|
||||
int m = round % 2;
|
||||
int next_m = m ^ 1;
|
||||
ulong* pk = &k[m * 8];
|
||||
ulong* nk = &k[next_m * 8];
|
||||
ulong* ps = &state[m * 8];
|
||||
ulong* ns = &state[next_m * 8];
|
||||
|
||||
k[(($m ^ 1) * 8) + 0] = @w_op((&k[$m * 8]), 0) ^ RC[$round];
|
||||
nk[0] = @w_op(pk, 0) ^ RC[round];
|
||||
|
||||
$for var $i = 1; $i < 8; $i++ :
|
||||
k[(($m ^ 1) * 8) + $i] = @w_op((&k[$m * 8]), $i);
|
||||
nk[$i] = @w_op(pk, $i);
|
||||
$endfor
|
||||
|
||||
$for var $i = 0; $i < 8; $i++ :
|
||||
state[(($m ^ 1) * 8) + $i] = @w_op(&(state[$m * 8]), $i) ^ k[(($m ^ 1) * 8) + $i];
|
||||
$endfor
|
||||
ns[$i] = @w_op(ps, $i) ^ nk[$i];
|
||||
$endfor
|
||||
}
|
||||
|
||||
$for var $x = 0; $x < 8; $x++:
|
||||
self.hash[$x] ^= state[$x];
|
||||
|
||||
@@ -459,20 +459,33 @@ fn void test_log() @test
|
||||
|
||||
fn void test_ct_intlog2() @test @if($feature(SLOW_TESTS) && !env::OPENBSD)
|
||||
{
|
||||
uint128 actual, expected;
|
||||
usz[129] actual;
|
||||
$for var $x = 0; $x <= 128; ++$x :
|
||||
expected = (uint128)math::floor(math::log2($x));
|
||||
actual = (uint128)math::@intlog2($x);
|
||||
assert(expected == actual, "input %d: floor(log2($x)) -> %d is not equal to @intlog2($x) -> %d", $x, expected, actual);
|
||||
actual[$x] = (usz)math::@intlog2($x);
|
||||
$endfor
|
||||
for (usz i = 0; i <= 128; i++)
|
||||
{
|
||||
usz expected = (i <= 1) ? 0 : (usz)math::floor(math::log2(i));
|
||||
assert(actual[i] == expected, "input %d: expected %d, got %d", i, expected, actual[i]);
|
||||
}
|
||||
|
||||
var $logme = (uint128)1;
|
||||
$for var $x = 0; $x < 8192; ++$x :
|
||||
$logme *= 13;
|
||||
expected = (uint128)math::floor(math::log2((uint128)$logme));
|
||||
actual = (uint128)math::@intlog2((uint128)$logme);
|
||||
assert(expected == actual, "input %d (idx %d): floor(log2(|$logme|)) -> %d is not equal to @intlog2(|$logme|) -> %d", $logme, $x, expected, actual);
|
||||
usz[128] powers;
|
||||
$for var $i = 0; $i < 128; ++$i :
|
||||
powers[$i] = (usz)math::@intlog2((uint128)1 << $i);
|
||||
$endfor
|
||||
for (usz i = 0; i < 128; i++)
|
||||
{
|
||||
assert(powers[i] == i, "input 2^%d: expected %d, got %d", i, i, powers[i]);
|
||||
}
|
||||
|
||||
usz[127] complex_vals;
|
||||
$for var $i = 1; $i < 128; ++$i :
|
||||
complex_vals[$i - 1] = (usz)math::@intlog2(((uint128)1 << $i) | ((uint128)1 << ($i - 1)));
|
||||
$endfor
|
||||
for (usz i = 1; i < 128; i++)
|
||||
{
|
||||
assert(complex_vals[i - 1] == (usz)i, "input 2^%d + 2^%d: expected %d, got %d", i, i - 1, i, complex_vals[i - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
fn void test_pow() @test
|
||||
|
||||
Reference in New Issue
Block a user