diff --git a/benchmarks/stdlib/hash/whirlpool.c3 b/benchmarks/stdlib/hash/whirlpool.c3 new file mode 100644 index 000000000..9b04bf556 --- /dev/null +++ b/benchmarks/stdlib/hash/whirlpool.c3 @@ -0,0 +1,57 @@ +module whirlpool_bench; + +fn void initialize_bench() @init +{ + set_benchmark_warmup_iterations(3); + set_benchmark_max_iterations(128); + + input = mem::alloc_array(char, BUFSZ); + input[:BUFSZ] = (char[]){ [0..BUFSZ-1] = 0xA5 }[..]; + input_slice = input[:BUFSZ]; +} + +fn void teardown_bench() @finalizer +{ + mem::free(input); + input = null; +} + +char* input; +char[] input_slice; +const usz BUFSZ = 1024 * 1024; + +module whirlpool_bench @benchmark; + +import std::hash; + +fn void whirlpool_hash() +{ + runtime::@start_benchmark(); + char[*] myset = whirlpool::hash(input_slice); + runtime::@end_benchmark(); + mem::zero_volatile(myset[..]); +} + +fn void compared_with_sha256() +{ + runtime::@start_benchmark(); + char[*] myset = sha256::hash(input_slice); + runtime::@end_benchmark(); + mem::zero_volatile(myset[..]); +} + +fn void compared_with_sha512() +{ + runtime::@start_benchmark(); + char[*] myset = sha512::hash(input_slice); + runtime::@end_benchmark(); + mem::zero_volatile(myset[..]); +} + +fn void compared_with_streebog_512() +{ + runtime::@start_benchmark(); + char[*] myset = streebog::hash_512(input_slice); + runtime::@end_benchmark(); + mem::zero_volatile(myset[..]); +} diff --git a/lib/std/hash/whirlpool/whirlpool.c3 b/lib/std/hash/whirlpool/whirlpool.c3 index d2d77a344..4101b42e4 100644 --- a/lib/std/hash/whirlpool/whirlpool.c3 +++ b/lib/std/hash/whirlpool/whirlpool.c3 @@ -128,15 +128,15 @@ fn char[HASH_SIZE] Whirlpool.final(&self) } -macro ulong @w_op(#src, $shift) @private - => S_BOX[(0 * 256) + (int)(#src[($shift + 0) & 7] >> 56) ] - ^ S_BOX[(1 * 256) + (int)(#src[($shift + 7) & 7] >> 48) & 0xFF] - ^ S_BOX[(2 * 256) + (int)(#src[($shift + 6) & 7] >> 40) & 0xFF] - ^ S_BOX[(3 * 256) + (int)(#src[($shift + 5) & 7] >> 32) & 0xFF] - ^ S_BOX[(4 * 256) + (int)(#src[($shift + 4) & 7] >> 24) & 0xFF] - ^ S_BOX[(5 * 256) + (int)(#src[($shift + 3) & 7] >> 16) & 0xFF] - ^ S_BOX[(6 * 256) + (int)(#src[($shift + 2) & 7] >> 8) & 0xFF] - ^ S_BOX[(7 * 256) + (int)(#src[($shift + 1) & 7] >> 0) & 0xFF]; +macro ulong @w_op(#src, shift) @private + => S_BOX[(0 * 256) + (int)(#src[(shift + 0) & 7] >> 56) ] + ^ S_BOX[(1 * 256) + (int)(#src[(shift + 7) & 7] >> 48) & 0xFF] + ^ S_BOX[(2 * 256) + (int)(#src[(shift + 6) & 7] >> 40) & 0xFF] + ^ S_BOX[(3 * 256) + (int)(#src[(shift + 5) & 7] >> 32) & 0xFF] + ^ S_BOX[(4 * 256) + (int)(#src[(shift + 4) & 7] >> 24) & 0xFF] + ^ S_BOX[(5 * 256) + (int)(#src[(shift + 3) & 7] >> 16) & 0xFF] + ^ S_BOX[(6 * 256) + (int)(#src[(shift + 2) & 7] >> 8) & 0xFF] + ^ S_BOX[(7 * 256) + (int)(#src[(shift + 1) & 7] >> 0) & 0xFF]; const ulong[10] RC @private = { @@ -158,26 +158,33 @@ fn void Whirlpool.process_block(&self, char* block) @local ulong[2 * 8] k; // key ulong[2 * 8] state; // state - // NOTE: These loops are unrolled with C3's Chad-tier compile-time evaluation. - $for var $round = 0; $round < 8; $round++: - k[$round] = self.hash[$round]; - state[$round] = $$bswap(mem::load((ulong*)block + $round, 1)) ^ self.hash[$round]; - self.hash[$round] = state[$round]; + // NOTE: These loops are kept as $for to ensure initial setup is unrolled. + $for var $i = 0; $i < 8; $i++: + k[$i] = self.hash[$i]; + state[$i] = $$bswap(mem::load((ulong*)block + $i, 1)) ^ self.hash[$i]; + self.hash[$i] = state[$i]; $endfor - $for var $round = 0; $round < ROUNDS; ++$round : - var $m = $round % 2; + // Use regular for loops for the rounds to avoid massive code bloat. 80K less instructions. + for (int round = 0; round < ROUNDS; ++round) + { + int m = round % 2; + int next_m = m ^ 1; + ulong* pk = &k[m * 8]; + ulong* nk = &k[next_m * 8]; + ulong* ps = &state[m * 8]; + ulong* ns = &state[next_m * 8]; - k[(($m ^ 1) * 8) + 0] = @w_op((&k[$m * 8]), 0) ^ RC[$round]; + nk[0] = @w_op(pk, 0) ^ RC[round]; $for var $i = 1; $i < 8; $i++ : - k[(($m ^ 1) * 8) + $i] = @w_op((&k[$m * 8]), $i); + nk[$i] = @w_op(pk, $i); $endfor $for var $i = 0; $i < 8; $i++ : - state[(($m ^ 1) * 8) + $i] = @w_op(&(state[$m * 8]), $i) ^ k[(($m ^ 1) * 8) + $i]; + ns[$i] = @w_op(ps, $i) ^ nk[$i]; $endfor - $endfor + } $for var $x = 0; $x < 8; $x++: self.hash[$x] ^= state[$x]; diff --git a/test/unit/stdlib/math/math.c3 b/test/unit/stdlib/math/math.c3 index 4c94c677a..4a42dd553 100644 --- a/test/unit/stdlib/math/math.c3 +++ b/test/unit/stdlib/math/math.c3 @@ -459,20 +459,33 @@ fn void test_log() @test fn void test_ct_intlog2() @test @if($feature(SLOW_TESTS) && !env::OPENBSD) { - uint128 actual, expected; + usz[129] actual; $for var $x = 0; $x <= 128; ++$x : - expected = (uint128)math::floor(math::log2($x)); - actual = (uint128)math::@intlog2($x); - assert(expected == actual, "input %d: floor(log2($x)) -> %d is not equal to @intlog2($x) -> %d", $x, expected, actual); + actual[$x] = (usz)math::@intlog2($x); $endfor + for (usz i = 0; i <= 128; i++) + { + usz expected = (i <= 1) ? 0 : (usz)math::floor(math::log2(i)); + assert(actual[i] == expected, "input %d: expected %d, got %d", i, expected, actual[i]); + } - var $logme = (uint128)1; - $for var $x = 0; $x < 8192; ++$x : - $logme *= 13; - expected = (uint128)math::floor(math::log2((uint128)$logme)); - actual = (uint128)math::@intlog2((uint128)$logme); - assert(expected == actual, "input %d (idx %d): floor(log2(|$logme|)) -> %d is not equal to @intlog2(|$logme|) -> %d", $logme, $x, expected, actual); + usz[128] powers; + $for var $i = 0; $i < 128; ++$i : + powers[$i] = (usz)math::@intlog2((uint128)1 << $i); $endfor + for (usz i = 0; i < 128; i++) + { + assert(powers[i] == i, "input 2^%d: expected %d, got %d", i, i, powers[i]); + } + + usz[127] complex_vals; + $for var $i = 1; $i < 128; ++$i : + complex_vals[$i - 1] = (usz)math::@intlog2(((uint128)1 << $i) | ((uint128)1 << ($i - 1))); + $endfor + for (usz i = 1; i < 128; i++) + { + assert(complex_vals[i - 1] == (usz)i, "input 2^%d + 2^%d: expected %d, got %d", i, i - 1, i, complex_vals[i - 1]); + } } fn void test_pow() @test