stdlib: std::compression::zip and std::compression::deflate (#2930)

* stdlib: implement `std::compression::zip` and `std::compression::deflate` - C3 implementation of DEFLATE (RFC 1951) and ZIP archive handling. - Support for reading and writing archives using STORE and DEFLATE methods. - Decompression supports both fixed and dynamic Huffman blocks. - Compression using greedy LZ77 matching. - Zero dependencies on libc. - Stream-based entry reading and writing. - Full unit test coverage. NOTE: This is an initial implementation. Future improvements could be: - Optimization of the LZ77 matching (lazy matching). - Support for dynamic Huffman blocks in compression. - ZIP64 support for large files/archives. - Support for encryption and additional compression methods. * optimizations+refactoring deflate: - replace linear search with hash-based match finding. - implement support for dynamic Huffman blocks using the Package-Merge algorithm. - add streaming decompression. - add buffered StreamBitReader. zip: - add ZIP64 support. - add CP437 and UTF-8 filename encoding detection. - add DOS date/time conversion and timestamp preservation. - add ZipEntryReader for streaming entry reads. - implement ZipArchive.extract and ZipArchive.recover helpers. other: - Add `set_modified_time` to std::io; - Add benchmarks and a few more unit tests. * zip: add archive comment support add tests * forgot to rename the benchmark :( * detect utf8 names on weird zips fix method not passed to open_writer * another edge case where directory doesn't end with / * testing utilities - detect encrypted zip - `ZipArchive.open_writer` default to DEFLATE * fix zip64 creation, add tests * fix ZIP header endianness for big-endian compatibility Update ZipLFH, ZipCDH, ZipEOCD, Zip64EOCD, and Zip64Locator structs to use little-endian bitstruct types from std::core::bitorder * fix ZipEntryReader position tracking and seek logic ZIP_METHOD_STORE added a test to track this * add package-merge algorithm attribution Thanks @konimarti * standalone deflate_benchmark.c3 against `miniz` * fix integer overflows, leaks and improve safety * a few safety for 32-bit systems and tests * deflate compress optimization * improve match finding, hash updates, and buffer usage * use ulong for zip offsets * style changes (#18) * style changes * update tests * style changes in `deflate.c3` * fix typo * Allocator first. Some changes to deflate to use `copy_to` * Fix missing conversion on 32 bits. * Fix deflate stream. Formatting. Prefer switch over if-elseif * - Stream functions now use long/ulong rather than isz/usz for seek/available. - `instream.seek` is replaced by `set_cursor` and `cursor`. - `instream.available`, `cursor` etc are long/ulong rather than isz/usz to be correct on 32-bit. * Update to constdef * Fix test --------- Co-authored-by: Book-reader <thevoid@outlook.co.nz> Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
2026-02-27 03:51:18 +00:00 · 2026-02-20 16:41:34 -03:00
parent 5055e86518
commit eae7d0c4a1
26 changed files with 3745 additions and 96 deletions
--- a/benchmarks/stdlib/compression/deflate.c3
+++ b/benchmarks/stdlib/compression/deflate.c3
@@ -0,0 +1,51 @@
+module deflate_benchmarks;
+import std::compression::deflate;
+
+const uint SMALL_ITERATIONS = 50000;
+const uint LARGE_ITERATIONS = 100;
+
+// Data to compress
+const char[] SMALL_DATA = { [0..1023] = 'A' };
+const char[] LARGE_DATA = { [0..1048575] = 'B' };
+
+char[] small_compressed;
+char[] large_compressed;
+
+fn void initialize_bench() @init
+{
+	small_compressed = deflate::compress(mem, SMALL_DATA)!!;
+	large_compressed = deflate::compress(mem, LARGE_DATA)!!;
+	set_benchmark_warmup_iterations(2);
+	set_benchmark_max_iterations(10);
+
+	set_benchmark_func_iterations($qnameof(deflate_compress_small), SMALL_ITERATIONS);
+	set_benchmark_func_iterations($qnameof(deflate_decompress_small), SMALL_ITERATIONS);
+	set_benchmark_func_iterations($qnameof(deflate_compress_large), LARGE_ITERATIONS);
+	set_benchmark_func_iterations($qnameof(deflate_decompress_large), LARGE_ITERATIONS);
+}
+
+// =======================================================================================
+module deflate_benchmarks @benchmark;
+
+import std::compression::deflate;
+import std::core::mem;
+
+fn void deflate_compress_small() => @pool()
+{
+	char[]? compressed = deflate::compress(tmem, SMALL_DATA);
+}
+
+fn void deflate_decompress_small() => @pool()
+{
+	char[]? decompressed = deflate::decompress(tmem, small_compressed);
+}
+
+fn void deflate_compress_large() => @pool()
+{
+	char[]? compressed = deflate::compress(tmem, LARGE_DATA);
+}
+
+fn void deflate_decompress_large() => @pool()
+{
+	char[]? decompressed = deflate::decompress(tmem, large_compressed);
+}
--- a/benchmarks/stdlib/crypto/crypto_shootout.c3
+++ b/benchmarks/stdlib/crypto/crypto_shootout.c3
@@ -32,8 +32,8 @@ fn void initialize_bench() @init
 		$qnameof(sha1_16)[..^4],
 		$qnameof(sha2_256_16)[..^4],
 		$qnameof(sha2_512_16)[..^4],
-		$qnameof(blake2s_256_16)[..^4],
-		$qnameof(blake2b_256_16)[..^4],
+		//$qnameof(blake2s_256_16)[..^4],
+		//$qnameof(blake2b_256_16)[..^4],
 		$qnameof(blake3_16)[..^4],
 		$qnameof(ripemd_160_16)[..^4],
 		$qnameof(whirlpool_16)[..^4],
@@ -68,8 +68,8 @@ fn void md5_16() => md5::hash(common_16);
 fn void sha1_16() => sha1::hash(common_16);
 fn void sha2_256_16() => sha256::hash(common_16);
 fn void sha2_512_16() => sha512::hash(common_16);
-fn void blake2s_256_16() => blake2::s(256, common_16);
-fn void blake2b_256_16() => blake2::b(256, common_16);
+//fn void blake2s_256_16() => blake2::s(256, common_16);
+//fn void blake2b_256_16() => blake2::b(256, common_16);
 fn void blake3_16() => blake3::hash(common_16);
 fn void ripemd_160_16() => ripemd::hash{160}(common_16);
 fn void whirlpool_16() => whirlpool::hash(common_16);
@@ -80,8 +80,8 @@ fn void md5_256() => md5::hash(common_256);
 fn void sha1_256() => sha1::hash(common_256);
 fn void sha2_256_256() => sha256::hash(common_256);
 fn void sha2_512_256() => sha512::hash(common_256);
-fn void blake2s_256_256() => blake2::s(256, common_256);
-fn void blake2b_256_256() => blake2::b(256, common_256);
+//fn void blake2s_256_256() => blake2::s(256, common_256);
+//fn void blake2b_256_256() => blake2::b(256, common_256);
 fn void blake3_256() => blake3::hash(common_256);
 fn void ripemd_160_256() => ripemd::hash{160}(common_256);
 fn void whirlpool_256() => whirlpool::hash(common_256);
@@ -92,8 +92,8 @@ fn void md5_4kib() => md5::hash(common_4kib);
 fn void sha1_4kib() => sha1::hash(common_4kib);
 fn void sha2_256_4kib() => sha256::hash(common_4kib);
 fn void sha2_512_4kib() => sha512::hash(common_4kib);
-fn void blake2s_256_4kib() => blake2::s(256, common_4kib);
-fn void blake2b_256_4kib() => blake2::b(256, common_4kib);
+//fn void blake2s_256_4kib() => blake2::s(256, common_4kib);
+//fn void blake2b_256_4kib() => blake2::b(256, common_4kib);
 fn void blake3_4kib() => blake3::hash(common_4kib);
 fn void ripemd_160_4kib() => ripemd::hash{160}(common_4kib);
 fn void whirlpool_4kib() => whirlpool::hash(common_4kib);
@@ -104,8 +104,8 @@ fn void md5_1mib() => md5::hash(common_1mib);
 fn void sha1_1mib() => sha1::hash(common_1mib);
 fn void sha2_256_1mib() => sha256::hash(common_1mib);
 fn void sha2_512_1mib() => sha512::hash(common_1mib);
-fn void blake2s_256_1mib() => blake2::s(256, common_1mib);
-fn void blake2b_256_1mib() => blake2::b(256, common_1mib);
+//fn void blake2s_256_1mib() => blake2::s(256, common_1mib);
+//fn void blake2b_256_1mib() => blake2::b(256, common_1mib);
 fn void blake3_1mib() => blake3::hash(common_1mib);
 fn void ripemd_160_1mib() => ripemd::hash{160}(common_1mib);
 fn void whirlpool_1mib() => whirlpool::hash(common_1mib);