stdlib: std::compression::zip and std::compression::deflate (#2930)

* stdlib: implement `std::compression::zip` and `std::compression::deflate`

- C3 implementation of DEFLATE (RFC 1951) and ZIP archive handling.
- Support for reading and writing archives using STORE and DEFLATE
methods.
- Decompression supports both fixed and dynamic Huffman blocks.
- Compression using greedy LZ77 matching.
- Zero dependencies on libc.
- Stream-based entry reading and writing.
- Full unit test coverage.

NOTE: This is an initial implementation. Future improvements could be:

- Optimization of the LZ77 matching (lazy matching).
- Support for dynamic Huffman blocks in compression.
- ZIP64 support for large files/archives.
- Support for encryption and additional compression methods.

* optimizations+refactoring

deflate:
- replace linear search with hash-based match finding.
- implement support for dynamic Huffman blocks using the Package-Merge
algorithm.
- add streaming decompression.
- add buffered StreamBitReader.

zip:
- add ZIP64 support.
- add CP437 and UTF-8 filename encoding detection.
- add DOS date/time conversion and timestamp preservation.
- add ZipEntryReader for streaming entry reads.
- implement ZipArchive.extract and ZipArchive.recover helpers.

other:
- Add `set_modified_time` to std::io;
- Add benchmarks and a few more unit tests.

* zip: add archive comment support

add tests

* forgot to rename the benchmark :(

* detect utf8 names on weird zips

fix method not passed to open_writer

* another edge case where directory doesn't end with /

* testing utilities

- detect encrypted zip
- `ZipArchive.open_writer` default to DEFLATE

* fix zip64 creation, add tests

* fix ZIP header endianness for big-endian compatibility

Update ZipLFH, ZipCDH, ZipEOCD, Zip64EOCD, and Zip64Locator structs to
use little-endian bitstruct types from std::core::bitorder

* fix ZipEntryReader position tracking and seek logic ZIP_METHOD_STORE

added a test to track this

* add package-merge algorithm attribution

Thanks @konimarti

* standalone deflate_benchmark.c3 against `miniz`

* fix integer overflows, leaks and improve safety

* a few safety for 32-bit systems and tests

* deflate compress optimization

* improve match finding, hash updates, and buffer usage

* use ulong for zip offsets

* style changes (#18)

* style changes

* update tests

* style changes in `deflate.c3`

* fix typo

* Allocator first. Some changes to deflate to use `copy_to`

* Fix missing conversion on 32 bits.

* Fix deflate stream. Formatting. Prefer switch over if-elseif

* - Stream functions now use long/ulong rather than isz/usz for seek/available.
- `instream.seek` is replaced by `set_cursor` and `cursor`.
- `instream.available`, `cursor` etc are long/ulong rather than isz/usz to be correct on 32-bit.

* Update to constdef

* Fix test

---------

Co-authored-by: Book-reader <thevoid@outlook.co.nz>
Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
This commit is contained in:
Manu Linares
2026-02-20 16:41:34 -03:00
committed by GitHub
parent 5055e86518
commit eae7d0c4a1
26 changed files with 3745 additions and 96 deletions

View File

@@ -0,0 +1,51 @@
module deflate_benchmarks;
import std::compression::deflate;
const uint SMALL_ITERATIONS = 50000;
const uint LARGE_ITERATIONS = 100;
// Data to compress
const char[] SMALL_DATA = { [0..1023] = 'A' };
const char[] LARGE_DATA = { [0..1048575] = 'B' };
char[] small_compressed;
char[] large_compressed;
fn void initialize_bench() @init
{
small_compressed = deflate::compress(mem, SMALL_DATA)!!;
large_compressed = deflate::compress(mem, LARGE_DATA)!!;
set_benchmark_warmup_iterations(2);
set_benchmark_max_iterations(10);
set_benchmark_func_iterations($qnameof(deflate_compress_small), SMALL_ITERATIONS);
set_benchmark_func_iterations($qnameof(deflate_decompress_small), SMALL_ITERATIONS);
set_benchmark_func_iterations($qnameof(deflate_compress_large), LARGE_ITERATIONS);
set_benchmark_func_iterations($qnameof(deflate_decompress_large), LARGE_ITERATIONS);
}
// =======================================================================================
module deflate_benchmarks @benchmark;
import std::compression::deflate;
import std::core::mem;
fn void deflate_compress_small() => @pool()
{
char[]? compressed = deflate::compress(tmem, SMALL_DATA);
}
fn void deflate_decompress_small() => @pool()
{
char[]? decompressed = deflate::decompress(tmem, small_compressed);
}
fn void deflate_compress_large() => @pool()
{
char[]? compressed = deflate::compress(tmem, LARGE_DATA);
}
fn void deflate_decompress_large() => @pool()
{
char[]? decompressed = deflate::decompress(tmem, large_compressed);
}

View File

@@ -32,8 +32,8 @@ fn void initialize_bench() @init
$qnameof(sha1_16)[..^4],
$qnameof(sha2_256_16)[..^4],
$qnameof(sha2_512_16)[..^4],
$qnameof(blake2s_256_16)[..^4],
$qnameof(blake2b_256_16)[..^4],
//$qnameof(blake2s_256_16)[..^4],
//$qnameof(blake2b_256_16)[..^4],
$qnameof(blake3_16)[..^4],
$qnameof(ripemd_160_16)[..^4],
$qnameof(whirlpool_16)[..^4],
@@ -68,8 +68,8 @@ fn void md5_16() => md5::hash(common_16);
fn void sha1_16() => sha1::hash(common_16);
fn void sha2_256_16() => sha256::hash(common_16);
fn void sha2_512_16() => sha512::hash(common_16);
fn void blake2s_256_16() => blake2::s(256, common_16);
fn void blake2b_256_16() => blake2::b(256, common_16);
//fn void blake2s_256_16() => blake2::s(256, common_16);
//fn void blake2b_256_16() => blake2::b(256, common_16);
fn void blake3_16() => blake3::hash(common_16);
fn void ripemd_160_16() => ripemd::hash{160}(common_16);
fn void whirlpool_16() => whirlpool::hash(common_16);
@@ -80,8 +80,8 @@ fn void md5_256() => md5::hash(common_256);
fn void sha1_256() => sha1::hash(common_256);
fn void sha2_256_256() => sha256::hash(common_256);
fn void sha2_512_256() => sha512::hash(common_256);
fn void blake2s_256_256() => blake2::s(256, common_256);
fn void blake2b_256_256() => blake2::b(256, common_256);
//fn void blake2s_256_256() => blake2::s(256, common_256);
//fn void blake2b_256_256() => blake2::b(256, common_256);
fn void blake3_256() => blake3::hash(common_256);
fn void ripemd_160_256() => ripemd::hash{160}(common_256);
fn void whirlpool_256() => whirlpool::hash(common_256);
@@ -92,8 +92,8 @@ fn void md5_4kib() => md5::hash(common_4kib);
fn void sha1_4kib() => sha1::hash(common_4kib);
fn void sha2_256_4kib() => sha256::hash(common_4kib);
fn void sha2_512_4kib() => sha512::hash(common_4kib);
fn void blake2s_256_4kib() => blake2::s(256, common_4kib);
fn void blake2b_256_4kib() => blake2::b(256, common_4kib);
//fn void blake2s_256_4kib() => blake2::s(256, common_4kib);
//fn void blake2b_256_4kib() => blake2::b(256, common_4kib);
fn void blake3_4kib() => blake3::hash(common_4kib);
fn void ripemd_160_4kib() => ripemd::hash{160}(common_4kib);
fn void whirlpool_4kib() => whirlpool::hash(common_4kib);
@@ -104,8 +104,8 @@ fn void md5_1mib() => md5::hash(common_1mib);
fn void sha1_1mib() => sha1::hash(common_1mib);
fn void sha2_256_1mib() => sha256::hash(common_1mib);
fn void sha2_512_1mib() => sha512::hash(common_1mib);
fn void blake2s_256_1mib() => blake2::s(256, common_1mib);
fn void blake2b_256_1mib() => blake2::b(256, common_1mib);
//fn void blake2s_256_1mib() => blake2::s(256, common_1mib);
//fn void blake2b_256_1mib() => blake2::b(256, common_1mib);
fn void blake3_1mib() => blake3::hash(common_1mib);
fn void ripemd_160_1mib() => ripemd::hash{160}(common_1mib);
fn void whirlpool_1mib() => whirlpool::hash(common_1mib);