stdlib: std::compression::zip and std::compression::deflate (#2930)

* stdlib: implement `std::compression::zip` and `std::compression::deflate`

- C3 implementation of DEFLATE (RFC 1951) and ZIP archive handling.
- Support for reading and writing archives using STORE and DEFLATE
methods.
- Decompression supports both fixed and dynamic Huffman blocks.
- Compression using greedy LZ77 matching.
- Zero dependencies on libc.
- Stream-based entry reading and writing.
- Full unit test coverage.

NOTE: This is an initial implementation. Future improvements could be:

- Optimization of the LZ77 matching (lazy matching).
- Support for dynamic Huffman blocks in compression.
- ZIP64 support for large files/archives.
- Support for encryption and additional compression methods.

* optimizations+refactoring

deflate:
- replace linear search with hash-based match finding.
- implement support for dynamic Huffman blocks using the Package-Merge
algorithm.
- add streaming decompression.
- add buffered StreamBitReader.

zip:
- add ZIP64 support.
- add CP437 and UTF-8 filename encoding detection.
- add DOS date/time conversion and timestamp preservation.
- add ZipEntryReader for streaming entry reads.
- implement ZipArchive.extract and ZipArchive.recover helpers.

other:
- Add `set_modified_time` to std::io;
- Add benchmarks and a few more unit tests.

* zip: add archive comment support

add tests

* forgot to rename the benchmark :(

* detect utf8 names on weird zips

fix method not passed to open_writer

* another edge case where directory doesn't end with /

* testing utilities

- detect encrypted zip
- `ZipArchive.open_writer` default to DEFLATE

* fix zip64 creation, add tests

* fix ZIP header endianness for big-endian compatibility

Update ZipLFH, ZipCDH, ZipEOCD, Zip64EOCD, and Zip64Locator structs to
use little-endian bitstruct types from std::core::bitorder

* fix ZipEntryReader position tracking and seek logic ZIP_METHOD_STORE

added a test to track this

* add package-merge algorithm attribution

Thanks @konimarti

* standalone deflate_benchmark.c3 against `miniz`

* fix integer overflows, leaks and improve safety

* a few safety for 32-bit systems and tests

* deflate compress optimization

* improve match finding, hash updates, and buffer usage

* use ulong for zip offsets

* style changes (#18)

* style changes

* update tests

* style changes in `deflate.c3`

* fix typo

* Allocator first. Some changes to deflate to use `copy_to`

* Fix missing conversion on 32 bits.

* Fix deflate stream. Formatting. Prefer switch over if-elseif

* - Stream functions now use long/ulong rather than isz/usz for seek/available.
- `instream.seek` is replaced by `set_cursor` and `cursor`.
- `instream.available`, `cursor` etc are long/ulong rather than isz/usz to be correct on 32-bit.

* Update to constdef

* Fix test

---------

Co-authored-by: Book-reader <thevoid@outlook.co.nz>
Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
This commit is contained in:
Manu Linares
2026-02-20 16:41:34 -03:00
committed by GitHub
parent 5055e86518
commit eae7d0c4a1
26 changed files with 3745 additions and 96 deletions

View File

@@ -0,0 +1,210 @@
module deflate_test @test;
import std::compression::deflate, std::io, std::math;
fn void test_deflate_basic()
{
String original = "Hello, world! This is a test of the DEFLATE compression algorithm.";
char[] compressed = deflate::compress(mem, original)!!;
defer free(compressed.ptr);
char[] decompressed = deflate::decompress(mem, compressed)!!;
defer free(decompressed.ptr);
assert((String)decompressed == original, "Decompressed data does not match original");
}
fn void test_deflate_repetitive()
{
// 5000 bytes of repetitive data should compress very well
usz len = 5000;
char[] original = mem::malloc(len)[:len];
defer free(original.ptr);
for (usz i = 0; i < len; i++)
{
original[i] = (char)((i % 10) + '0');
}
char[] compressed = deflate::compress(mem, original)!!;
defer free(compressed.ptr);
// Check that we actually achieved some compression
assert(compressed.len < len / 10, "Repetitive data should compress well");
char[] decompressed = deflate::decompress(mem, compressed)!!;
defer free(decompressed.ptr);
assert(decompressed.len == original.len, "Length mismatch");
assert((String)decompressed == (String)original, "Data mismatch");
}
fn void test_deflate_empty()
{
char[] original = {};
char[] compressed = deflate::compress(mem, original)!!;
defer free(compressed.ptr);
char[] decompressed = deflate::decompress(mem, compressed)!!;
defer free(decompressed.ptr);
assert(decompressed.len == 0, "Expected empty decompression");
}
fn void test_deflate_large_repetitive() @if($feature(SLOW_TESTS))
{
// Test larger buffer to trigger reallocs in inflater
usz len = 100000;
char[] original = mem::malloc(len)[:len];
defer free(original.ptr);
mem::set(original.ptr, (char)'A', len);
char[] compressed = deflate::compress(mem, original)!!;
defer free(compressed.ptr);
char[] decompressed = deflate::decompress(mem, compressed)!!;
defer free(decompressed.ptr);
assert(decompressed.len == len, "Length mismatch");
assert(decompressed[0] == 'A' && decompressed[len-1] == 'A', "Data mismatch");
}
fn void test_deflate_random_ish()
{
// Data that doesn't compress well
usz len = 1024;
char[] original = mem::malloc(len)[:len];
defer free(original.ptr);
for (usz i = 0; i < len; i++)
{
original[i] = (char)(i & 0xFF);
}
char[] compressed = deflate::compress(mem, original)!!;
defer free(compressed.ptr);
char[] decompressed = deflate::decompress(mem, compressed)!!;
defer free(decompressed.ptr);
assert((String)decompressed == (String)original, "Data mismatch");
}
fn void test_deflate_corrupted()
{
char[] compressed = deflate::compress(mem, "Some data")!!;
defer free(compressed.ptr);
// Corrupt the block type (bits 1-2 of first byte) to 3 (reserved/invalid)
compressed[0] |= 0x06;
char[]? decompressed = deflate::decompress(mem, compressed);
assert(!@ok(decompressed), "Expected decompression to fail for corrupted data");
}
fn void test_deflate_stream()
{
@pool()
{
String base = "This is a streaming test for DEFLATE. ";
usz base_len = base.len;
usz count = 50;
char[] original_arr = mem::malloc(base_len * count)[:base_len * count];
defer free(original_arr.ptr);
for (usz i = 0; i < count; i++) {
mem::copy(original_arr.ptr + i * base_len, base.ptr, base_len);
}
String original = (String)original_arr;
char[] compressed = deflate::compress(mem, original_arr)!!;
defer free(compressed.ptr);
// Use a temporary file on disk to test the streaming interface
File f = file::open("unittest_stream_deflate.bin", "wb+")!!;
defer { (void)f.close(); (void)file::delete("unittest_stream_deflate.bin"); }
f.write(compressed)!!;
f.seek(0, Seek.SET)!!;
// Decompress using stream
File out_f = file::open("unittest_stream_out.bin", "wb+")!!;
defer { (void)out_f.close(); (void)file::delete("unittest_stream_out.bin"); }
deflate::decompress_stream(&f, &out_f)!!;
usz out_size = out_f.seek(0, Seek.CURSOR)!!;
assert(out_size == original.len, "Length mismatch in streaming decompression");
out_f.seek(0, Seek.SET)!!;
char[] result = mem::malloc(out_size)[:out_size];
defer free(result.ptr);
out_f.read(result)!!;
assert((String)result == original, "Data mismatch in streaming decompression");
};
}
fn void test_deflate_embedded_stream()
{
String base = "This is a streaming test for DEFLATE. ";
char[] compressed = deflate::compress(mem, base[..])!!;
defer free(compressed.ptr);
usz append_len = compressed.len + 1;
char[] append = mem::malloc(append_len)[:append_len];
defer free(append.ptr);
append[:compressed.len] = compressed[..];
append[compressed.len..] = 'c';
ByteReader reader;
reader.init(append);
ByteWriter writer;
writer.tinit();
deflate::decompress_stream(&reader, &writer)!!;
assert(writer.str_view() == base);
assert(reader.read_byte()!! == 'c');
}
fn void test_deflate_incremental()
{
@pool()
{
String original = "This is a test of incremental decompression. We will read it byte by byte.";
char[] compressed = deflate::compress(mem, original)!!;
defer free(compressed.ptr);
// Use a ByteReader for the compressed data
io::ByteReader in_stream;
in_stream.init(compressed);
Inflater* inflater = mem::new(Inflater);
char[] bit_buf = mem::malloc(8192)[:8192];
inflater.init(&in_stream, bit_buf);
defer free(bit_buf.ptr);
defer free(inflater);
char[] decompressed = mem::malloc(original.len)[:original.len];
defer free(decompressed.ptr);
for (usz i = 0; i < original.len; i++)
{
char[1] one_byte;
usz n = inflater.read(one_byte[..])!!;
assert(n == 1, "Expected 1 byte");
decompressed[i] = one_byte[0];
}
// One more read should return 0 (or EOF)
char[1] extra;
assert(inflater.read(extra[..])!! == 0, "Expected EOF");
assert((String)original == (String)decompressed, "Incremental decompression failed");
};
}