mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 12:01:16 +00:00
stdlib: std::compression::zip and std::compression::deflate (#2930)
* stdlib: implement `std::compression::zip` and `std::compression::deflate` - C3 implementation of DEFLATE (RFC 1951) and ZIP archive handling. - Support for reading and writing archives using STORE and DEFLATE methods. - Decompression supports both fixed and dynamic Huffman blocks. - Compression using greedy LZ77 matching. - Zero dependencies on libc. - Stream-based entry reading and writing. - Full unit test coverage. NOTE: This is an initial implementation. Future improvements could be: - Optimization of the LZ77 matching (lazy matching). - Support for dynamic Huffman blocks in compression. - ZIP64 support for large files/archives. - Support for encryption and additional compression methods. * optimizations+refactoring deflate: - replace linear search with hash-based match finding. - implement support for dynamic Huffman blocks using the Package-Merge algorithm. - add streaming decompression. - add buffered StreamBitReader. zip: - add ZIP64 support. - add CP437 and UTF-8 filename encoding detection. - add DOS date/time conversion and timestamp preservation. - add ZipEntryReader for streaming entry reads. - implement ZipArchive.extract and ZipArchive.recover helpers. other: - Add `set_modified_time` to std::io; - Add benchmarks and a few more unit tests. * zip: add archive comment support add tests * forgot to rename the benchmark :( * detect utf8 names on weird zips fix method not passed to open_writer * another edge case where directory doesn't end with / * testing utilities - detect encrypted zip - `ZipArchive.open_writer` default to DEFLATE * fix zip64 creation, add tests * fix ZIP header endianness for big-endian compatibility Update ZipLFH, ZipCDH, ZipEOCD, Zip64EOCD, and Zip64Locator structs to use little-endian bitstruct types from std::core::bitorder * fix ZipEntryReader position tracking and seek logic ZIP_METHOD_STORE added a test to track this * add package-merge algorithm attribution Thanks @konimarti * standalone deflate_benchmark.c3 against `miniz` * fix integer overflows, leaks and improve safety * a few safety for 32-bit systems and tests * deflate compress optimization * improve match finding, hash updates, and buffer usage * use ulong for zip offsets * style changes (#18) * style changes * update tests * style changes in `deflate.c3` * fix typo * Allocator first. Some changes to deflate to use `copy_to` * Fix missing conversion on 32 bits. * Fix deflate stream. Formatting. Prefer switch over if-elseif * - Stream functions now use long/ulong rather than isz/usz for seek/available. - `instream.seek` is replaced by `set_cursor` and `cursor`. - `instream.available`, `cursor` etc are long/ulong rather than isz/usz to be correct on 32-bit. * Update to constdef * Fix test --------- Co-authored-by: Book-reader <thevoid@outlook.co.nz> Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
This commit is contained in:
210
test/unit/stdlib/compression/deflate.c3
Normal file
210
test/unit/stdlib/compression/deflate.c3
Normal file
@@ -0,0 +1,210 @@
|
||||
module deflate_test @test;
|
||||
|
||||
import std::compression::deflate, std::io, std::math;
|
||||
|
||||
fn void test_deflate_basic()
|
||||
{
|
||||
String original = "Hello, world! This is a test of the DEFLATE compression algorithm.";
|
||||
char[] compressed = deflate::compress(mem, original)!!;
|
||||
defer free(compressed.ptr);
|
||||
|
||||
char[] decompressed = deflate::decompress(mem, compressed)!!;
|
||||
defer free(decompressed.ptr);
|
||||
|
||||
assert((String)decompressed == original, "Decompressed data does not match original");
|
||||
}
|
||||
|
||||
fn void test_deflate_repetitive()
|
||||
{
|
||||
// 5000 bytes of repetitive data should compress very well
|
||||
usz len = 5000;
|
||||
char[] original = mem::malloc(len)[:len];
|
||||
defer free(original.ptr);
|
||||
|
||||
for (usz i = 0; i < len; i++)
|
||||
{
|
||||
original[i] = (char)((i % 10) + '0');
|
||||
}
|
||||
|
||||
char[] compressed = deflate::compress(mem, original)!!;
|
||||
defer free(compressed.ptr);
|
||||
|
||||
// Check that we actually achieved some compression
|
||||
assert(compressed.len < len / 10, "Repetitive data should compress well");
|
||||
|
||||
char[] decompressed = deflate::decompress(mem, compressed)!!;
|
||||
defer free(decompressed.ptr);
|
||||
|
||||
assert(decompressed.len == original.len, "Length mismatch");
|
||||
assert((String)decompressed == (String)original, "Data mismatch");
|
||||
}
|
||||
|
||||
fn void test_deflate_empty()
|
||||
{
|
||||
char[] original = {};
|
||||
char[] compressed = deflate::compress(mem, original)!!;
|
||||
defer free(compressed.ptr);
|
||||
|
||||
char[] decompressed = deflate::decompress(mem, compressed)!!;
|
||||
defer free(decompressed.ptr);
|
||||
|
||||
assert(decompressed.len == 0, "Expected empty decompression");
|
||||
}
|
||||
|
||||
fn void test_deflate_large_repetitive() @if($feature(SLOW_TESTS))
|
||||
{
|
||||
// Test larger buffer to trigger reallocs in inflater
|
||||
usz len = 100000;
|
||||
char[] original = mem::malloc(len)[:len];
|
||||
defer free(original.ptr);
|
||||
|
||||
mem::set(original.ptr, (char)'A', len);
|
||||
|
||||
char[] compressed = deflate::compress(mem, original)!!;
|
||||
defer free(compressed.ptr);
|
||||
|
||||
char[] decompressed = deflate::decompress(mem, compressed)!!;
|
||||
defer free(decompressed.ptr);
|
||||
|
||||
assert(decompressed.len == len, "Length mismatch");
|
||||
assert(decompressed[0] == 'A' && decompressed[len-1] == 'A', "Data mismatch");
|
||||
}
|
||||
|
||||
fn void test_deflate_random_ish()
|
||||
{
|
||||
// Data that doesn't compress well
|
||||
usz len = 1024;
|
||||
char[] original = mem::malloc(len)[:len];
|
||||
defer free(original.ptr);
|
||||
|
||||
for (usz i = 0; i < len; i++)
|
||||
{
|
||||
original[i] = (char)(i & 0xFF);
|
||||
}
|
||||
|
||||
char[] compressed = deflate::compress(mem, original)!!;
|
||||
defer free(compressed.ptr);
|
||||
|
||||
char[] decompressed = deflate::decompress(mem, compressed)!!;
|
||||
defer free(decompressed.ptr);
|
||||
|
||||
assert((String)decompressed == (String)original, "Data mismatch");
|
||||
}
|
||||
|
||||
fn void test_deflate_corrupted()
|
||||
{
|
||||
char[] compressed = deflate::compress(mem, "Some data")!!;
|
||||
defer free(compressed.ptr);
|
||||
|
||||
// Corrupt the block type (bits 1-2 of first byte) to 3 (reserved/invalid)
|
||||
compressed[0] |= 0x06;
|
||||
|
||||
char[]? decompressed = deflate::decompress(mem, compressed);
|
||||
assert(!@ok(decompressed), "Expected decompression to fail for corrupted data");
|
||||
}
|
||||
|
||||
fn void test_deflate_stream()
|
||||
{
|
||||
@pool()
|
||||
{
|
||||
String base = "This is a streaming test for DEFLATE. ";
|
||||
usz base_len = base.len;
|
||||
usz count = 50;
|
||||
char[] original_arr = mem::malloc(base_len * count)[:base_len * count];
|
||||
defer free(original_arr.ptr);
|
||||
for (usz i = 0; i < count; i++) {
|
||||
mem::copy(original_arr.ptr + i * base_len, base.ptr, base_len);
|
||||
}
|
||||
String original = (String)original_arr;
|
||||
|
||||
char[] compressed = deflate::compress(mem, original_arr)!!;
|
||||
defer free(compressed.ptr);
|
||||
|
||||
// Use a temporary file on disk to test the streaming interface
|
||||
File f = file::open("unittest_stream_deflate.bin", "wb+")!!;
|
||||
defer { (void)f.close(); (void)file::delete("unittest_stream_deflate.bin"); }
|
||||
|
||||
f.write(compressed)!!;
|
||||
f.seek(0, Seek.SET)!!;
|
||||
|
||||
// Decompress using stream
|
||||
File out_f = file::open("unittest_stream_out.bin", "wb+")!!;
|
||||
defer { (void)out_f.close(); (void)file::delete("unittest_stream_out.bin"); }
|
||||
|
||||
deflate::decompress_stream(&f, &out_f)!!;
|
||||
|
||||
usz out_size = out_f.seek(0, Seek.CURSOR)!!;
|
||||
assert(out_size == original.len, "Length mismatch in streaming decompression");
|
||||
|
||||
out_f.seek(0, Seek.SET)!!;
|
||||
char[] result = mem::malloc(out_size)[:out_size];
|
||||
defer free(result.ptr);
|
||||
out_f.read(result)!!;
|
||||
|
||||
assert((String)result == original, "Data mismatch in streaming decompression");
|
||||
};
|
||||
}
|
||||
|
||||
fn void test_deflate_embedded_stream()
|
||||
{
|
||||
String base = "This is a streaming test for DEFLATE. ";
|
||||
|
||||
char[] compressed = deflate::compress(mem, base[..])!!;
|
||||
defer free(compressed.ptr);
|
||||
|
||||
usz append_len = compressed.len + 1;
|
||||
char[] append = mem::malloc(append_len)[:append_len];
|
||||
defer free(append.ptr);
|
||||
|
||||
append[:compressed.len] = compressed[..];
|
||||
append[compressed.len..] = 'c';
|
||||
|
||||
ByteReader reader;
|
||||
reader.init(append);
|
||||
|
||||
ByteWriter writer;
|
||||
writer.tinit();
|
||||
|
||||
deflate::decompress_stream(&reader, &writer)!!;
|
||||
|
||||
assert(writer.str_view() == base);
|
||||
|
||||
assert(reader.read_byte()!! == 'c');
|
||||
}
|
||||
|
||||
fn void test_deflate_incremental()
|
||||
{
|
||||
@pool()
|
||||
{
|
||||
String original = "This is a test of incremental decompression. We will read it byte by byte.";
|
||||
char[] compressed = deflate::compress(mem, original)!!;
|
||||
defer free(compressed.ptr);
|
||||
|
||||
// Use a ByteReader for the compressed data
|
||||
io::ByteReader in_stream;
|
||||
in_stream.init(compressed);
|
||||
|
||||
Inflater* inflater = mem::new(Inflater);
|
||||
char[] bit_buf = mem::malloc(8192)[:8192];
|
||||
inflater.init(&in_stream, bit_buf);
|
||||
defer free(bit_buf.ptr);
|
||||
defer free(inflater);
|
||||
|
||||
char[] decompressed = mem::malloc(original.len)[:original.len];
|
||||
defer free(decompressed.ptr);
|
||||
|
||||
for (usz i = 0; i < original.len; i++)
|
||||
{
|
||||
char[1] one_byte;
|
||||
usz n = inflater.read(one_byte[..])!!;
|
||||
assert(n == 1, "Expected 1 byte");
|
||||
decompressed[i] = one_byte[0];
|
||||
}
|
||||
|
||||
// One more read should return 0 (or EOF)
|
||||
char[1] extra;
|
||||
assert(inflater.read(extra[..])!! == 0, "Expected EOF");
|
||||
|
||||
assert((String)original == (String)decompressed, "Incremental decompression failed");
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user