mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 03:51:18 +00:00
* stdlib: implement `std::compression::zip` and `std::compression::deflate` - C3 implementation of DEFLATE (RFC 1951) and ZIP archive handling. - Support for reading and writing archives using STORE and DEFLATE methods. - Decompression supports both fixed and dynamic Huffman blocks. - Compression using greedy LZ77 matching. - Zero dependencies on libc. - Stream-based entry reading and writing. - Full unit test coverage. NOTE: This is an initial implementation. Future improvements could be: - Optimization of the LZ77 matching (lazy matching). - Support for dynamic Huffman blocks in compression. - ZIP64 support for large files/archives. - Support for encryption and additional compression methods. * optimizations+refactoring deflate: - replace linear search with hash-based match finding. - implement support for dynamic Huffman blocks using the Package-Merge algorithm. - add streaming decompression. - add buffered StreamBitReader. zip: - add ZIP64 support. - add CP437 and UTF-8 filename encoding detection. - add DOS date/time conversion and timestamp preservation. - add ZipEntryReader for streaming entry reads. - implement ZipArchive.extract and ZipArchive.recover helpers. other: - Add `set_modified_time` to std::io; - Add benchmarks and a few more unit tests. * zip: add archive comment support add tests * forgot to rename the benchmark :( * detect utf8 names on weird zips fix method not passed to open_writer * another edge case where directory doesn't end with / * testing utilities - detect encrypted zip - `ZipArchive.open_writer` default to DEFLATE * fix zip64 creation, add tests * fix ZIP header endianness for big-endian compatibility Update ZipLFH, ZipCDH, ZipEOCD, Zip64EOCD, and Zip64Locator structs to use little-endian bitstruct types from std::core::bitorder * fix ZipEntryReader position tracking and seek logic ZIP_METHOD_STORE added a test to track this * add package-merge algorithm attribution Thanks @konimarti * standalone deflate_benchmark.c3 against `miniz` * fix integer overflows, leaks and improve safety * a few safety for 32-bit systems and tests * deflate compress optimization * improve match finding, hash updates, and buffer usage * use ulong for zip offsets * style changes (#18) * style changes * update tests * style changes in `deflate.c3` * fix typo * Allocator first. Some changes to deflate to use `copy_to` * Fix missing conversion on 32 bits. * Fix deflate stream. Formatting. Prefer switch over if-elseif * - Stream functions now use long/ulong rather than isz/usz for seek/available. - `instream.seek` is replaced by `set_cursor` and `cursor`. - `instream.available`, `cursor` etc are long/ulong rather than isz/usz to be correct on 32-bit. * Update to constdef * Fix test --------- Co-authored-by: Book-reader <thevoid@outlook.co.nz> Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
211 lines
5.7 KiB
Plaintext
211 lines
5.7 KiB
Plaintext
module deflate_test @test;
|
|
|
|
import std::compression::deflate, std::io, std::math;
|
|
|
|
fn void test_deflate_basic()
|
|
{
|
|
String original = "Hello, world! This is a test of the DEFLATE compression algorithm.";
|
|
char[] compressed = deflate::compress(mem, original)!!;
|
|
defer free(compressed.ptr);
|
|
|
|
char[] decompressed = deflate::decompress(mem, compressed)!!;
|
|
defer free(decompressed.ptr);
|
|
|
|
assert((String)decompressed == original, "Decompressed data does not match original");
|
|
}
|
|
|
|
fn void test_deflate_repetitive()
|
|
{
|
|
// 5000 bytes of repetitive data should compress very well
|
|
usz len = 5000;
|
|
char[] original = mem::malloc(len)[:len];
|
|
defer free(original.ptr);
|
|
|
|
for (usz i = 0; i < len; i++)
|
|
{
|
|
original[i] = (char)((i % 10) + '0');
|
|
}
|
|
|
|
char[] compressed = deflate::compress(mem, original)!!;
|
|
defer free(compressed.ptr);
|
|
|
|
// Check that we actually achieved some compression
|
|
assert(compressed.len < len / 10, "Repetitive data should compress well");
|
|
|
|
char[] decompressed = deflate::decompress(mem, compressed)!!;
|
|
defer free(decompressed.ptr);
|
|
|
|
assert(decompressed.len == original.len, "Length mismatch");
|
|
assert((String)decompressed == (String)original, "Data mismatch");
|
|
}
|
|
|
|
fn void test_deflate_empty()
|
|
{
|
|
char[] original = {};
|
|
char[] compressed = deflate::compress(mem, original)!!;
|
|
defer free(compressed.ptr);
|
|
|
|
char[] decompressed = deflate::decompress(mem, compressed)!!;
|
|
defer free(decompressed.ptr);
|
|
|
|
assert(decompressed.len == 0, "Expected empty decompression");
|
|
}
|
|
|
|
fn void test_deflate_large_repetitive() @if($feature(SLOW_TESTS))
|
|
{
|
|
// Test larger buffer to trigger reallocs in inflater
|
|
usz len = 100000;
|
|
char[] original = mem::malloc(len)[:len];
|
|
defer free(original.ptr);
|
|
|
|
mem::set(original.ptr, (char)'A', len);
|
|
|
|
char[] compressed = deflate::compress(mem, original)!!;
|
|
defer free(compressed.ptr);
|
|
|
|
char[] decompressed = deflate::decompress(mem, compressed)!!;
|
|
defer free(decompressed.ptr);
|
|
|
|
assert(decompressed.len == len, "Length mismatch");
|
|
assert(decompressed[0] == 'A' && decompressed[len-1] == 'A', "Data mismatch");
|
|
}
|
|
|
|
fn void test_deflate_random_ish()
|
|
{
|
|
// Data that doesn't compress well
|
|
usz len = 1024;
|
|
char[] original = mem::malloc(len)[:len];
|
|
defer free(original.ptr);
|
|
|
|
for (usz i = 0; i < len; i++)
|
|
{
|
|
original[i] = (char)(i & 0xFF);
|
|
}
|
|
|
|
char[] compressed = deflate::compress(mem, original)!!;
|
|
defer free(compressed.ptr);
|
|
|
|
char[] decompressed = deflate::decompress(mem, compressed)!!;
|
|
defer free(decompressed.ptr);
|
|
|
|
assert((String)decompressed == (String)original, "Data mismatch");
|
|
}
|
|
|
|
fn void test_deflate_corrupted()
|
|
{
|
|
char[] compressed = deflate::compress(mem, "Some data")!!;
|
|
defer free(compressed.ptr);
|
|
|
|
// Corrupt the block type (bits 1-2 of first byte) to 3 (reserved/invalid)
|
|
compressed[0] |= 0x06;
|
|
|
|
char[]? decompressed = deflate::decompress(mem, compressed);
|
|
assert(!@ok(decompressed), "Expected decompression to fail for corrupted data");
|
|
}
|
|
|
|
fn void test_deflate_stream()
|
|
{
|
|
@pool()
|
|
{
|
|
String base = "This is a streaming test for DEFLATE. ";
|
|
usz base_len = base.len;
|
|
usz count = 50;
|
|
char[] original_arr = mem::malloc(base_len * count)[:base_len * count];
|
|
defer free(original_arr.ptr);
|
|
for (usz i = 0; i < count; i++) {
|
|
mem::copy(original_arr.ptr + i * base_len, base.ptr, base_len);
|
|
}
|
|
String original = (String)original_arr;
|
|
|
|
char[] compressed = deflate::compress(mem, original_arr)!!;
|
|
defer free(compressed.ptr);
|
|
|
|
// Use a temporary file on disk to test the streaming interface
|
|
File f = file::open("unittest_stream_deflate.bin", "wb+")!!;
|
|
defer { (void)f.close(); (void)file::delete("unittest_stream_deflate.bin"); }
|
|
|
|
f.write(compressed)!!;
|
|
f.seek(0, Seek.SET)!!;
|
|
|
|
// Decompress using stream
|
|
File out_f = file::open("unittest_stream_out.bin", "wb+")!!;
|
|
defer { (void)out_f.close(); (void)file::delete("unittest_stream_out.bin"); }
|
|
|
|
deflate::decompress_stream(&f, &out_f)!!;
|
|
|
|
usz out_size = out_f.seek(0, Seek.CURSOR)!!;
|
|
assert(out_size == original.len, "Length mismatch in streaming decompression");
|
|
|
|
out_f.seek(0, Seek.SET)!!;
|
|
char[] result = mem::malloc(out_size)[:out_size];
|
|
defer free(result.ptr);
|
|
out_f.read(result)!!;
|
|
|
|
assert((String)result == original, "Data mismatch in streaming decompression");
|
|
};
|
|
}
|
|
|
|
fn void test_deflate_embedded_stream()
|
|
{
|
|
String base = "This is a streaming test for DEFLATE. ";
|
|
|
|
char[] compressed = deflate::compress(mem, base[..])!!;
|
|
defer free(compressed.ptr);
|
|
|
|
usz append_len = compressed.len + 1;
|
|
char[] append = mem::malloc(append_len)[:append_len];
|
|
defer free(append.ptr);
|
|
|
|
append[:compressed.len] = compressed[..];
|
|
append[compressed.len..] = 'c';
|
|
|
|
ByteReader reader;
|
|
reader.init(append);
|
|
|
|
ByteWriter writer;
|
|
writer.tinit();
|
|
|
|
deflate::decompress_stream(&reader, &writer)!!;
|
|
|
|
assert(writer.str_view() == base);
|
|
|
|
assert(reader.read_byte()!! == 'c');
|
|
}
|
|
|
|
fn void test_deflate_incremental()
|
|
{
|
|
@pool()
|
|
{
|
|
String original = "This is a test of incremental decompression. We will read it byte by byte.";
|
|
char[] compressed = deflate::compress(mem, original)!!;
|
|
defer free(compressed.ptr);
|
|
|
|
// Use a ByteReader for the compressed data
|
|
io::ByteReader in_stream;
|
|
in_stream.init(compressed);
|
|
|
|
Inflater* inflater = mem::new(Inflater);
|
|
char[] bit_buf = mem::malloc(8192)[:8192];
|
|
inflater.init(&in_stream, bit_buf);
|
|
defer free(bit_buf.ptr);
|
|
defer free(inflater);
|
|
|
|
char[] decompressed = mem::malloc(original.len)[:original.len];
|
|
defer free(decompressed.ptr);
|
|
|
|
for (usz i = 0; i < original.len; i++)
|
|
{
|
|
char[1] one_byte;
|
|
usz n = inflater.read(one_byte[..])!!;
|
|
assert(n == 1, "Expected 1 byte");
|
|
decompressed[i] = one_byte[0];
|
|
}
|
|
|
|
// One more read should return 0 (or EOF)
|
|
char[1] extra;
|
|
assert(inflater.read(extra[..])!! == 0, "Expected EOF");
|
|
|
|
assert((String)original == (String)decompressed, "Incremental decompression failed");
|
|
};
|
|
}
|