stdlib: std::compression::zip and std::compression::deflate (#2930)

* stdlib: implement `std::compression::zip` and `std::compression::deflate` - C3 implementation of DEFLATE (RFC 1951) and ZIP archive handling. - Support for reading and writing archives using STORE and DEFLATE methods. - Decompression supports both fixed and dynamic Huffman blocks. - Compression using greedy LZ77 matching. - Zero dependencies on libc. - Stream-based entry reading and writing. - Full unit test coverage. NOTE: This is an initial implementation. Future improvements could be: - Optimization of the LZ77 matching (lazy matching). - Support for dynamic Huffman blocks in compression. - ZIP64 support for large files/archives. - Support for encryption and additional compression methods. * optimizations+refactoring deflate: - replace linear search with hash-based match finding. - implement support for dynamic Huffman blocks using the Package-Merge algorithm. - add streaming decompression. - add buffered StreamBitReader. zip: - add ZIP64 support. - add CP437 and UTF-8 filename encoding detection. - add DOS date/time conversion and timestamp preservation. - add ZipEntryReader for streaming entry reads. - implement ZipArchive.extract and ZipArchive.recover helpers. other: - Add `set_modified_time` to std::io; - Add benchmarks and a few more unit tests. * zip: add archive comment support add tests * forgot to rename the benchmark :( * detect utf8 names on weird zips fix method not passed to open_writer * another edge case where directory doesn't end with / * testing utilities - detect encrypted zip - `ZipArchive.open_writer` default to DEFLATE * fix zip64 creation, add tests * fix ZIP header endianness for big-endian compatibility Update ZipLFH, ZipCDH, ZipEOCD, Zip64EOCD, and Zip64Locator structs to use little-endian bitstruct types from std::core::bitorder * fix ZipEntryReader position tracking and seek logic ZIP_METHOD_STORE added a test to track this * add package-merge algorithm attribution Thanks @konimarti * standalone deflate_benchmark.c3 against `miniz` * fix integer overflows, leaks and improve safety * a few safety for 32-bit systems and tests * deflate compress optimization * improve match finding, hash updates, and buffer usage * use ulong for zip offsets * style changes (#18) * style changes * update tests * style changes in `deflate.c3` * fix typo * Allocator first. Some changes to deflate to use `copy_to` * Fix missing conversion on 32 bits. * Fix deflate stream. Formatting. Prefer switch over if-elseif * - Stream functions now use long/ulong rather than isz/usz for seek/available. - `instream.seek` is replaced by `set_cursor` and `cursor`. - `instream.available`, `cursor` etc are long/ulong rather than isz/usz to be correct on 32-bit. * Update to constdef * Fix test --------- Co-authored-by: Book-reader <thevoid@outlook.co.nz> Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
2026-02-27 03:51:18 +00:00 · 2026-02-20 16:41:34 -03:00
parent 5055e86518
commit eae7d0c4a1
26 changed files with 3745 additions and 96 deletions
--- a/test/compression/deflate_benchmark.c3
+++ b/test/compression/deflate_benchmark.c3
@@ -0,0 +1,207 @@
+// 1. `gcc -O3 -c dependencies/miniz/miniz.c -o build/miniz.o`
+// 2. `build/c3c -O3 compile-run test/compression/deflate_benchmark.c3 build/miniz.o`
+
+module deflate_benchmark;
+import std, std::time::clock;
+
+const int AMOUNT_OF_WORK = 10; // Increase this to scale test data sizes
+
+fn int main(String[] args)
+{
+	io::printf("\n%s%s DEFLATE BENCHMARK %s", Ansi.BOLD, Ansi.BG_CYAN, Ansi.RESET);
+	io::printfn(" Comparing C3 std::compression::deflate with miniz (in-process)\n");
+	io::printfn(" Work Scale: %dx\n", AMOUNT_OF_WORK);
+
+	io::printfn("%-26s | %7s | %7s | %7s | %7s | %-10s", "Test Case", "C3 Rat.", "Miz Rat.", "C3 MB/s", "Miz MB/s", "Winner");
+	io::printfn("---------------------------+---------+---------+---------+---------+-----------");
+
+	// Test 1: Redundant data
+	usz redundant_size = 10_000_000 * (usz)AMOUNT_OF_WORK;
+	char[] redundant = allocator::alloc_array(tmem, char, redundant_size);
+	mem::set(redundant.ptr, 'A', redundant_size);
+	run_bench(string::tformat("Redundant (%dMB 'A')", (int)(redundant_size / 1_000_000)), redundant);
+
+	// Test 2: Large Source Project (All .c files in src/compiler)
+	DString project_src;
+	Path src_dir = path::new(tmem, "src/compiler")!!;
+	PathList? compiler_files = path::ls(tmem, src_dir);
+	if (try files = compiler_files) {
+		for (int i = 0; i < AMOUNT_OF_WORK; i++) {
+			foreach (p : files) {
+				if (p.basename().ends_with(".c")) {
+					Path full_p = src_dir.tappend(p.str_view())!!;
+					if (try data = file::load_path(tmem, full_p)) {
+						project_src.append(data);
+					}
+				}
+			}
+		}
+	}
+	run_bench("Compiler Source (Bulk)", project_src.str_view());
+
+	// Test 3: Standard Library (All .c3 files in lib/std)
+	DString std_src;
+	for (int i = 0; i < AMOUNT_OF_WORK; i++) {
+		collect_files(path::new(tmem, "lib/std")!!, ".c3", &std_src);
+	}
+	run_bench("Stdlib Source (Bulk)", std_src.str_view());
+
+	// Test 4: Log Files (Simulated)
+	DString log_data;
+	for (int i = 0; i < 50_000 * AMOUNT_OF_WORK; i++) {
+		log_data.appendf("2024-02-13 21:30:%02d.%03d [INFO] Connection established from 192.168.1.%d\n", i % 60, i % 1000, i % 255);
+		log_data.appendf("2024-02-13 21:30:%02d.%03d [DEBUG] Buffer size: %d bytes\n", i % 60, i % 1000, (i * 123) % 4096);
+	}
+	run_bench("Log Files (Simulated)", log_data.str_view());
+
+	// Test 5: Web Content (Simulated HTML/CSS)
+	DString web_data;
+	web_data.append("<!DOCTYPE html><html><head><style>.item { color: red; margin: 10px; }</style></head><body>");
+	for (int i = 0; i < 1000 * AMOUNT_OF_WORK; i++) {
+		web_data.appendf("<div class='item' id='obj%d'>", i);
+		web_data.append("<h1>Title of the item</h1><p>This is some repetitive descriptive text that might appear on a web page.</p>");
+		web_data.append("<ul><li>Feature 1</li><li>Feature 2</li><li>Feature 3</li></ul></div>");
+	}
+	web_data.append("</body></html>");
+	run_bench("Web Content (Simulated)", web_data.str_view());
+
+	// Test 6: CSV Data (Simulated)
+	DString csv_data;
+	csv_data.append("id,name,value1,value2,status,category\n");
+	for (int i = 0; i < 20_000 * AMOUNT_OF_WORK; i++) {
+		csv_data.appendf("%d,Product_%d,%d.5,%d,\"%s\",\"%s\"\n", 
+			i, i % 100, i * 10, i % 500, 
+			i % 3 == 0 ? "Active" : "Inactive", 
+			i % 5 == 0 ? "Electronics" : "Home");
+	}
+	run_bench("CSV Data (Simulated)", csv_data.str_view());
+
+	// Test 7: Binary Data (Structured)
+	usz binary_size = 2_000_000 * (usz)AMOUNT_OF_WORK;
+	char[] binary = allocator::alloc_array(tmem, char, binary_size);
+	for (usz i = 0; i < binary.len; i += 4) {
+		uint val = (uint)i ^ 0xDEADBEEF;
+		mem::copy(binary.ptr + i, &val, 4);
+	}
+	run_bench("Binary Data (Structured)", binary);
+
+	// Test 8: Random Noise (1MB)
+	usz noise_size = 1_000_000 * (usz)AMOUNT_OF_WORK;
+	DString noise;
+	for (usz i = 0; i < noise_size; i++) {
+		noise.append((char)rand('z' - 'a' + 1) + 'a');
+	}
+	run_bench("Random Noise (Scaled)", noise.str_view());
+
+	// Test 9: Tiny File (Check overhead)
+	run_bench("Tiny File (asd.c3)", "module asd; fn void main() {}\n");
+
+	// Test 10: Natural Language (Repetitive)
+	String text = "The quick brown fox jumps over the lazy dog. ";
+	DString long_text;
+	for (int i = 0; i < 50_000 * AMOUNT_OF_WORK; i++) long_text.append(text);
+	run_bench("Natural Text (Scaled)", long_text.str_view());
+
+	if (args.len > 1) {
+		Path custom_p = path::new(tmem, args[1])!!;
+		if (try custom_data = file::load_path(tmem, custom_p)) {
+			run_bench(string::tformat("Custom: %s", custom_p.basename()), custom_data);
+		}
+	}
+	// Final Summary
+	double avg_c3 = totals.c3_speed_sum / totals.count;
+	double avg_miniz = totals.miniz_speed_sum / totals.count;
+	double total_factor = avg_c3 / avg_miniz;
+
+	io::printfn("\n%sOVERALL SUMMARY%s", Ansi.BOLD, Ansi.RESET);
+	io::printfn("  Average Throughput C3:    %8.1f MB/s", avg_c3);
+	io::printfn("  Average Throughput Miniz: %8.1f MB/s", avg_miniz);
+	io::printfn("  %sC3 is %.1fx faster on average!%s\n", Ansi.BOLD, total_factor, Ansi.RESET);
+
+	return 0;
+}
+
+struct BenchResult {
+	long time_ns;
+	usz size;
+	double ratio;
+	double throughput_mbs;
+}
+
+struct BenchTotal {
+	double c3_speed_sum;
+	double miniz_speed_sum;
+	int count;
+}
+BenchTotal totals;
+
+fn void run_bench(String title, char[] data)
+{
+	// C3 Bench
+	Clock start = clock::now();
+	char[] c3_compressed = deflate::compress(data, tmem)!!;
+	Clock end = clock::now();
+	BenchResult c3 = calculate_metrics(data.len, (long)(end - start), c3_compressed.len);
+
+	// Miniz Bench
+	usz miniz_size;
+	start = clock::now();
+	void* miniz_ptr = tdefl_compress_mem_to_heap(data.ptr, data.len, &miniz_size, MINIZ_FLAGS);
+	end = clock::now();
+	BenchResult miniz = calculate_metrics(data.len, (long)(end - start), miniz_size);
+	if (miniz_ptr) mz_free(miniz_ptr);
+
+	// Performance Delta
+	double speed_factor = c3.throughput_mbs / miniz.throughput_mbs;
+	
+	io::printf("%-26s | %6.2f%% | %6.2f%% | %7.1f | %7.1f | %s%s (%.1fx)%s\n",
+		title[:(min(title.len, 26))],
+		c3.ratio, miniz.ratio,
+		c3.throughput_mbs, miniz.throughput_mbs,
+		speed_factor > 1.0 ? Ansi.CYAN : Ansi.WHITE,
+		speed_factor > 1.0 ? "C3" : "Miniz",
+		speed_factor > 1.0 ? speed_factor : 1.0 / speed_factor,
+		Ansi.RESET);
+
+	totals.c3_speed_sum += c3.throughput_mbs;
+	totals.miniz_speed_sum += miniz.throughput_mbs;
+	totals.count++;
+}
+
+fn void collect_files(Path dir, String suffix, DString* out)
+{
+	PathList? items = path::ls(tmem, dir);
+	if (catch items) return;
+	foreach (p : items) {
+		Path full = dir.tappend(p.str_view())!!;
+		if (path::is_dir(full)) {
+			if (p.basename() != "." && p.basename() != "..") {
+				collect_files(full, suffix, out);
+			}
+		} else if (p.basename().ends_with(suffix)) {
+			if (try data = file::load_path(tmem, full)) {
+				out.append(data);
+			}
+		}
+	}
+}
+
+fn BenchResult calculate_metrics(usz original_len, long time_ns, usz compressed_len)
+{
+	BenchResult res;
+	res.time_ns = time_ns;
+	res.size = compressed_len;
+	res.ratio = (double)compressed_len / (double)original_len * 100.0;
+	res.throughput_mbs = (double)original_len / (1024.0 * 1024.0) / ((double)time_ns / 1_000_000_000.0);
+	return res;
+}
+
+// External Miniz bindings
+extern fn void* tdefl_compress_mem_to_heap(void* pSrc_buf, usz src_buf_len, usz* pOut_len, int flags);
+extern fn void mz_free(void* p);
+
+const int TDEFL_GREEDY_PARSING_FLAG = 0x04000;
+const int TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000; // Fastest init for miniz for a fair comparisson
+const int C3_EQUIVALENT_PROBES = 16; // C3 uses MAX_CHAIN = 16 as default (this should be exposed)
+
+const int MINIZ_FLAGS = C3_EQUIVALENT_PROBES | TDEFL_GREEDY_PARSING_FLAG | TDEFL_NONDETERMINISTIC_PARSING_FLAG;
--- a/test/compression/zip_compare_7z.c3
+++ b/test/compression/zip_compare_7z.c3
@@ -0,0 +1,194 @@
+<*
+ Compare `C3 zip` vs `7z` extraction
+ External dependencies: 7z, diff
+*>
+module verify_zip;
+import std;
+import libc;
+
+fn int main(String[] args)
+{
+	if (args.len < 2)
+	{
+		io::printfn("Usage: %s [-r|--recursive] [-o|--output <dir>] <zip_dir>", args[0]);
+		return 1;
+	}
+
+	bool recursive = false;
+	String zip_dir;
+	String output_dir;
+
+	for (int i = 1; i < args.len; i++)
+	{
+		String arg = args[i];
+		switch (arg)
+		{
+			case "-r":
+			case "--recursive":
+				recursive = true;
+			case "-o":
+			case "--output":
+				if (++i >= args.len)
+				{
+					io::printfn("Error: %s requires a directory path", arg);
+					return 1;
+				}
+				output_dir = args[i];
+			default:
+				if (arg.starts_with("-"))
+				{
+					io::printfn("Error: unknown option %s", arg);
+					return 1;
+				}
+				if (zip_dir)
+				{
+					io::printfn("Error: multiple zip directories specified ('%s' and '%s')", zip_dir, arg);
+					return 1;
+				}
+				zip_dir = arg;
+		}
+	}
+
+	if (!zip_dir)
+	{
+		io::printfn("Error: no zip directory specified.");
+		return 1;
+	}
+
+	return process_dir(zip_dir, recursive, output_dir);
+}
+
+fn int process_dir(String dir, bool recursive, String output_dir)
+{
+	PathList? files = path::ls(tmem, path::temp(dir)!!);
+	if (catch excuse = files)
+	{
+		io::printfn("Could not open directory: %s (Excuse: %s)", dir, excuse);
+		return 1;
+	}
+
+	foreach (p : files)
+	{
+		String name = p.basename();
+		if (name == "." || name == "..") continue;
+
+		String zip_path = path::temp(dir)!!.tappend(name)!!.str_view();
+
+		if (file::is_dir(zip_path))
+		{
+			if (recursive)
+			{
+				if (process_dir(zip_path, recursive, output_dir) != 0) return 1;
+			}
+			continue;
+		}
+
+		if (!name.ends_with(".zip")) continue;
+
+		ulong size = 0;
+		File? f = file::open(zip_path, "rb");
+		if (try fh = f)
+		{
+			(void)fh.seek(0, Seek.END);
+			size = fh.seek(0, Seek.CURSOR) ?? 0;
+			fh.close()!!;
+		}
+		io::printf("Verifying %-40s [%7d KB] ", name[:(min(name.len, 40))], size / 1024);
+
+		switch (verify_one(zip_path, output_dir))
+		{
+			case 0:
+				io::printfn("%sFAILED%s ❌", Ansi.RED, Ansi.RESET);
+				return 1;
+			case 1:
+				io::printfn("%sPASSED%s ✅", Ansi.GREEN, Ansi.RESET);
+			default:
+				io::printn();
+		}
+	}
+
+	return 0;
+}
+
+fn int verify_one(String zip_path, String output_dir)
+{
+	Path extract_root;
+	if (output_dir)
+	{
+		extract_root = path::temp(output_dir)!!;
+	}
+	else
+	{
+		extract_root = path::temp_directory(tmem)!!;
+	}
+
+	String name = (String)path::temp(zip_path)!!.basename();
+
+	Path temp_c3 = extract_root.tappend(name.tconcat("_c3"))!!;
+	Path temp_7z = extract_root.tappend(name.tconcat("_7z"))!!;
+
+	(void)path::mkdir(temp_c3, true);
+	(void)path::mkdir(temp_7z, true);
+
+	ZipArchive? archive = zip::open(zip_path, "r");
+	if (catch excuse = archive)
+	{
+		io::printfn("%sFAIL%s (open: %s)", Ansi.RED, Ansi.RESET, excuse);
+		return 0;
+	}
+	defer (void)archive.close();
+
+	Time start = time::now();
+	if (catch excuse = archive.extract(temp_c3.str_view()))
+	{
+		if (excuse == zip::ENCRYPTED_FILE)
+		{
+			io::printf("%sSKIPPED%s (Encrypted)", Ansi.YELLOW, Ansi.RESET);
+			return 2;
+		}
+		io::printfn("%sFAIL%s (extract: %s)", Ansi.RED, Ansi.RESET, excuse);
+		return 0;
+	}
+	Duration c3_time = time::now() - start;
+
+	start = time::now();
+	if (!extract_7z(zip_path, temp_7z.str_view()))
+	{
+		io::printfn("%sFAIL%s (7z extract)", Ansi.RED, Ansi.RESET);
+		return 0;
+	}
+	Duration p7_time = time::now() - start;
+
+	io::printf(" [C3: %5d ms, 7z: %5d ms]", (long)c3_time / 1000, (long)p7_time / 1000);
+
+	io::print(" Comparing... ");
+	if (!compare_dirs(temp_c3.str_view(), temp_7z.str_view()))
+	{
+		io::printfn("%sFAIL%s (Differences found)", Ansi.RED, Ansi.RESET);
+		return 0;
+	}
+
+	// keep files on error for manual verification
+	(void)path::rmtree(temp_c3);
+	(void)path::rmtree(temp_7z);
+
+	return 1;
+}
+
+fn bool extract_7z(String zip_path, String output_dir)
+{
+	String out_opt = "-o".tconcat(output_dir);
+	String[] cmd = { "7z", "x", zip_path, out_opt, "-y", "-bb0" };
+	SubProcess? proc = process::create(cmd, { .search_user_path = true });
+	if (catch excuse = proc) return false;
+	return (int)proc.join()!! == 0;
+}
+
+fn bool compare_dirs(String dir1, String dir2)
+{
+	String[] cmd = { "diff", "-r", dir1, dir2 };
+	SubProcess? proc = process::create(cmd, { .search_user_path = true, .inherit_stdio = true });
+	if (catch excuse = proc) return false;
+	int res = (int)proc.join()!!;
+	return res == 0;
+}
--- a/test/unit/stdlib/compression/deflate.c3
+++ b/test/unit/stdlib/compression/deflate.c3
@@ -0,0 +1,210 @@
+module deflate_test @test;
+
+import std::compression::deflate, std::io, std::math;
+
+fn void test_deflate_basic()
+{
+	String original = "Hello, world! This is a test of the DEFLATE compression algorithm.";
+	char[] compressed = deflate::compress(mem, original)!!;
+	defer free(compressed.ptr);
+
+	char[] decompressed = deflate::decompress(mem, compressed)!!;
+	defer free(decompressed.ptr);
+
+	assert((String)decompressed == original, "Decompressed data does not match original");
+}
+
+fn void test_deflate_repetitive()
+{
+	// 5000 bytes of repetitive data should compress very well
+	usz len = 5000;
+	char[] original = mem::malloc(len)[:len];
+	defer free(original.ptr);
+
+	for (usz i = 0; i < len; i++)
+	{
+		original[i] = (char)((i % 10) + '0');
+	}
+
+	char[] compressed = deflate::compress(mem, original)!!;
+	defer free(compressed.ptr);
+
+	// Check that we actually achieved some compression
+	assert(compressed.len < len / 10, "Repetitive data should compress well");
+
+	char[] decompressed = deflate::decompress(mem, compressed)!!;
+	defer free(decompressed.ptr);
+
+	assert(decompressed.len == original.len, "Length mismatch");
+	assert((String)decompressed == (String)original, "Data mismatch");
+}
+
+fn void test_deflate_empty()
+{
+	char[] original = {};
+	char[] compressed = deflate::compress(mem, original)!!;
+	defer free(compressed.ptr);
+
+	char[] decompressed = deflate::decompress(mem, compressed)!!;
+	defer free(decompressed.ptr);
+
+	assert(decompressed.len == 0, "Expected empty decompression");
+}
+
+fn void test_deflate_large_repetitive() @if($feature(SLOW_TESTS))
+{
+	// Test larger buffer to trigger reallocs in inflater
+	usz len = 100000;
+	char[] original = mem::malloc(len)[:len];
+	defer free(original.ptr);
+
+	mem::set(original.ptr, (char)'A', len);
+
+	char[] compressed = deflate::compress(mem, original)!!;
+	defer free(compressed.ptr);
+
+	char[] decompressed = deflate::decompress(mem, compressed)!!;
+	defer free(decompressed.ptr);
+
+	assert(decompressed.len == len, "Length mismatch");
+	assert(decompressed[0] == 'A' && decompressed[len-1] == 'A', "Data mismatch");
+}
+
+fn void test_deflate_random_ish()
+{
+	// Data that doesn't compress well
+	usz len = 1024;
+	char[] original = mem::malloc(len)[:len];
+	defer free(original.ptr);
+
+	for (usz i = 0; i < len; i++)
+	{
+		original[i] = (char)(i & 0xFF);
+	}
+
+	char[] compressed = deflate::compress(mem, original)!!;
+	defer free(compressed.ptr);
+
+	char[] decompressed = deflate::decompress(mem, compressed)!!;
+	defer free(decompressed.ptr);
+
+	assert((String)decompressed == (String)original, "Data mismatch");
+}
+
+fn void test_deflate_corrupted()
+{
+	char[] compressed = deflate::compress(mem, "Some data")!!;
+	defer free(compressed.ptr);
+
+	// Corrupt the block type (bits 1-2 of first byte) to 3 (reserved/invalid)
+	compressed[0] |= 0x06;
+
+	char[]? decompressed = deflate::decompress(mem, compressed);
+	assert(!@ok(decompressed), "Expected decompression to fail for corrupted data");
+}
+
+fn void test_deflate_stream()
+{
+	@pool()
+	{
+		String base = "This is a streaming test for DEFLATE. ";
+		usz base_len = base.len;
+		usz count = 50;
+		char[] original_arr = mem::malloc(base_len * count)[:base_len * count];
+		defer free(original_arr.ptr);
+		for (usz i = 0; i < count; i++) {
+			mem::copy(original_arr.ptr + i * base_len, base.ptr, base_len);
+		}
+		String original = (String)original_arr;
+
+		char[] compressed = deflate::compress(mem, original_arr)!!;
+		defer free(compressed.ptr);
+
+		// Use a temporary file on disk to test the streaming interface
+		File f = file::open("unittest_stream_deflate.bin", "wb+")!!;
+		defer { (void)f.close(); (void)file::delete("unittest_stream_deflate.bin"); }
+
+		f.write(compressed)!!;
+		f.seek(0, Seek.SET)!!;
+
+		// Decompress using stream
+		File out_f = file::open("unittest_stream_out.bin", "wb+")!!;
+		defer { (void)out_f.close(); (void)file::delete("unittest_stream_out.bin"); }
+
+		deflate::decompress_stream(&f, &out_f)!!;
+
+		usz out_size = out_f.seek(0, Seek.CURSOR)!!;
+		assert(out_size == original.len, "Length mismatch in streaming decompression");
+
+		out_f.seek(0, Seek.SET)!!;
+		char[] result = mem::malloc(out_size)[:out_size];
+		defer free(result.ptr);
+		out_f.read(result)!!;
+
+		assert((String)result == original, "Data mismatch in streaming decompression");
+	};
+}
+
+fn void test_deflate_embedded_stream()
+{
+	String base = "This is a streaming test for DEFLATE. ";
+
+	char[] compressed = deflate::compress(mem, base[..])!!;
+	defer free(compressed.ptr);
+
+	usz append_len = compressed.len + 1;
+	char[] append = mem::malloc(append_len)[:append_len];
+	defer free(append.ptr);
+
+	append[:compressed.len] = compressed[..];
+	append[compressed.len..] = 'c';
+
+	ByteReader reader;
+	reader.init(append);
+
+	ByteWriter writer;
+	writer.tinit();
+
+	deflate::decompress_stream(&reader, &writer)!!;
+
+	assert(writer.str_view() == base);
+
+	assert(reader.read_byte()!! == 'c');
+}
+
+fn void test_deflate_incremental()
+{
+	@pool()
+	{
+		String original = "This is a test of incremental decompression. We will read it byte by byte.";
+		char[] compressed = deflate::compress(mem, original)!!;
+		defer free(compressed.ptr);
+
+		// Use a ByteReader for the compressed data
+		io::ByteReader in_stream;
+		in_stream.init(compressed);
+
+		Inflater* inflater = mem::new(Inflater);
+		char[] bit_buf = mem::malloc(8192)[:8192];
+		inflater.init(&in_stream, bit_buf);
+		defer free(bit_buf.ptr);
+		defer free(inflater);
+
+		char[] decompressed = mem::malloc(original.len)[:original.len];
+		defer free(decompressed.ptr);
+
+		for (usz i = 0; i < original.len; i++)
+		{
+			char[1] one_byte;
+			usz n = inflater.read(one_byte[..])!!;
+			assert(n == 1, "Expected 1 byte");
+			decompressed[i] = one_byte[0];
+		}
+
+		// One more read should return 0 (or EOF)
+		char[1] extra;
+		assert(inflater.read(extra[..])!! == 0, "Expected EOF");
+
+		assert((String)original == (String)decompressed, "Incremental decompression failed");
+	};
+}
--- a/test/unit/stdlib/compression/zip.c3
+++ b/test/unit/stdlib/compression/zip.c3
@@ -0,0 +1,549 @@
+module zip_test @test;
+
+import std::io;
+import std::compression::zip;
+
+fn void test_zip_store()
+{
+	@pool()
+	{
+		// Create archive with uncompressed file
+		ZipArchive zip = zip::open(mem, "unittest_store.zip", "w")!!;
+		zip.write_file("test.txt", "Hello, World!", STORE)!!;
+		(void)zip.close();
+		defer (void)file::delete("unittest_store.zip");
+
+		// Read and verify
+		ZipArchive read_zip = zip::open(mem, "unittest_store.zip", "r")!!;
+		defer (void)read_zip.close();
+
+		assert(read_zip.count() == 1, "Expected 1 entry");
+
+		ZipEntry entry = read_zip.stat("test.txt")!!;
+		assert(entry.method == STORE, "Expected STORE method");
+		assert(entry.uncompressed_size == 13, "Expected 13 bytes");
+
+		char[] data = read_zip.read_file_all(mem, "test.txt")!!;
+		defer free(data);
+		assert((String)data == "Hello, World!", "Data mismatch");
+
+	};
+}
+
+fn void test_zip_deflate()
+{
+	@pool()
+	{
+		// Create archive with compressed file
+		ZipArchive zip = zip::open(mem, "unittest_deflate.zip", "w")!!;
+
+		ZipEntryWriter writer = zip.open_writer("compressed.txt", DEFLATE)!!;
+		String data = "This is a test. ";
+		for (int i = 0; i < 100; i++)
+		{
+			writer.write((char[])data)!!;
+		}
+		writer.close()!!;
+		(void)zip.close();
+		defer (void)file::delete("unittest_deflate.zip");
+
+		// Read and verify
+		ZipArchive read_zip = zip::open(mem, "unittest_deflate.zip", "r")!!;
+		defer (void)read_zip.close();
+
+		assert(read_zip.count() == 1, "Expected 1 entry");
+
+		ZipEntry entry = read_zip.stat("compressed.txt")!!;
+		assert(entry.method == DEFLATE, "Expected DEFLATE method");
+		assert(entry.uncompressed_size == 1600, "Expected 1600 bytes");
+
+		char[] decompressed = read_zip.read_file_all(mem, "compressed.txt")!!;
+		defer free(decompressed);
+		assert(decompressed.len == 1600, "Decompressed size mismatch");
+
+	};
+}
+
+fn void test_zip_directory()
+{
+	@pool()
+	{
+		// Create archive with directory
+		ZipArchive zip = zip::open(mem, "unittest_dir.zip", "w")!!;
+		zip.add_directory("docs")!!;
+		zip.write_file("docs/readme.txt", "README")!!;
+		(void)zip.close();
+		defer (void)file::delete("unittest_dir.zip");
+
+		// Read and verify
+		ZipArchive read_zip = zip::open(mem, "unittest_dir.zip", "r")!!;
+		defer (void)read_zip.close();
+
+		assert(read_zip.count() == 2, "Expected 2 entries");
+
+		ZipEntry dir_entry = read_zip.stat("docs/")!!;
+		assert(dir_entry.is_directory, "Expected directory");
+		assert(dir_entry.uncompressed_size == 0, "Directory should have 0 size");
+
+		ZipEntry file_entry = read_zip.stat("docs/readme.txt")!!;
+		assert(!file_entry.is_directory, "Expected file");
+
+		char[] data = read_zip.read_file_all(mem, "docs/readme.txt")!!;
+		defer free(data);
+		assert((String)data == "README", "Data mismatch");
+
+	};
+}
+
+fn void test_zip_crc32_verification()
+{
+	@pool()
+	{
+		// Create archive
+		ZipArchive zip = zip::open(mem, "unittest_crc.zip", "w")!!;
+		zip.write_file("data.txt", "Test data for CRC32")!!;
+		(void)zip.close();
+		defer (void)file::delete("unittest_crc.zip");
+
+		ZipArchive read_zip = zip::open(mem, "unittest_crc.zip", "r")!!;
+		defer (void)read_zip.close();
+
+		char[] data = read_zip.read_file_all(mem, "data.txt")!!;
+		defer free(data);
+		assert((String)data == "Test data for CRC32", "Data mismatch");
+
+	};
+}
+
+fn void test_zip_multiple_files()
+{
+	@pool()
+	{
+		// Create archive with multiple files
+		ZipArchive zip = zip::open(mem, "unittest_multi.zip", "w")!!;
+		zip.write_file("file1.txt", "First file")!!;
+		zip.write_file("file2.txt", "Second file")!!;
+		zip.write_file("file3.txt", "Third file")!!;
+		(void)zip.close();
+		defer (void)file::delete("unittest_multi.zip");
+
+		// Read and verify
+		ZipArchive read_zip = zip::open(mem, "unittest_multi.zip", "r")!!;
+		defer (void)read_zip.close();
+
+		assert(read_zip.count() == 3, "Expected 3 entries");
+
+		for (usz i = 0; i < read_zip.count(); i++)
+		{
+			ZipEntry entry = read_zip.stat_at(i)!!;
+			assert(!entry.is_directory, "Expected files only");
+		}
+
+		char[] data1 = read_zip.read_file_all(mem, "file1.txt")!!;
+		defer free(data1);
+		assert((String)data1 == "First file", "File1 mismatch");
+
+		char[] data2 = read_zip.read_file_all(mem, "file2.txt")!!;
+		defer free(data2);
+		assert((String)data2 == "Second file", "File2 mismatch");
+
+		char[] data3 = read_zip.read_file_all(mem, "file3.txt")!!;
+		defer free(data3);
+		assert((String)data3 == "Third file", "File3 mismatch");
+
+	};
+}
+
+fn void test_zip_streaming()
+{
+	@pool()
+	{
+		// Test streaming write
+		ZipArchive zip = zip::open(mem, "unittest_stream.zip", "w")!!;
+
+		ZipEntryWriter writer = zip.open_writer("stream.txt", DEFLATE)!!;
+		writer.write("Part 1. ")!!;
+		writer.write("Part 2. ")!!;
+		writer.write("Part 3.")!!;
+		writer.close()!!;
+
+		(void)zip.close();
+		defer (void)file::delete("unittest_stream.zip");
+
+		// Read and verify
+		ZipArchive read_zip = zip::open(mem, "unittest_stream.zip", "r")!!;
+		defer (void)read_zip.close();
+
+		char[] data = read_zip.read_file_all(mem, "stream.txt")!!;
+		defer free(data);
+		assert((String)data == "Part 1. Part 2. Part 3.", "Streaming write failed");
+
+	};
+}
+fn void test_zip_invalid_access()
+{
+	@pool()
+	{
+		// Test non-existent archive
+		ZipArchive? opt = zip::open(mem, "non_existent.zip", "r");
+		assert(!@ok(opt), "Expected error when opening non-existent file");
+
+		// Test non-existent entry
+		ZipArchive zip = zip::open(mem, "unittest_edge.zip", "w")!!;
+		zip.write_file("exists.txt", "data")!!;
+		(void)zip.close();
+		defer (void)file::delete("unittest_edge.zip");
+
+		ZipArchive read_zip = zip::open(mem, "unittest_edge.zip", "r")!!;
+		defer (void)read_zip.close();
+
+		ZipEntry? entry_opt = read_zip.stat("does_not_exist.txt");
+		assert(!@ok(entry_opt), "Expected ENTRY_NOT_FOUND");
+
+		char[]? data_opt = read_zip.read_file_all(mem, "does_not_exist.txt");
+		assert(!@ok(data_opt), "Expected error when reading non-existent file");
+		assert(!@ok(data_opt), "Expected error when reading non-existent file");
+
+	};
+}
+
+fn void test_zip_empty_archive()
+{
+	@pool()
+	{
+		// Create empty archive
+		ZipArchive zip = zip::open(mem, "unittest_empty.zip", "w")!!;
+		(void)zip.close();
+		defer (void)file::delete("unittest_empty.zip");
+
+		// Read empty archive
+		ZipArchive read_zip = zip::open(mem, "unittest_empty.zip", "r")!!;
+		defer (void)read_zip.close();
+
+		assert(read_zip.count() == 0, "Expected 0 entries");
+	};
+}
+
+fn void test_zip_recovery()
+{
+	@pool()
+	{
+		String path = "unittest_embedded_broken.zip";
+		// Create a "broken" ZIP (LFH + Data, but no Central Directory)
+		// Filename: "a", Data: "bc"
+		char[] broken_zip = {0x50,0x4B,0x03,0x04,0x14,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x38,0x2B,0xA9,0xC2,0x02,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x61,0x62,0x63};
+		file::save(path, broken_zip)!!;
+		defer (void)file::delete(path);
+
+		ZipArchive? normal = zip::open(mem, path, "r");
+		assert(!@ok(normal), "Normal open should fail on broken ZIP");
+
+		ZipArchive recovered = zip::recover(mem, path)!!;
+		defer (void)recovered.close();
+
+		assert(recovered.count() == 1, "Should have recovered 1 file");
+		char[] data = recovered.read_file_all(mem, "a")!!;
+		defer free(data);
+		assert((String)data == "bc", "Recovered data mismatch");
+	};
+}
+
+fn void test_zip_cp437()
+{
+	@pool()
+	{
+		String path = "unittest_embedded_cp437.zip";
+		// Create a ZIP with CP437 encoding (Bit 11 NOT set)
+		// Filename: 0x80 (Ç in CP437), Data: "x"
+		char[] cp437_zip = {0x50,0x4B,0x03,0x04,0x14,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x83,0x16,0xDC,0x8C,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x80,0x78};
+
+		file::save(path, cp437_zip)!!;
+		defer (void)file::delete(path);
+
+		ZipArchive recovered = zip::recover(mem, path)!!;
+		defer (void)recovered.close();
+
+		ZipEntry entry = recovered.stat_at(0)!!;
+		assert(entry.name == "Ç", "CP437 decoding failed");
+
+		char[] data = recovered.read_file_all(mem, "Ç")!!;
+		defer free(data);
+		assert((String)data == "x", "Data mismatch in CP437 test");
+	};
+}
+
+fn void test_zip_with_comment()
+{
+	@pool()
+	{
+		// Create a ZIP file with a comment
+		ZipArchive zip = zip::open(mem, "unittest_comment.zip", "w")!!;
+		zip.write_file("test.txt", "Hello, World!")!!;
+		(void)zip.close();
+		defer (void)file::delete("unittest_comment.zip");
+
+		char[] zip_data = file::load(mem, "unittest_comment.zip")!!;
+		defer free(zip_data);
+
+		isz eocd_pos = -1;
+		for (isz i = (isz)zip_data.len - 22; i >= 0; i--)
+		{
+			uint sig = mem::load((uint*)&zip_data[i], 1);
+			if (sig == 0x06054b50)
+			{
+				eocd_pos = i;
+				break;
+			}
+		}
+		assert(eocd_pos >= 0, "EOCD not found");
+
+		String comment = "This is a test comment!";
+		mem::store((ushort*)&zip_data[eocd_pos + 20], (ushort)comment.len, 1);
+
+		char[] new_zip = mem::new_array(char, zip_data.len + comment.len);
+		defer free(new_zip);
+		mem::copy(new_zip.ptr, zip_data.ptr, zip_data.len);
+		mem::copy(new_zip.ptr + zip_data.len, comment.ptr, comment.len);
+
+		file::save("unittest_comment.zip", new_zip[:zip_data.len + comment.len])!!;
+
+		// Try to open it
+		ZipArchive read_zip = zip::open(mem, "unittest_comment.zip", "r")!!;
+		defer (void)read_zip.close();
+
+		assert(read_zip.count() == 1, "Expected 1 entry");
+		assert(read_zip.comment == comment, "Comment mismatch");
+
+		char[] data = read_zip.read_file_all(mem, "test.txt")!!;
+		defer free(data);
+		assert((String)data == "Hello, World!", "Data mismatch with comment");
+	};
+}
+
+fn void test_zip_write_comment()
+{
+	@pool()
+	{
+		ZipArchive zip = zip::open(mem, "unittest_write_comment.zip", "w")!!;
+		zip.comment = String.copy("Created by C3 ZIP library", zip.allocator);
+		zip.write_file("test.txt", "Hello!")!!;
+		(void)zip.close();
+		defer (void)file::delete("unittest_write_comment.zip");
+
+		ZipArchive read_zip = zip::open(mem, "unittest_write_comment.zip", "r")!!;
+		defer (void)read_zip.close();
+
+		assert(read_zip.comment == "Created by C3 ZIP library", "Comment not preserved");
+		assert(read_zip.count() == 1, "Expected 1 entry");
+	};
+}
+
+fn void test_zip64_headers()
+{
+	@pool()
+	{
+		String filename = "unittest_zip64.zip";
+		ZipArchive zip = zip::open(mem, filename, "w")!!;
+
+		ZipEntryWriter writer = zip.open_writer("large.txt", STORE)!!;
+		writer.write("data")!!;
+
+		// Manually set the size to > 4GB to trigger ZIP64 headers in the Central Directory.
+		// This tests the fix for ZIP64 extra field serialization (ensuring no byte truncation).
+		writer.entry.uncompressed_size = 0x100000001;
+		writer.entry.compressed_size = 0x100000001;
+
+		writer.close()!!;
+		(void)zip.close();
+		defer (void)file::delete(filename);
+
+		ZipArchive read_zip = zip::open(mem, filename, "r")!!;
+		defer (void)read_zip.close();
+
+		ZipEntry entry = read_zip.stat("large.txt")!!;
+		assert(entry.uncompressed_size == 0x100000001, "Failed to read ZIP64 uncompressed size");
+		assert(entry.compressed_size == 0x100000001, "Failed to read ZIP64 compressed size");
+	};
+}
+
+fn void test_zip_utf8()
+{
+	@pool()
+	{
+		String filename = "unittest_utf8.zip";
+		String utf8_name = "测试_🚀.txt";
+		ZipArchive zip = zip::open(mem, filename, "w")!!;
+		zip.write_file(utf8_name, "content")!!;
+		(void)zip.close();
+		defer (void)file::delete(filename);
+
+		ZipArchive read_zip = zip::open(mem, filename, "r")!!;
+		defer (void)read_zip.close();
+
+		ZipEntry entry = read_zip.stat(utf8_name)!!;
+		assert(entry.name == utf8_name, "UTF-8 filename mismatch");
+	};
+}
+
+fn void test_zip_zero_length()
+{
+	@pool()
+	{
+		String filename = "unittest_zero.zip";
+		ZipArchive zip = zip::open(mem, filename, "w")!!;
+		zip.write_file("empty.txt", "")!!;
+		(void)zip.close();
+		defer (void)file::delete(filename);
+
+		ZipArchive read_zip = zip::open(mem, filename, "r")!!;
+		defer (void)read_zip.close();
+
+		ZipEntry entry = read_zip.stat("empty.txt")!!;
+		assert(entry.uncompressed_size == 0, "Size should be 0");
+
+		char[] data = read_zip.read_file_all(mem, "empty.txt")!!;
+		defer free(data);
+		assert(data.len == 0, "Read data should be empty");
+	};
+}
+
+fn void test_zip64_offset()
+{
+	@pool()
+	{
+		String filename = "unittest_zip64_offset.zip";
+		ZipArchive zip = zip::open(mem, filename, "w")!!;
+
+		ZipEntryWriter writer = zip.open_writer("offset_test.txt", STORE)!!;
+		writer.write("data")!!;
+
+		// Manually set offset to > 4GB to trigger ZIP64 headers in the Central Directory
+		writer.entry.offset = 0x100000005;
+
+		writer.close()!!;
+		(void)zip.close();
+		defer (void)file::delete(filename);
+
+		ZipArchive read_zip = zip::open(mem, filename, "r")!!;
+		defer (void)read_zip.close();
+
+		ZipEntry entry = read_zip.stat("offset_test.txt")!!;
+		assert(entry.offset == 0x100000005, "Failed to read ZIP64 offset");
+	};
+}
+
+fn void test_zip_reader_pos_and_seek()
+{
+	@pool()
+	{
+		String path = "unittest_reader.zip";
+		ZipArchive zip = zip::open(mem, path, "w")!!;
+		zip.write_file("test.txt", "0123456789", STORE)!!;
+		(void)zip.close();
+		defer (void)file::delete(path);
+
+		ZipArchive read_zip = zip::open(mem, path, "r")!!;
+		defer (void)read_zip.close();
+
+		ZipEntryReader reader = read_zip.open_reader("test.txt")!!;
+		defer (void)reader.close();
+
+		assert(reader.len() == 10, "Expected length 10");
+		assert(reader.available()!! == 10, "Expected 10 bytes available");
+		assert(reader.pos == 0, "Expected pos 0");
+
+		char[3] buf;
+		assert(reader.read(buf[..])!! == 3);
+		assert((String)buf[..] == "012", "Expected '012'");
+		assert(reader.pos == 3, "Expected pos 3");
+		assert(reader.available()!! == 7, "Expected 7 bytes available");
+
+		assert(reader.seek(2, Seek.CURSOR)!! == 5, "Expected seek to 5");
+		assert(reader.pos == 5, "Expected pos 5 after seek");
+		assert(reader.available()!! == 5, "Expected 5 bytes available after seek");
+
+		assert(reader.read(buf[..])!! == 3);
+		assert((String)buf[..] == "567", "Expected '567'");
+
+		assert(reader.seek(1, Seek.SET)!! == 1, "Expected seek to 1");
+		assert(reader.read(buf[..])!! == 3);
+		assert((String)buf[..] == "123", "Expected '123'");
+
+		assert(reader.seek(-2, Seek.END)!! == 8, "Expected seek to 8");
+		assert(reader.read(buf[..])!! == 2);
+		assert((String)buf[:2] == "89", "Expected '89'");
+		assert(reader.available()!! == 0, "Expected 0 bytes available at end");
+
+		// Edge case: Negative seek SET
+		assert(!@ok(reader.seek(-1, Seek.SET)), "Negative seek SET should fail");
+
+		// Edge case: Seek past end
+		assert(reader.seek(100, Seek.SET)!! == 10, "Seek past end should cap at size");
+		assert(reader.pos == 10, "Pos should be 10");
+	};
+}
+
+fn void test_zip_comment_boundary()
+{
+	@pool()
+	{
+		String filename = "unittest_comment_limit.zip";
+
+		// 1. Test exactly 65535 bytes (Should pass)
+		{
+			ZipArchive zip = zip::open(mem, filename, "w")!!;
+			char[] huge_comment = allocator::malloc(tmem, 65535)[:65535];
+			mem::set(huge_comment.ptr, (char)'C', 65535);
+			zip.comment = String.copy((String)huge_comment, zip.allocator);
+			zip.write_file("t.txt", "d")!!;
+			(void)zip.close();
+
+			ZipArchive read_zip = zip::open(mem, filename, "r")!!;
+			assert(read_zip.comment.len == 65535, "Comment length mismatch at 65535");
+			(void)read_zip.close();
+			(void)file::delete(filename);
+		}
+
+		// 2. Test 65536 bytes (Should fail with INVALID_ARGUMENT)
+		{
+			ZipArchive zip = zip::open(mem, filename, "w")!!;
+			char[] too_huge = allocator::malloc(tmem, 65536)[:65536];
+			mem::set(too_huge.ptr, (char)'X', 65536);
+			zip.comment = String.copy((String)too_huge, zip.allocator);
+			zip.write_file("t.txt", "d")!!;
+
+			fault res = @catch(zip.close());
+			assert(res == zip::INVALID_ARGUMENT, "Expected INVALID_ARGUMENT for 64k+1 comment");
+			(void)file::delete(filename);
+		}
+	};
+}
+
+fn void test_zip_reader_available_capping()
+{
+	@pool()
+	{
+		// We manually construct a reader to test the capping logic for huge entry sizes
+		// that might exist on 32-bit systems (where usz < 64-bit).
+		ZipEntryReader reader;
+		mem::set(&reader, 0, ZipEntryReader.sizeof);
+		reader.size = 0xFFFFFFFFFFFFFFFF;
+		reader.pos = 0;
+
+		usz avail = reader.available()!!;
+		assert(avail == usz.max, "Expected available to be capped at usz.max");
+
+		reader.pos = 100;
+		avail = reader.available()!!;
+		if (usz.max < 0xFFFFFFFFFFFFFFFF)
+		{
+			// triggers on 32-bit
+			assert(avail == usz.max, "Expected available to still be capped at usz.max");
+		}
+		else
+		{
+			// on 64-bit
+			assert(avail == usz.max - (usz)100, "Expected available size to be correct on 64-bit");
+		}
+	};
+}