stdlib: std::compression::zip and std::compression::deflate (#2930)

* stdlib: implement `std::compression::zip` and `std::compression::deflate` - C3 implementation of DEFLATE (RFC 1951) and ZIP archive handling. - Support for reading and writing archives using STORE and DEFLATE methods. - Decompression supports both fixed and dynamic Huffman blocks. - Compression using greedy LZ77 matching. - Zero dependencies on libc. - Stream-based entry reading and writing. - Full unit test coverage. NOTE: This is an initial implementation. Future improvements could be: - Optimization of the LZ77 matching (lazy matching). - Support for dynamic Huffman blocks in compression. - ZIP64 support for large files/archives. - Support for encryption and additional compression methods. * optimizations+refactoring deflate: - replace linear search with hash-based match finding. - implement support for dynamic Huffman blocks using the Package-Merge algorithm. - add streaming decompression. - add buffered StreamBitReader. zip: - add ZIP64 support. - add CP437 and UTF-8 filename encoding detection. - add DOS date/time conversion and timestamp preservation. - add ZipEntryReader for streaming entry reads. - implement ZipArchive.extract and ZipArchive.recover helpers. other: - Add `set_modified_time` to std::io; - Add benchmarks and a few more unit tests. * zip: add archive comment support add tests * forgot to rename the benchmark :( * detect utf8 names on weird zips fix method not passed to open_writer * another edge case where directory doesn't end with / * testing utilities - detect encrypted zip - `ZipArchive.open_writer` default to DEFLATE * fix zip64 creation, add tests * fix ZIP header endianness for big-endian compatibility Update ZipLFH, ZipCDH, ZipEOCD, Zip64EOCD, and Zip64Locator structs to use little-endian bitstruct types from std::core::bitorder * fix ZipEntryReader position tracking and seek logic ZIP_METHOD_STORE added a test to track this * add package-merge algorithm attribution Thanks @konimarti * standalone deflate_benchmark.c3 against `miniz` * fix integer overflows, leaks and improve safety * a few safety for 32-bit systems and tests * deflate compress optimization * improve match finding, hash updates, and buffer usage * use ulong for zip offsets * style changes (#18) * style changes * update tests * style changes in `deflate.c3` * fix typo * Allocator first. Some changes to deflate to use `copy_to` * Fix missing conversion on 32 bits. * Fix deflate stream. Formatting. Prefer switch over if-elseif * - Stream functions now use long/ulong rather than isz/usz for seek/available. - `instream.seek` is replaced by `set_cursor` and `cursor`. - `instream.available`, `cursor` etc are long/ulong rather than isz/usz to be correct on 32-bit. * Update to constdef * Fix test --------- Co-authored-by: Book-reader <thevoid@outlook.co.nz> Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
2026-02-27 12:01:16 +00:00 · 2026-02-20 16:41:34 -03:00
parent 5055e86518
commit eae7d0c4a1
26 changed files with 3745 additions and 96 deletions
--- a/lib/std/core/dstring.c3
+++ b/lib/std/core/dstring.c3
@@ -658,9 +658,10 @@ fn usz? DString.read_from_stream(&self, InStream reader)
 	if (&reader.available)
 	{
 		usz total_read = 0;
-		while (usz available = reader.available()!)
+		while (ulong available = reader.available()!)
 		{
-			self.reserve(available);
+			if (available > isz.max) available = (ulong)isz.max;
+			self.reserve((usz)available);
 			StringData* data = self.data();
 			usz len = reader.read(data.chars[data.len..(data.capacity - 1)])!;
 			total_read += len;
--- a/lib/std/core/env.c3
+++ b/lib/std/core/env.c3
@@ -126,6 +126,7 @@ const bool ARCH_64_BIT = $$REGISTER_SIZE == 64;
 const bool LIBC = $$COMPILER_LIBC_AVAILABLE;
 const bool NO_LIBC = !LIBC && !CUSTOM_LIBC;
 const bool CUSTOM_LIBC = $$CUSTOM_LIBC;
+const bool OLD_IO = $feature(OLD_IO);
 const CompilerOptLevel COMPILER_OPT_LEVEL = CompilerOptLevel.from_ordinal($$COMPILER_OPT_LEVEL);
 const bool BIG_ENDIAN = $$PLATFORM_BIG_ENDIAN;
 const bool I128_NATIVE_SUPPORT = $$PLATFORM_I128_SUPPORTED;
--- a/lib/std/core/runtime_benchmark.c3
+++ b/lib/std/core/runtime_benchmark.c3
@@ -125,10 +125,11 @@ fn bool run_benchmarks(BenchmarkUnit[] benchmarks)
 		char[] perc_str = { [0..19] = ' ', [20] = 0 };
 		int perc = 0;
 		uint print_step = current_benchmark_iterations / 100;
+		if (print_step == 0) print_step = 1;

 		for (this_iteration = 0; this_iteration < current_benchmark_iterations; ++this_iteration, benchmark_nano_seconds = {})
 		{
-			if (0 == this_iteration % print_step)   // only print right about when the % will update
+			if (this_iteration % print_step == 0)   // only print right about when the % will update
 			{
 				perc_str[0..(uint)math::floor((this_iteration / (float)current_benchmark_iterations) * 20)] = '#';
 				perc = (uint)math::ceil(100 * (this_iteration / (float)current_benchmark_iterations));
--- a/lib/std/core/runtime_test.c3
+++ b/lib/std/core/runtime_test.c3
@@ -142,7 +142,7 @@ fn void mute_output() @local
    File* stderr = io::stderr();
 	*stderr = test_context.fake_stdout;
 	*stdout = test_context.fake_stdout;
-	(void)test_context.fake_stdout.seek(0, Seek.SET)!!;
+	(void)test_context.fake_stdout.set_cursor(0)!!;
 }

 fn void unmute_output(bool has_error) @local
@@ -155,7 +155,7 @@ fn void unmute_output(bool has_error) @local
 	*stderr = test_context.stored.stderr;
 	*stdout = test_context.stored.stdout;

-	usz log_size = test_context.fake_stdout.seek(0, Seek.CURSOR)!!;
+	ulong log_size = test_context.fake_stdout.cursor()!!;
 	if (has_error)
 	{
 		io::printn(test_context.has_ansi_codes ? "[\e[0;31mFAIL\e[0m]" : "[FAIL]");
@@ -165,7 +165,7 @@ fn void unmute_output(bool has_error) @local
 	{
 		test_context.fake_stdout.write_byte('\n')!!;
 		test_context.fake_stdout.write_byte('\0')!!;
-		(void)test_context.fake_stdout.seek(0, Seek.SET)!!;
+		test_context.fake_stdout.set_cursor(0)!!;

 		io::printfn("\n========== TEST LOG ============");
 		io::printfn("%s\n", test_context.current_test_name);