Files
c3c/lib/std/io/stream/bytebuffer.c3
Manu Linares eae7d0c4a1 stdlib: std::compression::zip and std::compression::deflate (#2930)
* stdlib: implement `std::compression::zip` and `std::compression::deflate`

- C3 implementation of DEFLATE (RFC 1951) and ZIP archive handling.
- Support for reading and writing archives using STORE and DEFLATE
methods.
- Decompression supports both fixed and dynamic Huffman blocks.
- Compression using greedy LZ77 matching.
- Zero dependencies on libc.
- Stream-based entry reading and writing.
- Full unit test coverage.

NOTE: This is an initial implementation. Future improvements could be:

- Optimization of the LZ77 matching (lazy matching).
- Support for dynamic Huffman blocks in compression.
- ZIP64 support for large files/archives.
- Support for encryption and additional compression methods.

* optimizations+refactoring

deflate:
- replace linear search with hash-based match finding.
- implement support for dynamic Huffman blocks using the Package-Merge
algorithm.
- add streaming decompression.
- add buffered StreamBitReader.

zip:
- add ZIP64 support.
- add CP437 and UTF-8 filename encoding detection.
- add DOS date/time conversion and timestamp preservation.
- add ZipEntryReader for streaming entry reads.
- implement ZipArchive.extract and ZipArchive.recover helpers.

other:
- Add `set_modified_time` to std::io;
- Add benchmarks and a few more unit tests.

* zip: add archive comment support

add tests

* forgot to rename the benchmark :(

* detect utf8 names on weird zips

fix method not passed to open_writer

* another edge case where directory doesn't end with /

* testing utilities

- detect encrypted zip
- `ZipArchive.open_writer` default to DEFLATE

* fix zip64 creation, add tests

* fix ZIP header endianness for big-endian compatibility

Update ZipLFH, ZipCDH, ZipEOCD, Zip64EOCD, and Zip64Locator structs to
use little-endian bitstruct types from std::core::bitorder

* fix ZipEntryReader position tracking and seek logic ZIP_METHOD_STORE

added a test to track this

* add package-merge algorithm attribution

Thanks @konimarti

* standalone deflate_benchmark.c3 against `miniz`

* fix integer overflows, leaks and improve safety

* a few safety for 32-bit systems and tests

* deflate compress optimization

* improve match finding, hash updates, and buffer usage

* use ulong for zip offsets

* style changes (#18)

* style changes

* update tests

* style changes in `deflate.c3`

* fix typo

* Allocator first. Some changes to deflate to use `copy_to`

* Fix missing conversion on 32 bits.

* Fix deflate stream. Formatting. Prefer switch over if-elseif

* - Stream functions now use long/ulong rather than isz/usz for seek/available.
- `instream.seek` is replaced by `set_cursor` and `cursor`.
- `instream.available`, `cursor` etc are long/ulong rather than isz/usz to be correct on 32-bit.

* Update to constdef

* Fix test

---------

Co-authored-by: Book-reader <thevoid@outlook.co.nz>
Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
2026-02-20 20:41:34 +01:00

158 lines
3.7 KiB
Plaintext

module std::io;
import std::math;
struct ByteBuffer (InStream, OutStream)
{
Allocator allocator;
usz max_read;
char[] bytes;
usz read_idx;
usz write_idx;
bool has_last;
}
<*
ByteBuffer provides a streamable read/write buffer.
max_read defines how many bytes might be kept before its internal buffer is shrunk.
@require self.bytes.len == 0 : "Buffer already initialized."
*>
fn ByteBuffer* ByteBuffer.init(&self, Allocator allocator, usz max_read, usz initial_capacity = 16)
{
*self = { .allocator = allocator, .max_read = max_read };
initial_capacity = max(initial_capacity, 16);
self.grow(initial_capacity);
return self;
}
fn ByteBuffer* ByteBuffer.tinit(&self, usz max_read, usz initial_capacity = 16)
{
return self.init(tmem, max_read, initial_capacity);
}
<*
@require buf.len > 0
@require self.bytes.len == 0 : "Buffer already initialized."
*>
fn ByteBuffer* ByteBuffer.init_with_buffer(&self, char[] buf)
{
*self = { .max_read = buf.len, .bytes = buf };
return self;
}
fn void ByteBuffer.free(&self)
{
if (self.allocator) allocator::free(self.allocator, self.bytes);
*self = {};
}
fn usz? ByteBuffer.write(&self, char[] bytes) @dynamic
{
usz cap = self.bytes.len - self.write_idx;
if (cap < bytes.len) self.grow(bytes.len);
self.bytes[self.write_idx:bytes.len] = bytes[..];
self.write_idx += bytes.len;
return bytes.len;
}
fn void? ByteBuffer.write_byte(&self, char c) @dynamic
{
usz cap = self.bytes.len - self.write_idx;
if (cap == 0) self.grow(1);
self.bytes[self.write_idx] = c;
self.write_idx++;
}
fn usz? ByteBuffer.read(&self, char[] bytes) @dynamic
{
usz readable = self.write_idx - self.read_idx;
if (readable == 0)
{
self.has_last = false;
return io::EOF~;
}
usz n = min(readable, bytes.len);
bytes[:n] = self.bytes[self.read_idx:n];
self.read_idx += n;
self.has_last = n > 0;
self.shrink();
return n;
}
fn char? ByteBuffer.read_byte(&self) @dynamic
{
usz readable = self.write_idx - self.read_idx;
if (readable == 0)
{
self.has_last = false;
return io::EOF~;
}
char c = self.bytes[self.read_idx];
self.read_idx++;
self.has_last = true;
self.shrink();
return c;
}
<*
Only the last byte of a successful read can be pushed back.
*>
fn void? ByteBuffer.pushback_byte(&self) @dynamic
{
if (!self.has_last) return io::EOF~;
assert(self.read_idx > 0);
self.read_idx--;
self.has_last = false;
}
fn long? ByteBuffer.cursor(&self) @dynamic
{
return self.read_idx;
}
fn void? ByteBuffer.set_cursor(&self, long offset, SeekOrigin whence = FROM_START) @dynamic
{
switch (whence)
{
case FROM_START:
if (offset < 0 || offset > self.write_idx) return INVALID_POSITION~;
self.read_idx = (usz)offset;
case FROM_CURSOR:
if ((offset < 0 && self.read_idx < -offset) ||
(offset > 0 && self.read_idx + offset > self.write_idx)) return INVALID_POSITION~;
self.read_idx += (usz)offset;
case FROM_END:
if (offset < 0 || offset > self.write_idx) return INVALID_POSITION~;
self.read_idx = self.write_idx - (usz)offset;
}
}
fn usz? ByteBuffer.seek(&self, isz offset, Seek seek) @dynamic
{
self.set_cursor(offset, (SeekOrigin)seek.ordinal)!;
return (usz)self.cursor();
}
fn ulong? ByteBuffer.available(&self) @inline @dynamic
{
return (ulong)self.write_idx - self.read_idx;
}
fn void ByteBuffer.grow(&self, usz n)
{
n = math::next_power_of_2(self.bytes.len + n);
char* p = allocator::realloc(self.allocator, self.bytes, n);
self.bytes = p[:n];
}
macro void ByteBuffer.shrink(&self)
{
if (self.read_idx >= self.max_read)
{
// Drop the read data besides the last byte (for pushback_byte).
usz readable = self.write_idx - self.read_idx;
self.bytes[:1 + readable] = self.bytes[self.read_idx - 1:1 + readable];
self.write_idx = 1 + readable;
self.read_idx = 1;
}
}