Files
c3c/lib/std/io/stream.c3
Manu Linares eae7d0c4a1 stdlib: std::compression::zip and std::compression::deflate (#2930)
* stdlib: implement `std::compression::zip` and `std::compression::deflate`

- C3 implementation of DEFLATE (RFC 1951) and ZIP archive handling.
- Support for reading and writing archives using STORE and DEFLATE
methods.
- Decompression supports both fixed and dynamic Huffman blocks.
- Compression using greedy LZ77 matching.
- Zero dependencies on libc.
- Stream-based entry reading and writing.
- Full unit test coverage.

NOTE: This is an initial implementation. Future improvements could be:

- Optimization of the LZ77 matching (lazy matching).
- Support for dynamic Huffman blocks in compression.
- ZIP64 support for large files/archives.
- Support for encryption and additional compression methods.

* optimizations+refactoring

deflate:
- replace linear search with hash-based match finding.
- implement support for dynamic Huffman blocks using the Package-Merge
algorithm.
- add streaming decompression.
- add buffered StreamBitReader.

zip:
- add ZIP64 support.
- add CP437 and UTF-8 filename encoding detection.
- add DOS date/time conversion and timestamp preservation.
- add ZipEntryReader for streaming entry reads.
- implement ZipArchive.extract and ZipArchive.recover helpers.

other:
- Add `set_modified_time` to std::io;
- Add benchmarks and a few more unit tests.

* zip: add archive comment support

add tests

* forgot to rename the benchmark :(

* detect utf8 names on weird zips

fix method not passed to open_writer

* another edge case where directory doesn't end with /

* testing utilities

- detect encrypted zip
- `ZipArchive.open_writer` default to DEFLATE

* fix zip64 creation, add tests

* fix ZIP header endianness for big-endian compatibility

Update ZipLFH, ZipCDH, ZipEOCD, Zip64EOCD, and Zip64Locator structs to
use little-endian bitstruct types from std::core::bitorder

* fix ZipEntryReader position tracking and seek logic ZIP_METHOD_STORE

added a test to track this

* add package-merge algorithm attribution

Thanks @konimarti

* standalone deflate_benchmark.c3 against `miniz`

* fix integer overflows, leaks and improve safety

* a few safety for 32-bit systems and tests

* deflate compress optimization

* improve match finding, hash updates, and buffer usage

* use ulong for zip offsets

* style changes (#18)

* style changes

* update tests

* style changes in `deflate.c3`

* fix typo

* Allocator first. Some changes to deflate to use `copy_to`

* Fix missing conversion on 32 bits.

* Fix deflate stream. Formatting. Prefer switch over if-elseif

* - Stream functions now use long/ulong rather than isz/usz for seek/available.
- `instream.seek` is replaced by `set_cursor` and `cursor`.
- `instream.available`, `cursor` etc are long/ulong rather than isz/usz to be correct on 32-bit.

* Update to constdef

* Fix test

---------

Co-authored-by: Book-reader <thevoid@outlook.co.nz>
Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
2026-02-20 20:41:34 +01:00

678 lines
14 KiB
Plaintext

module std::io;
import std::math;
alias SetCursorFn = fn void?(void*, long offset, SeekOrigin whence = START);
interface InStream
{
fn void? close() @optional;
fn long? cursor() @optional;
fn void? set_cursor(long offset, SeekOrigin whence = FROM_START) @optional;
fn usz? seek(isz offset, Seek seek) @optional;
fn usz len() @optional;
fn ulong? size() @optional;
fn ulong? available() @optional;
fn usz? read(char[] buffer);
fn char? read_byte();
fn usz? write_to(OutStream out) @optional;
fn void? pushback_byte() @optional;
}
interface OutStream
{
fn void? destroy() @optional;
fn void? close() @optional;
fn void? flush() @optional;
fn usz? write(char[] bytes);
fn void? write_byte(char c);
fn usz? read_to(InStream in) @optional;
}
fn ulong? available(InStream s)
{
if (&s.available) return s.available();
if (&s.set_cursor && &s.cursor)
{
long curr = s.cursor()!;
s.set_cursor(0, FROM_END)!;
ulong len = s.cursor()!;
s.set_cursor(curr)!;
return len - curr;
}
if (&s.seek)
{
usz curr = s.seek(0, Seek.CURSOR)!;
usz len = s.seek(0, Seek.END)!;
s.seek(curr, Seek.SET)!;
return (ulong)len - (ulong)curr;
}
return io::UNSUPPORTED_OPERATION~;
}
macro bool @is_instream(#expr) @const
{
return $defined(InStream i = #expr);
}
macro bool @is_not_instream_if_ptr(#expr) @const
{
return !$defined(&#expr) ||| !@is_instream(&#expr);
}
macro bool @is_outstream(#expr) @const
{
return $defined(OutStream s = #expr);
}
macro bool @is_not_outstream_if_ptr(#expr) @const
{
return !$defined(&#expr) ||| !@is_outstream(&#expr);
}
<*
@param [&out] ref
@require @is_instream(stream) : "Expected a stream"
*>
macro usz? read_any(stream, any ref)
{
return read_all(stream, ((char*)ref)[:ref.type.sizeof]);
}
<*
@param [&in] ref : "the object to write."
@require @is_outstream(stream)
@ensure return == ref.type.sizeof
*>
macro usz? write_any(stream, any ref)
{
return write_all(stream, ((char*)ref)[:ref.type.sizeof]);
}
<*
@require @is_instream(stream)
*>
macro usz? read_all(stream, char[] buffer)
{
if (buffer.len == 0) return 0;
usz n = stream.read(buffer)!;
if (n != buffer.len) return UNEXPECTED_EOF~;
return n;
}
<*
This function will read to the end of the stream.
@require @is_instream(stream)
*>
macro char[]? read_fully(Allocator allocator, stream)
{
// Efficient path if it is possible to pre-allocate
if (try len = available(stream))
{
char* data = allocator::malloc_try(allocator, len)!;
defer catch allocator::free(allocator, data);
usz read = 0;
while (read < len)
{
read += stream.read(data[read:len - read])!;
}
return data[:len];
}
ByteWriter writer;
writer.init(allocator);
copy_to(stream, &writer)!;
return writer.array_view();
}
<*
@require @is_outstream(stream)
*>
macro usz? write_all(stream, char[] buffer)
{
if (buffer.len == 0) return 0;
usz n = stream.write(buffer)!;
if (n != buffer.len) return INCOMPLETE_WRITE~;
return n;
}
<*
@require @is_instream(s)
*>
macro usz? read_using_read_byte(s, char[] buffer)
{
usz len = 0;
foreach (&cptr : buffer)
{
char? c = s.read_byte();
if (catch err = c)
{
if (err == io::EOF) return len;
return err~;
}
*cptr = c;
len++;
}
return len;
}
<*
@require @is_outstream(s)
*>
macro void? write_byte_using_write(s, char c)
{
char[1] buff = { c };
s.write(&buff)!;
}
<*
@require @is_instream(s)
*>
macro char? read_byte_using_read(s)
{
char[1] buffer;
usz read = s.read(&buffer)!;
if (read != 1) return io::EOF~;
return buffer[0];
}
alias ReadByteFn = fn char?();
<*
@require @is_outstream(s)
*>
macro usz? write_using_write_byte(s, char[] bytes)
{
foreach (c : bytes) s.write_byte(c)!;
return bytes.len;
}
macro void? pushback_using_seek(s)
{
if (&s.set_cursor)
{
s.set_cursor(-1, FROM_CURSOR)!;
return;
}
s.seek(-1, CURSOR)!;
}
fn usz? copy_to(InStream in, OutStream dst, char[] buffer = {})
{
if (buffer.len) return copy_through_buffer(in, dst, buffer);
if (&in.write_to) return in.write_to(dst);
if (&dst.read_to) return dst.read_to(in);
$switch env::MEMORY_ENV:
$case NORMAL:
return copy_through_buffer(in, dst, &&(char[4096]){});
$case SMALL:
return copy_through_buffer(in, dst, &&(char[1024]){});
$case TINY:
$case NONE:
return copy_through_buffer(in, dst, &&(char[256]){});
$endswitch
}
macro usz? copy_through_buffer(InStream in, OutStream dst, char[] buffer) @local
{
usz total_copied;
while (true)
{
usz? len = in.read(buffer);
if (catch err = len)
{
if (err == io::EOF) return total_copied;
return err~;
}
if (!len) return total_copied;
usz written = dst.write(buffer[:len])!;
total_copied += len;
if (written != len) return INCOMPLETE_WRITE~;
}
}
const char[*] MAX_VARS @private = { [2] = 3, [4] = 5, [8] = 10 };
<*
@require @is_instream(stream)
@require $kindof(x_ptr) == POINTER && $typeof(x_ptr).inner.kindof.is_int()
*>
macro usz? read_varint(stream, x_ptr)
{
var $Type = $typeof(x_ptr).inner;
const MAX = MAX_VARS[$Type.sizeof];
$Type x;
uint shift;
usz n;
for (usz i = 0; i < MAX; i++)
{
char? c = stream.read_byte();
if (catch err = c)
{
if (err == io::EOF) return io::UNEXPECTED_EOF~;
return err~;
}
n++;
if (c & 0x80 == 0)
{
if (i + 1 == MAX && c > 1) break;
x |= c << shift;
$if $Type.kindof == SIGNED_INT:
x = x & 1 == 0 ? x >> 1 : ~(x >> 1);
$endif
*x_ptr = x;
return n;
}
x |= (c & 0x7F) << shift;
shift += 7;
}
return math::OVERFLOW~;
}
<*
@require @is_outstream(stream)
@require $kindof(x).is_int()
*>
macro usz? write_varint(stream, x)
{
var $Type = $typeof(x);
const MAX = MAX_VARS[$Type.sizeof];
char[MAX] buffer @noinit;
usz i;
while (x >= 0x80)
{
buffer[i] = (char)(x | 0x80);
x >>= 7;
i++;
}
buffer[i] = (char)x;
return write_all(stream, buffer[:i + 1]);
}
<*
@require @is_instream(stream)
*>
macro ushort? read_be_ushort(stream)
{
char hi_byte = stream.read_byte()!;
char lo_byte = stream.read_byte()!;
return (ushort)(hi_byte << 8 | lo_byte);
}
<*
@require @is_instream(stream)
*>
macro ushort? read_le_ushort(stream)
{
char lo_byte = stream.read_byte()!;
char hi_byte = stream.read_byte()!;
return (ushort)(hi_byte << 8 | lo_byte);
}
<*
@require @is_instream(stream)
*>
macro short? read_be_short(stream)
{
return read_be_ushort(stream);
}
<*
@require @is_instream(stream)
*>
macro short? read_le_short(stream)
{
return read_le_ushort(stream);
}
<*
@require @is_outstream(stream)
*>
macro void? write_be_short(stream, ushort s)
{
stream.write_byte((char)(s >> 8))!;
stream.write_byte((char)s)!;
}
<*
@require @is_outstream(stream)
*>
macro void? write_le_short(stream, ushort s)
{
stream.write_byte((char)s)!;
stream.write_byte((char)(s >> 8))!;
}
<*
@require @is_instream(stream)
*>
macro uint? read_be_uint(stream)
{
uint val = stream.read_byte()! << 24;
val += stream.read_byte()! << 16;
val += stream.read_byte()! << 8;
return val + stream.read_byte()!;
}
<*
@require @is_instream(stream)
*>
macro uint? read_le_uint(stream)
{
uint val = stream.read_byte()!;
val += stream.read_byte()! << 8;
val += stream.read_byte()! << 16;
return val + stream.read_byte()! << 24;
}
<*
@require @is_instream(stream)
*>
macro int? read_be_int(stream)
{
return read_be_uint(stream);
}
<*
@require @is_instream(stream)
*>
macro int? read_le_int(stream)
{
return read_le_uint(stream);
}
<*
@require @is_outstream(stream)
*>
macro void? write_be_int(stream, uint s)
{
stream.write_byte((char)(s >> 24))!;
stream.write_byte((char)(s >> 16))!;
stream.write_byte((char)(s >> 8))!;
stream.write_byte((char)s)!;
}
<*
@require @is_outstream(stream)
*>
macro void? write_le_int(stream, uint s)
{
stream.write_byte((char)s)!;
stream.write_byte((char)(s >> 8))!;
stream.write_byte((char)(s >> 16))!;
stream.write_byte((char)(s >> 24))!;
}
<*
@require @is_instream(stream)
*>
macro ulong? read_be_ulong(stream)
{
ulong val = (ulong)stream.read_byte()! << 56;
val += (ulong)stream.read_byte()! << 48;
val += (ulong)stream.read_byte()! << 40;
val += (ulong)stream.read_byte()! << 32;
val += (ulong)stream.read_byte()! << 24;
val += (ulong)stream.read_byte()! << 16;
val += (ulong)stream.read_byte()! << 8;
return val + stream.read_byte()!;
}
<*
@require @is_instream(stream)
*>
macro ulong? read_le_ulong(stream)
{
ulong val = (ulong)stream.read_byte()!;
val += (ulong)stream.read_byte()! << 8;
val += (ulong)stream.read_byte()! << 16;
val += (ulong)stream.read_byte()! << 24;
val += (ulong)stream.read_byte()! << 32;
val += (ulong)stream.read_byte()! << 40;
val += (ulong)stream.read_byte()! << 48;
return val + (ulong)stream.read_byte()! << 56;
}
<*
@require @is_instream(stream)
*>
macro long? read_be_long(stream)
{
return read_be_ulong(stream);
}
<*
@require @is_instream(stream)
*>
macro long? read_le_long(stream)
{
return read_le_ulong(stream);
}
<*
@require @is_outstream(stream)
*>
macro void? write_be_long(stream, ulong s)
{
stream.write_byte((char)(s >> 56))!;
stream.write_byte((char)(s >> 48))!;
stream.write_byte((char)(s >> 40))!;
stream.write_byte((char)(s >> 32))!;
stream.write_byte((char)(s >> 24))!;
stream.write_byte((char)(s >> 16))!;
stream.write_byte((char)(s >> 8))!;
stream.write_byte((char)s)!;
}
<*
@require @is_outstream(stream)
*>
macro void? write_le_long(stream, ulong s)
{
stream.write_byte((char)s)!;
stream.write_byte((char)(s >> 8))!;
stream.write_byte((char)(s >> 16))!;
stream.write_byte((char)(s >> 24))!;
stream.write_byte((char)(s >> 32))!;
stream.write_byte((char)(s >> 40))!;
stream.write_byte((char)(s >> 48))!;
stream.write_byte((char)(s >> 56))!;
}
<*
@require @is_instream(stream)
*>
macro uint128? read_be_uint128(stream)
{
uint128 val = (uint128)stream.read_byte()! << 120;
val += (uint128)stream.read_byte()! << 112;
val += (uint128)stream.read_byte()! << 104;
val += (uint128)stream.read_byte()! << 96;
val += (uint128)stream.read_byte()! << 88;
val += (uint128)stream.read_byte()! << 80;
val += (uint128)stream.read_byte()! << 72;
val += (uint128)stream.read_byte()! << 64;
val += (uint128)stream.read_byte()! << 56;
val += (uint128)stream.read_byte()! << 48;
val += (uint128)stream.read_byte()! << 40;
val += (uint128)stream.read_byte()! << 32;
val += (uint128)stream.read_byte()! << 24;
val += (uint128)stream.read_byte()! << 16;
val += (uint128)stream.read_byte()! << 8;
return val + stream.read_byte()!;
}
<*
@require @is_instream(stream)
*>
macro uint128? read_le_uint128(stream)
{
uint128 val = stream.read_byte()!;
val += (uint128)stream.read_byte()! << 8;
val += (uint128)stream.read_byte()! << 16;
val += (uint128)stream.read_byte()! << 24;
val += (uint128)stream.read_byte()! << 32;
val += (uint128)stream.read_byte()! << 40;
val += (uint128)stream.read_byte()! << 48;
val += (uint128)stream.read_byte()! << 56;
val += (uint128)stream.read_byte()! << 64;
val += (uint128)stream.read_byte()! << 72;
val += (uint128)stream.read_byte()! << 80;
val += (uint128)stream.read_byte()! << 88;
val += (uint128)stream.read_byte()! << 96;
val += (uint128)stream.read_byte()! << 104;
val += (uint128)stream.read_byte()! << 112;
return val + (uint128)stream.read_byte()! << 120;
}
<*
@require @is_instream(stream)
*>
macro int128? read_be_int128(stream)
{
return read_be_uint128(stream);
}
<*
@require @is_instream(stream)
*>
macro int128? read_le_int128(stream)
{
return read_le_uint128(stream);
}
<*
@require @is_outstream(stream)
*>
macro void? write_be_int128(stream, uint128 s)
{
stream.write_byte((char)(s >> 120))!;
stream.write_byte((char)(s >> 112))!;
stream.write_byte((char)(s >> 104))!;
stream.write_byte((char)(s >> 96))!;
stream.write_byte((char)(s >> 88))!;
stream.write_byte((char)(s >> 80))!;
stream.write_byte((char)(s >> 72))!;
stream.write_byte((char)(s >> 64))!;
stream.write_byte((char)(s >> 56))!;
stream.write_byte((char)(s >> 48))!;
stream.write_byte((char)(s >> 40))!;
stream.write_byte((char)(s >> 32))!;
stream.write_byte((char)(s >> 24))!;
stream.write_byte((char)(s >> 16))!;
stream.write_byte((char)(s >> 8))!;
stream.write_byte((char)s)!;
}
<*
@require @is_outstream(stream)
*>
macro void? write_le_int128(stream, uint128 s)
{
stream.write_byte((char)s)!;
stream.write_byte((char)(s >> 8))!;
stream.write_byte((char)(s >> 16))!;
stream.write_byte((char)(s >> 24))!;
stream.write_byte((char)(s >> 32))!;
stream.write_byte((char)(s >> 40))!;
stream.write_byte((char)(s >> 48))!;
stream.write_byte((char)(s >> 56))!;
stream.write_byte((char)(s >> 64))!;
stream.write_byte((char)(s >> 72))!;
stream.write_byte((char)(s >> 80))!;
stream.write_byte((char)(s >> 88))!;
stream.write_byte((char)(s >> 96))!;
stream.write_byte((char)(s >> 104))!;
stream.write_byte((char)(s >> 112))!;
stream.write_byte((char)(s >> 120))!;
}
<*
@require @is_outstream(stream)
@require data.len < 256 : "Data exceeded 255"
*>
macro usz? write_tiny_bytearray(stream, char[] data)
{
stream.write_byte((char)data.len)!;
return stream.write(data) + 1;
}
<*
@require @is_instream(stream)
*>
macro char[]? read_tiny_bytearray(stream, Allocator allocator)
{
int len = stream.read_byte()!;
if (!len) return {};
char[] data = allocator::alloc_array(allocator, char, len);
io::read_all(stream, data)!;
return data;
}
<*
@require @is_outstream(stream)
@require data.len < 0x1000 : "Data exceeded 65535"
*>
macro usz? write_short_bytearray(stream, char[] data)
{
io::write_be_short(stream, (ushort)data.len)!;
return stream.write(data) + 2;
}
<*
@require @is_instream(stream)
*>
macro char[]? read_short_bytearray(stream, Allocator allocator)
{
int len = io::read_be_ushort(stream)!;
if (!len) return {};
char[] data = allocator::alloc_array(allocator, char, len);
io::read_all(stream, data)!;
return data;
}
<*
@require @is_instream(stream)
*>
macro void? skip(stream, usz bytes)
{
if (!bytes) return;
$switch:
$case $typeof(stream) == InStream:
if (!&stream.seek && !&stream.set_cursor)
{
for (usz i = 0; i < bytes; i++)
{
stream.read()!;
}
return;
}
if (!&stream.set_cursor)
{
stream.seek(bytes, CURSOR)!;
return;
}
stream.set_cursor(bytes, FROM_CURSOR)!;
$case $defined(stream.set_cursor):
stream.set_cursor(bytes, FROM_CURSOR)!;
$case $defined(stream.seek):
stream.seek(bytes, CURSOR)!;
$default:
for (usz i = 0; i < bytes; i++)
{
stream.read()!;
}
$endswitch
}
<*
Wrap bytes for reading using io functions.
*>
fn ByteReader wrap_bytes(char[] bytes)
{
return { bytes, 0 };
}