diff --git a/benchmarks/stdlib/compression/deflate.c3 b/benchmarks/stdlib/compression/deflate.c3 new file mode 100644 index 000000000..3c1242818 --- /dev/null +++ b/benchmarks/stdlib/compression/deflate.c3 @@ -0,0 +1,51 @@ +module deflate_benchmarks; +import std::compression::deflate; + +const uint SMALL_ITERATIONS = 50000; +const uint LARGE_ITERATIONS = 100; + +// Data to compress +const char[] SMALL_DATA = { [0..1023] = 'A' }; +const char[] LARGE_DATA = { [0..1048575] = 'B' }; + +char[] small_compressed; +char[] large_compressed; + +fn void initialize_bench() @init +{ + small_compressed = deflate::compress(mem, SMALL_DATA)!!; + large_compressed = deflate::compress(mem, LARGE_DATA)!!; + set_benchmark_warmup_iterations(2); + set_benchmark_max_iterations(10); + + set_benchmark_func_iterations($qnameof(deflate_compress_small), SMALL_ITERATIONS); + set_benchmark_func_iterations($qnameof(deflate_decompress_small), SMALL_ITERATIONS); + set_benchmark_func_iterations($qnameof(deflate_compress_large), LARGE_ITERATIONS); + set_benchmark_func_iterations($qnameof(deflate_decompress_large), LARGE_ITERATIONS); +} + +// ======================================================================================= +module deflate_benchmarks @benchmark; + +import std::compression::deflate; +import std::core::mem; + +fn void deflate_compress_small() => @pool() +{ + char[]? compressed = deflate::compress(tmem, SMALL_DATA); +} + +fn void deflate_decompress_small() => @pool() +{ + char[]? decompressed = deflate::decompress(tmem, small_compressed); +} + +fn void deflate_compress_large() => @pool() +{ + char[]? compressed = deflate::compress(tmem, LARGE_DATA); +} + +fn void deflate_decompress_large() => @pool() +{ + char[]? decompressed = deflate::decompress(tmem, large_compressed); +} diff --git a/benchmarks/stdlib/crypto/crypto_shootout.c3 b/benchmarks/stdlib/crypto/crypto_shootout.c3 index 8b7f17920..f9e781085 100644 --- a/benchmarks/stdlib/crypto/crypto_shootout.c3 +++ b/benchmarks/stdlib/crypto/crypto_shootout.c3 @@ -32,8 +32,8 @@ fn void initialize_bench() @init $qnameof(sha1_16)[..^4], $qnameof(sha2_256_16)[..^4], $qnameof(sha2_512_16)[..^4], - $qnameof(blake2s_256_16)[..^4], - $qnameof(blake2b_256_16)[..^4], + //$qnameof(blake2s_256_16)[..^4], + //$qnameof(blake2b_256_16)[..^4], $qnameof(blake3_16)[..^4], $qnameof(ripemd_160_16)[..^4], $qnameof(whirlpool_16)[..^4], @@ -68,8 +68,8 @@ fn void md5_16() => md5::hash(common_16); fn void sha1_16() => sha1::hash(common_16); fn void sha2_256_16() => sha256::hash(common_16); fn void sha2_512_16() => sha512::hash(common_16); -fn void blake2s_256_16() => blake2::s(256, common_16); -fn void blake2b_256_16() => blake2::b(256, common_16); +//fn void blake2s_256_16() => blake2::s(256, common_16); +//fn void blake2b_256_16() => blake2::b(256, common_16); fn void blake3_16() => blake3::hash(common_16); fn void ripemd_160_16() => ripemd::hash{160}(common_16); fn void whirlpool_16() => whirlpool::hash(common_16); @@ -80,8 +80,8 @@ fn void md5_256() => md5::hash(common_256); fn void sha1_256() => sha1::hash(common_256); fn void sha2_256_256() => sha256::hash(common_256); fn void sha2_512_256() => sha512::hash(common_256); -fn void blake2s_256_256() => blake2::s(256, common_256); -fn void blake2b_256_256() => blake2::b(256, common_256); +//fn void blake2s_256_256() => blake2::s(256, common_256); +//fn void blake2b_256_256() => blake2::b(256, common_256); fn void blake3_256() => blake3::hash(common_256); fn void ripemd_160_256() => ripemd::hash{160}(common_256); fn void whirlpool_256() => whirlpool::hash(common_256); @@ -92,8 +92,8 @@ fn void md5_4kib() => md5::hash(common_4kib); fn void sha1_4kib() => sha1::hash(common_4kib); fn void sha2_256_4kib() => sha256::hash(common_4kib); fn void sha2_512_4kib() => sha512::hash(common_4kib); -fn void blake2s_256_4kib() => blake2::s(256, common_4kib); -fn void blake2b_256_4kib() => blake2::b(256, common_4kib); +//fn void blake2s_256_4kib() => blake2::s(256, common_4kib); +//fn void blake2b_256_4kib() => blake2::b(256, common_4kib); fn void blake3_4kib() => blake3::hash(common_4kib); fn void ripemd_160_4kib() => ripemd::hash{160}(common_4kib); fn void whirlpool_4kib() => whirlpool::hash(common_4kib); @@ -104,8 +104,8 @@ fn void md5_1mib() => md5::hash(common_1mib); fn void sha1_1mib() => sha1::hash(common_1mib); fn void sha2_256_1mib() => sha256::hash(common_1mib); fn void sha2_512_1mib() => sha512::hash(common_1mib); -fn void blake2s_256_1mib() => blake2::s(256, common_1mib); -fn void blake2b_256_1mib() => blake2::b(256, common_1mib); +//fn void blake2s_256_1mib() => blake2::s(256, common_1mib); +//fn void blake2b_256_1mib() => blake2::b(256, common_1mib); fn void blake3_1mib() => blake3::hash(common_1mib); fn void ripemd_160_1mib() => ripemd::hash{160}(common_1mib); fn void whirlpool_1mib() => whirlpool::hash(common_1mib); diff --git a/lib/std/compression/deflate.c3 b/lib/std/compression/deflate.c3 new file mode 100644 index 000000000..3a805ea01 --- /dev/null +++ b/lib/std/compression/deflate.c3 @@ -0,0 +1,1108 @@ +<* + DEFLATE compression implementation (RFC 1951). + + API: + - fn char[]? decompress(Allocator allocator, char[] input) + - fn void? decompress_stream(InStream input, OutStream output) + - fn char[]? compress(Allocator allocator, char[] input, ) + - struct Inflater: InStream for pull-based decompression (use `init` then `read`). +*> +module std::compression::deflate; +import std::io, std::math, std::bits, std::sort, std::collections::list; +import std::thread; + +faultdef CORRUPTED_DATA; + +<* + Stateful DEFLATE inflater. + Implements InStream for pull-based decompression. +*> +struct Inflater (InStream) +{ + StreamBitReader reader; + InflaterState state; + uint hlit, hdist, hclen; + uint dyn_i; + uint dyn_num_lengths; + uint stored_len; + uint match_len; + uint match_dist; + bool final; + bool done; + + char[65536] window; + ulong pos; + ulong read_pos; + + Huffman* lit_ptr; + Huffman* dist_ptr; + + char[19] code_lengths; + char[320] lit_dist_lengths; + + Huffman dyn_lit; + Huffman dyn_dist; + Huffman dyn_code_huff; +} + +fn void Inflater.init(&self, InStream input, char[] bit_reader_buf = {}) +{ + *self = {}; + self.reader.init(input, bit_reader_buf); + self.state = START_BLOCK; + static OnceFlag run_once; + run_once.call(fn void() { + build_fixed_huffman(&lit_fixed_global, &dist_fixed_global); + }); +} + +fn usz? Inflater.read(&self, char[] buffer) @dynamic +{ + if (self.done && self.pos == self.read_pos) return 0; + + usz total_out = 0; + while (total_out < buffer.len) + { + if (self.read_pos < self.pos) + { + ulong to_copy = math::min((ulong)(buffer.len - total_out), self.pos - self.read_pos); + + ulong start_idx = self.read_pos & 0xFFFF; + ulong len = math::min(to_copy, 65536U - start_idx); + + buffer[total_out:len] = self.window[start_idx:len]; + + if (len > usz.max) return CORRUPTED_DATA~; + total_out = total_out.overflow_add((usz)len) ?? CORRUPTED_DATA~!; + self.read_pos += len; + + if (len < to_copy) + { + buffer[total_out:to_copy - len] = self.window[:to_copy - len]; + + total_out += (usz)(to_copy - len); + self.read_pos += to_copy - len; + } + continue; + } + + if (self.done) break; + + if (self.pos - self.read_pos >= 32768U) break; + + self.step()!; + } + + return total_out; +} + +fn char? Inflater.read_byte(&self) @dynamic +{ + char[1] b; + if (try n = self.read(&b) && n > 0) + { + return b[0]; + } + return io::EOF~; +} + +fn char[]? decompress(Allocator allocator, char[] input) => @pool() +{ + if (!input.len) return allocator::new_array(allocator, char, 0); + + ByteReader mem_stream; + mem_stream.init(input); + + Inflater* inflater = allocator::new(tmem, Inflater); + char[4096] tmp_bit_buf; + inflater.init(&mem_stream, &tmp_bit_buf); + + usz out_cap = input.len * 2; + if (out_cap < 1024) out_cap = 1024; + + ByteWriter writer; + writer.tinit(); + writer.ensure_capacity(out_cap)!; + + usz out_len = io::copy_to(inflater, &writer)!; + if (out_len == 0) return allocator::new_array(allocator, char, 0); + + char[] result = allocator::alloc_array(allocator, char, out_len); + result[..] = writer.array_view()[:out_len]; + return result; +} + +<* + Decompress stream using DEFLATE. + Reads from `input`, writes to `output`. + Uses blocking I/O and the temp allocator +*> +fn void? decompress_stream(InStream input, OutStream output) => @pool() +{ + Inflater* inflater = mem::tnew(Inflater); + char[] bit_buf = mem::tnew(char[8192]); + inflater.init(input, bit_buf); + io::copy_to(inflater, output)!; +} + +<* + Compress data using the DEFLATE algorithm, this uses the temp allocator + @param input : `The data to compress.` + @param allocator : `The allocator to use.` + @return `The compressed data.` +*> +fn char[]? compress(Allocator allocator, char[] input) => @pool() +{ + if (input.len == 0) + { + BitWriter writer; + writer.init(allocator, 8); + writer.write_bits(1, 1); + writer.write_bits(1, 2); + writer.write_huffman(0, 7); + return writer.finish(); + } + + const uint MIN_MATCH = 3; + const uint MAX_MATCH = 258; + const uint WINDOW_SIZE = 32768; + const uint HASH_SIZE = 32768; + const uint HASH_MASK = HASH_SIZE - 1; + const uint MAX_CHAIN = 16; + const uint GOOD_MATCH = 32; + const uint NICE_MATCH = 128; + + uint[] head = allocator::alloc_array(tmem, uint, HASH_SIZE); + head[..] = 0xFFFFFFFF; + + uint[] prev = allocator::alloc_array(tmem, uint, WINDOW_SIZE); + + uint[] lit_freqs = mem::temp_array(uint, 286); + uint[] dist_freqs = mem::temp_array(uint, 30); + Token[] tokens = allocator::alloc_array(tmem, Token, input.len + 1); + usz token_count = 0; + + uint hash = 0; + if (input.len >= 2) + { + hash = ((uint)input[0] << 5) ^ (uint)input[1]; + } + + usz pos = 0; + while (pos < input.len) + { + uint best_len = 0; + uint best_dist = 0; + + if (pos + 2 < input.len) + { + hash = ((hash << 5) ^ (uint)input[pos + 2]) & HASH_MASK; + uint match_head = head[hash]; + head[hash] = (uint)pos; + prev[pos & 0x7FFF] = match_head; + + uint chain_len = 0; + uint curr = match_head; + while (curr != 0xFFFFFFFF && (uint)pos - curr < WINDOW_SIZE && chain_len < MAX_CHAIN) + { + chain_len++; + if (pos + best_len < input.len && input[curr + best_len] == input[pos + best_len]) + { + if (best_len < 3 || mem::load((ushort*)&input[pos + best_len - 1], 1) == mem::load((ushort*)&input[curr + best_len - 1], 1)) + { + uint match_len = 0; + while (match_len + 8 <= MAX_MATCH && pos + match_len + 8 <= input.len) + { + if (mem::load((ulong*)&input[curr + match_len], 1) == mem::load((ulong*)&input[pos + match_len], 1)) + { + match_len += 8; + continue; + } + break; + } + + while (match_len < MAX_MATCH && + pos + match_len < input.len && + input[curr + match_len] == input[pos + match_len]) + { + match_len++; + } + + if (match_len >= MIN_MATCH && match_len > best_len) + { + best_len = match_len; + best_dist = (uint)pos - curr; + if (best_len >= NICE_MATCH) break; + } + } + } + curr = prev[curr & 0x7FFF]; + if (best_len >= GOOD_MATCH) chain_len++; + } + } + + if (best_len >= MIN_MATCH) + { + uint len_code; + switch (best_len) + { + case 3..10: len_code = 257 + best_len - 3; + case 11..18: len_code = 265 + (best_len - 11) / 2; + case 19..34: len_code = 269 + (best_len - 19) / 4; + case 35..66: len_code = 273 + (best_len - 35) / 8; + case 67..130: len_code = 277 + (best_len - 67) / 16; + case 131..257:len_code = 281 + (best_len - 131) / 32; + default: len_code = 285; + } + lit_freqs[len_code]++; + + uint dist_code; + switch (best_dist) + { + case 1..4: dist_code = best_dist - 1; + case 5..8: dist_code = 4 + (best_dist - 5) / 2; + case 9..16: dist_code = 6 + (best_dist - 9) / 4; + case 17..32: dist_code = 8 + (best_dist - 17) / 8; + case 33..64: dist_code = 10 + (best_dist - 33) / 16; + case 65..128: dist_code = 12 + (best_dist - 65) / 32; + case 129..256: dist_code = 14 + (best_dist - 129) / 64; + case 257..512: dist_code = 16 + (best_dist - 257) / 128; + case 513..1024: dist_code = 18 + (best_dist - 513) / 256; + case 1025..2048: dist_code = 20 + (best_dist - 1025) / 512; + case 2049..4096: dist_code = 22 + (best_dist - 2049) / 1024; + case 4097..8192: dist_code = 24 + (best_dist - 4097) / 2048; + case 8193..16384: dist_code = 26 + (best_dist - 8193) / 4096; + default: dist_code = 28 + (best_dist - 16385) / 8192; + } + dist_freqs[dist_code]++; + tokens[token_count++] = { (ushort)best_len, (ushort)best_dist }; + + uint limit = best_len - 1; + if (pos + limit + 2 < input.len) + { + for (uint k = 0; k < limit; k++) + { + pos++; + hash = ((hash << 5) ^ (uint)input[pos + 2]) & HASH_MASK; + prev[pos & 0x7FFF] = head[hash]; + head[hash] = (uint)pos; + } + } + else + { + for (uint k = 0; k < limit; k++) + { + pos++; + if (pos + 2 < input.len) + { + hash = ((hash << 5) ^ (uint)input[pos + 2]) & HASH_MASK; + prev[pos & 0x7FFF] = head[hash]; + head[hash] = (uint)pos; + } + } + } + pos++; + } + else + { + lit_freqs[(uint)input[pos]]++; + tokens[token_count++] = { (ushort)input[pos], 0 }; + pos++; + } + } + lit_freqs[256] = 1; + tokens[token_count++] = { 256, 0 }; + + Huffman lit_huff, dist_huff; + lit_huff.build_from_freqs(lit_freqs, 15); + + uint total_dist = 0; + foreach (f : dist_freqs) total_dist += f; + if (total_dist == 0) dist_freqs[0] = 1; + dist_huff.build_from_freqs(dist_freqs, 15); + + BitWriter writer; + writer.init(allocator, math::max((usz)input.len / 2, (usz)1024)); + writer.write_bits(1, 1); + writer.write_bits(2, 2); + + char[] lit_lens = lit_huff.get_lengths(285); + char[] dist_lens = dist_huff.get_lengths(29); + + usz hlit_count = 286; + while (hlit_count > 257 && lit_lens[hlit_count - 1] == 0) hlit_count--; + usz hdist_count = 30; + while (hdist_count > 1 && dist_lens[hdist_count - 1] == 0) hdist_count--; + + writer.write_bits((uint)hlit_count - 257, 5); + writer.write_bits((uint)hdist_count - 1, 5); + + List{Token} tree_tokens; tree_tokens.tinit(); + uint[] tree_freqs = mem::temp_array(uint, 19); + + char[] all_lens = mem::temp_array(char, hlit_count + hdist_count); + all_lens[:hlit_count] = lit_lens[:hlit_count]; + all_lens[hlit_count:hdist_count] = dist_lens[:hdist_count]; + + for (usz i = 0; i < all_lens.len;) + { + char len = all_lens[i]; + usz count = 1; + while (i + count < all_lens.len && all_lens[i + count] == len) count++; + + if (len == 0) + { + while (count >= 11) + { + uint c = (uint)math::min(count, (usz)138); + tree_tokens.push({ 18, (ushort)(c - 11) }); + tree_freqs[18]++; + i += c; count -= c; + } + while (count >= 3) + { + uint c = (uint)math::min(count, (usz)10); + tree_tokens.push({ 17, (ushort)(c - 3) }); + tree_freqs[17]++; + i += c; count -= c; + } + } + else if (count >= 4) + { + tree_tokens.push({ (ushort)len, 0 }); + tree_freqs[(uint)len]++; + i++; count--; + while (count >= 3) + { + uint c = (uint)math::min(count, (usz)6); + tree_tokens.push({ 16, (ushort)(c - 3) }); + tree_freqs[16]++; + i += c; count -= c; + } + } + + while (count--) + { + tree_tokens.push({ (ushort)len, 0 }); + tree_freqs[(uint)len]++; + i++; + } + } + + Huffman tree_huff; + tree_huff.build_from_freqs(tree_freqs, 7); + char[] tree_lens = tree_huff.get_lengths(18); + + usz hclen_count = 19; + while (hclen_count > 4 && tree_lens[ORDER[hclen_count - 1]] == 0) hclen_count--; + writer.write_bits((uint)hclen_count - 4, 4); + + for (usz i = 0; i < hclen_count; i++) + { + writer.write_bits(tree_lens[ORDER[i]], 3); + } + + Code[19] tree_codes; + gen_canonical_codes(&tree_codes, tree_lens); + foreach (t : tree_tokens) + { + writer.write_huffman(tree_codes[t.val].code, tree_codes[t.val].len); + switch (t.val) + { + case 16: + writer.write_bits(t.dist, 2); + case 17: + writer.write_bits(t.dist, 3); + case 18: + writer.write_bits(t.dist, 7); + } + } + + Code[286] lit_codes; + gen_canonical_codes(&lit_codes, lit_lens[:hlit_count]); + Code[30] dist_codes; + gen_canonical_codes(&dist_codes, dist_lens[:hdist_count]); + + foreach (t : tokens[:token_count]) + { + if (t.dist == 0) + { + writer.write_huffman(lit_codes[t.val].code, lit_codes[t.val].len); + } + else + { + uint best_len = t.val; + uint best_dist = t.dist; + + uint len_code; + uint len_extra_bits = 0; + uint len_extra = 0; + switch (best_len) + { + case 3..10: len_code = 257 + best_len - 3; + case 11..18: len_code = 265 + (best_len - 11) / 2; len_extra_bits = 1; len_extra = (best_len - 11) % 2; + case 19..34: len_code = 269 + (best_len - 19) / 4; len_extra_bits = 2; len_extra = (best_len - 19) % 4; + case 35..66: len_code = 273 + (best_len - 35) / 8; len_extra_bits = 3; len_extra = (best_len - 35) % 8; + case 67..130: len_code = 277 + (best_len - 67) / 16; len_extra_bits = 4; len_extra = (best_len - 67) % 16; + case 131..257:len_code = 281 + (best_len - 131) / 32; len_extra_bits = 5; len_extra = (best_len - 131) % 32; + default: len_code = 285; + } + writer.write_huffman(lit_codes[len_code].code, lit_codes[len_code].len); + if (len_extra_bits > 0) { writer.write_bits(len_extra, len_extra_bits); } + + uint dist_code; + uint dist_extra_bits = 0; + uint dist_extra = 0; + switch (best_dist) + { + case 1..4: dist_code = best_dist - 1; + case 5..8: dist_code = 4 + (best_dist - 5) / 2; dist_extra_bits = 1; dist_extra = (best_dist - 5) % 2; + case 9..16: dist_code = 6 + (best_dist - 9) / 4; dist_extra_bits = 2; dist_extra = (best_dist - 9) % 4; + case 17..32: dist_code = 8 + (best_dist - 17) / 8; dist_extra_bits = 3; dist_extra = (best_dist - 17) % 8; + case 33..64: dist_code = 10 + (best_dist - 33) / 16; dist_extra_bits = 4; dist_extra = (best_dist - 33) % 16; + case 65..128: dist_code = 12 + (best_dist - 65) / 32; dist_extra_bits = 5; dist_extra = (best_dist - 65) % 32; + case 129..256: dist_code = 14 + (best_dist - 129) / 64; dist_extra_bits = 6; dist_extra = (best_dist - 129) % 64; + case 257..512: dist_code = 16 + (best_dist - 257) / 128; dist_extra_bits = 7; dist_extra = (best_dist - 257) % 128; + case 513..1024: dist_code = 18 + (best_dist - 513) / 256; dist_extra_bits = 8; dist_extra = (best_dist - 513) % 256; + case 1025..2048: dist_code = 20 + (best_dist - 1025) / 512; dist_extra_bits = 9; dist_extra = (best_dist - 1025) % 512; + case 2049..4096: dist_code = 22 + (best_dist - 2049) / 1024; dist_extra_bits = 10; dist_extra = (best_dist - 2049) % 1024; + case 4097..8192: dist_code = 24 + (best_dist - 4097) / 2048; dist_extra_bits = 11; dist_extra = (best_dist - 4097) % 2048; + case 8193..16384: dist_code = 26 + (best_dist - 8193) / 4096; dist_extra_bits = 12; dist_extra = (best_dist - 8193) % 4096; + default: dist_code = 28 + (best_dist - 16385) / 8192; dist_extra_bits = 13; dist_extra = (best_dist - 16385) % 8192; + } + writer.write_huffman(dist_codes[dist_code].code, dist_codes[dist_code].len); + if (dist_extra_bits > 0) { writer.write_bits(dist_extra, dist_extra_bits); } + } + } + + return writer.finish(); +} + +/*-----------------------------------------------------------------------------*/ + +Huffman lit_fixed_global @private; +Huffman dist_fixed_global @private; + + +struct StreamBitReader @private +{ + InStream stream; + char[] buffer; + usz buf_pos; + usz buf_len; + ulong bit_buf; + uint nbits; + SetCursorFn set_cursor_fn; +} + +enum InflaterState @private +{ + START_BLOCK, + READ_STORED_LEN, + COPY_STORED, + READ_DYN_COUNTS, + READ_DYN_CODELENS, + READ_DYN_TREES, + DECODE_SYMBOL, + READ_DIST_SYM, + COPY_MATCH, + DONE +} + +fn void StreamBitReader.init(&self, InStream reader, char[] buffer) +{ + *self = { .stream = reader, .buffer = buffer, .set_cursor_fn = &reader.set_cursor }; +} +fn void StreamBitReader.close(&self) +{ + if (self.buf_len && self.buf_pos != self.buf_len && self.set_cursor_fn) + { + (void)self.set_cursor_fn(self.stream, (long)self.buf_pos - (long)self.buf_len, FROM_CURSOR); + } +} + +fn void? StreamBitReader.refill(&self) @private @inline +{ + if (self.buf_pos >= self.buf_len) + { + usz n = self.stream.read(self.buffer)!; + if (!n) return io::EOF~; + self.buf_len = n; + self.buf_pos = 0; + } +} + +fn uint? StreamBitReader.read_bits(&self, uint count) @private @inline +{ + if (count == 0) return 0; + if (self.nbits < count) + { + while (self.nbits <= 56) + { + if (self.buf_pos >= self.buf_len) + { + if (@catch(self.refill())) break; + } + self.bit_buf |= (ulong)self.buffer[self.buf_pos++] << self.nbits; + self.nbits += 8; + } + } + if (self.nbits < count) return CORRUPTED_DATA~; + uint value = (uint)(self.bit_buf & ((1UL << count) - 1)); + self.bit_buf >>= count; + self.nbits -= count; + return value; +} + +fn void StreamBitReader.align(&self) @private @inline +{ + uint skip = self.nbits % 8; + self.bit_buf >>= skip; + self.nbits -= skip; +} + +struct BitWriter @private +{ + char[] data; + usz len; + ulong buffer; + uint nbits; + Allocator allocator; +} + +fn void BitWriter.init(&self, Allocator allocator, usz initial_cap) @private +{ + self.allocator = allocator; + self.data = allocator::alloc_array(allocator, char, initial_cap); + self.len = 0; + self.buffer = 0; + self.nbits = 0; +} + +fn void BitWriter.write_bits(&self, uint value, uint count) @private +{ + self.buffer |= (ulong)(value & ((1 << count) - 1)) << self.nbits; + self.nbits += count; + while (self.nbits >= 8) + { + if (self.len >= self.data.len) + { + usz new_cap = self.data.len; + if (new_cap < 1024) new_cap = 1024; + while (new_cap <= self.len) + { + if (new_cap > usz.max / 2) { new_cap = self.len + 1; break; } + new_cap *= 2; + } + self.data = allocator::realloc_array(self.allocator, self.data.ptr, char, new_cap); + } + self.data[self.len++] = (char)(self.buffer & 0xFF); + self.buffer >>= 8; + self.nbits -= 8; + } +} + +fn void BitWriter.write_huffman(&self, uint code, uint len) @private +{ + uint rev = bits::reverse(code << (32 - len)); + self.write_bits(rev, len); +} + +fn char[] BitWriter.finish(&self) @private +{ + if (self.nbits > 0) + { + if (self.len >= self.data.len) + { + usz new_cap = self.len + 1; + self.data = allocator::realloc_array(self.allocator, self.data.ptr, char, new_cap); + } + self.data[self.len++] = (char)(self.buffer & 0xFF); + self.buffer = 0; + self.nbits = 0; + } + return self.data[:self.len]; +} + +struct Huffman @private +{ + ushort[16] counts; + ushort[288] symbols; +} + +fn void Huffman.build(&self, char[] lengths) @private +{ + ushort[16] offsets; + self.counts = {}; + foreach (len : lengths) + { + if (len > 0 && len < 16) self.counts[len]++; + } + + ushort offset = 0; + for (uint i = 1; i < 16; i++) + { + offsets[i] = offset; + offset += self.counts[i]; + } + + foreach (uint i, len : lengths) + { + if (len > 0 && len < 16) + { + ushort sym_idx = offsets[len]++; + if (sym_idx < 288) + { + self.symbols[sym_idx] = (ushort)i; + } + } + } +} + +struct Token @private +{ + ushort val; + ushort dist; +} + +struct IndexMap @private +{ + usz index; + uint freq; +} +fn bool IndexMap.less(&self, IndexMap other) => self.freq < other.freq; + +<* + Compute length-limited prefix code lengths using the Larmore–Hirschberg + package-merge algorithm. + This is a port of the C3 implementation by @konimarti (https://github.com/konimarti), + which was based on the C implementation by Stephan Brumme + (https://create.stephan-brumme.com/length-limited-prefix-codes/). +*> +fn char[] pkg_merge(Allocator allocator, uint[] freqs, uint max_bits) @private => @pool() +{ + List {IndexMap} map; + map.tinit(); + + usz n = 0; + foreach (i, freq : freqs) + { + if (freq == 0) continue; + map.push({i, freq}); + n++; + } + + if (n == 0) return {}; + + sort::quicksort(map.array_view()); + + if (n == 1) + { + char[] blen = allocator::new_array(allocator, char, freqs.len); + blen[map[0].index] = 1; + return blen; + } + + uint[] hist = mem::temp_array(uint, n); + foreach (i, m : map) hist[i] = m.freq; + + usz max_elements = 2 * n; + uint[] current = mem::temp_array(uint, max_elements); + uint[] previous = mem::temp_array(uint, max_elements); + ulong[] is_merged = mem::temp_array(ulong, max_elements); + + previous[:n] = hist[:n]; + usz num_previous = n; + is_merged[:max_elements] = 0; + usz num_relevant = 2 * n - 2; + + ulong mask = 1; + for (uint bits = max_bits - 1; bits > 0; bits--) + { + num_previous &= (usz)~1; + current[0] = hist[0]; + current[1] = hist[1]; + uint sum = current[0] + current[1]; + + usz num_current = 2; + usz num_hist = 2; + usz num_merged = 0; + + while (true) + { + if (num_hist < n && hist[num_hist] <= sum) + { + current[num_current++] = hist[num_hist++]; + continue; + } + + is_merged[num_current] |= mask; + current[num_current] = sum; + num_current++; + + num_merged++; + if (num_merged * 2 >= num_previous) break; + + sum = previous[num_merged * 2] + previous[num_merged * 2 + 1]; + } + + while (num_hist < n) current[num_current++] = hist[num_hist++]; + mask <<= 1; + + if (num_previous >= num_relevant) + { + bool keep_going = false; + for (usz i = num_relevant; i > 1; i--) + { + if (previous[i - 1] != current[i - 1]) + { + keep_going = true; + break; + } + } + if (!keep_going) break; + } + + @swap(previous, current); + num_previous = num_current; + } + + mask >>= 1; + hist[..] = 0; + + usz num_analyze = num_relevant; + while (mask != 0) + { + usz num_merged_loop = 0; + hist[0]++; + hist[1]++; + + usz symbol = 2; + for (usz i = symbol; i < num_analyze; i++) + { + if ((is_merged[i] & mask) == 0) + { + hist[symbol]++; + symbol++; + } + else + { + num_merged_loop++; + } + } + num_analyze = 2 * num_merged_loop; + mask >>= 1; + } + + for (usz i = 0; i < num_analyze; i++) hist[i]++; + + char[] blen = allocator::new_array(allocator, char, freqs.len); + foreach (i, m : map) + { + blen[m.index] = (char)hist[i]; + } + + return blen; +} + +fn char[] Huffman.get_lengths(&self, usz max_sym) @private +{ + char[] blen = allocator::new_array(tmem, char, max_sym + 1); + + ushort index = 0; + for (uint len = 1; len < 16; len++) + { + uint count = self.counts[len]; + for (uint i = 0; i < count; i++) + { + blen[self.symbols[index++]] = (char)len; + } + } + return blen; +} + +fn void Huffman.build_from_freqs(&self, uint[] freqs, uint max_bits) @private => @pool() +{ + char[] blen = pkg_merge(tmem, freqs, max_bits); + self.build(blen); +} + +fn void gen_canonical_codes(Code* codes, char[] blen) @private +{ + ushort[16] bl_count; + foreach (len : blen) + { + if (len > 0) bl_count[len]++; + } + + ushort[16] next_code; + ushort code = 0; + bl_count[0] = 0; + for (uint bits = 1; bits <= 15; bits++) + { + code = (code + bl_count[bits - 1]) << 1; + next_code[bits] = code; + } + + foreach (uint n, len : blen) + { + if (len != 0) + { + uint c = next_code[len]; + codes[n].code = (ushort)c; + codes[n].len = (ushort)len; + next_code[len]++; + } + else + { + codes[n].code = 0; + codes[n].len = 0; + } + } +} + +fn void build_fixed_huffman(Huffman* lit, Huffman* dist) @private +{ + char[288] lit_lens; + for (uint i = 0; i <= 143; i++) lit_lens[i] = 8; + for (uint i = 144; i <= 255; i++) lit_lens[i] = 9; + for (uint i = 256; i <= 279; i++) lit_lens[i] = 7; + for (uint i = 280; i <= 287; i++) lit_lens[i] = 8; + lit.build(&lit_lens); + + char[32] dist_lens; + dist_lens[..] = 5; + dist.build(&dist_lens); +} + +struct Code @private +{ + ushort code; + ushort len; +} + +fn void gen_fixed_codes(Code* codes) @private +{ + char[288] lens; + for (uint i = 0; i <= 143; i++) lens[i] = 8; + for (uint i = 144; i <= 255; i++) lens[i] = 9; + for (uint i = 256; i <= 279; i++) lens[i] = 7; + for (uint i = 280; i <= 287; i++) lens[i] = 8; + + ushort[16] bl_count; + for (uint i = 0; i < 288; i++) + { + if (lens[i] > 0) bl_count[lens[i]]++; + } + + ushort[16] next_code; + ushort code = 0; + bl_count[0] = 0; + for (uint bits = 1; bits <= 15; bits++) + { + code = (code + bl_count[bits - 1]) << 1; + next_code[bits] = code; + } + + for (uint n = 0; n < 288; n++) + { + uint len = lens[n]; + if (len != 0) + { + uint c = next_code[len]; + codes[n].code = (ushort)c; + codes[n].len = (ushort)len; + next_code[len]++; + } + else + { + codes[n].code = 0; + codes[n].len = 0; + } + } +} + +fn ushort? Huffman.decode_stream(&self, StreamBitReader* reader) @private @inline +{ + uint code = 0; + uint first = 0; + uint index = 0; + for (uint len = 1; len < 16; len++) + { + code |= reader.read_bits(1)!; + uint count = self.counts[len]; + if (code < first + count) + { + return self.symbols[index + (code - first)]; + } + index += count; + first += count; + first <<= 1; + code <<= 1; + } + return CORRUPTED_DATA~; +} + +fn void? Inflater.step(&self) @private +{ + switch (self.state) + { + case START_BLOCK: + self.final = self.reader.read_bits(1)! != 0; + switch (self.reader.read_bits(2)!) + { + case 0: + self.state = READ_STORED_LEN; + case 1: + self.lit_ptr = &lit_fixed_global; + self.dist_ptr = &dist_fixed_global; + self.state = DECODE_SYMBOL; + case 2: + self.state = READ_DYN_COUNTS; + default: + return CORRUPTED_DATA~; + } + case READ_STORED_LEN: + self.reader.align(); + self.stored_len = self.reader.read_bits(16)!; + uint nlen = self.reader.read_bits(16)!; + if (self.stored_len != (~nlen & 0xFFFF)) return CORRUPTED_DATA~; + if (self.stored_len == 0) + { + self.state = self.final ? DONE : START_BLOCK; + break; + } + self.state = COPY_STORED; + case COPY_STORED: + char c = (char)self.reader.read_bits(8)!; + self.write_byte(c); + self.stored_len--; + if (self.stored_len == 0) + { + self.state = self.final ? DONE : START_BLOCK; + } + case READ_DYN_COUNTS: + self.hlit = self.reader.read_bits(5)! + 257; + self.hdist = self.reader.read_bits(5)! + 1; + self.hclen = self.reader.read_bits(4)! + 4; + self.dyn_i = 0; + self.code_lengths = {}; + self.state = READ_DYN_CODELENS; + + case READ_DYN_CODELENS: + self.code_lengths[ORDER[self.dyn_i]] = (char)self.reader.read_bits(3)!; + self.dyn_i++; + if (self.dyn_i >= self.hclen) + { + self.dyn_code_huff.build(&self.code_lengths); + self.dyn_i = 0; + self.dyn_num_lengths = self.hlit + self.hdist; + if (self.dyn_num_lengths > 320) return CORRUPTED_DATA~; + self.lit_dist_lengths[..] = 0; + self.state = READ_DYN_TREES; + } + + case READ_DYN_TREES: + ushort sym = self.dyn_code_huff.decode_stream(&self.reader)!; + switch (sym) + { + case 0..15: + if (self.dyn_i >= 320) return CORRUPTED_DATA~; + self.lit_dist_lengths[self.dyn_i++] = (char)sym; + case 16: + if (self.dyn_i == 0) return CORRUPTED_DATA~; + uint repeat_count = self.reader.read_bits(2)! + 3; + char prev = self.lit_dist_lengths[self.dyn_i - 1]; + if (self.dyn_i + repeat_count > self.dyn_num_lengths || self.dyn_i + repeat_count > 320) return CORRUPTED_DATA~; + for (uint k = 0; k < repeat_count; k++) self.lit_dist_lengths[self.dyn_i++] = prev; + case 17: + uint zero_len = self.reader.read_bits(3)! + 3; + if (self.dyn_i + zero_len > self.dyn_num_lengths || self.dyn_i + zero_len > 320) return CORRUPTED_DATA~; + for (uint k = 0; k < zero_len; k++) self.lit_dist_lengths[self.dyn_i++] = 0; + case 18: + uint zero_len = self.reader.read_bits(7)! + 11; + if (self.dyn_i + zero_len > self.dyn_num_lengths || self.dyn_i + zero_len > 320) return CORRUPTED_DATA~; + for (uint k = 0; k < zero_len; k++) self.lit_dist_lengths[self.dyn_i++] = 0; + } + if (self.dyn_i >= self.dyn_num_lengths) + { + uint hlit = self.hlit; + uint hdist = self.hdist; + uint total = hlit + hdist; + if (hlit > 286 || hdist > 32 || total > 320 || hlit > total) + { + return CORRUPTED_DATA~; + } + self.dyn_lit.build(self.lit_dist_lengths[0:hlit]); + self.dyn_dist.build(self.lit_dist_lengths[hlit:hdist]); + self.lit_ptr = &self.dyn_lit; + self.dist_ptr = &self.dyn_dist; + self.state = DECODE_SYMBOL; + } + + case DECODE_SYMBOL: + ushort symbol = self.lit_ptr.decode_stream(&self.reader)!; + switch + { + case symbol < 256: + self.write_byte((char)symbol); + case symbol == 256: + self.state = self.final ? DONE : START_BLOCK; + case symbol <= 285: + uint len_idx = symbol - 257; + self.match_len = LENGTH_BASE[len_idx] + self.reader.read_bits(LENGTH_EXTRA[len_idx])!; + self.state = READ_DIST_SYM; + default: + return CORRUPTED_DATA~; + } + + case READ_DIST_SYM: + ushort dist_sym = self.dist_ptr.decode_stream(&self.reader)!; + self.match_dist = DIST_BASE[dist_sym] + self.reader.read_bits(DIST_EXTRA[dist_sym])!; + self.state = COPY_MATCH; + + case COPY_MATCH: + if (self.match_dist > self.pos) return CORRUPTED_DATA~; + char c = self.window[(usz)((self.pos - self.match_dist) & 0xFFFF)]; + self.write_byte(c); + self.match_len--; + if (self.match_len == 0) + { + self.state = DECODE_SYMBOL; + } + + case DONE: + self.done = true; + self.reader.close(); + } + return; +} + +fn void Inflater.write_byte(&self, char c) @private @inline +{ + self.window[(usz)(self.pos & 0xFFFF)] = c; + self.pos++; +} + +const ushort[31] LENGTH_BASE @private = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0 +}; + +const char[31] LENGTH_EXTRA @private = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0 +}; + +const ushort[32] DIST_BASE @private = { + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577, 0, 0 +}; + +const char[32] DIST_EXTRA @private = { + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 0, 0 +}; + +const uint[19] ORDER @private = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; diff --git a/lib/std/compression/zip.c3 b/lib/std/compression/zip.c3 new file mode 100644 index 000000000..51c5df231 --- /dev/null +++ b/lib/std/compression/zip.c3 @@ -0,0 +1,1215 @@ +<* + ZIP archive module (STORE and DEFLATE). + + API: + - fn ZipArchive? open(Allocator allocator, String path, String mode = "r") + - fn ZipArchive? recover(Allocator allocator, String path, ) + - fn void? ZipArchive.extract(&self, String output_dir) + - fn ZipEntry? ZipArchive.stat(&self, String filename) + - fn char[]? ZipArchive.read_file_all(&self, Allocator allocator, String filename) + - fn void? ZipArchive.close(&self) +*> +module std::compression::zip; +import std::io, std::collections::list, std::hash::crc32, std::time, std::math; +import std::encoding::codepage, std::compression::deflate; +import libc; + +faultdef + INVALID_ARGUMENT, + IO_ERROR, + CORRUPTED_DATA, + ENTRY_NOT_FOUND, + ENCRYPTED_FILE; + +<* + Describes a single entry within a ZIP archive. +*> +struct ZipEntry +{ + String name; + ulong uncompressed_size; + ulong compressed_size; + bool is_directory; + bool is_encrypted; + uint crc32; + ulong offset; + ZipMethod method; + ushort last_mod_time; + ushort last_mod_date; +} + +fn Time ZipEntry.time(&self) => dos_date_time_to_time(self.last_mod_date, self.last_mod_time); + +alias ZipEntryList = List{ZipEntry}; + +struct ZipArchive +{ + File* file; + Allocator allocator; + ZipEntryList entries; + String mode; + String path; + String comment; +} + +constdef ZipMethod : UShortLE +{ + STORE = {0}, + DEFLATE = {8}, +} + +<* + Opens a ZIP archive. + @param allocator : `The allocator to use.` + @param path : `The path to the ZIP file.` + @param mode : `Opening mode ("r", "w", "w+").` + @return `The opened archive.` + @require mode == "r" || mode == "w" || mode == "w+" +*> +fn ZipArchive? open(Allocator allocator, String path, String mode = "r") +{ + if (mode == "w" || mode == "w+") + { + File f = file::open(path, mode == "w+" ? "w+" : "w+b")!; + + ZipArchive archive; + archive.allocator = allocator; + archive.file = allocator::new(allocator, File, f); + archive.entries.init(allocator); + archive.path = path.copy(allocator); + archive.mode = mode.copy(allocator); + return archive; + } + if (mode != "r") return INVALID_ARGUMENT~; + + File f = file::open(path, "rb")!; + defer (catch err) (void)f.close(); + + ulong file_size = f.size()!!; + + if (file_size < ZipEOCD.sizeof) return CORRUPTED_DATA~; + + ulong search_start = file_size > (ulong)(ZipEOCD.sizeof + 65535) + ? file_size - (ulong)(ZipEOCD.sizeof + 65535) + : 0; + + ZipEOCD eocd; + bool found = false; + + for (ulong pos = file_size - (ulong)ZipEOCD.sizeof; pos >= search_start; pos--) + { + if (pos > (ulong)isz.max) return io::OVERFLOW~; + f.set_cursor(pos)!; + UIntLE sig; + if (io::read_any(&f, &sig)! != 4) break; + + if (sig.val == ZIP_EOCD_SIG) + { + f.set_cursor(pos)!; + if (io::read_any(&f, &eocd)! == ZipEOCD.sizeof) + { + ulong expected_end = pos + ZipEOCD.sizeof + eocd.comment_len.val; + if (expected_end == file_size) + { + found = true; + break; + } + } + } + if (pos == 0) break; + } + + if (!found) return CORRUPTED_DATA~; + + ZipArchive archive; + archive.allocator = allocator; + archive.file = allocator::new(allocator, File, f); + archive.entries.init(allocator); + archive.path = path.copy(allocator); + archive.mode = mode.copy(allocator); + + if (eocd.comment_len.val > 0) + { + char[] comment_data = allocator::alloc_array(allocator, char, (usz)eocd.comment_len.val); + defer allocator::free(allocator, comment_data); + if (archive.file.read(comment_data)! == (usz)eocd.comment_len.val) + { + archive.comment = codepage::decode(allocator, comment_data, CP437)!!; + } + } + + defer catch (void)archive.close(); + + if (eocd.cd_offset.val > (uint)isz.max) return io::OVERFLOW~; + archive.file.set_cursor(eocd.cd_offset.val)!; + + usz num_entries = eocd.num_entries.val; + + + // ZIP64 check + if (eocd.num_entries.val == 0xFFFF || eocd.cd_offset.val == 0xFFFFFFFF) + { + isz locator_pos = (isz)file_size - ZipEOCD.sizeof - Zip64Locator.sizeof; + if (locator_pos >= 0) + { + archive.file.set_cursor(locator_pos)!; + Zip64Locator locator; + if (try n = archive.file.read(((char*)&locator)[:Zip64Locator.sizeof])) + { + if (n == Zip64Locator.sizeof && locator.signature.val == ZIP64_LOCATOR_SIG) + { + if (locator.offset_eocd.val > (ulong)isz.max) return io::OVERFLOW~; + archive.file.set_cursor(locator.offset_eocd.val)!; + Zip64EOCD eocd64; + io::read_any(archive.file, &eocd64)!; + if (eocd64.signature.val == ZIP64_EOCD_SIG) + { + if (eocd64.offset_cd.val > (ulong)isz.max) return io::OVERFLOW~; + archive.file.set_cursor(eocd64.offset_cd.val)!!; + num_entries = (usz)eocd64.count_total.val; + } + } + } + } + } + + for (usz i = 0; i < num_entries; i++) + { + ZipCDH cdh; + if (io::read_any(archive.file, &cdh)! != ZipCDH.sizeof) break; + if (cdh.signature.val != ZIP_CDH_SIG) break; + + char[] raw_name = allocator::alloc_array(allocator, char, cdh.filename_len.val); + if (archive.file.read(raw_name)! != (usz)cdh.filename_len.val) + { + allocator::free(allocator, raw_name); + break; + } + + String name; + bool is_utf8 = (cdh.flags.val & 0x0800) != 0; + if (is_utf8 || is_valid_utf8(raw_name)) + { + name = (String)raw_name; + } + else + { + name = (String)codepage::decode(allocator, raw_name, CP437)!!; + allocator::free(allocator, raw_name.ptr); + } + + char[] extra_field; + if (cdh.extra_field_len.val > 0) + { + extra_field = allocator::alloc_array(allocator, char, cdh.extra_field_len.val); + archive.file.read(extra_field)!; + } + + archive.file.set_cursor(cdh.comment_len.val, FROM_CURSOR)!; + + ulong uncompressed_size = cdh.uncompressed_size.val; + ulong compressed_size = cdh.compressed_size.val; + ulong offset = cdh.relative_offset.val; + + if (cdh.uncompressed_size.val == 0xFFFFFFFF || cdh.compressed_size.val == 0xFFFFFFFF || cdh.relative_offset.val == 0xFFFFFFFF) + { + ByteReader reader = { .bytes = extra_field }; + while (reader.available()! >= 4) + { + ushort id = io::read_le_ushort(&reader)!; + ushort size = io::read_le_ushort(&reader)!; + + if (id == ZIP64_EXTRA_ID) + { + int remaining = size; + if (cdh.uncompressed_size.val == 0xFFFFFFFF && remaining >= 8) + { + uncompressed_size = io::read_le_ulong(&reader)!; + remaining -= 8; + } + if (cdh.compressed_size.val == 0xFFFFFFFF && remaining >= 8) + { + compressed_size = io::read_le_ulong(&reader)!; + remaining -= 8; + } + if (cdh.relative_offset.val == 0xFFFFFFFF && remaining >= 8) + { + offset = io::read_le_ulong(&reader)!; + remaining -= 8; + } + break; + } + reader.set_cursor(size, FROM_CURSOR)!; + } + } + if (extra_field.len > 0) allocator::free(allocator, extra_field); + + bool is_directory = name.ends_with("/") || name.ends_with("\\"); + if (!is_directory) + { + ushort host_system = cdh.version_made_by.val >> 8; + if (host_system == 0 || host_system == 10) // MS-DOS or NTFS + { + if ((cdh.external_attr.val & 0x10) != 0) is_directory = true; + } + else if (host_system == 3) // Unix + { + if (((cdh.external_attr.val >> 16) & 0x4000) != 0) is_directory = true; + } + } + + ZipEntry entry = { + .name = name, + .uncompressed_size = uncompressed_size, + .compressed_size = compressed_size, + .crc32 = cdh.crc32.val, + .offset = offset, + .method = cdh.method, + .last_mod_time = cdh.last_mod_time.val, + .last_mod_date = cdh.last_mod_date.val, + .is_directory = is_directory, + .is_encrypted = (cdh.flags.val & 1) != 0 + }; + + archive.entries.push(entry); + } + + return archive; +} + +<* + Recovers a ZIP archive by scanning for Local File Headers + when the Central Directory is missing or corrupted. + @param path : `The path to the broken ZIP file.` + @param allocator : `The allocator to use.` + @return `The recovered archive.` +*> +fn ZipArchive? recover(Allocator allocator, String path) +{ + File f = file::open(path, "rb")!; + defer (catch err) (void)f.close(); + + ZipArchive archive = { + .allocator = allocator, + .file = allocator::new(allocator, File, f), + .path = path.copy(allocator), + .mode = "r".copy(allocator) + }; + archive.entries.init(allocator); + defer (catch err) (void)archive.close(); + + char[4] sig_buf; + while (true) + { + long offset = archive.file.cursor()!!; + usz n = archive.file.read(sig_buf[..])!; + if (n < 4) break; + + if (bitorder::read(sig_buf, UIntLE) != ZIP_LFH_SIG) + { + archive.file.set_cursor(-3, FROM_CURSOR)!!; + continue; + } + + ZipLFH lfh; + if (archive.file.read(((char*)&lfh.version_needed)[:ZipLFH.sizeof - 4])! != ZipLFH.sizeof - 4) break; + lfh.signature.val = ZIP_LFH_SIG; + + char[] raw_name = allocator::alloc_array(allocator, char, lfh.filename_len.val); + if (archive.file.read(raw_name)! != (usz)lfh.filename_len.val) + { + allocator::free(allocator, raw_name); + break; + } + + String name; + if ((lfh.flags.val & 0x0800) != 0 || is_valid_utf8(raw_name)) + { + name = (String)raw_name; + } + else + { + name = (String)codepage::decode(allocator, raw_name, CP437)!!; + allocator::free(allocator, raw_name.ptr); + } + + archive.file.set_cursor(lfh.extra_field_len.val, FROM_CURSOR)!!; + + ZipEntry entry = { + .name = name, + .compressed_size = lfh.compressed_size.val, + .uncompressed_size = lfh.uncompressed_size.val, + .crc32 = lfh.crc32.val, + .offset = offset, + .method = lfh.method, + .last_mod_time = lfh.last_mod_time.val, + .last_mod_date = lfh.last_mod_date.val, + .is_directory = name.ends_with("/") || name.ends_with("\\"), + .is_encrypted = (lfh.flags.val & 1) != 0 + }; + + archive.entries.push(entry); + + if (lfh.compressed_size.val > 0 && (ulong)lfh.compressed_size.val > (ulong)isz.max) return io::OVERFLOW~; + archive.file.set_cursor(lfh.compressed_size.val, FROM_CURSOR)!!; + } + + if (archive.entries.len() == 0) return CORRUPTED_DATA~; + return archive; +} + +<* + Closes the ZIP archive, writing the central directory if in write mode. +*> +fn void? ZipArchive.close(&self) +{ + defer { + (void)self.file.close(); + + foreach (&entry : self.entries) + { + allocator::free(self.allocator, entry.name); + } + self.entries.free(); + allocator::free(self.allocator, self.mode); + allocator::free(self.allocator, self.path); + if (self.comment.len > 0) allocator::free(self.allocator, self.comment); + if (self.file) allocator::free(self.allocator, self.file); + } + + if (self.mode.starts_with("w")) + { + self.file.flush()!; + ulong cd_offset = self.file.cursor()!; + ulong cd_size = 0; + + for (usz i = 0; i < self.entries.len(); i++) + { + ZipEntry* entry = self.entries.get_ref(i); + ZipCDH cdh = { + .signature.val = ZIP_CDH_SIG, + .version_made_by.val = 45, // 4.5 for ZIP64 + .version_needed.val = 45, + .method = entry.method, + .last_mod_time.val = entry.last_mod_time, + .last_mod_date.val = entry.last_mod_date, + .crc32.val = entry.crc32, + .filename_len.val = (ushort)entry.name.len, + }; + + bool is_zip64 = entry.uncompressed_size >= 0xFFFFFFFF || entry.compressed_size >= 0xFFFFFFFF || entry.offset >= 0xFFFFFFFF; + + char[] extra_data; + if (is_zip64) + { + cdh.compressed_size.val = 0xFFFFFFFF; + cdh.uncompressed_size.val = 0xFFFFFFFF; + cdh.relative_offset.val = 0xFFFFFFFF; + + // Header(4) + Uncomp(8) + Comp(8) + Offset(8) + ushort extra_size = 28; + extra_data = allocator::alloc_array(self.allocator, char, extra_size); + bitorder::write(ZIP64_EXTRA_ID, extra_data[:2], UShortLE); + bitorder::write((ushort)(extra_size - 4), extra_data[2:2], UShortLE); + bitorder::write(entry.uncompressed_size, extra_data[4:8], ULongLE); + bitorder::write(entry.compressed_size, extra_data[12:8], ULongLE); + bitorder::write(entry.offset, extra_data[20:8], ULongLE); + + cdh.extra_field_len.val = extra_size; + } + else + { + cdh.compressed_size.val = (uint)entry.compressed_size; + cdh.uncompressed_size.val = (uint)entry.uncompressed_size; + cdh.relative_offset.val = (uint)entry.offset; + } + + // Set external attributes (MS-DOS compatibility). + // 0x10 is the DOS directory attribute. + cdh.external_attr.val = (uint)(entry.is_directory ? 0x10 : 0); + + io::write_any(self.file, &cdh)!; + self.file.write(entry.name)!; + if (is_zip64) + { + self.file.write(extra_data)!; + allocator::free(self.allocator, extra_data); + } + + ulong entry_record_size = (ulong)(ZipCDH.sizeof + entry.name.len + cdh.extra_field_len.val); + if (cd_size > (ulong.max - entry_record_size)) return io::OVERFLOW~; + cd_size += entry_record_size; + } + + bool cd_zip64 = self.entries.len() >= 0xFFFF || cd_size >= 0xFFFFFFFF || cd_offset >= 0xFFFFFFFF; + + if (cd_zip64) + { + ulong eocd64_offset = self.file.cursor()!; + + Zip64EOCD eocd64 = { + .signature.val = ZIP64_EOCD_SIG, + .size.val = (ulong)(Zip64EOCD.sizeof - 12), + .version_made.val = 45, + .version_needed.val = 45, + .count_this_disk.val = (ulong)self.entries.len(), + .count_total.val = (ulong)self.entries.len(), + .size_cd.val = cd_size, + .offset_cd.val = cd_offset, + }; + + io::write_any(self.file, &eocd64)!; + + Zip64Locator locator = { + .signature.val = ZIP64_LOCATOR_SIG, + .disk_start.val = 0, + .offset_eocd.val = eocd64_offset, + .total_disks.val = 1, + }; + + io::write_any(self.file, &locator)!; + } + + char[] encoded_comment; + if (self.comment.len > 0) + { + char[]? res = codepage::encode(self.allocator, self.comment, CodePage.CP437); + if (try res) + { + encoded_comment = res; + if (encoded_comment.len > 0xFFFF) + { + allocator::free(self.allocator, encoded_comment.ptr); + return INVALID_ARGUMENT~; + } + } + } + defer if (encoded_comment.ptr) allocator::free(self.allocator, encoded_comment); + + ZipEOCD eocd = { + .signature.val = ZIP_EOCD_SIG, + .num_entries_this_disk.val = (ushort)(self.entries.len() >= 0xFFFF ? 0xFFFF : (ushort)self.entries.len()), + .num_entries.val = (ushort)(self.entries.len() >= 0xFFFF ? 0xFFFF : (ushort)self.entries.len()), + .cd_size.val = (uint)(cd_size >= 0xFFFFFFFF ? 0xFFFFFFFF : (uint)cd_size), + .cd_offset.val = (uint)(cd_offset >= 0xFFFFFFFF ? 0xFFFFFFFF : (uint)cd_offset), + .comment_len.val = (ushort)encoded_comment.len, + }; + + io::write_any(self.file, &eocd)!; + + if (encoded_comment.len > 0) + { + self.file.write(encoded_comment)!; + } + } +} + +<* + Extracts the entire archive to the specified directory. + @param output_dir : `The directory to extract to.` +*> +fn void? ZipArchive.extract(&self, String output_dir) => @pool() +{ + for (usz i = 0; i < self.count(); i++) + { + ZipEntry entry = self.stat_at(i) ?? ENTRY_NOT_FOUND~!; + String out_path_str; + if (try tmp = path::temp(output_dir)) + { + if (try combined = tmp.tappend(entry.name)) + { + out_path_str = combined.str_view(); + } + else + { + return IO_ERROR~; + } + } + else + { + return IO_ERROR~; + } + + if (entry.is_directory) + { + (void)path::mkdir(out_path_str, true); + } + else + { + if (try tmp = path::temp(out_path_str)) + { + if (try parent = tmp.parent()) + { + (void)path::mkdir(parent.str_view(), true); + } + } + + ZipEntryReader reader = self.open_reader(entry.name)!; + defer (void)reader.close(); + + File f = file::open(out_path_str, "wb")!; + defer (void)f.close(); + char[65536] buf; + while (true) + { + usz? res = reader.read(&buf); + if (catch excuse = res) + { + if (excuse == io::EOF) break; + return excuse~; + } + usz n = res; + if (n == 0) break; + f.write(buf[:n])!; + } + f.close()!; + + file::set_modified_time(out_path_str, (Time_t)entry.time().to_seconds())!; + } + } + + // Set directory timestamps (reverse order for subdirectories) + for (usz i = self.count(); i > 0; i--) + { + ZipEntry entry; + if (try res = self.stat_at(i - 1)) + { + entry = res; + } + else + { + continue; + } + if (!entry.is_directory) continue; + + if (try tmp = path::temp(output_dir)) + { + if (try combined = tmp.tappend(entry.name)) + { + String out_path_str = (String)combined.str_view(); + file::set_modified_time(out_path_str, (Time_t)entry.time().to_seconds())!; + } + } + } +} + +fn usz ZipArchive.count(&self) => self.entries.len(); + +<* + Returns metadata for the entry at the given index. + @require index < self.count() +*> +fn ZipEntry? ZipArchive.stat_at(&self, usz index) +{ + if (index >= self.entries.len()) return ENTRY_NOT_FOUND~; + return self.entries.get(index); +} + +<* + Returns metadata for the entry with the given filename. +*> +fn ZipEntry? ZipArchive.stat(&self, String filename) +{ + for (usz i = 0; i < self.entries.len(); i++) + { + ZipEntry entry = self.entries.get(i); + if (entry.name == filename) return entry; + } + return ENTRY_NOT_FOUND~; +} + + +<* + Reads an entire file from the archive. + @param allocator : `The allocator to use.` + @param filename : `The name of the file to read.` + @return `The uncompressed file data.` +*> +fn char[]? ZipArchive.read_file_all(&self, Allocator allocator, String filename, ) +{ + ZipEntryReader reader = self.open_reader(filename)!; + defer (void)reader.close(); + ZipEntry entry = self.stat(filename)!; + + char[] data; + defer catch if (data.ptr) allocator::free(allocator, data); + + if (reader.method == STORE) + { + if (reader.size > (ulong)usz.max) return io::OVERFLOW~; + data = allocator::alloc_array(allocator, char, (usz)reader.size); + reader.read(data)!; + } + else + { + if (reader.adapter.start_offset > (ulong)isz.max) return io::OVERFLOW~; + self.file.set_cursor(reader.adapter.start_offset)!; + if (entry.compressed_size > (ulong)usz.max) return io::OVERFLOW~; + char[] compressed = allocator::alloc_array(allocator, char, (usz)entry.compressed_size); + defer allocator::free(allocator, compressed); + self.file.read(compressed)!; + data = deflate::decompress(allocator, compressed)!; + } + + Crc32 crc; + crc.init(); + crc.update(data); + if (~crc.result != entry.crc32) return CORRUPTED_DATA~; + + return data; +} + + +<* + Adds a directory entry to the archive. +*> +fn void? ZipArchive.add_directory(&self, String dirname) +{ + String dir_name = dirname; + if (!dirname.ends_with("/")) + { + dir_name = string::tformat("%s/", dirname); + } + + ZipEntryWriter? writer_opt = self.open_writer(dir_name, STORE); + if (catch err = writer_opt) return err~; + + ZipEntryWriter writer = writer_opt; + writer.entry.is_directory = true; + writer.close()!; +} + +<* + Writes an entire file to the archive. + @param filename : `The name of the file to create.` + @param data : `The data to write.` + @param method : `Compression method.` +*> +fn void? ZipArchive.write_file(&self, String filename, char[] data, ZipMethod method = DEFLATE) +{ + ZipEntryWriter writer = self.open_writer(filename, method)!; + + writer.write(data)!; + writer.close()!; +} + +struct ZipEntryReader (InStream) +{ + ulong size; // Uncompressed size + ulong pos; // Uncompressed position + ZipMethod method; + // For DEFLATE + Inflater* inflater; + ArchiveStreamAdapter adapter; + char* bit_buf; +} + +fn usz? ZipEntryReader.read(&self, char[] buffer) @dynamic +{ + if (self.method == STORE) + { + usz n = self.adapter.read(buffer)!; + if (n == 0) return io::EOF~; + self.pos += n; + return n; + } + else if (self.method == DEFLATE) + { + if (self.inflater == null) + { + self.inflater = allocator::new(self.adapter.archive.allocator, Inflater); + self.bit_buf = allocator::alloc_array(self.adapter.archive.allocator, char, 8192); + self.inflater.init(&self.adapter, self.bit_buf[:8192]); + } + + usz n = self.inflater.read(buffer)!; + if (n == 0) return io::EOF~; + self.pos += n; + return n; + } + + return io::UNSUPPORTED_OPERATION~; +} + +fn void? ZipEntryReader.close(&self) @dynamic +{ + if (self.method == DEFLATE && self.inflater != null) + { + allocator::free(self.adapter.archive.allocator, self.bit_buf); + allocator::free(self.adapter.archive.allocator, self.inflater); + self.inflater = null; + self.bit_buf = null; + } + return; +} + +fn char? ZipEntryReader.read_byte(&self) @dynamic +{ + char[1] b; + usz n = self.read(&b)!; + if (n == 0) return io::EOF~; + return b[0]; +} + +fn usz ZipEntryReader.len(&self) @dynamic +{ + if (self.size > (ulong)usz.max) return usz.max; + return (usz)self.size; +} + +fn ulong? ZipEntryReader.available(&self) @dynamic +{ + return self.size - self.pos; +} + +fn usz? ZipEntryReader.seek(&self, isz offset, Seek seek) @dynamic +{ + self.set_cursor((long)offset, (SeekOrigin)seek.ordinal)!; + long size = self.cursor()!; + if (size > (ulong)usz.max) return io::OVERFLOW~; + return (usz)size; +} + +fn long? ZipEntryReader.cursor(&self) @dynamic +{ + return self.pos; +} + +fn void? ZipEntryReader.set_cursor(&self, long offset, SeekOrigin seek) @dynamic +{ + if (self.method == DEFLATE) return io::UNSUPPORTED_OPERATION~; + + ulong new_pos = self.pos; + switch (seek) + { + case FROM_START: + if (offset < 0) return io::INVALID_ARGUMENT~; + new_pos = offset; + case FROM_CURSOR: + new_pos = new_pos + offset; + case FROM_END: + new_pos = self.size + offset; + } + if (new_pos > self.size) new_pos = self.size; + self.pos = new_pos; + self.adapter.pos = new_pos; +} + +<* + Opens a reader for an entry. + @param filename : `The name of the file to read.` + @return `A reader for the entry's data.` +*> +fn ZipEntryReader? ZipArchive.open_reader(&self, String filename) +{ + ZipEntry? entry = self.stat(filename); + if (catch entry) return ENTRY_NOT_FOUND~; + + if (entry.is_encrypted) return ENCRYPTED_FILE~; + + self.file.flush()!; + if (entry.offset > (ulong)isz.max) return io::OVERFLOW~; + self.file.set_cursor(entry.offset)!; + ZipLFH lfh; + io::read_any(self.file, &lfh)!; + if (lfh.signature.val != ZIP_LFH_SIG) return CORRUPTED_DATA~; + + ZipEntryReader reader; + + reader.adapter.archive = self; + ulong start_offset = entry.offset + ZipLFH.sizeof + lfh.filename_len.val + lfh.extra_field_len.val; + if (start_offset > (ulong)isz.max) return io::OVERFLOW~; + reader.adapter.start_offset = start_offset; + // For STORE: adapter.size is uncompressed size. + // For DEFLATE: adapter.size is compressed size. + reader.adapter.size = entry.method == STORE + ? entry.uncompressed_size + : entry.compressed_size; + reader.size = entry.uncompressed_size; + reader.method = entry.method; + + return reader; +} + +struct ZipEntryWriter (OutStream) +{ + ZipArchive* archive; + ZipEntry entry; + Crc32 crc; + ZipLFH lfh; + ulong lfh_offset; + char[] buffer; + usz capacity; +} + +fn usz? ZipEntryWriter.write(&self, char[] bytes) @dynamic +{ + if (bytes.len == 0) return 0; + + self.crc.update(bytes); + self.entry.uncompressed_size += (ulong)bytes.len; + + if (self.entry.method == STORE) + { + usz n = self.archive.file.write(bytes)!; + self.entry.compressed_size += n; + return n; + } + else + { + usz new_len = self.buffer.len + bytes.len; + if (new_len > self.capacity) + { + usz new_cap = self.capacity; + if (new_cap < 4096) new_cap = 4096; + while (new_cap < new_len) + { + if (new_cap > usz.max / 2) { new_cap = new_len; break; } + new_cap *= 2; + } + + char* p = allocator::realloc_array(self.archive.allocator, self.buffer.ptr, char, new_cap); + self.buffer = p[:self.buffer.len]; // Keep length as used size + self.capacity = new_cap; + } + + mem::copy(self.buffer.ptr + self.buffer.len, bytes.ptr, bytes.len); + // Update slice length + self.buffer = self.buffer.ptr[:new_len]; + return bytes.len; + } +} + +fn void? ZipEntryWriter.write_byte(&self, char c) @dynamic +{ + char[1] b = { c }; + self.write(&b)!; +} + +fn void? ZipEntryWriter.close(&self) @dynamic +{ + if (self.entry.method == DEFLATE) + { + if (self.buffer.len > 0) + { + char[]? data = deflate::compress(self.archive.allocator, self.buffer); + if (catch data) return IO_ERROR~; + self.archive.file.write(data)!; + self.entry.compressed_size = data.len; + + allocator::free(self.archive.allocator, data); + allocator::free(self.archive.allocator, self.buffer); + self.buffer = {}; + } + } + + self.entry.crc32 = ~self.crc.result; + + self.lfh.crc32.val = self.entry.crc32; + + // ZIP64 sentinel: readers typically fallback to Central Directory for actual sizes. + self.lfh.compressed_size.val = (uint)math::min(self.entry.compressed_size, (ulong)0xFFFFFFFF); + self.lfh.uncompressed_size.val = (uint)math::min(self.entry.uncompressed_size, (ulong)0xFFFFFFFF); + + long end_pos = self.archive.file.cursor()!; + + if (self.lfh_offset > (long)isz.max) return io::OVERFLOW~; + self.archive.file.set_cursor(self.lfh_offset)!; + io::write_any(self.archive.file, &self.lfh)!; + + self.archive.file.set_cursor(end_pos)!; + + self.archive.entries.push(self.entry); +} + +<* + Opens a writer for a new entry. + @param filename : `The name of the file to create in the archive.` + @param method : `Compression method.` + @return `A writer for the new entry.` +*> +fn ZipEntryWriter? ZipArchive.open_writer(&self, String filename, ZipMethod method = DEFLATE) +{ + if (!self.mode.starts_with("w")) return IO_ERROR~; + + ZipEntryWriter writer; + writer.archive = self; + writer.entry.name = filename.copy(self.allocator); + defer catch allocator::free(self.allocator, writer.entry.name); + + writer.entry.method = method; + writer.entry.offset = self.file.cursor()!; + writer.crc.init(); + writer.lfh_offset = writer.entry.offset; + + writer.lfh = { + .signature.val = ZIP_LFH_SIG, + .version_needed.val = 20, + .method = method, + .flags.val = 0x0800, // UTF-8 flag + .filename_len.val = (ushort)filename.len, + }; + + Time now = time::now(); + writer.entry.last_mod_time = time_to_dos_time(now); + writer.entry.last_mod_date = time_to_dos_date(now); + writer.lfh.last_mod_time.val = writer.entry.last_mod_time; + writer.lfh.last_mod_date.val = writer.entry.last_mod_date; + + io::write_any(self.file, &writer.lfh)!; + self.file.write(filename)!; + + return writer; +} + +// ----------------------------------------------------------------------------- +// PRIVATE IMPLEMENTATION +// ----------------------------------------------------------------------------- + +struct ArchiveStreamAdapter (InStream) @private +{ + ZipArchive* archive; + ulong start_offset; + ulong size; + ulong pos; +} + +fn usz? ArchiveStreamAdapter.read(&self, char[] buffer) @dynamic +{ + if (self.pos >= self.size) return 0; + + usz to_read = (usz)math::min((ulong)buffer.len, self.size - self.pos); + if (to_read == 0) return 0; + + ulong abs_pos = self.start_offset + self.pos; + if (abs_pos > (ulong)isz.max) return io::OVERFLOW~; + + // Note: ZipArchive shared file handle access is not thread-safe. + self.archive.file.set_cursor(abs_pos)!; + usz n = self.archive.file.read(buffer[:to_read])!; + self.pos += n; + return n; +} + +fn char? ArchiveStreamAdapter.read_byte(&self) @dynamic +{ + char[1] b; + if (try n = self.read(b[..])) + { + if (n == 0) return io::EOF~; + return b[0]; + } + return io::EOF~; +} + +struct ZipLFH @packed @private +{ + UIntLE signature; + UShortLE version_needed; + UShortLE flags; + ZipMethod method; + UShortLE last_mod_time; + UShortLE last_mod_date; + UIntLE crc32; + UIntLE compressed_size; + UIntLE uncompressed_size; + UShortLE filename_len; + UShortLE extra_field_len; +} + +struct ZipCDH @packed @private +{ + UIntLE signature; + UShortLE version_made_by; + UShortLE version_needed; + UShortLE flags; + ZipMethod method; + UShortLE last_mod_time; + UShortLE last_mod_date; + UIntLE crc32; + UIntLE compressed_size; + UIntLE uncompressed_size; + UShortLE filename_len; + UShortLE extra_field_len; + UShortLE comment_len; + UShortLE disk_number_start; + UShortLE internal_attr; + UIntLE external_attr; + UIntLE relative_offset; +} + +struct ZipEOCD @packed @private +{ + UIntLE signature; + UShortLE disk_number; + UShortLE cd_disk_number; + UShortLE num_entries_this_disk; + UShortLE num_entries; + UIntLE cd_size; + UIntLE cd_offset; + UShortLE comment_len; +} + +const uint ZIP_LFH_SIG @private = 0x04034B50; +const uint ZIP_CDH_SIG @private = 0x02014B50; +const uint ZIP_EOCD_SIG @private = 0x06054B50; +const uint ZIP64_EOCD_SIG @private = 0x06064B50; +const uint ZIP64_LOCATOR_SIG @private = 0x07064B50; + +const ushort ZIP64_EXTRA_ID @private = 0x0001; + +struct Zip64EOCD @packed @private +{ + UIntLE signature; + ULongLE size; // Size of remaining record + UShortLE version_made; + UShortLE version_needed; + UIntLE disk_num; + UIntLE disk_start; + ULongLE count_this_disk; + ULongLE count_total; + ULongLE size_cd; + ULongLE offset_cd; + // char[] custom_data; +} + +struct Zip64Locator @packed @private +{ + UIntLE signature; + UIntLE disk_start; + ULongLE offset_eocd; + UIntLE total_disks; +} + +struct Zip64ExtraField @private +{ + ushort header_id; + ushort size; + ulong uncompressed_size; + ulong compressed_size; + ulong offset; + uint disk_start; +} + +fn ushort time_to_dos_time(Time t) @private +{ + long seconds = (long)t / 1_000_000; + int s = (int)(seconds % 60); + int m = (int)((seconds / 60) % 60); + int h = (int)((seconds / 3600) % 24); + return (ushort)(((h & 0x1F) << 11) | ((m & 0x3F) << 5) | ((s / 2) & 0x1F)); +} + +fn ushort time_to_dos_date(Time t) @private +{ + long seconds = (long)t / 1_000_000; + long days = seconds / 86400; + + if (days < 3652) return (ushort)((0 << 9) | (1 << 5) | 1); // 1980-01-01 + + days -= 3652; + int year = 0; + while (true) + { + int y = 1980 + year; + bool is_leap = (y % 4 == 0 && y % 100 != 0) || (y % 400 == 0); + int year_days = is_leap ? 366 : 365; + if (days < (long)year_days) break; + days -= (long)year_days; + year++; + } + + int[12] month_days = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; + int y = 1980 + year; + if ((y % 4 == 0 && y % 100 != 0) || (y % 400 == 0)) month_days[1] = 29; + + int month = 0; + while (days >= (long)month_days[month]) + { + days -= (long)month_days[month]; + month++; + } + + int day = (int)days + 1; + month++; + return (ushort)(((year & 0x7F) << 9) | ((month & 0xF) << 5) | (day & 0x1F)); +} + +fn Time dos_date_time_to_time(ushort dos_date, ushort dos_time) @private +{ + int sec = (int)((dos_time & 0x1F) * 2); + int min = (int)((dos_time >> 5) & 0x3F); + int hour = (int)((dos_time >> 11) & 0x1F); + + int day = (int)(dos_date & 0x1F); + int month = (int)((dos_date >> 5) & 0xF); + int year = (int)((dos_date >> 9) & 0x7F) + 1980; + + if (day == 0) day = 1; + if (month == 0) month = 1; + if (month > 12) month = 12; + + // 1970 to 1980 is 3652 days + long total_days = 3652; + + for (int y = 1980; y < year; y++) + { + bool is_leap = (y % 4 == 0 && y % 100 != 0) || (y % 400 == 0); + total_days += is_leap ? 366 : 365; + } + + bool is_leap_year = (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0); + int[12] mdays = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; + if (is_leap_year) mdays[1] = 29; + + for (int i = 0; i < month - 1; i++) + { + total_days += mdays[i]; + } + + total_days += (long)(day - 1); + + long total_seconds = total_days * 86400; + total_seconds += (long)hour * 3600; + total_seconds += (long)min * 60; + total_seconds += (long)sec; + + return (Time)(total_seconds * 1_000_000); +} + +fn bool is_valid_utf8(char[] bytes) @private +{ + usz i = 0; + while (i < bytes.len) + { + char lead = bytes[i]; + switch (lead) + { + case 0x00..0x7F: + i++; + case 0xC2..0xDF: + if (i + 1 >= bytes.len || (bytes[i + 1] & 0xC0) != 0x80) return false; + i += 2; + case 0xE0..0xEF: + if (i + 2 >= bytes.len) return false; + char b1 = bytes[i + 1]; + char b2 = bytes[i + 2]; + if ((b1 & 0xC0) != 0x80 || (b2 & 0xC0) != 0x80) return false; + if (lead == 0xE0 && b1 < 0xA0) return false; + if (lead == 0xED && b1 >= 0xA0) return false; + i += 3; + case 0xF0..0xF4: + if (i + 3 >= bytes.len) return false; + char b1 = bytes[i + 1]; + char b2 = bytes[i + 2]; + char b3 = bytes[i + 3]; + if ((b1 & 0xC0) != 0x80 || (b2 & 0xC0) != 0x80 || (b3 & 0xC0) != 0x80) return false; + if (lead == 0xF0 && b1 < 0x90) return false; + if (lead == 0xF4 && b1 >= 0x90) return false; + i += 4; + default: + return false; + } + } + return true; +} diff --git a/lib/std/core/dstring.c3 b/lib/std/core/dstring.c3 index 495880c1f..59dd5aa2c 100644 --- a/lib/std/core/dstring.c3 +++ b/lib/std/core/dstring.c3 @@ -658,9 +658,10 @@ fn usz? DString.read_from_stream(&self, InStream reader) if (&reader.available) { usz total_read = 0; - while (usz available = reader.available()!) + while (ulong available = reader.available()!) { - self.reserve(available); + if (available > isz.max) available = (ulong)isz.max; + self.reserve((usz)available); StringData* data = self.data(); usz len = reader.read(data.chars[data.len..(data.capacity - 1)])!; total_read += len; diff --git a/lib/std/core/env.c3 b/lib/std/core/env.c3 index 27c8ab19c..c0e12ddb3 100644 --- a/lib/std/core/env.c3 +++ b/lib/std/core/env.c3 @@ -126,6 +126,7 @@ const bool ARCH_64_BIT = $$REGISTER_SIZE == 64; const bool LIBC = $$COMPILER_LIBC_AVAILABLE; const bool NO_LIBC = !LIBC && !CUSTOM_LIBC; const bool CUSTOM_LIBC = $$CUSTOM_LIBC; +const bool OLD_IO = $feature(OLD_IO); const CompilerOptLevel COMPILER_OPT_LEVEL = CompilerOptLevel.from_ordinal($$COMPILER_OPT_LEVEL); const bool BIG_ENDIAN = $$PLATFORM_BIG_ENDIAN; const bool I128_NATIVE_SUPPORT = $$PLATFORM_I128_SUPPORTED; diff --git a/lib/std/core/runtime_benchmark.c3 b/lib/std/core/runtime_benchmark.c3 index b8abc1a8b..c1828e692 100644 --- a/lib/std/core/runtime_benchmark.c3 +++ b/lib/std/core/runtime_benchmark.c3 @@ -125,10 +125,11 @@ fn bool run_benchmarks(BenchmarkUnit[] benchmarks) char[] perc_str = { [0..19] = ' ', [20] = 0 }; int perc = 0; uint print_step = current_benchmark_iterations / 100; + if (print_step == 0) print_step = 1; for (this_iteration = 0; this_iteration < current_benchmark_iterations; ++this_iteration, benchmark_nano_seconds = {}) { - if (0 == this_iteration % print_step) // only print right about when the % will update + if (this_iteration % print_step == 0) // only print right about when the % will update { perc_str[0..(uint)math::floor((this_iteration / (float)current_benchmark_iterations) * 20)] = '#'; perc = (uint)math::ceil(100 * (this_iteration / (float)current_benchmark_iterations)); diff --git a/lib/std/core/runtime_test.c3 b/lib/std/core/runtime_test.c3 index 1c3bbe0c0..33ae06cb6 100644 --- a/lib/std/core/runtime_test.c3 +++ b/lib/std/core/runtime_test.c3 @@ -142,7 +142,7 @@ fn void mute_output() @local File* stderr = io::stderr(); *stderr = test_context.fake_stdout; *stdout = test_context.fake_stdout; - (void)test_context.fake_stdout.seek(0, Seek.SET)!!; + (void)test_context.fake_stdout.set_cursor(0)!!; } fn void unmute_output(bool has_error) @local @@ -155,7 +155,7 @@ fn void unmute_output(bool has_error) @local *stderr = test_context.stored.stderr; *stdout = test_context.stored.stdout; - usz log_size = test_context.fake_stdout.seek(0, Seek.CURSOR)!!; + ulong log_size = test_context.fake_stdout.cursor()!!; if (has_error) { io::printn(test_context.has_ansi_codes ? "[\e[0;31mFAIL\e[0m]" : "[FAIL]"); @@ -165,7 +165,7 @@ fn void unmute_output(bool has_error) @local { test_context.fake_stdout.write_byte('\n')!!; test_context.fake_stdout.write_byte('\0')!!; - (void)test_context.fake_stdout.seek(0, Seek.SET)!!; + test_context.fake_stdout.set_cursor(0)!!; io::printfn("\n========== TEST LOG ============"); io::printfn("%s\n", test_context.current_test_name); diff --git a/lib/std/io/file.c3 b/lib/std/io/file.c3 index 2069673d7..d04497606 100644 --- a/lib/std/io/file.c3 +++ b/lib/std/io/file.c3 @@ -39,11 +39,16 @@ fn bool is_dir(String path) return os::native_is_dir(path); } -fn usz? get_size(String path) +fn ulong? get_size(String path) { return os::native_file_size(path); } +fn void? set_modified_time(String path, Time_t time) +{ + return os::native_set_modified_time(path, time); +} + fn void? delete(String filename) { return os::native_remove(filename) @inline; @@ -63,10 +68,25 @@ fn void? File.reopen(&self, String filename, String mode) *> fn usz? File.seek(&self, isz offset, Seek seek_mode = Seek.SET) @dynamic { - os::native_fseek(self.file, offset, seek_mode)!; - return os::native_ftell(self.file); + os::native_fseek(self.file, offset, (SeekOrigin)seek_mode.ordinal)!; + return (usz)os::native_ftell(self.file); } +<* + @require self.file != null +*> +fn void? File.set_cursor(&self, long offset, SeekOrigin whence = FROM_START) @dynamic +{ + return os::native_fseek(self.file, offset, whence); +} + +<* + @require self.file != null +*> +fn long? File.cursor(&self) @dynamic +{ + return os::native_ftell(self.file); +} /* Implement later @@ -118,6 +138,14 @@ fn void? File.close(&self) @inline @dynamic self.file = null; } +fn ulong? File.size(&self) @dynamic +{ + long curr = self.cursor()!; + defer (void)self.set_cursor(curr); + self.set_cursor(0, FROM_END)!; + return self.cursor()!; +} + <* @require self.file != null *> @@ -171,9 +199,8 @@ fn char[]? load_buffer(String filename, char[] buffer) { File file = open(filename, "rb")!; defer (void)file.close(); - usz len = file.seek(0, END)!; + long len = file.size()!; if (len > buffer.len) return io::OVERFLOW~; - file.seek(0, SET)!; usz read = 0; while (read < len) { @@ -187,16 +214,16 @@ fn char[]? load(Allocator allocator, String filename) { File file = open(filename, "rb")!; defer (void)file.close(); - usz len = file.seek(0, END)!; - file.seek(0, SET)!; - char* data = allocator::malloc_try(allocator, len)!; + ulong len = file.size()!; + if (len > usz.max) return io::OUT_OF_SPACE~; + char* data = allocator::malloc_try(allocator, (usz)len)!; defer catch allocator::free(allocator, data); usz read = 0; - while (read < len) + while (read < (usz)len) { - read += file.read(data[read:len - read])!; + read += file.read(data[read:(usz)len - read])!; } - return data[:len]; + return data[:(usz)len]; } fn char[]? load_path(Allocator allocator, Path path) => load(allocator, path.str_view()); diff --git a/lib/std/io/file_mmap.c3 b/lib/std/io/file_mmap.c3 index 6dbbf7c62..267133761 100644 --- a/lib/std/io/file_mmap.c3 +++ b/lib/std/io/file_mmap.c3 @@ -45,10 +45,9 @@ fn FileMmap? mmap_file(File file, usz offset = 0, usz len = 0, VirtualMemoryAcce { if (len == 0) { - usz cur = file.seek(0, CURSOR)!; - defer file.seek(cur, SET)!!; - usz file_size = file.seek(0, END)!; - len = file_size - offset; + ulong new_len = file.size()! - offset; + if (new_len > (ulong)isz.max) return mem::OUT_OF_MEMORY~; + len = (usz)new_len; } // get the page size diff --git a/lib/std/io/io.c3 b/lib/std/io/io.c3 index 48d1e6111..a3b19416f 100644 --- a/lib/std/io/io.c3 +++ b/lib/std/io/io.c3 @@ -11,6 +11,14 @@ enum Seek END } +enum SeekOrigin +{ + FROM_START, + FROM_CURSOR, + FROM_END +} + + faultdef ALREADY_EXISTS, BUSY, diff --git a/lib/std/io/os/file_libc.c3 b/lib/std/io/os/file_libc.c3 index 49855ae24..f853e0af5 100644 --- a/lib/std/io/os/file_libc.c3 +++ b/lib/std/io/os/file_libc.c3 @@ -49,16 +49,16 @@ fn void*? native_freopen(void* file, String filename, String mode) @inline => @ return file ?: file_open_errno()~; } -fn void? native_fseek(void* file, isz offset, Seek seek_mode) @inline +fn void? native_fseek(void* file, long offset, SeekOrigin seek_mode) @inline { if (libc::fseek(file, (SeekIndex)offset, seek_mode.ordinal)) return file_seek_errno()~; } -fn usz? native_ftell(CFile file) @inline +fn long? native_ftell(CFile file) @inline { long index = libc::ftell(file); - return index >= 0 ? (usz)index : file_seek_errno()~; + return index >= 0 ? index : file_seek_errno()~; } fn usz? native_fwrite(CFile file, char[] buffer) @inline @@ -123,3 +123,22 @@ macro fault file_seek_errno() @local } } + +struct Utimbuf +{ + Time_t actime; + Time_t modtime; +} + +extern fn int utime(char* filename, void* times) @if(!env::WIN32); +extern fn int _wutime(WChar* filename, void* times) @if(env::WIN32); + +fn void? native_set_modified_time(String filename, libc::Time_t time) => @stack_mem(256; Allocator smem) +{ + Utimbuf times = { time, time }; + $if env::WIN32: + if (_wutime(filename.to_wstring(smem)!, ×)) return io::GENERAL_ERROR~; + $else + if (utime(filename.zstr_copy(smem), ×)) return io::GENERAL_ERROR~; + $endif +} diff --git a/lib/std/io/os/file_nolibc.c3 b/lib/std/io/os/file_nolibc.c3 index 25c37fbf1..e073d24bb 100644 --- a/lib/std/io/os/file_nolibc.c3 +++ b/lib/std/io/os/file_nolibc.c3 @@ -4,12 +4,13 @@ import libc; alias FopenFn = fn void*?(String, String); alias FreopenFn = fn void*?(void*, String, String); alias FcloseFn = fn void?(void*); -alias FseekFn = fn void?(void*, isz, Seek); -alias FtellFn = fn usz?(void*); +alias FseekFn = fn void?(void*, long, SeekOrigin); +alias FtellFn = fn long?(void*); alias FwriteFn = fn usz?(void*, char[] buffer); alias FreadFn = fn usz?(void*, char[] buffer); alias RemoveFn = fn void?(String); alias FputcFn = fn void?(int, void*); +alias SetModifiedTimeFn = fn void?(String, libc::Time_t); FopenFn native_fopen_fn @weak @if(!$defined(native_fopen_fn)); FcloseFn native_fclose_fn @weak @if(!$defined(native_fclose_fn)); @@ -20,6 +21,7 @@ FwriteFn native_fwrite_fn @weak @if(!$defined(native_fwrite_fn)); FreadFn native_fread_fn @weak @if(!$defined(native_fread_fn)); RemoveFn native_remove_fn @weak @if(!$defined(native_remove_fn)); FputcFn native_fputc_fn @weak @if(!$defined(native_fputc_fn)); +SetModifiedTimeFn native_set_modified_time_fn @weak @if(!$defined(native_set_modified_time_fn)); <* @require mode.len > 0 @@ -52,13 +54,13 @@ fn void*? native_freopen(void* file, String filename, String mode) @inline return io::UNSUPPORTED_OPERATION~; } -fn void? native_fseek(void* file, isz offset, Seek seek_mode) @inline +fn void? native_fseek(void* file, long offset, SeekOrigin whence) @inline { - if (native_fseek_fn) return native_fseek_fn(file, offset, seek_mode); + if (native_fseek_fn) return native_fseek_fn(file, offset, whence); return io::UNSUPPORTED_OPERATION~; } -fn usz? native_ftell(CFile file) @inline +fn ulong? native_ftell(CFile file) @inline { if (native_ftell_fn) return native_ftell_fn(file); return io::UNSUPPORTED_OPERATION~; @@ -81,3 +83,9 @@ fn void? native_fputc(CInt c, CFile stream) @inline if (native_fputc_fn) return native_fputc_fn(c, stream); return io::UNSUPPORTED_OPERATION~; } + +fn void? native_set_modified_time(String filename, libc::Time_t time) @inline +{ + if (native_set_modified_time_fn) return native_set_modified_time_fn(filename, time); + return io::UNSUPPORTED_OPERATION~; +} diff --git a/lib/std/io/os/fileinfo.c3 b/lib/std/io/os/fileinfo.c3 index cb720d7d3..347119f47 100644 --- a/lib/std/io/os/fileinfo.c3 +++ b/lib/std/io/os/fileinfo.c3 @@ -47,14 +47,15 @@ fn usz? native_file_size(String path) @if(env::WIN32) => @pool() return (usz)size.quadPart; } -fn usz? native_file_size(String path) @if(!env::WIN32 && !env::DARWIN && !env::LINUX && !env::ANDROID && !env::BSD_FAMILY) +fn ulong? native_file_size(String path) @if(!env::WIN32 && !env::DARWIN && !env::LINUX && !env::ANDROID && !env::BSD_FAMILY) { File f = file::open(path, "r")!; defer (void)f.close(); - return f.seek(0, Seek.END)!; + f.set_cursor(0, FROM_END)!; + return f.cursor(); } -fn usz? native_file_size(String path) @if(env::DARWIN || env::LINUX || env::ANDROID || env::BSD_FAMILY) +fn ulong? native_file_size(String path) @if(env::DARWIN || env::LINUX || env::ANDROID || env::BSD_FAMILY) { Stat stat; native_stat(&stat, path)!; diff --git a/lib/std/io/path.c3 b/lib/std/io/path.c3 index f2ede5689..ff0ca5586 100644 --- a/lib/std/io/path.c3 +++ b/lib/std/io/path.c3 @@ -36,7 +36,7 @@ fn Path? cwd(Allocator allocator) fn bool is_dir(Path path) => os::native_is_dir(path.str_view()); fn bool is_file(Path path) => os::native_is_file(path.str_view()); -fn usz? file_size(Path path) => os::native_file_size(path.str_view()); +fn ulong? file_size(Path path) => os::native_file_size(path.str_view()); fn bool exists(Path path) => os::native_file_or_dir_exists(path.str_view()); fn Path? tcwd() => cwd(tmem) @inline; diff --git a/lib/std/io/stream.c3 b/lib/std/io/stream.c3 index 48ccd94bd..5a57ec884 100644 --- a/lib/std/io/stream.c3 +++ b/lib/std/io/stream.c3 @@ -1,12 +1,20 @@ module std::io; import std::math; + + +alias SetCursorFn = fn void?(void*, long offset, SeekOrigin whence = START); + + interface InStream { fn void? close() @optional; + fn long? cursor() @optional; + fn void? set_cursor(long offset, SeekOrigin whence = FROM_START) @optional; fn usz? seek(isz offset, Seek seek) @optional; fn usz len() @optional; - fn usz? available() @optional; + fn ulong? size() @optional; + fn ulong? available() @optional; fn usz? read(char[] buffer); fn char? read_byte(); fn usz? write_to(OutStream out) @optional; @@ -24,15 +32,23 @@ interface OutStream fn usz? read_to(InStream in) @optional; } -fn usz? available(InStream s) +fn ulong? available(InStream s) { if (&s.available) return s.available(); + if (&s.set_cursor && &s.cursor) + { + long curr = s.cursor()!; + s.set_cursor(0, FROM_END)!; + ulong len = s.cursor()!; + s.set_cursor(curr)!; + return len - curr; + } if (&s.seek) { usz curr = s.seek(0, Seek.CURSOR)!; usz len = s.seek(0, Seek.END)!; s.seek(curr, Seek.SET)!; - return len - curr; + return (ulong)len - (ulong)curr; } return io::UNSUPPORTED_OPERATION~; } @@ -177,6 +193,11 @@ macro usz? write_using_write_byte(s, char[] bytes) macro void? pushback_using_seek(s) { + if (&s.set_cursor) + { + s.set_cursor(-1, FROM_CURSOR)!; + return; + } s.seek(-1, CURSOR)!; } @@ -407,11 +428,11 @@ macro ulong? read_le_ulong(stream) { ulong val = (ulong)stream.read_byte()!; val += (ulong)stream.read_byte()! << 8; - val += (ulong)stream.read_byte()! << 16; + val += (ulong)stream.read_byte()! << 16; val += (ulong)stream.read_byte()! << 24; val += (ulong)stream.read_byte()! << 32; val += (ulong)stream.read_byte()! << 40; - val += (ulong)stream.read_byte()! << 48; + val += (ulong)stream.read_byte()! << 48; return val + (ulong)stream.read_byte()! << 56; } @@ -621,24 +642,30 @@ macro void? skip(stream, usz bytes) { if (!bytes) return; $switch: - $case !$defined(stream.seek): - for (usz i = 0; i < bytes; i++) - { - stream.read()!; - } - return; $case $typeof(stream) == InStream: - if (!&stream.seek) - { - for (usz i = 0; i < bytes; i++) - { - stream.read()!; - } - return; - } + if (!&stream.seek && !&stream.set_cursor) + { + for (usz i = 0; i < bytes; i++) + { + stream.read()!; + } + return; + } + if (!&stream.set_cursor) + { + stream.seek(bytes, CURSOR)!; + return; + } + stream.set_cursor(bytes, FROM_CURSOR)!; + $case $defined(stream.set_cursor): + stream.set_cursor(bytes, FROM_CURSOR)!; + $case $defined(stream.seek): stream.seek(bytes, CURSOR)!; $default: - stream.seek(bytes, CURSOR)!; + for (usz i = 0; i < bytes; i++) + { + stream.read()!; + } $endswitch } diff --git a/lib/std/io/stream/bytebuffer.c3 b/lib/std/io/stream/bytebuffer.c3 index b7832bab0..43034788f 100644 --- a/lib/std/io/stream/bytebuffer.c3 +++ b/lib/std/io/stream/bytebuffer.c3 @@ -104,28 +104,37 @@ fn void? ByteBuffer.pushback_byte(&self) @dynamic self.has_last = false; } -fn usz? ByteBuffer.seek(&self, isz offset, Seek seek) @dynamic +fn long? ByteBuffer.cursor(&self) @dynamic { - switch (seek) - { - case SET: - if (offset < 0 || offset > self.write_idx) return INVALID_POSITION~; - self.read_idx = offset; - return offset; - case CURSOR: - if ((offset < 0 && self.read_idx < -offset) || - (offset > 0 && self.read_idx + offset > self.write_idx)) return INVALID_POSITION~; - self.read_idx += offset; - case END: - if (offset < 0 || offset > self.write_idx) return INVALID_POSITION~; - self.read_idx = self.write_idx - offset; - } return self.read_idx; } -fn usz? ByteBuffer.available(&self) @inline @dynamic +fn void? ByteBuffer.set_cursor(&self, long offset, SeekOrigin whence = FROM_START) @dynamic { - return self.write_idx - self.read_idx; + switch (whence) + { + case FROM_START: + if (offset < 0 || offset > self.write_idx) return INVALID_POSITION~; + self.read_idx = (usz)offset; + case FROM_CURSOR: + if ((offset < 0 && self.read_idx < -offset) || + (offset > 0 && self.read_idx + offset > self.write_idx)) return INVALID_POSITION~; + self.read_idx += (usz)offset; + case FROM_END: + if (offset < 0 || offset > self.write_idx) return INVALID_POSITION~; + self.read_idx = self.write_idx - (usz)offset; + } +} + +fn usz? ByteBuffer.seek(&self, isz offset, Seek seek) @dynamic +{ + self.set_cursor(offset, (SeekOrigin)seek.ordinal)!; + return (usz)self.cursor(); +} + +fn ulong? ByteBuffer.available(&self) @inline @dynamic +{ + return (ulong)self.write_idx - self.read_idx; } fn void ByteBuffer.grow(&self, usz n) diff --git a/lib/std/io/stream/bytereader.c3 b/lib/std/io/stream/bytereader.c3 index d74ae1171..32a649691 100644 --- a/lib/std/io/stream/bytereader.c3 +++ b/lib/std/io/stream/bytereader.c3 @@ -41,16 +41,26 @@ fn void? ByteReader.pushback_byte(&self) @dynamic fn usz? ByteReader.seek(&self, isz offset, Seek seek) @dynamic { - isz new_index; - switch (seek) + self.set_cursor((long)offset, (SeekOrigin)seek.ordinal)!; + return (usz)self.cursor(); +} + +fn long? ByteReader.cursor(&self) @dynamic +{ + return self.index; +} + +fn void? ByteReader.set_cursor(&self, long offset, SeekOrigin whence = FROM_START) @dynamic +{ + long new_index; + switch (whence) { - case SET: new_index = offset; - case CURSOR: new_index = self.index + offset; - case END: new_index = self.bytes.len + offset; + case FROM_START: new_index = offset; + case FROM_CURSOR: new_index = self.index + offset; + case FROM_END: new_index = self.bytes.len + offset; } - if (new_index < 0) return INVALID_POSITION~; - self.index = new_index; - return new_index; + if (new_index < 0 || new_index > self.bytes.len) return INVALID_POSITION~; + self.index = (usz)new_index; } fn usz? ByteReader.write_to(&self, OutStream writer) @dynamic @@ -62,7 +72,7 @@ fn usz? ByteReader.write_to(&self, OutStream writer) @dynamic return written; } -fn usz? ByteReader.available(&self) @inline @dynamic +fn ulong? ByteReader.available(&self) @inline @dynamic { return max(0, self.bytes.len - self.index); -} \ No newline at end of file +} diff --git a/lib/std/io/stream/bytewriter.c3 b/lib/std/io/stream/bytewriter.c3 index 146c54d27..ffebc71f6 100644 --- a/lib/std/io/stream/bytewriter.c3 +++ b/lib/std/io/stream/bytewriter.c3 @@ -86,9 +86,10 @@ fn usz? ByteWriter.read_from(&self, InStream reader) @dynamic usz start_index = self.index; if (&reader.available) { - while (usz available = reader.available()!) + while (ulong available = reader.available()!) { - self.ensure_capacity(self.index + available)!; + if (available > usz.max) return OUT_OF_SPACE~; + self.ensure_capacity(self.index + (usz)available)!; usz read = reader.read(self.bytes[self.index..])!; self.index += read; } diff --git a/lib/std/io/stream/limitreader.c3 b/lib/std/io/stream/limitreader.c3 index 3e03d6274..b5fe229a7 100644 --- a/lib/std/io/stream/limitreader.c3 +++ b/lib/std/io/stream/limitreader.c3 @@ -38,7 +38,7 @@ fn char? LimitReader.read_byte(&self) @dynamic return self.wrapped_stream.read_byte(); } -fn usz? LimitReader.available(&self) @inline @dynamic +fn ulong? LimitReader.available(&self) @inline @dynamic { return self.limit; } \ No newline at end of file diff --git a/lib/std/os/linux/linux.c3 b/lib/std/os/linux/linux.c3 index 0804ea351..3bf757799 100644 --- a/lib/std/os/linux/linux.c3 +++ b/lib/std/os/linux/linux.c3 @@ -186,7 +186,7 @@ fn ulong? elf_module_image_base(String path) @local bool is_little_endian = file.read_byte()! == 1; // Actually, not supported. if (!is_little_endian) return backtrace::IMAGE_NOT_FOUND~; - file.seek(0)!; + file.set_cursor(0)!; if (is_64) { Elf64_Ehdr file_header; @@ -195,7 +195,7 @@ fn ulong? elf_module_image_base(String path) @local for (isz i = 0; i < file_header.e_phnum; i++) { Elf64_Phdr header; - file.seek((usz)file_header.e_phoff + (usz)file_header.e_phentsize * i)!; + file.set_cursor(file_header.e_phoff + (long)file_header.e_phentsize * i)!; io::read_any(&file, &header)!; if (header.p_type == PT_PHDR) return header.p_vaddr - header.p_offset; } @@ -207,7 +207,7 @@ fn ulong? elf_module_image_base(String path) @local for (isz i = 0; i < file_header.e_phnum; i++) { Elf32_Phdr header; - file.seek(file_header.e_phoff + (usz)file_header.e_phentsize * i)!; + file.set_cursor(file_header.e_phoff + (long)file_header.e_phentsize * i)!; io::read_any(&file, &header)!; if (header.p_type == PT_PHDR) return (ulong)header.p_vaddr - header.p_offset; } diff --git a/releasenotes.md b/releasenotes.md index 6492affe9..5ddf490c2 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -33,6 +33,9 @@ - Add `array::even`, `array::odd`, and `array::unlace` macros. #2892 - Add discrete and continuous distributions in `std::math::distributions`. - Add bitorder functions `store_le`, `load_le`, `store_be`, `store_le`. +- Stream functions now use long/ulong rather than isz/usz for seek/available. +- `instream.seek` is replaced by `set_cursor` and `cursor`. +- `instream.available`, `cursor` etc are long/ulong rather than isz/usz to be correct on 32-bit. ### Fixes - Add error message if directory with output file name already exists diff --git a/test/compression/deflate_benchmark.c3 b/test/compression/deflate_benchmark.c3 new file mode 100644 index 000000000..5a9abd45c --- /dev/null +++ b/test/compression/deflate_benchmark.c3 @@ -0,0 +1,207 @@ +// 1. `gcc -O3 -c dependencies/miniz/miniz.c -o build/miniz.o` +// 2. `build/c3c -O3 compile-run test/compression/deflate_benchmark.c3 build/miniz.o` + +module deflate_benchmark; +import std, std::time::clock; + +const int AMOUNT_OF_WORK = 10; // Increase this to scale test data sizes + +fn int main(String[] args) +{ + io::printf("\n%s%s DEFLATE BENCHMARK %s", Ansi.BOLD, Ansi.BG_CYAN, Ansi.RESET); + io::printfn(" Comparing C3 std::compression::deflate with miniz (in-process)\n"); + io::printfn(" Work Scale: %dx\n", AMOUNT_OF_WORK); + + io::printfn("%-26s | %7s | %7s | %7s | %7s | %-10s", "Test Case", "C3 Rat.", "Miz Rat.", "C3 MB/s", "Miz MB/s", "Winner"); + io::printfn("---------------------------+---------+---------+---------+---------+-----------"); + + // Test 1: Redundant data + usz redundant_size = 10_000_000 * (usz)AMOUNT_OF_WORK; + char[] redundant = allocator::alloc_array(tmem, char, redundant_size); + mem::set(redundant.ptr, 'A', redundant_size); + run_bench(string::tformat("Redundant (%dMB 'A')", (int)(redundant_size / 1_000_000)), redundant); + + // Test 2: Large Source Project (All .c files in src/compiler) + DString project_src; + Path src_dir = path::new(tmem, "src/compiler")!!; + PathList? compiler_files = path::ls(tmem, src_dir); + if (try files = compiler_files) { + for (int i = 0; i < AMOUNT_OF_WORK; i++) { + foreach (p : files) { + if (p.basename().ends_with(".c")) { + Path full_p = src_dir.tappend(p.str_view())!!; + if (try data = file::load_path(tmem, full_p)) { + project_src.append(data); + } + } + } + } + } + run_bench("Compiler Source (Bulk)", project_src.str_view()); + + // Test 3: Standard Library (All .c3 files in lib/std) + DString std_src; + for (int i = 0; i < AMOUNT_OF_WORK; i++) { + collect_files(path::new(tmem, "lib/std")!!, ".c3", &std_src); + } + run_bench("Stdlib Source (Bulk)", std_src.str_view()); + + // Test 4: Log Files (Simulated) + DString log_data; + for (int i = 0; i < 50_000 * AMOUNT_OF_WORK; i++) { + log_data.appendf("2024-02-13 21:30:%02d.%03d [INFO] Connection established from 192.168.1.%d\n", i % 60, i % 1000, i % 255); + log_data.appendf("2024-02-13 21:30:%02d.%03d [DEBUG] Buffer size: %d bytes\n", i % 60, i % 1000, (i * 123) % 4096); + } + run_bench("Log Files (Simulated)", log_data.str_view()); + + // Test 5: Web Content (Simulated HTML/CSS) + DString web_data; + web_data.append("
"); + for (int i = 0; i < 1000 * AMOUNT_OF_WORK; i++) { + web_data.appendf("This is some repetitive descriptive text that might appear on a web page.
"); + web_data.append("