Files
c3c/lib/std/os/linux/linux.c3
Manu Linares eae7d0c4a1 stdlib: std::compression::zip and std::compression::deflate (#2930)
* stdlib: implement `std::compression::zip` and `std::compression::deflate`

- C3 implementation of DEFLATE (RFC 1951) and ZIP archive handling.
- Support for reading and writing archives using STORE and DEFLATE
methods.
- Decompression supports both fixed and dynamic Huffman blocks.
- Compression using greedy LZ77 matching.
- Zero dependencies on libc.
- Stream-based entry reading and writing.
- Full unit test coverage.

NOTE: This is an initial implementation. Future improvements could be:

- Optimization of the LZ77 matching (lazy matching).
- Support for dynamic Huffman blocks in compression.
- ZIP64 support for large files/archives.
- Support for encryption and additional compression methods.

* optimizations+refactoring

deflate:
- replace linear search with hash-based match finding.
- implement support for dynamic Huffman blocks using the Package-Merge
algorithm.
- add streaming decompression.
- add buffered StreamBitReader.

zip:
- add ZIP64 support.
- add CP437 and UTF-8 filename encoding detection.
- add DOS date/time conversion and timestamp preservation.
- add ZipEntryReader for streaming entry reads.
- implement ZipArchive.extract and ZipArchive.recover helpers.

other:
- Add `set_modified_time` to std::io;
- Add benchmarks and a few more unit tests.

* zip: add archive comment support

add tests

* forgot to rename the benchmark :(

* detect utf8 names on weird zips

fix method not passed to open_writer

* another edge case where directory doesn't end with /

* testing utilities

- detect encrypted zip
- `ZipArchive.open_writer` default to DEFLATE

* fix zip64 creation, add tests

* fix ZIP header endianness for big-endian compatibility

Update ZipLFH, ZipCDH, ZipEOCD, Zip64EOCD, and Zip64Locator structs to
use little-endian bitstruct types from std::core::bitorder

* fix ZipEntryReader position tracking and seek logic ZIP_METHOD_STORE

added a test to track this

* add package-merge algorithm attribution

Thanks @konimarti

* standalone deflate_benchmark.c3 against `miniz`

* fix integer overflows, leaks and improve safety

* a few safety for 32-bit systems and tests

* deflate compress optimization

* improve match finding, hash updates, and buffer usage

* use ulong for zip offsets

* style changes (#18)

* style changes

* update tests

* style changes in `deflate.c3`

* fix typo

* Allocator first. Some changes to deflate to use `copy_to`

* Fix missing conversion on 32 bits.

* Fix deflate stream. Formatting. Prefer switch over if-elseif

* - Stream functions now use long/ulong rather than isz/usz for seek/available.
- `instream.seek` is replaced by `set_cursor` and `cursor`.
- `instream.available`, `cursor` etc are long/ulong rather than isz/usz to be correct on 32-bit.

* Update to constdef

* Fix test

---------

Co-authored-by: Book-reader <thevoid@outlook.co.nz>
Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
2026-02-20 20:41:34 +01:00

318 lines
8.8 KiB
Plaintext

module std::os::linux @if(env::LINUX);
import libc, std::os, std::io, std::collections::list, std::net::os;
// https://man7.org/linux/man-pages/man3/inet_ntop.3.html
extern fn char** inet_ntop(int, void*, char*, Socklen_t);
// https://linux.die.net/man/3/ntohs
<*
* The htonl() function converts the unsigned integer hostlong from host byte order to network byte order.
*>
extern fn uint htonl(uint hostlong);
<*
* The htons() function converts the unsigned short integer hostshort from host byte order to network byte order.
*>
extern fn ushort htons(ushort hostshort);
<*
* The ntohl() function converts the unsigned integer netlong from network byte order to host byte order.
*>
extern fn uint ntohl(uint netlong);
<*
* The ntohs() function converts the unsigned short integer netshort from network byte order to host byte order.
*>
extern fn ushort ntohs(ushort netshort);
// https://man7.org/linux/man-pages/man3/bzero.3.html
<*
* The bzero() function erases the data in the n bytes of the memory
* starting at the location pointed to by s, by writing zeros (bytes
* containing '\0') to that area.
*>
extern fn void bzero(char*, usz);
extern fn isz readlink(ZString path, char* buf, usz bufsize);
const PT_PHDR = 6;
const EI_NIDENT = 16;
alias Elf32_Half = ushort;
alias Elf32_Word = uint;
alias Elf32_Addr = uint;
alias Elf32_Off = uint;
struct Elf32_Ehdr
{
char[EI_NIDENT] e_ident;
Elf32_Half e_type;
Elf32_Half e_machine;
Elf32_Word e_version;
Elf32_Addr e_entry;
Elf32_Off e_phoff;
Elf32_Off e_shoff;
Elf32_Word e_flags;
Elf32_Half e_ehsize;
Elf32_Half e_phentsize;
Elf32_Half e_phnum;
Elf32_Half e_shentsize;
Elf32_Half e_shnum;
Elf32_Half e_shstrndx;
}
struct Elf32_Phdr
{
Elf32_Word p_type;
Elf32_Off p_offset;
Elf32_Addr p_vaddr;
Elf32_Addr p_paddr;
Elf32_Word p_filesz;
Elf32_Word p_memsz;
Elf32_Word p_flags;
Elf32_Word p_align;
}
struct Elf32_Shdr
{
Elf32_Word sh_name;
Elf32_Word sh_type;
Elf32_Word sh_flags;
Elf32_Addr sh_addr;
Elf32_Off sh_offset;
Elf32_Word sh_size;
Elf32_Word sh_link;
Elf32_Word sh_info;
Elf32_Word sh_addralign;
Elf32_Word sh_entsize;
}
alias Elf64_Addr = ulong;
alias Elf64_Half = ushort;
alias Elf64_Off = ulong;
alias Elf64_Word = uint;
alias Elf64_Sword = int;
alias Elf64_Sxword = long;
alias Elf64_Lword = ulong;
alias Elf64_Xword = ulong;
struct Elf64_Ehdr
{
char[EI_NIDENT] e_ident;
Elf64_Half e_type;
Elf64_Half e_machine;
Elf64_Word e_version;
Elf64_Addr e_entry;
Elf64_Off e_phoff;
Elf64_Off e_shoff;
Elf64_Word e_flags;
Elf64_Half e_ehsize;
Elf64_Half e_phentsize;
Elf64_Half e_phnum;
Elf64_Half e_shentsize;
Elf64_Half e_shnum;
Elf64_Half e_shstrndx;
}
struct Elf64_Phdr
{
Elf64_Word p_type;
Elf64_Word p_flags;
Elf64_Off p_offset;
Elf64_Addr p_vaddr;
Elf64_Addr p_paddr;
Elf64_Xword p_filesz;
Elf64_Xword p_memsz;
Elf64_Xword p_align;
}
struct Elf64_Shdr
{
Elf64_Word sh_name;
Elf64_Word sh_type;
Elf64_Xword sh_flags;
Elf64_Addr sh_addr;
Elf64_Off sh_offset;
Elf64_Xword sh_size;
Elf64_Word sh_link;
Elf64_Word sh_info;
Elf64_Xword sh_addralign;
Elf64_Xword sh_entsize;
}
extern fn CInt dladdr(void* addr, Linux_Dl_info* info);
struct Linux_Dl_info
{
ZString dli_fname; /* Pathname of shared object */
void* dli_fbase; /* Base address of shared object */
ZString dli_sname; /* Name of nearest symbol */
void* dli_saddr; /* Address of nearest symbol */
}
alias Dl_iterate_phdr_callback64 = fn CInt(Linux_dl_phdr_info_64*, usz, void*);
alias Dl_iterate_phdr_callback32 = fn CInt(Linux_dl_phdr_info_32*, usz, void*);
extern fn CInt dl_iterate_phdr64(Dl_iterate_phdr_callback64 callback, void* data);
extern fn CInt dl_iterate_phdr32(Dl_iterate_phdr_callback32 callback, void* data);
struct Linux_dl_phdr_info_64
{
Elf64_Addr dlpi_addr;
ZString dlpi_name;
Elf64_Phdr* dlpi_phdr;
Elf64_Half dlpi_phnum;
ulong dlpi_adds;
ulong dlpi_subs;
usz dlpi_tsl_modid;
void* dlpi_tls_data;
}
struct Linux_dl_phdr_info_32
{
Elf32_Addr dlpi_addr;
ZString dlpi_name;
Elf32_Phdr* dlpi_phdr;
Elf32_Half dlpi_phnum;
ulong dlpi_adds;
ulong dlpi_subs;
usz dlpi_tsl_modid;
void* dlpi_tls_data;
}
fn ulong? elf_module_image_base(String path) @local
{
File file = file::open(path, "rb")!;
defer (void)file.close();
char[4] buffer;
io::read_all(&file, &buffer)!;
if (buffer != { 0x7f, 'E', 'L', 'F'}) return backtrace::IMAGE_NOT_FOUND~;
bool is_64 = file.read_byte()! == 2;
bool is_little_endian = file.read_byte()! == 1;
// Actually, not supported.
if (!is_little_endian) return backtrace::IMAGE_NOT_FOUND~;
file.set_cursor(0)!;
if (is_64)
{
Elf64_Ehdr file_header;
io::read_any(&file, &file_header)!;
if (file_header.e_ehsize != Elf64_Ehdr.sizeof) return backtrace::IMAGE_NOT_FOUND~;
for (isz i = 0; i < file_header.e_phnum; i++)
{
Elf64_Phdr header;
file.set_cursor(file_header.e_phoff + (long)file_header.e_phentsize * i)!;
io::read_any(&file, &header)!;
if (header.p_type == PT_PHDR) return header.p_vaddr - header.p_offset;
}
return 0;
}
Elf32_Ehdr file_header;
io::read_any(&file, &file_header)!;
if (file_header.e_ehsize != Elf32_Ehdr.sizeof) return backtrace::IMAGE_NOT_FOUND~;
for (isz i = 0; i < file_header.e_phnum; i++)
{
Elf32_Phdr header;
file.set_cursor(file_header.e_phoff + (long)file_header.e_phentsize * i)!;
io::read_any(&file, &header)!;
if (header.p_type == PT_PHDR) return (ulong)header.p_vaddr - header.p_offset;
}
return 0;
}
fn void? backtrace_add_from_exec(Allocator allocator, BacktraceList* list, void* addr) @local
{
char[1024] buf @noinit;
String exec_path = process::execute_stdout_to_buffer(&buf, {"realpath", "-e", string::bformat(&&(char[64]){}, "/proc/%d/exe", posix::getpid())})!;
String obj_name = exec_path.copy(allocator);
String addr2line = process::execute_stdout_to_buffer(&buf, {"addr2line", "-p", "-i", "-C", "-f", "-e", exec_path, string::bformat(&&(char[64]){}, "0x%x", addr)})!;
return backtrace_add_addr2line(allocator, list, addr, addr2line, obj_name, "???");
}
fn void? backtrace_add_from_dlinfo(Allocator allocator, BacktraceList* list, void* addr, Linux_Dl_info* info) @local
{
char[1024] buf @noinit;
void* obj_addr = addr - (uptr)info.dli_fbase + (uptr)elf_module_image_base(info.dli_fname.str_view())!;
ZString obj_path = info.dli_fname;
String sname = info.dli_sname ? info.dli_sname.str_view() : "???";
String addr2line = process::execute_stdout_to_buffer(&buf, {"addr2line", "-p", "-i", "-C", "-f", "-e", obj_path.str_view(), string::bformat(&&(char[64]){}, "0x%x", obj_addr - 1)})!;
return backtrace_add_addr2line(allocator, list, addr, addr2line, info.dli_fname.str_view(), sname);
}
fn Backtrace? backtrace_line_parse(Allocator allocator, String string, String obj_name, String func_name, bool is_inlined)
{
Splitter s = string.trim().tokenize(" at ");
String first = s.next() ?? NOT_FOUND~!;
String second = s.next() ?? NOT_FOUND~!;
uint line = 0;
String source = "";
if (!second.contains("?") && second.contains(":"))
{
usz index = second.rindex_of_char(':')!;
source = second[:index];
line = second[index + 1..].to_uint()!;
}
return {
.function = first.copy(allocator),
.object_file = obj_name.copy(allocator),
.file = source.copy(allocator),
.line = line,
.allocator = allocator,
.is_inline = is_inlined
};
}
fn void? backtrace_add_addr2line(Allocator allocator, BacktraceList* list, void* addr, String addr2line, String obj_name, String func_name) @local
{
Splitter splitter = addr2line.tokenize("(inlined by)");
while (try part = splitter.next())
{
bool is_inline = splitter.at_end();
Backtrace? trace = backtrace_line_parse(allocator, part, obj_name, func_name, is_inline);
if (catch trace)
{
list.push({
.function = func_name.copy(allocator),
.object_file = obj_name.copy(allocator),
.offset = (uptr)addr,
.file = "".copy(allocator),
.line = 0,
.allocator = allocator,
.is_inline = is_inline
});
continue;
}
list.push(trace);
}
}
fn void? backtrace_add_element(Allocator allocator, BacktraceList *list, void* addr) @local
{
if (!addr)
{
list.push(backtrace::BACKTRACE_UNKNOWN);
return;
}
Linux_Dl_info info;
if (dladdr(addr, &info) == 0)
{
return backtrace_add_from_exec(allocator, list, addr);
}
return backtrace_add_from_dlinfo(allocator, list, addr, &info);
}
fn BacktraceList? symbolize_backtrace(Allocator allocator, void*[] backtrace)
{
BacktraceList list;
list.init(allocator, backtrace.len);
defer catch
{
foreach (trace : list)
{
trace.free();
}
list.free();
}
foreach (addr : backtrace)
{
backtrace_add_element(allocator, &list, addr)!;
}
return list;
}