Files
c3c/lib/std/core/str.c3
2023-01-26 16:24:18 +01:00

380 lines
8.2 KiB
C

module std::core::str;
define ZString = distinct char*;
define String = char[];
define Char32 = uint;
define Char16 = ushort;
private const uint SURROGATE_OFFSET = 0x10000;
private const uint SURROGATE_GENERIC_MASK = 0xF800;
private const uint SURROGATE_MASK = 0xFC00;
private const uint SURROGATE_CODEPOINT_MASK = 0x03FF;
private const uint SURROGATE_BITS = 10;
private const uint SURROGATE_LOW_VALUE = 0xDC00;
private const uint SURROGATE_HIGH_VALUE = 0xD800;
fault NumberConversion
{
EMPTY_STRING,
NEGATIVE_VALUE,
MALFORMED_INTEGER,
INTEGER_OVERFLOW,
}
fn VarString join(String[] s, String joiner)
{
if (!s.len) return (VarString)null;
usz total_size = joiner.len * s.len;
foreach (String* &str : s)
{
total_size += str.len;
}
VarString res = string::new_with_capacity(total_size);
res.append(s[0]);
foreach (String* &str : s[1..])
{
res.append(joiner);
res.append(*str);
}
return res;
}
macro bool char_in_set(char c, String set)
{
foreach (ch : set)
{
if (ch == c) return true;
}
return false;
}
private macro char_is_space_tab(char c)
{
return c == ' ' || c == '\t';
}
private macro to_integer($Type, String string)
{
usz len = string.len;
usz index = 0;
char* ptr = string.ptr;
while (index < len && char_is_space_tab(ptr[index])) index++;
if (len == index) return NumberConversion.EMPTY_STRING!;
bool is_negative;
switch (string[index])
{
case '-':
if ($Type.min == 0) return NumberConversion.NEGATIVE_VALUE!;
is_negative = true;
index++;
case '+':
index++;
default:
break;
}
if (len == index) return NumberConversion.MALFORMED_INTEGER!;
$Type base = 10;
if (string[index] == '0')
{
index++;
if (index == len) return ($Type)0;
switch (string[index])
{
case 'x':
case 'X':
base = 16;
index++;
case 'b':
case 'B':
base = 2;
index++;
case 'o':
case 'O':
base = 8;
index++;
default:
break;
}
if (len == index) return NumberConversion.MALFORMED_INTEGER!;
}
$Type value = 0;
while (index != len)
{
char c = {|
char ch = string[index++];
if (base != 16 || ch < 'A') return (char)(ch - '0');
if (ch <= 'F') return (char)(ch - 'A');
if (ch < 'a') return NumberConversion.MALFORMED_INTEGER!;
if (ch > 'f') return NumberConversion.MALFORMED_INTEGER!;
return (char)(ch - 'a');
|}?;
if (c >= base) return NumberConversion.MALFORMED_INTEGER!;
value = {|
if (is_negative)
{
$Type new_value = value * base - c;
if (new_value > value) return NumberConversion.INTEGER_OVERFLOW!;
return new_value;
}
$Type new_value = value * base + c;
if (new_value < value) return NumberConversion.INTEGER_OVERFLOW!;
return new_value;
|}?;
}
return value;
}
fn int128! to_int128(String string) => to_integer(int128, string);
fn long! to_long(String string) => to_integer(long, string);
fn int! to_int(String string) => to_integer(int, string);
fn short! to_short(String string) => to_integer(short, string);
fn ichar! to_ichar(String string) => to_integer(ichar, string);
fn uint128! to_uint128(String str) => to_integer(uint128, str);
fn ulong! to_ulong(String str) => to_integer(ulong, str);
fn uint! to_uint(String str) => to_integer(uint, str);
fn ushort! to_ushort(String str) => to_integer(ushort, str);
fn char! to_uchar(String str) => to_integer(char, str);
fn String trim(String string, String to_trim = "\t\n\r ")
{
usz start = 0;
usz len = string.len;
while (start < len && char_in_set(string[start], to_trim)) start++;
if (start == len) return string[:0];
usz end = len - 1;
while (end > start && char_in_set(string[end], to_trim)) end--;
return string[start..end];
}
fn bool starts_with(String s, String needle)
{
if (needle.len > s.len) return false;
foreach (i, c : needle)
{
if (c != s[i]) return false;
}
return true;
}
fn String[] tsplit(String s, String needle) => split(s, needle, mem::temp_allocator()) @inline;
fn String[] split(String s, String needle, Allocator* allocator = mem::current_allocator())
{
usz capacity = 16;
usz i = 0;
String* holder = allocator.alloc(String.sizeof * capacity)!!;
while (s.len)
{
usz! index = index_of(s, needle);
String res = void;
if (try index)
{
res = s[:index];
s = s[index + needle.len..];
}
else
{
res = s;
s = s[:0];
}
if (i == capacity)
{
capacity *= 2;
holder = allocator.realloc(holder, String.sizeof * capacity)!!;
}
holder[i++] = res;
}
return holder[:i];
}
fn usz! rindex_of(String s, String needle)
{
usz match = 0;
usz needed = needle.len;
if (!needed) return SearchResult.MISSING!;
usz index_start = 0;
char search = needle[0];
foreach_r (usz i, char c : s)
{
if (c == search)
{
if (!match) index_start = i;
match++;
if (match == needed) return index_start;
search = needle[match];
continue;
}
if (match)
{
match = 0;
search = needle[0];
}
}
return SearchResult.MISSING!;
}
fn usz! index_of(String s, String needle)
{
usz match = 0;
usz needed = needle.len;
if (!needed) return SearchResult.MISSING!;
usz index_start = 0;
char search = needle[0];
foreach (usz i, char c : s)
{
if (c == search)
{
if (!match) index_start = i;
match++;
if (match == needed) return index_start;
search = needle[match];
continue;
}
if (match)
{
match = 0;
search = needle[0];
}
}
return SearchResult.MISSING!;
}
fn ZString copy_zstring(String s, Allocator* allocator = mem::current_allocator())
{
usz len = s.len;
char* str = allocator.alloc(len + 1)!!;
mem::copy(str, s.ptr, len);
str[len] = 0;
return (ZString)str;
}
fn String copyz(String s, Allocator* allocator = mem::current_allocator())
{
usz len = s.len;
char* str = allocator.alloc(len + 1)!!;
mem::copy(str, s.ptr, len);
str[len] = 0;
return str[:len];
}
fn ZString tcopy_zstring(String s)
{
return copy_zstring(s, mem::temp_allocator());
}
fn bool compare(String a, String b)
{
if (a.len != b.len) return false;
foreach (i, c : a)
{
if (c != b[i]) return false;
}
return true;
}
fault UnicodeResult
{
INVALID_UTF8,
INVALID_UTF16,
CONVERSION_FAILED,
}
fn usz utf8_codepoints(String utf8)
{
usz len = 0;
foreach (char c : utf8)
{
if (c & 0xC0 != 0x80) len++;
}
return len;
}
fn Char32[]! utf8to32(String utf8, Allocator* allocator = mem::current_allocator())
{
usz codepoints = conv::utf8_codepoints(utf8);
Char32* data = allocator.alloc(Char32.sizeof * (codepoints + 1))?;
conv::utf8to32_unsafe(utf8, data)?;
data[codepoints] = 0;
return data[:codepoints];
}
fn String utf32to8(Char32[] utf32, Allocator* allocator = mem::current_allocator())
{
usz len = conv::utf8len_for_utf32(utf32);
char* data = allocator.alloc(len + 1)!!;
conv::utf32to8_unsafe(utf32, data);
data[len] = 0;
return data[:len];
}
fn Char16[]! utf8to16(String utf8, Allocator* allocator = mem::current_allocator())
{
usz len16 = conv::utf16len_for_utf8(utf8);
Char16* data = allocator.alloc((len16 + 1) * Char16.sizeof)?;
conv::utf8to16_unsafe(utf8, data)?;
data[len16] = 0;
return data[:len16];
}
fn String! utf16to8(Char16[] utf16, Allocator* allocator = mem::current_allocator())
{
usz len = conv::utf8len_for_utf16(utf16);
char* data = allocator.alloc(len + 1)?;
conv::utf16to8_unsafe(utf16, data)?;
data[len] = 0;
return data[:len];
}
fn String copy(String s, Allocator* allocator = mem::current_allocator())
{
usz len = s.len;
ZString str_copy = copy_zstring(s, allocator) @inline;
return str_copy[:len];
}
fn String tcopy(String s)
{
usz len = s.len;
ZString str_copy = tcopy_zstring(s) @inline;
return str_copy[:len];
}
fn String tconcat(String s1, String s2)
{
usz full_len = s1.len + s2.len;
char* str = tmalloc(full_len + 1);
usz s1_len = s1.len;
mem::copy(str, s1.ptr, s1_len);
mem::copy(str + s1_len, s2.ptr, s2.len);
str[full_len] = 0;
return str[:full_len];
}
fn String concat(String s1, String s2)
{
usz full_len = s1.len + s2.len;
char* str = malloc(full_len + 1);
usz s1_len = s1.len;
mem::copy(str, s1.ptr, s1_len);
mem::copy(str + s1_len, s2.ptr, s2.len);
str[full_len] = 0;
return str[:full_len];
}
fn String ZString.as_str(ZString str)
{
return ((char*)str)[:str.len()];
}
fn usz ZString.len(ZString str)
{
usz len = 0;
char* ptr = (char*)str;
while (char c = ptr++[0])
{
if (c & 0xC0 != 0x80) len++;
}
return len;
}