Files
c3c/lib/std/core/string.c3

293 lines
6.7 KiB
C

module std::core::string;
typedef ZString = distinct inline char*;
typedef Char32 = uint;
typedef Char16 = ushort;
const uint SURROGATE_OFFSET @private = 0x10000;
const uint SURROGATE_GENERIC_MASK @private = 0xF800;
const uint SURROGATE_MASK @private = 0xFC00;
const uint SURROGATE_CODEPOINT_MASK @private = 0x03FF;
const uint SURROGATE_BITS @private = 10;
const uint SURROGATE_LOW_VALUE @private = 0xDC00;
const uint SURROGATE_HIGH_VALUE @private = 0xD800;
fault NumberConversion
{
EMPTY_STRING,
NEGATIVE_VALUE,
MALFORMED_INTEGER,
INTEGER_OVERFLOW,
MALFORMED_FLOAT,
FLOAT_OUT_OF_RANGE,
}
macro String printf(String fmt, ..., Allocator* using = mem::heap())
{
@stack_mem(256; Allocator* mem)
{
DString str;
str.init(.using = mem);
str.printf(fmt, $vasplat());
return str.copy_str(using);
};
}
macro String tprintf(String fmt, ...)
{
DString str;
str.tinit();
str.printf(fmt, $vasplat());
return str.str();
}
macro bool char_in_set(char c, String set)
{
foreach (ch : set) if (ch == c) return true;
return false;
}
/**
* @param [in] string
* @param [in] to_trim
**/
fn String String.trim(String string, String to_trim = "\t\n\r ")
{
usz start = 0;
usz len = string.len;
while (start < len && char_in_set(string[start], to_trim)) start++;
if (start == len) return string[:0];
usz end = len - 1;
while (end > start && char_in_set(string[end], to_trim)) end--;
return string[start..end];
}
/**
* @param [in] string
* @param [in] needle
**/
fn bool String.starts_with(String string, String needle)
{
if (needle.len > string.len) return false;
if (!needle.len) return true;
return string[:needle.len] == needle;
}
/**
* @param [in] string
* @param [in] needle
**/
fn bool String.ends_with(String string, String needle)
{
if (needle.len > string.len) return false;
if (!needle.len) return true;
return string[^needle.len..] == needle;
}
/**
* Strip the front of the string if the prefix exists.
*
* @param [in] string
* @param [in] needle
**/
fn String String.strip(String string, String needle)
{
if (!needle.len || !string.starts_with(needle)) return string;
return string[needle.len..];
}
/**
* Strip the end of the string if the suffix exists.
*
* @param [in] string
* @param [in] needle
**/
fn String String.strip_end(String string, String needle)
{
if (!needle.len || !string.ends_with(needle)) return string;
// Note that this is the safe way if we want to support zero length.
return string[:(string.len - needle.len)];
}
/**
* Split a string into parts, e.g "a|b|c" split with "|" yields { "a", "b", "c" }
*
* @param [in] s
* @param [in] needle
* @param [&inout] using "The allocator, defaults to the heap allocator"
* @param max "Max number of elements, 0 means no limit, defaults to 0"
* @require needle.len > 0 "The needle must be at least 1 character long"
* @ensure return.len > 0
**/
fn String[] String.split(String s, String needle, usz max = 0, Allocator* using = mem::heap())
{
usz capacity = 16;
usz i = 0;
String* holder = malloc(String, capacity, .using = using);
bool no_more = false;
while (!no_more)
{
usz! index = i == max - 1 ? SearchResult.MISSING! : s.index_of(needle);
String res @noinit;
if (try index)
{
res = s[:index];
s = s[index + needle.len..];
}
else
{
res = s;
no_more = true;
}
if (i == capacity)
{
capacity *= 2;
holder = realloc(holder, String.sizeof * capacity, .using = using);
}
holder[i++] = res;
}
return holder[:i];
}
/**
* This function is identical to String.split, but implicitly uses the
* temporary allocator.
*
* @param [in] s
* @param [in] needle
* @param max "Max number of elements, 0 means no limit, defaults to 0"
**/
fn String[] String.tsplit(String s, String needle, usz max = 0)
{
return s.split(needle, max, mem::temp()) @inline;
}
/**
* Find the index of the first incidence of a string.
*
* @param [in] s
* @param [in] needle
* @pure
* @ensure return < s.len
* @require needle.len > 0 "The needle must be len 1 or more"
**/
fn usz! String.index_of(String s, String needle)
{
usz match = 0;
usz needed = needle.len;
usz index_start = 0;
char search = needle[0];
foreach (usz i, char c : s)
{
if (c == search)
{
if (!match) index_start = i;
match++;
if (match == needed) return index_start;
search = needle[match];
continue;
}
if (match)
{
match = 0;
search = needle[0];
}
}
return SearchResult.MISSING!;
}
/**
* Find the index of the last incidence of a string.
*
* @param [in] s
* @param [in] needle
* @pure
* @ensure return < s.len
* @require needle.len > 0 "The needle must be len 1 or more"
**/
fn usz! String.rindex_of(String s, String needle)
{
usz match = 0;
usz needed = needle.len;
usz index_start = 0;
char search = needle[^1];
foreach_r (usz i, char c : s)
{
if (c == search)
{
if (!match) index_start = i;
match++;
if (match == needed) return index_start - needle.len + 1;
search = needle[^(match + 1)];
continue;
}
if (match)
{
match = 0;
search = needle[^1];
}
}
return SearchResult.MISSING!;
}
fn ZString String.zstr_copy(String s, Allocator* using = mem::heap())
{
usz len = s.len;
char* str = malloc(len + 1, .using = using);
mem::copy(str, s.ptr, len);
str[len] = 0;
return (ZString)str;
}
fn String String.concat(String s1, String s2, Allocator* using = mem::heap())
{
usz full_len = s1.len + s2.len;
char* str = malloc(full_len + 1, .using = using);
usz s1_len = s1.len;
mem::copy(str, s1.ptr, s1_len);
mem::copy(str + s1_len, s2.ptr, s2.len);
str[full_len] = 0;
return (String)str[:full_len];
}
fn String String.tconcat(String s1, String s2) => s1.concat(s2, mem::temp());
fn ZString String.zstr_tcopy(String s) => s.zstr_copy(mem::temp()) @inline;
fn String String.copy(String s, Allocator* using = mem::heap())
{
usz len = s.len;
char* str = malloc(len + 1, .using = using);
mem::copy(str, s.ptr, len);
str[len] = 0;
return (String)str[:len];
}
fn String String.tcopy(String s) => s.copy(mem::temp()) @inline;
fn String ZString.copy(ZString z, Allocator* using = mem::heap()) => z.as_str().copy(using) @inline;
fn String ZString.tcopy(ZString z) => z.as_str().copy(mem::temp()) @inline;
fn Char16[]! String.to_utf16(String s, Allocator* using = mem::heap())
{
usz len16 = conv::utf16len_for_utf8(s);
Char16* data = malloc_checked(Char16, len16 + 1, .using = using)?;
conv::utf8to16_unsafe(s, data)?;
data[len16] = 0;
return data[:len16];
}
fn String! from_utf16(Char16[] utf16, Allocator* using = mem::heap())
{
usz len = conv::utf8len_for_utf16(utf16);
char* data = malloc_checked(len + 1, .using = using)?;
conv::utf16to8_unsafe(utf16, data)?;
data[len] = 0;
return (String)data[:len];
}
fn Char16[]! String.to_temp_utf16(String s) => s.to_utf16(mem::temp());