Files
c3c/lib/std/core/string.c3
Christoffer Lerno e293c435af 0.6.0: init_new/init_temp removed. LinkedList API rewritten. List "pop" and "remove" function now return Optionals. RingBuffer API rewritten. Allocator interface changed. Deprecated Allocator, DString and mem functions removed. "identity" functions are now constants for Matrix and Complex numbers. @default implementations for interfaces removed. any* => any, same for interfaces. Emit local/private globals as "private" in LLVM, following C "static". Updated enum syntax. Add support [rgba] properties in vectors. Improved checks of aliased "void". Subarray -> slice. Fix of llvm codegen enum check. Improved alignment handling. Add --output-dir #1155. Removed List/Object append. GenericList renamed AnyList. Remove unused "unwrap". Fixes to cond. Optimize output in dead branches. Better checking of operator methods. Disallow any from implementing dynamic methods. Check for operator mismatch. Remove unnecessary bitfield. Remove numbering in --list* commands Old style enum declaration for params/type, but now the type is optional. Add note on #1086. Allow making distinct types out of "void", "typeid", "anyfault" and faults. Remove system linker build options. "Try" expressions must be simple expressions. Add optimized build to Mac tests. Register int. assert(false) only allowed in unused branches or in tests. Compile time failed asserts is a compile time error. Remove current_block_is_target. Bug when assigning an optional from an optional. Remove unused emit_zstring. Simplify phi code. Remove unnecessary unreachable blocks and remove unnecessary current_block NULL assignments. Proper handling of '.' and Win32 '//server' paths. Add "no discard" to expression blocks with a return value. Detect "unsigned >= 0" as errors. Fix issue with distinct void as a member #1147. Improve callstack debug information #1184. Fix issue with absolute output-dir paths. Lambdas were not type checked thoroughly #1185. Fix compilation warning #1187. Request jump table using @jump for switches. Path normalization - fix possible null terminator out of bounds. Improved error messages on inlined macros.
Upgrade of mingw in CI. Fix problems using reflection on interface types #1203. Improved debug information on defer. $foreach doesn't create an implicit syntactic scope.
Error if `@if` depends on `@if`. Updated Linux stacktrace. Fix of default argument stacktrace. Allow linking libraries directly by file path. Improve inlining warning messages. Added `index_of_char_from`. Compiler crash using enum nameof from different module #1205. Removed unused fields in find_msvc. Use vswhere to find msvc. Update tests for LLVM 19
2024-06-12 10:14:26 +02:00

740 lines
18 KiB
C

module std::core::string;
import std::ascii;
distinct ZString = inline char*;
distinct WString = inline Char16*;
def Char32 = uint;
def Char16 = ushort;
fault UnicodeResult
{
INVALID_UTF8,
INVALID_UTF16,
CONVERSION_FAILED,
}
const uint SURROGATE_OFFSET @private = 0x10000;
const uint SURROGATE_GENERIC_MASK @private = 0xF800;
const uint SURROGATE_MASK @private = 0xFC00;
const uint SURROGATE_CODEPOINT_MASK @private = 0x03FF;
const uint SURROGATE_BITS @private = 10;
const uint SURROGATE_LOW_VALUE @private = 0xDC00;
const uint SURROGATE_HIGH_VALUE @private = 0xD800;
fault NumberConversion
{
EMPTY_STRING,
NEGATIVE_VALUE,
MALFORMED_INTEGER,
INTEGER_OVERFLOW,
MALFORMED_FLOAT,
FLOAT_OUT_OF_RANGE,
}
/**
* Return a temporary String created using the formatting function.
*
* @param [in] fmt `The formatting string`
**/
macro String tformat(String fmt, ...)
{
DString str = dstring::temp_with_capacity(fmt.len + $vacount * 8);
str.appendf(fmt, $vasplat());
return str.str_view();
}
/**
* Return a temporary ZString created using the formatting function.
*
* @param [in] fmt `The formatting string`
**/
macro ZString tformat_zstr(String fmt, ...)
{
DString str = dstring::temp_with_capacity(fmt.len + $vacount * 8);
str.appendf(fmt, $vasplat());
return str.zstr_view();
}
/**
* Return a new String created using the formatting function.
*
* @param [in] fmt `The formatting string`
* @param [inout] allocator `The allocator to use`
**/
macro String new_format(String fmt, ..., Allocator allocator = allocator::heap())
{
@pool(allocator)
{
DString str = dstring::temp_with_capacity(fmt.len + $vacount * 8);
str.appendf(fmt, $vasplat());
return str.copy_str(allocator);
};
}
/**
* Return a new ZString created using the formatting function.
*
* @param [in] fmt `The formatting string`
* @param [inout] allocator `The allocator to use`
**/
macro ZString new_format_zstr(String fmt, ..., Allocator allocator = allocator::heap())
{
@pool(allocator)
{
DString str = dstring::temp_with_capacity(fmt.len + $vacount * 8);
str.appendf(fmt, $vasplat());
return str.copy_zstr(allocator);
};
}
/**
* Check if a character is in a set.
*
* @param c `the character to check`
* @param [in] set `The formatting string`
* @pure
* @return `True if a character is in the set`
**/
macro bool char_in_set(char c, String set)
{
foreach (ch : set) if (ch == c) return true;
return false;
}
fn String join_new(String[] s, String joiner, Allocator allocator = allocator::heap())
{
if (!s)
{
return (String)allocator::new_array(allocator, char, 2)[:0];
}
usz total_size = joiner.len * s.len;
foreach (String* &str : s)
{
total_size += str.len;
}
@pool(allocator)
{
DString res = dstring::temp_with_capacity(total_size);
res.append(s[0]);
foreach (String* &str : s[1..])
{
res.append(joiner);
res.append(*str);
}
return res.copy_str(allocator);
};
}
/**
* Remove characters from the front and end of a string.
*
* @param [in] string `The string to trim`
* @param [in] to_trim `The set of characters to trim, defaults to whitespace`
* @pure
* @return `a substring of the string passed in`
**/
fn String String.trim(string, String to_trim = "\t\n\r ")
{
usz start = 0;
usz len = string.len;
while (start < len && char_in_set(string[start], to_trim)) start++;
if (start == len) return string[:0];
usz end = len - 1;
while (end > start && char_in_set(string[end], to_trim)) end--;
return string[start..end];
}
/**
* Check if the String starts with the needle.
*
* @param [in] string
* @param [in] needle
* @pure
* @return `'true' if the string starts with the needle`
**/
fn bool String.starts_with(string, String needle)
{
if (needle.len > string.len) return false;
if (!needle.len) return true;
return string[:needle.len] == needle;
}
/**
* Check if the String ends with the needle.
*
* @param [in] string
* @param [in] needle
* @pure
* @return `'true' if the string ends with the needle`
**/
fn bool String.ends_with(string, String needle)
{
if (needle.len > string.len) return false;
if (!needle.len) return true;
return string[^needle.len..] == needle;
}
/**
* Strip the front of the string if the prefix exists.
*
* @param [in] string
* @param [in] needle
* @pure
* @return `the substring with the prefix removed`
**/
fn String String.strip(string, String needle)
{
if (!needle.len || !string.starts_with(needle)) return string;
return string[needle.len..];
}
/**
* Strip the end of the string if the suffix exists.
*
* @param [in] string
* @param [in] needle
* @pure
* @return `the substring with the suffix removed`
**/
fn String String.strip_end(string, String needle)
{
if (!needle.len || !string.ends_with(needle)) return string;
// Note that this is the safe way if we want to support zero length.
return string[:(string.len - needle.len)];
}
/**
* Split a string into parts, e.g "a|b|c" split with "|" yields { "a", "b", "c" }
*
* @param [in] s
* @param [in] needle
* @param [&inout] allocator "The allocator, defaults to the heap allocator"
* @param max "Max number of elements, 0 means no limit, defaults to 0"
* @require needle.len > 0 "The needle must be at least 1 character long"
* @ensure return.len > 0
**/
fn String[] String.split(s, String needle, usz max = 0, Allocator allocator = allocator::heap())
{
usz capacity = 16;
usz i = 0;
String* holder = allocator::alloc_array(allocator, String, capacity);
bool no_more = false;
while (!no_more)
{
usz! index = i == max - 1 ? SearchResult.MISSING? : s.index_of(needle);
String res @noinit;
if (try index)
{
res = s[:index];
s = s[index + needle.len..];
}
else
{
res = s;
no_more = true;
}
if (i == capacity)
{
capacity *= 2;
holder = allocator::realloc(allocator, holder, String.sizeof * capacity);
}
holder[i++] = res;
}
return holder[:i];
}
/**
* This function is identical to String.split, but implicitly uses the
* temporary allocator.
*
* @param [in] s
* @param [in] needle
* @param max "Max number of elements, 0 means no limit, defaults to 0"
**/
fn String[] String.tsplit(s, String needle, usz max = 0)
{
return s.split(needle, max, allocator::temp()) @inline;
}
/**
* Check if a substring is found in the string.
* @param [in] s
* @param [in] needle "The string to look for."
* @pure
* @return "true if the string contains the substring, false otherwise"
**/
fn bool String.contains(s, String needle)
{
return @ok(s.index_of(needle));
}
/**
* Find the index of the first incidence of a string.
*
* @param [in] s
* @param needle "The character to look for"
* @pure
* @ensure return < s.len
* @return "the index of the needle"
* @return! SearchResult.MISSING "if the needle cannot be found"
**/
fn usz! String.index_of_char(s, char needle)
{
foreach (i, c : s)
{
if (c == needle) return i;
}
return SearchResult.MISSING?;
}
/**
* Find the index of the first incidence of a character.
*
* @param [in] s
* @param needle "The character to look for"
* @param start_index "The index to start with, may exceed max index."
* @pure
* @ensure return < s.len
* @return "the index of the needle"
* @return! SearchResult.MISSING "if the needle cannot be found starting from the start_index"
**/
fn usz! String.index_of_char_from(s, char needle, usz start_index)
{
usz len = s.len;
if (len <= start_index) return SearchResult.MISSING?;
for (usz i = start_index; i < len; i++)
{
if (s[i] == needle) return i;
}
return SearchResult.MISSING?;
}
/**
* Find the index of the first incidence of a character starting from the end.
*
* @param [in] s
* @param needle "the character to find"
* @pure
* @ensure return < s.len
* @return "the index of the needle"
* @return! SearchResult.MISSING "if the needle cannot be found"
**/
fn usz! String.rindex_of_char(s, char needle)
{
foreach_r (i, c : s)
{
if (c == needle) return i;
}
return SearchResult.MISSING?;
}
/**
* Find the index of the first incidence of a string.
*
* @param [in] s
* @param [in] needle
* @pure
* @ensure return < s.len
* @require needle.len > 0 : "The needle must be len 1 or more"
* @return "the index of the needle"
* @return! SearchResult.MISSING "if the needle cannot be found"
**/
fn usz! String.index_of(s, String needle)
{
usz needed = needle.len;
if (needed > 0 && s.len >= needed)
{
char first = needle[0];
foreach (i, c: s[..^needed])
{
if (c == first && s[i:needed] == needle) return i;
}
}
return SearchResult.MISSING?;
}
/**
* Find the index of the last incidence of a string.
*
* @param [in] s
* @param [in] needle
* @pure
* @ensure return < s.len
* @require needle.len > 0 "The needle must be len 1 or more"
* @return "the index of the needle"
* @return! SearchResult.MISSING "if the needle cannot be found"
**/
fn usz! String.rindex_of(s, String needle)
{
usz needed = needle.len;
if (needed > 0 && s.len >= needed)
{
char first = needle[0];
foreach_r (i, c: s[..^needed])
{
if (c == first && s[i:needed] == needle) return i;
}
}
return SearchResult.MISSING?;
}
fn String ZString.str_view(str)
{
return (String)(str[:str.len()]);
}
fn usz ZString.char_len(str)
{
usz len = 0;
char* ptr = (char*)str;
while (char c = ptr++[0])
{
if (c & 0xC0 != 0x80) len++;
}
return len;
}
fn usz ZString.len(str)
{
usz len = 0;
char* ptr = (char*)str;
while (char c = ptr++[0]) len++;
return len;
}
fn ZString String.zstr_copy(s, Allocator allocator = allocator::heap())
{
usz len = s.len;
char* str = allocator::malloc(allocator, len + 1);
mem::copy(str, s.ptr, len);
str[len] = 0;
return (ZString)str;
}
fn String String.concat(s1, String s2, Allocator allocator = allocator::heap())
{
usz full_len = s1.len + s2.len;
char* str = allocator::malloc(allocator, full_len + 1);
usz s1_len = s1.len;
mem::copy(str, s1.ptr, s1_len);
mem::copy(str + s1_len, s2.ptr, s2.len);
str[full_len] = 0;
return (String)str[:full_len];
}
fn String String.tconcat(s1, String s2) => s1.concat(s2, allocator::temp());
fn ZString String.zstr_tcopy(s) => s.zstr_copy(allocator::temp()) @inline;
fn String String.copy(s, Allocator allocator = allocator::heap())
{
usz len = s.len;
char* str = allocator::malloc(allocator, len + 1);
mem::copy(str, s.ptr, len);
str[len] = 0;
return (String)str[:len];
}
fn void String.free(&s, Allocator allocator = allocator::heap())
{
if (!s.len) return;
allocator::free(allocator, s.ptr);
*s = "";
}
fn String String.tcopy(s) => s.copy(allocator::temp()) @inline;
fn String ZString.copy(z, Allocator allocator = allocator::temp())
{
return z.str_view().copy(allocator) @inline;
}
fn String ZString.tcopy(z)
{
return z.str_view().copy(allocator::temp()) @inline;
}
/**
* Convert an UTF-8 string to UTF-16
* @return "The UTF-16 string as a slice, allocated using the given allocator"
* @return! UnicodeResult.INVALID_UTF8 "If the string contained an invalid UTF-8 sequence"
* @return! AllocationFailure "If allocation of the string fails"
**/
fn Char16[]! String.to_new_utf16(s, Allocator allocator = allocator::heap())
{
usz len16 = conv::utf16len_for_utf8(s);
Char16* data = allocator::alloc_array_try(allocator, Char16, len16 + 1)!;
conv::utf8to16_unsafe(s, data)!;
data[len16] = 0;
return data[:len16];
}
/**
* Convert an UTF-8 string to UTF-16
* @return "The UTF-16 string as a slice, allocated using the given allocator"
* @return! UnicodeResult.INVALID_UTF8 "If the string contained an invalid UTF-8 sequence"
* @return! AllocationFailure "If allocation of the string fails"
**/
fn Char16[]! String.to_temp_utf16(s)
{
return s.to_new_utf16(allocator::temp());
}
fn WString! String.to_new_wstring(s, Allocator allocator = allocator::heap())
{
return (WString)s.to_new_utf16(allocator).ptr;
}
fn WString! String.to_temp_wstring(s)
{
return (WString)s.to_temp_utf16().ptr;
}
fn Char32[]! String.to_new_utf32(s, Allocator allocator = allocator::heap())
{
usz codepoints = conv::utf8_codepoints(s);
Char32* data = allocator::alloc_array_try(allocator, Char32, codepoints + 1)!;
conv::utf8to32_unsafe(s, data)!;
data[codepoints] = 0;
return data[:codepoints];
}
fn Char32[]! String.to_temp_utf32(s)
{
return s.to_new_utf32(allocator::temp());
}
/**
* Convert a string to ASCII lower case.
*
* @param [inout] s
* @pure
**/
fn void String.convert_ascii_to_lower(s)
{
foreach (&c : s) if (c.is_upper() @pure) *c += 'a' - 'A';
}
fn String String.new_ascii_to_lower(s, Allocator allocator = allocator::heap())
{
String copy = s.copy(allocator);
copy.convert_ascii_to_lower();
return copy;
}
fn String String.temp_ascii_to_lower(s, Allocator allocator = allocator::heap())
{
return s.new_ascii_to_lower(allocator::temp());
}
/**
* Convert a string to ASCII upper case.
*
* @param [inout] s
* @pure
**/
fn void String.convert_ascii_to_upper(s)
{
foreach (&c : s) if (c.is_lower() @pure) *c -= 'a' - 'A';
}
/**
* Returns a string converted to ASCII upper case.
*
* @param [in] s
* @param [inout] allocator
*
* @return `a new String converted to ASCII upper case.`
**/
fn String String.new_ascii_to_upper(s, Allocator allocator = allocator::heap())
{
String copy = s.copy(allocator);
copy.convert_ascii_to_upper();
return copy;
}
fn StringIterator String.iterator(s)
{
return { s, 0 };
}
/**
* @param [in] s
* @return `a temporary String converted to ASCII upper case.`
**/
fn String String.temp_ascii_to_upper(s)
{
return s.new_ascii_to_upper(allocator::temp());
}
fn String! new_from_utf32(Char32[] utf32, Allocator allocator = allocator::heap())
{
usz len = conv::utf8len_for_utf32(utf32);
char* data = allocator::malloc_try(allocator, len + 1)!;
defer catch allocator::free(allocator, data);
conv::utf32to8_unsafe(utf32, data);
data[len] = 0;
return (String)data[:len];
}
fn String! new_from_utf16(Char16[] utf16, Allocator allocator = allocator::heap())
{
usz len = conv::utf8len_for_utf16(utf16);
char* data = allocator::malloc_try(allocator, len + 1)!;
defer catch allocator::free(allocator, data);
conv::utf16to8_unsafe(utf16, data)!;
data[len] = 0;
return (String)data[:len];
}
fn String! new_from_wstring(WString wstring, Allocator allocator = allocator::heap())
{
usz utf16_len;
while (wstring[utf16_len] != 0) utf16_len++;
Char16[] utf16 = wstring[:utf16_len];
return new_from_utf16(utf16, allocator);
}
fn String! temp_from_wstring(WString wstring) => new_from_wstring(wstring, allocator::temp()) @inline;
fn String! temp_from_utf16(Char16[] utf16) => new_from_utf16(utf16, allocator::temp()) @inline;
fn usz String.utf8_codepoints(s)
{
usz len = 0;
foreach (char c : s)
{
if (c & 0xC0 != 0x80) len++;
}
return len;
}
/**
* @require (base <= 10 && base > 1) || base == 16 : "Unsupported base"
**/
macro String.to_integer(string, $Type, int base = 10)
{
usz len = string.len;
usz index = 0;
char* ptr = string.ptr;
while (index < len && ascii::is_blank_m(ptr[index])) index++;
if (len == index) return NumberConversion.EMPTY_STRING?;
bool is_negative;
switch (string[index])
{
case '-':
if ($Type.min == 0) return NumberConversion.NEGATIVE_VALUE?;
is_negative = true;
index++;
case '+':
index++;
default:
break;
}
if (len == index) return NumberConversion.MALFORMED_INTEGER?;
$Type base_used = ($Type)base;
if (string[index] == '0' && base == 10)
{
index++;
if (index == len) return ($Type)0;
switch (string[index])
{
case 'x':
case 'X':
base_used = 16;
index++;
case 'b':
case 'B':
base_used = 2;
index++;
case 'o':
case 'O':
base_used = 8;
index++;
default:
break;
}
if (len == index) return NumberConversion.MALFORMED_INTEGER?;
}
$Type value = 0;
while (index != len)
{
char c = {|
char ch = string[index++];
if (base_used != 16 || ch < 'A') return (char)(ch - '0');
if (ch <= 'F') return (char)(ch - 'A' + 10);
if (ch < 'a') return NumberConversion.MALFORMED_INTEGER?;
if (ch > 'f') return NumberConversion.MALFORMED_INTEGER?;
return (char)(ch - 'a' + 10);
|}!;
if (c >= base_used) return NumberConversion.MALFORMED_INTEGER?;
value = {|
if (is_negative)
{
$Type new_value = value * base_used - c;
if (new_value > value) return NumberConversion.INTEGER_OVERFLOW?;
return new_value;
}
$Type new_value = value * base_used + c;
if (new_value < value) return NumberConversion.INTEGER_OVERFLOW?;
return new_value;
|}!;
}
return value;
}
fn int128! String.to_int128(s, int base = 10) => s.to_integer(int128, base);
fn long! String.to_long(s, int base = 10) => s.to_integer(long, base);
fn int! String.to_int(s, int base = 10) => s.to_integer(int, base);
fn short! String.to_short(s, int base = 10) => s.to_integer(short, base);
fn ichar! String.to_ichar(s, int base = 10) => s.to_integer(ichar, base);
fn uint128! String.to_uint128(s, int base = 10) => s.to_integer(uint128, base);
fn ulong! String.to_ulong(s, int base = 10) => s.to_integer(ulong, base);
fn uint! String.to_uint(s, int base = 10) => s.to_integer(uint, base);
fn ushort! String.to_ushort(s, int base = 10) => s.to_integer(ushort, base);
fn char! String.to_uchar(s, int base = 10) => s.to_integer(char, base);
fn double! String.to_double(s) => s.to_real(double);
fn float! String.to_float(s) => s.to_real(float);
fn Splitter String.splitter(self, String split)
{
return Splitter { self, split, 0 };
}
struct Splitter
{
String string;
String split;
usz current;
}
fn void Splitter.reset(&self)
{
self.current = 0;
}
fn String! Splitter.next(&self)
{
usz len = self.string.len;
usz current = self.current;
if (current >= len) return IteratorResult.NO_MORE_ELEMENT?;
String remaining = self.string[current..];
usz! next = remaining.index_of(self.split);
if (try next)
{
defer self.current = current + next + self.split.len;
return remaining[:next];
}
self.current = len;
return remaining;
}