mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 12:01:16 +00:00
1178 lines
28 KiB
Plaintext
1178 lines
28 KiB
Plaintext
module std::core::string;
|
|
import std::io;
|
|
import std::core::mem::allocator;
|
|
|
|
|
|
typedef String @if(!$defined(String)) = inline char[];
|
|
<*
|
|
ZString is a pointer to a zero terminated array of chars.
|
|
|
|
Use ZString when you need to interop with C zero terminated strings.
|
|
*>
|
|
typedef ZString = inline char*;
|
|
|
|
<*
|
|
WString is a pointer to a zero terminated array of Char16.
|
|
|
|
Depending on the platform, this may or may not correspond to wchar_t.
|
|
For Windows, wchar_t is generally 16 bits, on MacOS it is 32 bits.
|
|
However, for both MacOS and Linux, regular C strings (ZString)
|
|
will be UTF-8 encoded, so there is no need to use the wchar_t versions
|
|
of functions outside of encoding functions.
|
|
*>
|
|
typedef WString = inline Char16*;
|
|
|
|
<*
|
|
Char32 is a UTF32 codepoint
|
|
*>
|
|
alias Char32 = uint;
|
|
|
|
<*
|
|
Char16 is a UTF16 "character"
|
|
*>
|
|
alias Char16 = ushort;
|
|
|
|
<*
|
|
Common faults used with strings
|
|
*>
|
|
faultdef INVALID_UTF8, INVALID_UTF16, CONVERSION_FAILED,
|
|
EMPTY_STRING, NEGATIVE_VALUE, MALFORMED_INTEGER,
|
|
INTEGER_OVERFLOW, MALFORMED_FLOAT, FLOAT_OUT_OF_RANGE;
|
|
|
|
|
|
<*
|
|
Create a pointer to an UTF32 encoded string at compile time.
|
|
|
|
@param $string : "The string to encode"
|
|
*>
|
|
macro Char32* @wstring32(String $string) @builtin
|
|
{
|
|
return (Char32*)&&$$wstr32($string);
|
|
}
|
|
|
|
<*
|
|
Create a slice of an UTF32 encoded string at compile time.
|
|
|
|
@param $string : "The string to encode"
|
|
*>
|
|
macro Char32[] @char32(String $string) @builtin
|
|
{
|
|
return $$wstr32($string)[..^2];
|
|
}
|
|
|
|
<*
|
|
Create a WString (an UTF16 encoded string) at compile time.
|
|
|
|
@param $string : "The string to encode"
|
|
*>
|
|
macro WString @wstring(String $string) @builtin
|
|
{
|
|
return (WString)&&$$wstr16($string);
|
|
}
|
|
|
|
<*
|
|
Create a slice of an UTF32 encoded string at compile time.
|
|
|
|
@param $string : "The string to encode"
|
|
*>
|
|
macro Char16[] @char16(String $string) @builtin
|
|
{
|
|
return $$wstr16($string)[..^2];
|
|
}
|
|
|
|
macro String @sprintf(String $format, ...) @builtin @const
|
|
{
|
|
return $$sprintf($format, $vasplat);
|
|
}
|
|
<*
|
|
Return a temporary ZString created using the formatting function.
|
|
|
|
@param [in] fmt : `The formatting string`
|
|
*>
|
|
fn ZString tformat_zstr(String fmt, args...) @format(0)
|
|
{
|
|
DString str = dstring::temp_with_capacity(fmt.len + args.len * 8);
|
|
str.appendf(fmt, ...args);
|
|
return str.zstr_view();
|
|
}
|
|
|
|
<*
|
|
Return a new String created using the formatting function.
|
|
|
|
@param [inout] allocator : `The allocator to use`
|
|
@param [in] fmt : `The formatting string`
|
|
*>
|
|
fn String format(Allocator allocator, String fmt, args...) @format(1) => @pool()
|
|
{
|
|
DString str = dstring::temp_with_capacity(fmt.len + args.len * 8);
|
|
str.appendf(fmt, ...args);
|
|
return str.copy_str(allocator);
|
|
}
|
|
|
|
<*
|
|
Return a new String created using the formatting function.
|
|
|
|
@param [inout] buffer : `The buffer to use`
|
|
@param [in] fmt : `The formatting string`
|
|
*>
|
|
fn String bformat(char[] buffer, String fmt, args...) @format(1)
|
|
{
|
|
DString str = dstring::new_with_capacity(allocator::wrap(buffer), fmt.len + args.len * 8);
|
|
str.appendf(fmt, ...args);
|
|
return str.str_view();
|
|
}
|
|
|
|
<*
|
|
Return a temporary String created using the formatting function.
|
|
|
|
@param [in] fmt : `The formatting string`
|
|
*>
|
|
fn String tformat(String fmt, args...) @format(0)
|
|
{
|
|
DString str = dstring::temp_with_capacity(fmt.len + args.len * 8);
|
|
str.appendf(fmt, ...args);
|
|
return str.str_view();
|
|
}
|
|
|
|
<*
|
|
Check if a character is in a set.
|
|
|
|
@param c : `the character to check`
|
|
@param [in] set : `The formatting string`
|
|
@pure
|
|
@return `True if a character is in the set`
|
|
*>
|
|
macro bool char_in_set(char c, String set)
|
|
{
|
|
foreach (ch : set) if (ch == c) return true;
|
|
return false;
|
|
}
|
|
|
|
fn String join(Allocator allocator, String[] s, String joiner)
|
|
{
|
|
if (!s)
|
|
{
|
|
return (String)allocator::new_array(allocator, char, 2)[:0];
|
|
}
|
|
|
|
usz total_size = joiner.len * s.len;
|
|
foreach (String* &str : s)
|
|
{
|
|
total_size += str.len;
|
|
}
|
|
@pool()
|
|
{
|
|
DString res = dstring::temp_with_capacity(total_size);
|
|
res.append(s[0]);
|
|
foreach (String* &str : s[1..])
|
|
{
|
|
res.append(joiner);
|
|
res.append(*str);
|
|
}
|
|
return res.copy_str(allocator);
|
|
};
|
|
}
|
|
|
|
<*
|
|
Replace all instances of one substring with a different string.
|
|
|
|
@param [in] self
|
|
@param [in] needle : `The string to be replaced`
|
|
@param [in] new_str : `The replacement string`
|
|
@param [&inout] allocator : `The allocator to use for the String`
|
|
@return "The new string with the elements replaced"
|
|
*>
|
|
fn String String.replace(self, Allocator allocator, String needle, String new_str) @nodiscard
|
|
{
|
|
@pool()
|
|
{
|
|
String[] split = self.tsplit(needle);
|
|
return dstring::join(tmem, split, new_str).copy_str(mem);
|
|
};
|
|
}
|
|
|
|
<*
|
|
Replace all instances of one substring with a different string, allocating the new string on the temp allocator.
|
|
|
|
@param [in] self
|
|
@param [in] needle : `The string to be replaced`
|
|
@param [in] new_str : `The replacement string`
|
|
@return "The new string with the elements replaced"
|
|
*>
|
|
fn String String.treplace(self, String needle, String new_str)
|
|
{
|
|
String[] split = self.tsplit(needle);
|
|
return dstring::join(tmem, split, new_str).str_view();
|
|
}
|
|
|
|
|
|
<*
|
|
Remove characters from the front and end of a string.
|
|
|
|
@param [in] self : `The string to trim`
|
|
@param [in] to_trim : `The set of characters to trim, defaults to whitespace`
|
|
@pure
|
|
@return `a substring of the string passed in`
|
|
*>
|
|
fn String String.trim(self, String to_trim = "\t\n\r ")
|
|
{
|
|
return self.trim_left(to_trim).trim_right(to_trim);
|
|
}
|
|
|
|
<*
|
|
Remove characters from the front of a string.
|
|
|
|
@param [in] self : `The string to trim`
|
|
@param [in] to_trim : `The set of characters to trim, defaults to whitespace`
|
|
@pure
|
|
@return `a substring of the string passed in`
|
|
*>
|
|
fn String String.trim_left(self, String to_trim = "\t\n\r ")
|
|
{
|
|
usz start = 0;
|
|
usz len = self.len;
|
|
while (start < len && char_in_set(self[start], to_trim)) start++;
|
|
if (start == len) return self[:0];
|
|
return self[start..];
|
|
}
|
|
|
|
<*
|
|
Remove characters from the end of a string.
|
|
|
|
@param [in] self : `The string to trim`
|
|
@param [in] to_trim : `The set of characters to trim, defaults to whitespace`
|
|
@pure
|
|
@return `a substring of the string passed in`
|
|
*>
|
|
fn String String.trim_right(self, String to_trim = "\t\n\r ")
|
|
{
|
|
usz len = self.len;
|
|
while (len > 0 && char_in_set(self[len - 1], to_trim)) len--;
|
|
return self[:len];
|
|
}
|
|
|
|
<*
|
|
Check if the String starts with the prefix.
|
|
|
|
@param [in] self
|
|
@param [in] prefix
|
|
@pure
|
|
@return `'true' if the string starts with the prefix`
|
|
*>
|
|
fn bool String.starts_with(self, String prefix)
|
|
{
|
|
if (prefix.len > self.len) return false;
|
|
if (!prefix.len) return true;
|
|
return self[:prefix.len] == prefix;
|
|
}
|
|
|
|
<*
|
|
Check if the String ends with the suffix.
|
|
|
|
@param [in] self
|
|
@param [in] suffix
|
|
@pure
|
|
@return `'true' if the string ends with the suffix`
|
|
*>
|
|
fn bool String.ends_with(self, String suffix)
|
|
{
|
|
if (suffix.len > self.len) return false;
|
|
if (!suffix.len) return true;
|
|
return self[^suffix.len..] == suffix;
|
|
}
|
|
|
|
<*
|
|
Strip the front of the string if the prefix exists.
|
|
|
|
@param [in] self
|
|
@param [in] prefix
|
|
@pure
|
|
@return `the substring with the prefix removed`
|
|
*>
|
|
fn String String.strip(self, String prefix)
|
|
{
|
|
if (!prefix.len || !self.starts_with(prefix)) return self;
|
|
return self[prefix.len..];
|
|
}
|
|
|
|
<*
|
|
Strip the end of the string if the suffix exists.
|
|
|
|
@param [in] self
|
|
@param [in] suffix
|
|
@pure
|
|
@return `the substring with the suffix removed`
|
|
*>
|
|
fn String String.strip_end(self, String suffix)
|
|
{
|
|
if (!suffix.len || !self.ends_with(suffix)) return self;
|
|
// Note that this is the safe way if we want to support zero length.
|
|
return self[:(self.len - suffix.len)];
|
|
}
|
|
|
|
<*
|
|
Split a string into parts, e.g "a|b|c" split with "|" yields { "a", "b", "c" }
|
|
|
|
@param [in] self
|
|
@param [in] delimiter
|
|
@param max : "Max number of elements, 0 means no limit, defaults to 0"
|
|
@param skip_empty : "True to skip empty elements"
|
|
@param [&inout] allocator : "The allocator to use for the String[]"
|
|
|
|
@require delimiter.len > 0 : "The delimiter must be at least 1 character long"
|
|
@ensure return.len > 0 || skip_empty
|
|
*>
|
|
fn String[] String.split(self, Allocator allocator, String delimiter, usz max = 0, bool skip_empty = false)
|
|
{
|
|
usz capacity = 16;
|
|
usz i = 0;
|
|
String* holder = allocator::alloc_array(allocator, String, capacity);
|
|
bool no_more = false;
|
|
while (!no_more)
|
|
{
|
|
usz? index = i == max - 1 ? NOT_FOUND? : self.index_of(delimiter);
|
|
String res @noinit;
|
|
if (try index)
|
|
{
|
|
res = self[:index];
|
|
self = self[index + delimiter.len..];
|
|
}
|
|
else
|
|
{
|
|
res = self;
|
|
no_more = true;
|
|
}
|
|
if (!res.len && skip_empty)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (i == capacity)
|
|
{
|
|
capacity *= 2;
|
|
holder = allocator::realloc(allocator, holder, String.sizeof * capacity);
|
|
}
|
|
holder[i++] = res;
|
|
}
|
|
return holder[:i];
|
|
}
|
|
|
|
|
|
<*
|
|
This function is identical to String.split, but implicitly uses the
|
|
temporary allocator.
|
|
|
|
@param [in] s
|
|
@param [in] delimiter
|
|
@param max : "Max number of elements, 0 means no limit, defaults to 0"
|
|
@param skip_empty : "True to skip empty elements"
|
|
*>
|
|
fn String[] String.tsplit(s, String delimiter, usz max = 0, bool skip_empty = false) => s.split(tmem, delimiter, max, skip_empty) @inline;
|
|
|
|
faultdef BUFFER_EXCEEDED;
|
|
|
|
<*
|
|
Split a string into parts, e.g "a|b|c" split with "|" yields { "a", "b", "c" }
|
|
|
|
@param [in] s
|
|
@param [in] delimiter
|
|
@param [inout] buffer
|
|
@param max : "Max number of elements, 0 means no limit, defaults to 0"
|
|
@require delimiter.len > 0 : "The delimiter must be at least 1 character long"
|
|
@ensure return.len > 0 || skip_empty
|
|
@return? BUFFER_EXCEEDED : `If there are more elements than would fit the buffer`
|
|
*>
|
|
fn String[]? String.split_to_buffer(s, String delimiter, String[] buffer, usz max = 0, bool skip_empty = false)
|
|
{
|
|
usz max_capacity = buffer.len;
|
|
usz i = 0;
|
|
bool no_more = false;
|
|
while (!no_more)
|
|
{
|
|
usz? index = i == max - 1 ? NOT_FOUND? : s.index_of(delimiter);
|
|
String res @noinit;
|
|
if (try index)
|
|
{
|
|
res = s[:index];
|
|
s = s[index + delimiter.len..];
|
|
}
|
|
else
|
|
{
|
|
res = s;
|
|
no_more = true;
|
|
}
|
|
if (!res.len && skip_empty)
|
|
{
|
|
continue;
|
|
}
|
|
if (i == max_capacity)
|
|
{
|
|
return BUFFER_EXCEEDED?;
|
|
}
|
|
buffer[i++] = res;
|
|
}
|
|
return buffer[:i];
|
|
}
|
|
|
|
<*
|
|
Check if a substring is found in the string.
|
|
|
|
@param [in] s
|
|
@param [in] substr : "The string to look for."
|
|
@pure
|
|
@return "true if the string contains the substring, false otherwise"
|
|
*>
|
|
fn bool String.contains(s, String substr)
|
|
{
|
|
return @ok(s.index_of(substr));
|
|
}
|
|
|
|
<*
|
|
Check how many non-overlapping instances of a substring there is.
|
|
|
|
If the substring has zero length, the number of matches is zero.
|
|
|
|
@param [in] self : "The string to search"
|
|
@param [in] substr : "The string to look for."
|
|
@pure
|
|
@return "The number of times matched"
|
|
*>
|
|
fn usz String.count(self, String substr)
|
|
{
|
|
usz count = 0;
|
|
usz needed = substr.len;
|
|
if (needed == 0) return 0;
|
|
char first = substr[0];
|
|
while OUTER: (self.len >= needed)
|
|
{
|
|
foreach (i, c: self[..^needed])
|
|
{
|
|
if (c == first && self[i : needed] == substr)
|
|
{
|
|
count++;
|
|
self = self[i + needed..];
|
|
continue OUTER;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
<*
|
|
Find the index of the first incidence of a string.
|
|
|
|
@param [in] self
|
|
@param character : "The character to look for"
|
|
@pure
|
|
@ensure return < self.len
|
|
@return "the index of the character"
|
|
@return? NOT_FOUND : "if the character cannot be found"
|
|
*>
|
|
fn usz? String.index_of_char(self, char character)
|
|
{
|
|
foreach (i, c : self)
|
|
{
|
|
if (c == character) return i;
|
|
}
|
|
return NOT_FOUND?;
|
|
}
|
|
|
|
<*
|
|
Find the index of the first incidence of a one of the chars.
|
|
|
|
@param [in] self
|
|
@param [in] characters : "The characters to look for"
|
|
@pure
|
|
@ensure return < self.len
|
|
@return "the index of the character"
|
|
@return? NOT_FOUND : "if the character cannot be found"
|
|
*>
|
|
fn usz? String.index_of_chars(String self, char[] characters)
|
|
{
|
|
foreach (i, c : self)
|
|
{
|
|
foreach (j, pin : characters)
|
|
{
|
|
if (c == pin) return i;
|
|
}
|
|
}
|
|
|
|
return NOT_FOUND?;
|
|
}
|
|
|
|
<*
|
|
Find the index of the first incidence of a character.
|
|
|
|
@param [in] self
|
|
@param character : "The character to look for"
|
|
@param start_index : "The index to start with, may exceed max index."
|
|
@pure
|
|
@ensure return < self.len
|
|
@return "the index of the character"
|
|
@return? NOT_FOUND : "if the character cannot be found starting from the start_index"
|
|
*>
|
|
fn usz? String.index_of_char_from(self, char character, usz start_index)
|
|
{
|
|
usz len = self.len;
|
|
if (len <= start_index) return NOT_FOUND?;
|
|
for (usz i = start_index; i < len; i++)
|
|
{
|
|
if (self[i] == character) return i;
|
|
}
|
|
return NOT_FOUND?;
|
|
}
|
|
|
|
<*
|
|
Find the index of the first incidence of a character starting from the end.
|
|
|
|
@param [in] self
|
|
@param character : "the character to find"
|
|
@pure
|
|
@ensure return < self.len
|
|
@return "the index of the character"
|
|
@return? NOT_FOUND : "if the character cannot be found"
|
|
*>
|
|
fn usz? String.rindex_of_char(self, char character)
|
|
{
|
|
foreach_r (i, c : self)
|
|
{
|
|
if (c == character) return i;
|
|
}
|
|
return NOT_FOUND?;
|
|
}
|
|
|
|
<*
|
|
Find the index of the first incidence of a string.
|
|
|
|
@param [in] self
|
|
@param [in] substr
|
|
@pure
|
|
@ensure return < self.len
|
|
@require substr.len > 0 : "The string must be len 1 or more"
|
|
@return "the index of the substring"
|
|
@return? NOT_FOUND : "if the substring cannot be found"
|
|
*>
|
|
fn usz? String.index_of(self, String substr)
|
|
{
|
|
usz needed = substr.len;
|
|
if (needed > 0 && self.len >= needed)
|
|
{
|
|
char first = substr[0];
|
|
foreach (i, c: self[..^needed])
|
|
{
|
|
if (c == first && self[i : needed] == substr) return i;
|
|
}
|
|
}
|
|
return NOT_FOUND?;
|
|
}
|
|
|
|
<*
|
|
Find the index of the last incidence of a string.
|
|
|
|
@param [in] self
|
|
@param [in] substr
|
|
@pure
|
|
@ensure return < self.len
|
|
@require substr.len > 0 : "The substring must be len 1 or more"
|
|
@return "the index of the substring"
|
|
@return? NOT_FOUND : "if the substring cannot be found"
|
|
*>
|
|
fn usz? String.rindex_of(self, String substr)
|
|
{
|
|
usz needed = substr.len;
|
|
if (needed > 0 && self.len >= needed)
|
|
{
|
|
char first = substr[0];
|
|
foreach_r (i, c: self[..^needed])
|
|
{
|
|
if (c == first && self[i : needed] == substr) return i;
|
|
}
|
|
}
|
|
return NOT_FOUND?;
|
|
}
|
|
|
|
fn bool ZString.eq(self, ZString other) @operator(==)
|
|
{
|
|
char* a = self;
|
|
char* b = other;
|
|
if (a == b) return true;
|
|
if (!a || !b) return false;
|
|
for (;; a++, b++)
|
|
{
|
|
char c = *a;
|
|
if (c != *b) return false;
|
|
if (!c) return true;
|
|
}
|
|
}
|
|
|
|
fn String ZString.str_view(self)
|
|
{
|
|
return (String)(self[:self.len()]);
|
|
}
|
|
|
|
fn usz ZString.char_len(str)
|
|
{
|
|
usz len = 0;
|
|
char* ptr = (char*)str;
|
|
while (char c = ptr++[0])
|
|
{
|
|
if (c & 0xC0 != 0x80) len++;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
fn usz ZString.len(self)
|
|
{
|
|
usz len;
|
|
for (char* ptr = (char*)self; *ptr; ptr++) len++;
|
|
return len;
|
|
}
|
|
|
|
fn usz WString.len(self)
|
|
{
|
|
usz len;
|
|
for (Char16* ptr = (Char16*)self; *ptr; ptr++) len++;
|
|
return len;
|
|
}
|
|
|
|
fn ZString String.zstr_copy(self, Allocator allocator)
|
|
{
|
|
usz len = self.len;
|
|
char* str = allocator::malloc(allocator, len + 1);
|
|
mem::copy(str, self.ptr, len);
|
|
str[len] = 0;
|
|
return (ZString)str;
|
|
}
|
|
|
|
fn String String.concat(self, Allocator allocator, String s2)
|
|
{
|
|
usz full_len = self.len + s2.len;
|
|
char* str = allocator::malloc(allocator, full_len + 1);
|
|
usz self_len = self.len;
|
|
mem::copy(str, self.ptr, self_len);
|
|
mem::copy(str + self_len, s2.ptr, s2.len);
|
|
str[full_len] = 0;
|
|
return (String)str[:full_len];
|
|
}
|
|
|
|
fn String String.tconcat(self, String s2) => self.concat(tmem, s2);
|
|
|
|
|
|
fn ZString String.zstr_tcopy(self) => self.zstr_copy(tmem) @inline;
|
|
|
|
<*
|
|
Copy this string, by duplicating the string, always adding a zero byte
|
|
sentinel, so that it safely can be converted to a ZString by a
|
|
cast.
|
|
*>
|
|
fn String String.copy(self, Allocator allocator)
|
|
{
|
|
usz len = self.len;
|
|
char* str = allocator::malloc(allocator, len + 1);
|
|
mem::copy(str, self.ptr, len);
|
|
str[len] = 0;
|
|
return (String)str[:len];
|
|
}
|
|
|
|
fn void String.free(&self, Allocator allocator)
|
|
{
|
|
if (!self.ptr) return;
|
|
allocator::free(allocator, self.ptr);
|
|
*self = "";
|
|
}
|
|
|
|
fn String String.tcopy(self) => self.copy(tmem) @inline;
|
|
|
|
fn String ZString.copy(self, Allocator allocator)
|
|
{
|
|
return self.str_view().copy(allocator) @inline;
|
|
}
|
|
|
|
fn String ZString.tcopy(self)
|
|
{
|
|
return self.str_view().copy(tmem) @inline;
|
|
}
|
|
|
|
<*
|
|
Convert an UTF-8 string to UTF-16
|
|
@return "The UTF-16 string as a slice, allocated using the given allocator"
|
|
@return? INVALID_UTF8 : "If the string contained an invalid UTF-8 sequence"
|
|
*>
|
|
fn Char16[]? String.to_utf16(self, Allocator allocator)
|
|
{
|
|
usz len16 = conv::utf16len_for_utf8(self);
|
|
Char16* data = allocator::alloc_array_try(allocator, Char16, len16 + 1)!;
|
|
conv::utf8to16_unsafe(self, data)!;
|
|
data[len16] = 0;
|
|
return data[:len16];
|
|
}
|
|
|
|
fn Char16[]? String.to_temp_utf16(self) => self.to_utf16(tmem);
|
|
|
|
fn WString? String.to_wstring(self, Allocator allocator)
|
|
{
|
|
return (WString)self.to_utf16(allocator).ptr;
|
|
}
|
|
|
|
fn WString? String.to_temp_wstring(self) => self.to_wstring(tmem);
|
|
|
|
fn Char32[]? String.to_utf32(self, Allocator allocator)
|
|
{
|
|
usz codepoints = conv::utf8_codepoints(self);
|
|
Char32* data = allocator::alloc_array_try(allocator, Char32, codepoints + 1)!;
|
|
conv::utf8to32_unsafe(self, data)!;
|
|
data[codepoints] = 0;
|
|
return data[:codepoints];
|
|
}
|
|
|
|
fn Char32[]? String.to_temp_utf32(self) => self.to_utf32(tmem);
|
|
|
|
<*
|
|
Convert a string to ASCII lower case in place.
|
|
|
|
@param [inout] self
|
|
@pure
|
|
*>
|
|
fn void String.convert_to_lower(self)
|
|
{
|
|
foreach (&c : self) if (c.is_upper() @pure) *c += 'a' - 'A';
|
|
}
|
|
|
|
fn String String.to_lower_copy(self, Allocator allocator)
|
|
{
|
|
String copy = self.copy(allocator);
|
|
copy.convert_to_lower();
|
|
return copy;
|
|
}
|
|
|
|
fn String String.to_lower_tcopy(self)
|
|
{
|
|
return self.to_lower_copy(tmem);
|
|
}
|
|
|
|
<*
|
|
Convert a string to ASCII upper case.
|
|
|
|
@param [inout] self
|
|
@pure
|
|
*>
|
|
fn void String.convert_to_upper(self)
|
|
{
|
|
foreach (&c : self) if (c.is_lower() @pure) *c -= 'a' - 'A';
|
|
}
|
|
|
|
<*
|
|
Returns a string converted to ASCII upper case.
|
|
|
|
@param [in] self
|
|
@param [inout] allocator
|
|
|
|
@return `a new String converted to ASCII upper case.`
|
|
*>
|
|
fn String String.to_upper_copy(self, Allocator allocator)
|
|
{
|
|
String copy = self.copy(allocator);
|
|
copy.convert_to_upper();
|
|
return copy;
|
|
}
|
|
|
|
fn String String.capitalize_copy(self, Allocator allocator)
|
|
{
|
|
String s = self.copy(allocator);
|
|
if (s.len > 0 && s[0].is_lower())
|
|
{
|
|
s[0] &= (char)~0x20;
|
|
}
|
|
return s;
|
|
}
|
|
|
|
<*
|
|
Convert a string from `snake_case` to PascalCase.
|
|
|
|
@param [in] self
|
|
@return `"FooBar" from "foo_bar" the resulting pointer may safely be cast to ZString.`
|
|
*>
|
|
fn String String.snake_to_pascal_copy(self, Allocator allocator)
|
|
{
|
|
Splitter splitter = self.tokenize("_");
|
|
char[] new_string = allocator::alloc_array(allocator, char, self.len + 1);
|
|
usz index = 0;
|
|
while (try s = splitter.next())
|
|
{
|
|
assert(s.len > 0);
|
|
char c = s[0];
|
|
if (c.is_lower()) c = c.to_upper();
|
|
new_string[index++] = c;
|
|
s = s[1..];
|
|
new_string[index:s.len] = s[..];
|
|
index += s.len;
|
|
}
|
|
new_string[index] = 0;
|
|
return (String)new_string[:index];
|
|
}
|
|
|
|
<*
|
|
Movifies the current string from `snake_case` to PascalCase.
|
|
|
|
@param [inout] self
|
|
*>
|
|
fn void String.convert_snake_to_pascal(&self)
|
|
{
|
|
Splitter splitter = self.tokenize("_");
|
|
String new_string = *self;
|
|
usz index = 0;
|
|
while (try s = splitter.next())
|
|
{
|
|
assert(s.len > 0);
|
|
char c = s[0];
|
|
if (c.is_lower()) c = c.to_upper();
|
|
new_string[index++] = c;
|
|
s = s[1..];
|
|
new_string[index:s.len] = s[..];
|
|
index += s.len;
|
|
}
|
|
*self = new_string[:index];
|
|
}
|
|
|
|
<*
|
|
Convert a string from `PascalCase` to `snake_case`.
|
|
|
|
@param [in] self
|
|
@return `"foo_bar" from "FooBar" the resulting pointer may safely be cast to ZString.`
|
|
*>
|
|
fn String String.pascal_to_snake_copy(self, Allocator allocator) => @pool()
|
|
{
|
|
DString d;
|
|
d.init(tmem, (usz)(self.len * 1.5));
|
|
usz index = 0;
|
|
foreach (i, c : self)
|
|
{
|
|
if (c.is_upper())
|
|
{
|
|
if (i > 0 && ((self[i - 1].is_lower() || self[i - 1].is_digit()) || (i < self.len - 1 && self[i + 1].is_lower())))
|
|
{
|
|
d.append_char('_');
|
|
}
|
|
d.append_char(c.to_lower());
|
|
continue;
|
|
}
|
|
d.append_char(c);
|
|
}
|
|
return d.copy_str(allocator);
|
|
}
|
|
|
|
|
|
fn StringIterator String.iterator(self)
|
|
{
|
|
return { self, 0 };
|
|
}
|
|
|
|
<*
|
|
@param [in] self
|
|
@return `a temporary String converted to ASCII upper case.`
|
|
*>
|
|
fn String String.to_upper_tcopy(self)
|
|
{
|
|
return self.to_upper_copy(tmem);
|
|
}
|
|
|
|
fn String? from_utf32(Allocator allocator, Char32[] utf32)
|
|
{
|
|
usz len = conv::utf8len_for_utf32(utf32);
|
|
char* data = allocator::malloc_try(allocator, len + 1)!;
|
|
defer catch allocator::free(allocator, data);
|
|
conv::utf32to8_unsafe(utf32, data);
|
|
data[len] = 0;
|
|
return (String)data[:len];
|
|
}
|
|
|
|
fn String? from_utf16(Allocator allocator, Char16[] utf16)
|
|
{
|
|
usz len = conv::utf8len_for_utf16(utf16);
|
|
char* data = allocator::malloc_try(allocator, len + 1)!;
|
|
defer catch allocator::free(allocator, data);
|
|
conv::utf16to8_unsafe(utf16, data)!;
|
|
data[len] = 0;
|
|
return (String)data[:len];
|
|
}
|
|
|
|
fn String? from_wstring(Allocator allocator, WString wstring)
|
|
{
|
|
usz utf16_len;
|
|
while (wstring[utf16_len] != 0) utf16_len++;
|
|
Char16[] utf16 = wstring[:utf16_len];
|
|
return from_utf16(allocator, utf16);
|
|
}
|
|
|
|
fn String? tfrom_wstring(WString wstring) => from_wstring(tmem, wstring) @inline;
|
|
fn String? tfrom_utf16(Char16[] utf16) => from_utf16(tmem, utf16) @inline;
|
|
|
|
fn usz String.utf8_codepoints(s)
|
|
{
|
|
usz len = 0;
|
|
foreach (char c : s)
|
|
{
|
|
if (c & 0xC0 != 0x80) len++;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
<*
|
|
Determine whether the current string actually points to a ZString-like string.
|
|
|
|
This is done by looking at the byte one step after the end of the string. If this
|
|
is zero, it is considered zero terminated.
|
|
|
|
This function can safely be used with data pointing to null. However, it will not
|
|
work correctly if the pointer is invalid, for example it is already freed.
|
|
*>
|
|
fn bool String.is_zstr(self) @deprecated("Unsafe, use copy instead")
|
|
{
|
|
return self.ptr && *(self.ptr + self.len) == 0;
|
|
}
|
|
|
|
<*
|
|
Return a pointer to the string *iff* it is a pointer
|
|
to a zero terminated string, otherwise return a temp allocated zstring copy.
|
|
|
|
This function is suitable if you are converting strings to ZString on the temp
|
|
allocator, but suspect that the String might actually already point to zero
|
|
terminated data.
|
|
|
|
The function looks one step beyond the end of the slice to determine this,
|
|
which means that if that data is then modified after this call, this function
|
|
might behave incorrectly.
|
|
|
|
For this reason, try to ensure that the resulting ZString is immediately used.
|
|
|
|
@ensure return[self.len] == 0
|
|
*>
|
|
fn ZString String.quick_zstr(self) @deprecated("Unsafe, use zstr_tcopy instead")
|
|
{
|
|
return self.zstr_tcopy();
|
|
}
|
|
|
|
|
|
<*
|
|
Convert a number to a given base. If the base is not given, then
|
|
it will be inferred from the number if the string starts with 0x 0o or 0b and the
|
|
base is given as 10.
|
|
|
|
Furthermore it will skip any spaces before and after the number.
|
|
|
|
@param $Type : "The type to convert to"
|
|
@param base : "The base to convert to"
|
|
@require base > 0 && base <= 16 : "Unsupported base"
|
|
@return? MALFORMED_INTEGER : "When the value has some illegal character"
|
|
@return? INTEGER_OVERFLOW : "If the value does not fit in the given type"
|
|
@return? EMPTY_STRING : "If the string was empty"
|
|
@return? NEGATIVE_VALUE : "If the type was unsigned, and the value had a - prefix"
|
|
*>
|
|
macro String.to_integer(self, $Type, int base = 10)
|
|
{
|
|
usz len = self.len;
|
|
usz index = 0;
|
|
char* ptr = self.ptr;
|
|
while (index < len && ptr[index].is_blank()) index++;
|
|
if (len == index) return EMPTY_STRING?;
|
|
bool is_negative;
|
|
switch (self[index])
|
|
{
|
|
case '-':
|
|
if ($Type.min == 0) return NEGATIVE_VALUE?;
|
|
is_negative = true;
|
|
index++;
|
|
case '+':
|
|
index++;
|
|
default:
|
|
break;
|
|
}
|
|
if (len == index) return MALFORMED_INTEGER?;
|
|
$Type base_used = ($Type)base;
|
|
if (self[index] == '0' && base == 10)
|
|
{
|
|
index++;
|
|
if (index == len) return ($Type)0;
|
|
switch (self[index])
|
|
{
|
|
case 'x':
|
|
case 'X':
|
|
base_used = 16;
|
|
index++;
|
|
case 'b':
|
|
case 'B':
|
|
base_used = 2;
|
|
index++;
|
|
case 'o':
|
|
case 'O':
|
|
base_used = 8;
|
|
index++;
|
|
default:
|
|
break;
|
|
}
|
|
if (len == index) return MALFORMED_INTEGER?;
|
|
}
|
|
$Type value = 0;
|
|
while (index != len)
|
|
{
|
|
char c = self[index++];
|
|
switch
|
|
{
|
|
case base_used < 10 || c < 'A': c -= '0';
|
|
case c <= 'F': c -= 'A' - 10;
|
|
case c < 'a' || c > 'f': return MALFORMED_INTEGER?;
|
|
default: c -= 'a' - 10;
|
|
}
|
|
if (c >= base_used) return MALFORMED_INTEGER?;
|
|
do
|
|
{
|
|
if (is_negative)
|
|
{
|
|
$Type new_value = value * base_used - c;
|
|
if (new_value > value) return INTEGER_OVERFLOW?;
|
|
value = new_value;
|
|
break;
|
|
}
|
|
$Type new_value = value * base_used + c;
|
|
if (new_value < value) return INTEGER_OVERFLOW?;
|
|
value = new_value;
|
|
};
|
|
}
|
|
return value;
|
|
}
|
|
|
|
fn int128? String.to_int128(self, int base = 10) => self.to_integer(int128, base);
|
|
fn long? String.to_long(self, int base = 10) => self.to_integer(long, base);
|
|
fn int? String.to_int(self, int base = 10) => self.to_integer(int, base);
|
|
fn short? String.to_short(self, int base = 10) => self.to_integer(short, base);
|
|
fn ichar? String.to_ichar(self, int base = 10) => self.to_integer(ichar, base);
|
|
|
|
fn uint128? String.to_uint128(self, int base = 10) => self.to_integer(uint128, base);
|
|
fn ulong? String.to_ulong(self, int base = 10) => self.to_integer(ulong, base);
|
|
fn uint? String.to_uint(self, int base = 10) => self.to_integer(uint, base);
|
|
fn ushort? String.to_ushort(self, int base = 10) => self.to_integer(ushort, base);
|
|
fn char? String.to_uchar(self, int base = 10) => self.to_integer(char, base);
|
|
|
|
fn double? String.to_double(self) => self.to_real(double);
|
|
fn float? String.to_float(self) => self.to_real(float);
|
|
|
|
|
|
<*
|
|
Create a Splitter to track tokenizing of a string. Tokenize will turn "foo:bar::baz" into
|
|
"foo", "bar" and "baz", if you want the empty string to be present, use `tokenize_all`
|
|
instead.
|
|
|
|
@param [in] split : "The string to use for splitting"
|
|
@return "A Splitter to track the state"
|
|
*>
|
|
fn Splitter String.tokenize(self, String split)
|
|
{
|
|
return { .string = self, .split = split, .type = TOKENIZE };
|
|
}
|
|
|
|
<*
|
|
Create a Splitter to track tokenizing of a string. Tokenize will turn "foo:bar::baz" into
|
|
"foo", "bar" and "baz", if you want the empty string to be present, use `tokenize_all`
|
|
instead.
|
|
|
|
@param [in] split : "The string to use for splitting"
|
|
@param skip_last : "Set to true to not include the last empty token if present (default: false)"
|
|
@return "A Splitter to track the state"
|
|
*>
|
|
fn Splitter String.tokenize_all(self, String split, bool skip_last = false)
|
|
{
|
|
return {
|
|
.string = self,
|
|
.split = split,
|
|
.type = skip_last ? TOKENIZE_ALL_SKIP_LAST : TOKENIZE_ALL
|
|
};
|
|
}
|
|
|
|
fn Splitter String.splitter(self, String split) @deprecated("Use tokenize_all instead")
|
|
{
|
|
return self.tokenize_all(split, skip_last: true);
|
|
}
|
|
|
|
<*
|
|
This macro will create a string description of a struct.
|
|
|
|
@param [&inout] allocator : "The allocator to use"
|
|
@param x : "The struct to create a description of"
|
|
*>
|
|
macro String from_struct(Allocator allocator, x)
|
|
{
|
|
DString s;
|
|
@stack_mem(512; Allocator mem)
|
|
{
|
|
s.init(allocator: mem);
|
|
io::fprint(&s, x)!!;
|
|
return s.copy_str(allocator);
|
|
};
|
|
}
|
|
|
|
<*
|
|
This macro will create a temporary string description of a struct.
|
|
|
|
@param x : "The struct to create a description of"
|
|
*>
|
|
macro String tfrom_struct(x) => from_struct(tmem, x);
|
|
|
|
const uint SURROGATE_OFFSET @private = 0x10000;
|
|
const uint SURROGATE_GENERIC_MASK @private = 0xF800;
|
|
const uint SURROGATE_MASK @private = 0xFC00;
|
|
const uint SURROGATE_CODEPOINT_MASK @private = 0x03FF;
|
|
const uint SURROGATE_BITS @private = 10;
|
|
const uint SURROGATE_LOW_VALUE @private = 0xDC00;
|
|
const uint SURROGATE_HIGH_VALUE @private = 0xD800;
|
|
|
|
enum SplitterType
|
|
{
|
|
TOKENIZE,
|
|
TOKENIZE_ALL,
|
|
TOKENIZE_ALL_SKIP_LAST
|
|
}
|
|
|
|
<*
|
|
Splitter is handles tokenizing strings.
|
|
*>
|
|
struct Splitter
|
|
{
|
|
String string;
|
|
String split;
|
|
usz current;
|
|
SplitterType type;
|
|
int last_index;
|
|
}
|
|
|
|
fn void Splitter.reset(&self)
|
|
{
|
|
self.current = 0;
|
|
}
|
|
|
|
fn String? Splitter.next(&self)
|
|
{
|
|
while (true)
|
|
{
|
|
usz len = self.string.len;
|
|
usz current = self.current;
|
|
if (current > len) return NO_MORE_ELEMENT?;
|
|
if (current == len)
|
|
{
|
|
if (self.type != TOKENIZE_ALL) return NO_MORE_ELEMENT?;
|
|
self.current++;
|
|
return self.string[current - 1:0];
|
|
}
|
|
String remaining = self.string[current..];
|
|
usz? next = remaining.index_of(self.split);
|
|
if (try next)
|
|
{
|
|
self.current = current + next + self.split.len;
|
|
if (!next && self.type == TOKENIZE) continue;
|
|
return remaining[:next];
|
|
}
|
|
self.current = len;
|
|
return remaining;
|
|
}
|
|
}
|