module std::core::string; import std::ascii; distinct ZString = inline char*; distinct WString = inline Char16*; def Char32 = uint; def Char16 = ushort; fault UnicodeResult { INVALID_UTF8, INVALID_UTF16, CONVERSION_FAILED, } const uint SURROGATE_OFFSET @private = 0x10000; const uint SURROGATE_GENERIC_MASK @private = 0xF800; const uint SURROGATE_MASK @private = 0xFC00; const uint SURROGATE_CODEPOINT_MASK @private = 0x03FF; const uint SURROGATE_BITS @private = 10; const uint SURROGATE_LOW_VALUE @private = 0xDC00; const uint SURROGATE_HIGH_VALUE @private = 0xD800; fault NumberConversion { EMPTY_STRING, NEGATIVE_VALUE, MALFORMED_INTEGER, INTEGER_OVERFLOW, MALFORMED_FLOAT, FLOAT_OUT_OF_RANGE, } macro String tformat(String fmt, ...) { DString str = dstring::temp_with_capacity(fmt.len + $vacount * 8); str.appendf(fmt, $vasplat()); return str.str_view(); } macro String new_format(String fmt, ..., Allocator* allocator = mem::heap()) { @pool(allocator) { DString str = dstring::temp_with_capacity(fmt.len + $vacount * 8); str.appendf(fmt, $vasplat()); return str.copy_str(allocator); }; } macro bool char_in_set(char c, String set) { foreach (ch : set) if (ch == c) return true; return false; } fn String join_new(String[] s, String joiner, Allocator* allocator = mem::heap()) { if (!s) { return (String)allocator.new_zero_array(char, 2)[:0]; } usz total_size = joiner.len * s.len; foreach (String* &str : s) { total_size += str.len; } @pool(allocator) { DString res = dstring::temp_with_capacity(total_size); res.append(s[0]); foreach (String* &str : s[1..]) { res.append(joiner); res.append(*str); } return res.copy_str(allocator); }; } /** * @param [in] string * @param [in] to_trim **/ fn String String.trim(string, String to_trim = "\t\n\r ") { usz start = 0; usz len = string.len; while (start < len && char_in_set(string[start], to_trim)) start++; if (start == len) return string[:0]; usz end = len - 1; while (end > start && char_in_set(string[end], to_trim)) end--; return string[start..end]; } /** * @param [in] string * @param [in] needle **/ fn bool String.starts_with(string, String needle) { if (needle.len > string.len) return false; if (!needle.len) return true; return string[:needle.len] == needle; } /** * @param [in] string * @param [in] needle **/ fn bool String.ends_with(string, String needle) { if (needle.len > string.len) return false; if (!needle.len) return true; return string[^needle.len..] == needle; } /** * Strip the front of the string if the prefix exists. * * @param [in] string * @param [in] needle **/ fn String String.strip(string, String needle) { if (!needle.len || !string.starts_with(needle)) return string; return string[needle.len..]; } /** * Strip the end of the string if the suffix exists. * * @param [in] string * @param [in] needle **/ fn String String.strip_end(string, String needle) { if (!needle.len || !string.ends_with(needle)) return string; // Note that this is the safe way if we want to support zero length. return string[:(string.len - needle.len)]; } /** * Split a string into parts, e.g "a|b|c" split with "|" yields { "a", "b", "c" } * * @param [in] s * @param [in] needle * @param [&inout] allocator "The allocator, defaults to the heap allocator" * @param max "Max number of elements, 0 means no limit, defaults to 0" * @require needle.len > 0 "The needle must be at least 1 character long" * @ensure return.len > 0 **/ fn String[] String.split(s, String needle, usz max = 0, Allocator* allocator = mem::heap()) { usz capacity = 16; usz i = 0; String* holder = allocator.new_array(String, capacity); bool no_more = false; while (!no_more) { usz! index = i == max - 1 ? SearchResult.MISSING? : s.index_of(needle); String res @noinit; if (try index) { res = s[:index]; s = s[index + needle.len..]; } else { res = s; no_more = true; } if (i == capacity) { capacity *= 2; holder = allocator.realloc(holder, String.sizeof * capacity); } holder[i++] = res; } return holder[:i]; } /** * This function is identical to String.split, but implicitly uses the * temporary allocator. * * @param [in] s * @param [in] needle * @param max "Max number of elements, 0 means no limit, defaults to 0" **/ fn String[] String.tsplit(s, String needle, usz max = 0) { return s.split(needle, max, mem::temp()) @inline; } fn bool String.contains(s, String needle) { return @ok(s.index_of(needle)); } /** * Find the index of the first incidence of a string. * * @param [in] s * @pure * @ensure return < s.len * @return "the index of the needle" * @return! SearchResult.MISSING "if the needle cannot be found" **/ fn usz! String.index_of_char(s, char needle) { foreach (i, c : s) { if (c == needle) return i; } return SearchResult.MISSING?; } /** * Find the index of the first incidence of a string. * * @param [in] s * @pure * @ensure return < s.len * @return "the index of the needle" * @return! SearchResult.MISSING "if the needle cannot be found" **/ fn usz! String.rindex_of_char(s, char needle) { foreach_r (i, c : s) { if (c == needle) return i; } return SearchResult.MISSING?; } /** * Find the index of the first incidence of a string. * * @param [in] s * @param [in] needle * @pure * @ensure return < s.len * @require needle.len > 0 : "The needle must be len 1 or more" * @return "the index of the needle" * @return! SearchResult.MISSING "if the needle cannot be found" **/ fn usz! String.index_of(s, String needle) { usz needed = needle.len; if (needed > 0 && s.len >= needed) { char first = needle[0]; foreach (i, c: s[..^needed]) { if (c == first && s[i:needed] == needle) return i; } } return SearchResult.MISSING?; } /** * Find the index of the last incidence of a string. * * @param [in] s * @param [in] needle * @pure * @ensure return < s.len * @require needle.len > 0 "The needle must be len 1 or more" * @return "the index of the needle" * @return! SearchResult.MISSING "if the needle cannot be found" **/ fn usz! String.rindex_of(s, String needle) { usz needed = needle.len; if (needed > 0 && s.len >= needed) { char first = needle[0]; foreach_r (i, c: s[..^needed]) { if (c == first && s[i:needed] == needle) return i; } } return SearchResult.MISSING?; } fn String ZString.str_view(str) { return (String)(str[:str.len()]); } fn usz ZString.char_len(str) { usz len = 0; char* ptr = (char*)str; while (char c = ptr++[0]) { if (c & 0xC0 != 0x80) len++; } return len; } fn usz ZString.len(str) { usz len = 0; char* ptr = (char*)str; while (char c = ptr++[0]) len++; return len; } fn ZString String.zstr_copy(s, Allocator* allocator = mem::heap()) { usz len = s.len; char* str = allocator.alloc(len + 1); mem::copy(str, s.ptr, len); str[len] = 0; return (ZString)str; } fn String String.concat(s1, String s2, Allocator* allocator = mem::heap()) { usz full_len = s1.len + s2.len; char* str = allocator.alloc(full_len + 1); usz s1_len = s1.len; mem::copy(str, s1.ptr, s1_len); mem::copy(str + s1_len, s2.ptr, s2.len); str[full_len] = 0; return (String)str[:full_len]; } fn String String.tconcat(s1, String s2) => s1.concat(s2, mem::temp()); fn ZString String.zstr_tcopy(s) => s.zstr_copy(mem::temp()) @inline; fn String String.copy(s, Allocator* allocator = mem::heap()) { usz len = s.len; char* str = allocator.alloc(len + 1); mem::copy(str, s.ptr, len); str[len] = 0; return (String)str[:len]; } fn void String.free(&s, Allocator* allocator = mem::heap()) { if (!s.len) return; allocator.free(s.ptr); *s = ""; } fn String String.tcopy(s) => s.copy(mem::temp()) @inline; fn String ZString.copy(z, Allocator* allocator = mem::temp()) { return z.str_view().copy(allocator) @inline; } fn String ZString.tcopy(z) { return z.str_view().copy(mem::temp()) @inline; } /** * Convert an UTF-8 string to UTF-16 * @return "The UTF-16 string as a slice, allocated using the given allocator" * @return! UnicodeResult.INVALID_UTF8 "If the string contained an invalid UTF-8 sequence" * @return! AllocationFailure "If allocation of the string fails" **/ fn Char16[]! String.to_new_utf16(s, Allocator* allocator = mem::heap()) { usz len16 = conv::utf16len_for_utf8(s); Char16* data = allocator.new_array_checked(Char16, len16 + 1)!; conv::utf8to16_unsafe(s, data)!; data[len16] = 0; return data[:len16]; } /** * Convert an UTF-8 string to UTF-16 * @return "The UTF-16 string as a slice, allocated using the given allocator" * @return! UnicodeResult.INVALID_UTF8 "If the string contained an invalid UTF-8 sequence" * @return! AllocationFailure "If allocation of the string fails" **/ fn Char16[]! String.to_temp_utf16(s) { return s.to_new_utf16(mem::temp()); } fn WString! String.to_new_wstring(s, Allocator* allocator = mem::heap()) { return (WString)s.to_new_utf16(allocator).ptr; } fn WString! String.to_temp_wstring(s) { return (WString)s.to_temp_utf16().ptr; } fn Char32[]! String.to_new_utf32(s, Allocator* allocator = mem::heap()) { usz codepoints = conv::utf8_codepoints(s); Char32* data = allocator.new_array(Char32, codepoints + 1); conv::utf8to32_unsafe(s, data)!; data[codepoints] = 0; return data[:codepoints]; } fn Char32[]! String.to_temp_utf32(s) { return s.to_new_utf32(mem::temp()); } fn void String.convert_ascii_to_lower(s) { foreach (&c : s) if (c.is_upper()) *c += 'a' - 'A'; } fn String String.new_ascii_to_lower(s, Allocator* allocator = mem::heap()) { String copy = s.copy(allocator); copy.convert_ascii_to_lower(); return copy; } fn String String.temp_ascii_to_lower(s, Allocator* allocator = mem::heap()) { return s.new_ascii_to_lower(mem::temp()); } fn void String.convert_ascii_to_upper(s) { foreach (&c : s) if (c.is_lower()) *c -= 'a' - 'A'; } fn String String.new_ascii_to_upper(s, Allocator* allocator = mem::heap()) { String copy = s.copy(allocator); copy.convert_ascii_to_upper(); return copy; } fn String String.temp_ascii_to_upper(s) { return s.new_ascii_to_upper(mem::temp()); } fn String! new_from_utf32(Char32[] utf32, Allocator* allocator = mem::heap()) { usz len = conv::utf8len_for_utf32(utf32); char* data = allocator.alloc_checked(len + 1)!; defer catch allocator.free(data); conv::utf32to8_unsafe(utf32, data); data[len] = 0; return (String)data[:len]; } fn String! new_from_utf16(Char16[] utf16, Allocator* allocator = mem::heap()) { usz len = conv::utf8len_for_utf16(utf16); char* data = allocator.alloc_checked(len + 1)!; defer catch allocator.free(data); conv::utf16to8_unsafe(utf16, data)!; data[len] = 0; return (String)data[:len]; } fn String! new_from_wstring(WString wstring, Allocator* allocator = mem::heap()) { usz utf16_len; while (wstring[utf16_len] != 0) utf16_len++; Char16[] utf16 = wstring[:utf16_len]; return new_from_utf16(utf16, allocator); } fn String! temp_from_wstring(WString wstring) => new_from_wstring(wstring, mem::temp()) @inline; fn String! temp_from_utf16(Char16[] utf16) => new_from_utf16(utf16, mem::temp()) @inline; fn usz String.utf8_codepoints(s) { usz len = 0; foreach (char c : s) { if (c & 0xC0 != 0x80) len++; } return len; } macro String.to_integer(string, $Type) { usz len = string.len; usz index = 0; char* ptr = string.ptr; while (index < len && ascii::is_blank_m(ptr[index])) index++; if (len == index) return NumberConversion.EMPTY_STRING?; bool is_negative; switch (string[index]) { case '-': if ($Type.min == 0) return NumberConversion.NEGATIVE_VALUE?; is_negative = true; index++; case '+': index++; default: break; } if (len == index) return NumberConversion.MALFORMED_INTEGER?; $Type base = 10; if (string[index] == '0') { index++; if (index == len) return ($Type)0; switch (string[index]) { case 'x': case 'X': base = 16; index++; case 'b': case 'B': base = 2; index++; case 'o': case 'O': base = 8; index++; default: break; } if (len == index) return NumberConversion.MALFORMED_INTEGER?; } $Type value = 0; while (index != len) { char c = {| char ch = string[index++]; if (base != 16 || ch < 'A') return (char)(ch - '0'); if (ch <= 'F') return (char)(ch - 'A'); if (ch < 'a') return NumberConversion.MALFORMED_INTEGER?; if (ch > 'f') return NumberConversion.MALFORMED_INTEGER?; return (char)(ch - 'a'); |}!; if (c >= base) return NumberConversion.MALFORMED_INTEGER?; value = {| if (is_negative) { $Type new_value = value * base - c; if (new_value > value) return NumberConversion.INTEGER_OVERFLOW?; return new_value; } $Type new_value = value * base + c; if (new_value < value) return NumberConversion.INTEGER_OVERFLOW?; return new_value; |}!; } return value; } fn int128! String.to_int128(s) => s.to_integer(int128); fn long! String.to_long(s) => s.to_integer(long); fn int! String.to_int(s) => s.to_integer(int); fn short! String.to_short(s) => s.to_integer(short); fn ichar! String.to_ichar(s) => s.to_integer(ichar); fn uint128! String.to_uint128(s) => s.to_integer(uint128); fn ulong! String.to_ulong(s) => s.to_integer(ulong); fn uint! String.to_uint(s) => s.to_integer(uint); fn ushort! String.to_ushort(s) => s.to_integer(ushort); fn char! String.to_uchar(s) => s.to_integer(char); fn double! String.to_double(s) => s.to_real(double); fn float! String.to_float(s) => s.to_real(float);