Updated grammar. Removal of elif. Removal of ':' ';' in some ct statements. Empty faults is now an error. Remove "define" for types. Remove "private". Better errors on incorrect bitstruct syntax. Introduction of wildcard type rather than optional wildcard. Removal of scaled vector type. mkdir and rmdir. Disallow define @Foo() = { @inline }. Add handling for @optreturn and change it to @return!. Restrict interface style functions. Updated x64 ABI. stdlib updates to string. Removed deprecated functions. Update how variadics are implemented. Extended error messages. x86 ABI fixes. Shift check fixes. '!' and '?' are flipped. No trailing ',' allowed in functions. Fix to string parsing. Allow l suffix. Simplifying flatpath. any replaces variant, anyfault replaces anyerr. Allow getting the underlying type of anyfault. De-duplicate string constants. Fix of readme. Extended list. Fix of "(MyEnum)x + 1". Clock and DateTime types. Fixes to array concat.

2026-02-27 12:01:16 +00:00 · 2023-03-23 14:49:51 +01:00
parent d14e778232
commit 809321e20c
270 changed files with 8777 additions and 7237 deletions
--- a/lib/std/core/string.c3
+++ b/lib/std/core/string.c3
@@ -1,9 +1,17 @@
 module std::core::string;
+import std::ascii;

 typedef ZString = distinct inline char*;
 typedef Char32 = uint;
 typedef Char16 = ushort;

+fault UnicodeResult
+{
+	INVALID_UTF8,
+	INVALID_UTF16,
+	CONVERSION_FAILED,
+}
+
 const uint SURROGATE_OFFSET @private = 0x10000;
 const uint SURROGATE_GENERIC_MASK @private = 0xF800;
 const uint SURROGATE_MASK @private = 0xFC00;
@@ -47,6 +55,31 @@ macro bool char_in_set(char c, String set)
 	return false;
 }

+fn String join(String[] s, String joiner, Allocator* using = mem::heap())
+{
+	if (!s)
+	{
+		return (String)(calloc(char, 2, .using = using)[:0]);
+	}
+
+	usz total_size = joiner.len * s.len;
+	foreach (String* &str : s)
+	{
+		total_size += str.len;
+	}
+	@stack_mem(256; Allocator* mem)
+	{
+		DString res = dstring::new_with_capacity(total_size, .using = mem);
+		res.append(s[0]);
+		foreach (String* &str : s[1..])
+		{
+			res.append(joiner);
+			res.append(*str);
+		}
+		return res.copy_str(using);
+	};
+}
+
 /**
 * @param [in] string
 * @param [in] to_trim
@@ -128,7 +161,7 @@ fn String[] String.split(String s, String needle, usz max = 0, Allocator* using
 	bool no_more = false;
 	while (!no_more)
 	{
-		usz! index = i == max - 1 ? SearchResult.MISSING! : s.index_of(needle);
+		usz! index = i == max - 1 ? SearchResult.MISSING? : s.index_of(needle);
 		String res @noinit;
 		if (try index)
 		{
@@ -163,6 +196,11 @@ fn String[] String.tsplit(String s, String needle, usz max = 0)
 	return s.split(needle, max, mem::temp()) @inline;
 }

+fn bool String.contains(String s, String needle)
+{
+	return @ok(s.index_of(needle));
+}
+
 /**
 * Find the index of the first incidence of a string.
 *
@@ -170,7 +208,9 @@ fn String[] String.tsplit(String s, String needle, usz max = 0)
 * @param [in] needle
 * @pure
 * @ensure return < s.len
- * @require needle.len > 0 "The needle must be len 1 or more"
+ * @require needle.len > 0 : "The needle must be len 1 or more"
+ * @return "the index of the needle"
+ * @return! SearchResult.MISSING "if the needle cannot be found"
 **/
 fn usz! String.index_of(String s, String needle)
 {
@@ -194,7 +234,7 @@ fn usz! String.index_of(String s, String needle)
 			search = needle[0];
 		}
 	}
-	return SearchResult.MISSING!;
+	return SearchResult.MISSING?;
 }

 /**
@@ -205,6 +245,8 @@ fn usz! String.index_of(String s, String needle)
 * @pure
 * @ensure return < s.len
 * @require needle.len > 0 "The needle must be len 1 or more"
+ * @return "the index of the needle"
+ * @return! SearchResult.MISSING "if the needle cannot be found"
 **/
 fn usz! String.rindex_of(String s, String needle)
 {
@@ -228,9 +270,34 @@ fn usz! String.rindex_of(String s, String needle)
 			search = needle[^1];
 		}
 	}
-	return SearchResult.MISSING!;
+	return SearchResult.MISSING?;
 }

+fn String ZString.as_str(ZString str)
+{
+	return (String)((char*)str)[:str.len()];
+}
+
+fn usz ZString.char_len(ZString str)
+{
+	usz len = 0;
+	char* ptr = (char*)str;
+	while (char c = ptr++[0])
+	{
+		if (c & 0xC0 != 0x80) len++;
+	}
+	return len;
+}
+
+fn usz ZString.len(ZString str)
+{
+	usz len = 0;
+    char* ptr = (char*)str;
+    while (char c = ptr++[0]) len++;
+    return len;
+}
+
+
 fn ZString String.zstr_copy(String s, Allocator* using = mem::heap())
 {
    usz len = s.len;
@@ -270,23 +337,154 @@ fn String String.tcopy(String s) => s.copy(mem::temp()) @inline;
 fn String ZString.copy(ZString z, Allocator* using = mem::heap()) => z.as_str().copy(using) @inline;
 fn String ZString.tcopy(ZString z) => z.as_str().copy(mem::temp()) @inline;

+/**
+ * Convert an UTF-8 string to UTF-16
+ * @return "The UTF-16 string as a slice, allocated using the given allocator"
+ * @return! UnicodeResult.INVALID_UTF8 "If the string contained an invalid UTF-8 sequence"
+ * @return! AllocationFailure "If allocation of the string fails"
+ **/
 fn Char16[]! String.to_utf16(String s, Allocator* using = mem::heap())
 {
 	usz len16 = conv::utf16len_for_utf8(s);
-	Char16* data = malloc_checked(Char16, len16 + 1, .using = using)?;
-	conv::utf8to16_unsafe(s, data)?;
+	Char16* data = malloc_checked(Char16, len16 + 1, .using = using)!;
+	conv::utf8to16_unsafe(s, data)!;
 	data[len16] = 0;
 	return data[:len16];
 }

+fn Char32[]! String.to_utf32(String s, Allocator* using = mem::heap())
+{
+	usz codepoints = conv::utf8_codepoints(s);
+	Char32* data = malloc_checked(Char32, codepoints + 1, .using = using)!;
+	conv::utf8to32_unsafe(s, data)!;
+	data[codepoints] = 0;
+	return data[:codepoints];
+}
+
+fn String! from_utf32(Char32[] utf32, Allocator* using = mem::heap())
+{
+	usz len = conv::utf8len_for_utf32(utf32);
+	char* data = malloc_checked(len + 1, .using = using)!;
+	defer catch free(data, .using = using);
+	conv::utf32to8_unsafe(utf32, data);
+	data[len] = 0;
+	return (String)data[:len];
+}
+
 fn String! from_utf16(Char16[] utf16, Allocator* using = mem::heap())
 {
 	usz len = conv::utf8len_for_utf16(utf16);
-	char* data = malloc_checked(len + 1, .using = using)?;
-	conv::utf16to8_unsafe(utf16, data)?;
+	char* data = malloc_checked(len + 1, .using = using)!;
+	defer catch free(data, .using = using);
+	conv::utf16to8_unsafe(utf16, data)!;
 	data[len] = 0;
 	return (String)data[:len];
 }

+fn String! from_zutf16(Char16* utf16_pointer, Allocator* using = mem::heap())
+{
+	usz utf16_len;
+	while (utf16_pointer[utf16_len] != 0) utf16_len++;
+	Char16[] utf16 = utf16_pointer[:utf16_len];
+	return from_utf16(utf16, using);
+}
+
+fn usz String.utf8_codepoints(String s)
+{
+	usz len = 0;
+	foreach (char c : s)
+	{
+		if (c & 0xC0 != 0x80) len++;
+	}
+	return len;
+}
+
+
+macro String.to_integer(String string, $Type)
+{
+	usz len = string.len;
+	usz index = 0;
+	char* ptr = string.ptr;
+	while (index < len && ascii::is_blank_m(ptr[index])) index++;
+	if (len == index) return NumberConversion.EMPTY_STRING?;
+	bool is_negative;
+	switch (string[index])
+	{
+		case '-':
+			if ($Type.min == 0) return NumberConversion.NEGATIVE_VALUE?;
+			is_negative = true;
+			index++;
+		case '+':
+			index++;
+		default:
+			break;
+	}
+	if (len == index) return NumberConversion.MALFORMED_INTEGER?;
+	$Type base = 10;
+	if (string[index] == '0')
+	{
+		index++;
+		if (index == len) return ($Type)0;
+		switch (string[index])
+		{
+			case 'x':
+			case 'X':
+				base = 16;
+				index++;
+			case 'b':
+			case 'B':
+				base = 2;
+				index++;
+			case 'o':
+			case 'O':
+				base = 8;
+				index++;
+			default:
+				break;
+		}
+		if (len == index) return NumberConversion.MALFORMED_INTEGER?;
+	}
+	$Type value = 0;
+	while (index != len)
+	{
+		char c = {|
+			char ch = string[index++];
+			if (base != 16 || ch < 'A') return (char)(ch - '0');
+			if (ch <= 'F') return (char)(ch - 'A');
+			if (ch < 'a') return NumberConversion.MALFORMED_INTEGER?;
+			if (ch > 'f') return NumberConversion.MALFORMED_INTEGER?;
+			return (char)(ch - 'a');
+		|}!;
+		if (c >= base) return NumberConversion.MALFORMED_INTEGER?;
+		value = {|
+			if (is_negative)
+			{
+				$Type new_value = value * base - c;
+				if (new_value > value) return NumberConversion.INTEGER_OVERFLOW?;
+				return new_value;
+			}
+			$Type new_value = value * base + c;
+			if (new_value < value) return NumberConversion.INTEGER_OVERFLOW?;
+			return new_value;
+		|}!;
+	}
+	return value;
+}
+

 fn Char16[]! String.to_temp_utf16(String s) => s.to_utf16(mem::temp());
+
+fn int128! String.to_int128(String s) => s.to_integer(int128);
+fn long! String.to_long(String s) => s.to_integer(long);
+fn int! String.to_int(String s) => s.to_integer(int);
+fn short! String.to_short(String s) => s.to_integer(short);
+fn ichar! String.to_ichar(String s) => s.to_integer(ichar);
+
+fn uint128! String.to_uint128(String s) => s.to_integer(uint128);
+fn ulong! String.to_ulong(String s) => s.to_integer(ulong);
+fn uint! String.to_uint(String s) => s.to_integer(uint);
+fn ushort! String.to_ushort(String s) => s.to_integer(ushort);
+fn char! String.to_uchar(String s) => s.to_integer(char);
+
+fn double! String.to_double(String s) => s.to_real(double);
+fn float! String.to_float(String s) => s.to_real(float);