From 9643a7c2b29e5f9e4838e6a71acced345bbe951f Mon Sep 17 00:00:00 2001
From: Pierre Curto <pierre.curto@gmail.com>
Date: Thu, 5 Oct 2023 19:12:47 +0200
Subject: [PATCH] add DString.insert_at (#1026)

* add DString.insert
* make conv::utf32to8 more C3-like
---
 lib/std/core/conv.c3             | 27 ++++++------
 lib/std/core/dstring.c3          | 73 ++++++++++++++++++--------------
 test/unit/stdlib/conv_tests.c3   |  2 +-
 test/unit/stdlib/core/dstring.c3 | 64 ++++++++++++++++++++++++++++
 4 files changed, 118 insertions(+), 48 deletions(-)
 create mode 100644 test/unit/stdlib/core/dstring.c3

diff --git a/lib/std/core/conv.c3 b/lib/std/core/conv.c3
index 62356ff3d..bd563abca 100644
--- a/lib/std/core/conv.c3
+++ b/lib/std/core/conv.c3
@@ -12,29 +12,28 @@ const uint UTF16_SURROGATE_HIGH_VALUE @private = 0xD800;
 /**
  * @param c `The utf32 codepoint to convert`
  * @param [out] output `the resulting buffer`
- * @param available `the size available`
  **/
-fn usz! char32_to_utf8(Char32 c, char* output, usz available)
+fn usz! char32_to_utf8(Char32 c, char[] output)
 {
-	if (!available) return UnicodeResult.CONVERSION_FAILED?;
+	if (!output.len) return UnicodeResult.CONVERSION_FAILED?;
 	switch (true)
 	{
 		case c <= 0x7f:
 			output[0] = (char)c;
 			return 1;
 		case c <= 0x7ff:
-			if (available < 2) return UnicodeResult.CONVERSION_FAILED?;
+			if (output.len < 2) return UnicodeResult.CONVERSION_FAILED?;
 			output[0] = (char)(0xC0 | c >> 6);
 			output[1] = (char)(0x80 | (c & 0x3F));
 			return 2;
 		case c <= 0xffff:
-			if (available < 3) return UnicodeResult.CONVERSION_FAILED?;
+			if (output.len < 3) return UnicodeResult.CONVERSION_FAILED?;
 			output[0] = (char)(0xE0 | c >> 12);
 			output[1] = (char)(0x80 | (c >> 6 & 0x3F));
 			output[2] = (char)(0x80 | (c & 0x3F));
 			return 3;
 		case c <= 0x10ffff:
-			if (available < 4) return UnicodeResult.CONVERSION_FAILED?;
+			if (output.len < 4) return UnicodeResult.CONVERSION_FAILED?;
 			output[0] = (char)(0xF0 | c >> 18);
 			output[1] = (char)(0x80 | (c >> 12 & 0x3F));
 			output[2] = (char)(0x80 | (c >> 6 & 0x3F));
@@ -297,19 +296,17 @@ fn usz utf16len_for_utf32(Char32[] utf32)
  * @param [out] utf8_buffer
  * @return `the number of bytes written.`
  **/
-fn usz! utf32to8(Char32[] utf32, String utf8_buffer)
+fn usz! utf32to8(Char32[] utf32, char[] utf8_buffer)
 {
-	usz len = utf8_buffer.len;
-	char* ptr = utf8_buffer.ptr;
-	foreach (Char32 uc : utf32)
+	char[] buffer = utf8_buffer;
+	foreach (uc : utf32)
 	{
-		usz used = char32_to_utf8(uc, ptr, len) @inline!;
-		len -= used;
-		ptr += used;
+		usz used = char32_to_utf8(uc, buffer) @inline!;
+		buffer = buffer[used..];
 	}
 	// Zero terminate if there is space.
-	if (len > 0) ptr[0] = 0;
-	return utf8_buffer.len - len;
+	if (buffer.len > 0) buffer[0] = 0;
+	return utf8_buffer.len - buffer.len;
 }
 
 /**
diff --git a/lib/std/core/dstring.c3 b/lib/std/core/dstring.c3
index c9bd51574..3bb02a960 100644
--- a/lib/std/core/dstring.c3
+++ b/lib/std/core/dstring.c3
@@ -96,7 +96,7 @@ fn void DString.chop(self, usz new_size)
 
 fn String DString.str_view(self)
 {
-	StringData* data = (StringData*)self;
+	StringData* data = self.data();
 	if (!data) return "";
 	return (String)data.chars[:data.len];
 }
@@ -134,36 +134,14 @@ fn void DString.append_repeat(&self, char c, usz times)
  */
 fn void DString.append_char32(&self, Char32 c)
 {
-	if (c < 0x7f)
-	{
-		self.reserve(1);
-		StringData* data = self.data();
-		data.chars[data.len++] = (char)c;
-		return;
-	}
-	if (c < 0x7ff)
-	{
-		self.reserve(2);
-		StringData* data = self.data();
-		data.chars[data.len++] = (char)(0xC0 | c >> 6);
-		data.chars[data.len++] = (char)(0x80 | (c & 0x3F));
-		return;
-	}
-	if (c < 0xffff)
-	{
-		self.reserve(3);
-		StringData* data = self.data();
-		data.chars[data.len++] = (char)(0xE0 | c >> 12);
-		data.chars[data.len++] = (char)(0x80 | (c >> 6 & 0x3F));
-		data.chars[data.len++] = (char)(0x80 | (c & 0x3F));
-		return;
-	}
-	self.reserve(4);
+	char[4] buffer @noinit;
+	char* p = &buffer;
+	conv::char32_to_utf8_unsafe(c, &p);
+	usz n = p - (char*)&buffer;
+	self.reserve(n);
 	StringData* data = self.data();
-	data.chars[data.len++] = (char)(0xF0 | c >> 18);
-	data.chars[data.len++] = (char)(0x80 | (c >> 12 & 0x3F));
-	data.chars[data.len++] = (char)(0x80 | (c >> 6 & 0x3F));
-	data.chars[data.len++] = (char)(0x80 | (c & 0x3F));
+	data.chars[data.len:n] = buffer[:n];
+	data.len += n;
 }
 
 fn DString DString.tcopy(&self) => self.copy(mem::temp());
@@ -175,8 +153,8 @@ fn DString DString.copy(self, Allocator* using = null)
 		if (using) return new_with_capacity(0, using);
 		return (DString)null;
 	}
-	if (!using) using = mem::heap();
 	StringData* data = self.data();
+	if (!using) using = mem::heap();
 	DString new_string = new_with_capacity(data.capacity, using);
 	mem::copy((char*)new_string.data(), (char*)data, StringData.sizeof + data.len);
 	return new_string;
@@ -267,7 +245,7 @@ fn Char32[] DString.copy_utf32(&self, Allocator* using = mem::heap())
 
 fn void DString.append_string(&self, DString str)
 {
-	StringData* other = (StringData*)str;
+	StringData* other = str.data();
 	if (!other) return;
 	self.append(str.str_view());
 }
@@ -315,6 +293,37 @@ macro void DString.append(&self, value)
 	$endswitch
 }
 
+fn void DString.insert_at(&self, usz index, String s)
+{
+	if (s.len == 0) return;
+	self.reserve(s.len);
+	StringData* data = self.data();
+	usz len = self.len();
+	if (data.chars[:len].ptr == s.ptr)
+	{
+		// Source and destination are the same: nothing to do.
+		return;
+	}
+	index = min(index, len);
+	data.len += s.len;
+
+	char* start = data.chars[index:s.len].ptr; // area to insert into
+	mem::move(start + s.len, start, len - index); // move existing data
+	switch
+	{
+		case s.ptr <= start && start < s.ptr + s.len:
+			// Overlapping areas.
+			foreach_r (i, c : s)
+			{
+				data.chars[index + i] = c;
+			}
+		case start <= s.ptr && s.ptr < start + len:
+			// Source has moved.
+			mem::move(start, s.ptr + s.len, s.len);
+		default:
+			mem::move(start, s, s.len);
+	}
+}
 
 fn usz! DString.printf(&self, String format, args...) @maydiscard
 {
diff --git a/test/unit/stdlib/conv_tests.c3 b/test/unit/stdlib/conv_tests.c3
index 5cfa1d70e..2ea5720aa 100644
--- a/test/unit/stdlib/conv_tests.c3
+++ b/test/unit/stdlib/conv_tests.c3
@@ -4,7 +4,7 @@ import std::io;
 fn void! comparison_helper_32_to_8(Char32 c32, String expected_output)
 {
 	char[8] out;
-	usz len = conv::char32_to_utf8(c32, &out, 4)!;
+	usz len = conv::char32_to_utf8(c32, &out)!;
 	assert(len == expected_output.len, "Len should be 1");
 	foreach (i, c : expected_output)
 	{
diff --git a/test/unit/stdlib/core/dstring.c3 b/test/unit/stdlib/core/dstring.c3
new file mode 100644
index 000000000..b8cfb879b
--- /dev/null
+++ b/test/unit/stdlib/core/dstring.c3
@@ -0,0 +1,64 @@
+module std::core::dstring::tests @test;
+
+fn void test_insert_at()
+{
+	DString str = dstring::tnew(" world");
+	String s;
+
+	str.insert_at(0, "");
+	s = str.str_view();
+	assert(s == " world", "got '%s'; want ' world'", s);
+
+	str.insert_at(0, "hello");
+	s = str.str_view();
+	assert(s == "hello world", "got '%s'; want 'hello world'", s);
+
+	str.insert_at(5, " shiny");
+	s = str.str_view();
+	assert(s == "hello shiny world", "got '%s'; want 'hello shiny world'", s);
+}
+
+fn void test_insert_at_overlaps()
+{
+	DString str = dstring::tnew("abc");
+	String s;
+	String v;
+
+	str.insert_at(0, "bc");
+	s = str.str_view();
+	assert(s == "bcabc", "got '%s'; want 'bcabc'", s);
+
+	// Inserted string is unchanged.
+	str.chop(0);
+	str.append("abc");
+	v = str.str_view();
+	str.insert_at(0, v);
+	s = str.str_view();
+	assert(s == "abc", "got '%s'; want 'abc'", s);
+
+	// Inserted string is part of the tail.
+	str.chop(0);
+	str.append("abc");
+	v = str.str_view()[1..];
+	assert(v == "bc");
+	str.insert_at(0, v);
+	s = str.str_view();
+	assert(s == "bcabc", "got '%s'; want 'bcabc'", s);
+
+	// Inserted string is part of the head.
+	str.chop(0);
+	str.append("abc");
+	v = str.str_view()[1..];
+	str.insert_at(2, v);
+	s = str.str_view();
+	assert(s == "abbcc", "got '%s'; want 'abbcc'", s);
+
+	str.chop(0);
+	str.append("abcdef");
+	v = str.str_view()[3..];
+	assert(v == "def");
+	str.insert_at(0, v);
+	str.chop(3);
+	s = str.str_view();
+	assert(s == "def", "got '%s'; want 'def'", s);
+}
\ No newline at end of file