From 9643a7c2b29e5f9e4838e6a71acced345bbe951f Mon Sep 17 00:00:00 2001 From: Pierre Curto Date: Thu, 5 Oct 2023 19:12:47 +0200 Subject: [PATCH] add DString.insert_at (#1026) * add DString.insert * make conv::utf32to8 more C3-like --- lib/std/core/conv.c3 | 27 ++++++------ lib/std/core/dstring.c3 | 73 ++++++++++++++++++-------------- test/unit/stdlib/conv_tests.c3 | 2 +- test/unit/stdlib/core/dstring.c3 | 64 ++++++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 48 deletions(-) create mode 100644 test/unit/stdlib/core/dstring.c3 diff --git a/lib/std/core/conv.c3 b/lib/std/core/conv.c3 index 62356ff3d..bd563abca 100644 --- a/lib/std/core/conv.c3 +++ b/lib/std/core/conv.c3 @@ -12,29 +12,28 @@ const uint UTF16_SURROGATE_HIGH_VALUE @private = 0xD800; /** * @param c `The utf32 codepoint to convert` * @param [out] output `the resulting buffer` - * @param available `the size available` **/ -fn usz! char32_to_utf8(Char32 c, char* output, usz available) +fn usz! char32_to_utf8(Char32 c, char[] output) { - if (!available) return UnicodeResult.CONVERSION_FAILED?; + if (!output.len) return UnicodeResult.CONVERSION_FAILED?; switch (true) { case c <= 0x7f: output[0] = (char)c; return 1; case c <= 0x7ff: - if (available < 2) return UnicodeResult.CONVERSION_FAILED?; + if (output.len < 2) return UnicodeResult.CONVERSION_FAILED?; output[0] = (char)(0xC0 | c >> 6); output[1] = (char)(0x80 | (c & 0x3F)); return 2; case c <= 0xffff: - if (available < 3) return UnicodeResult.CONVERSION_FAILED?; + if (output.len < 3) return UnicodeResult.CONVERSION_FAILED?; output[0] = (char)(0xE0 | c >> 12); output[1] = (char)(0x80 | (c >> 6 & 0x3F)); output[2] = (char)(0x80 | (c & 0x3F)); return 3; case c <= 0x10ffff: - if (available < 4) return UnicodeResult.CONVERSION_FAILED?; + if (output.len < 4) return UnicodeResult.CONVERSION_FAILED?; output[0] = (char)(0xF0 | c >> 18); output[1] = (char)(0x80 | (c >> 12 & 0x3F)); output[2] = (char)(0x80 | (c >> 6 & 0x3F)); @@ -297,19 +296,17 @@ fn usz utf16len_for_utf32(Char32[] utf32) * @param [out] utf8_buffer * @return `the number of bytes written.` **/ -fn usz! utf32to8(Char32[] utf32, String utf8_buffer) +fn usz! utf32to8(Char32[] utf32, char[] utf8_buffer) { - usz len = utf8_buffer.len; - char* ptr = utf8_buffer.ptr; - foreach (Char32 uc : utf32) + char[] buffer = utf8_buffer; + foreach (uc : utf32) { - usz used = char32_to_utf8(uc, ptr, len) @inline!; - len -= used; - ptr += used; + usz used = char32_to_utf8(uc, buffer) @inline!; + buffer = buffer[used..]; } // Zero terminate if there is space. - if (len > 0) ptr[0] = 0; - return utf8_buffer.len - len; + if (buffer.len > 0) buffer[0] = 0; + return utf8_buffer.len - buffer.len; } /** diff --git a/lib/std/core/dstring.c3 b/lib/std/core/dstring.c3 index c9bd51574..3bb02a960 100644 --- a/lib/std/core/dstring.c3 +++ b/lib/std/core/dstring.c3 @@ -96,7 +96,7 @@ fn void DString.chop(self, usz new_size) fn String DString.str_view(self) { - StringData* data = (StringData*)self; + StringData* data = self.data(); if (!data) return ""; return (String)data.chars[:data.len]; } @@ -134,36 +134,14 @@ fn void DString.append_repeat(&self, char c, usz times) */ fn void DString.append_char32(&self, Char32 c) { - if (c < 0x7f) - { - self.reserve(1); - StringData* data = self.data(); - data.chars[data.len++] = (char)c; - return; - } - if (c < 0x7ff) - { - self.reserve(2); - StringData* data = self.data(); - data.chars[data.len++] = (char)(0xC0 | c >> 6); - data.chars[data.len++] = (char)(0x80 | (c & 0x3F)); - return; - } - if (c < 0xffff) - { - self.reserve(3); - StringData* data = self.data(); - data.chars[data.len++] = (char)(0xE0 | c >> 12); - data.chars[data.len++] = (char)(0x80 | (c >> 6 & 0x3F)); - data.chars[data.len++] = (char)(0x80 | (c & 0x3F)); - return; - } - self.reserve(4); + char[4] buffer @noinit; + char* p = &buffer; + conv::char32_to_utf8_unsafe(c, &p); + usz n = p - (char*)&buffer; + self.reserve(n); StringData* data = self.data(); - data.chars[data.len++] = (char)(0xF0 | c >> 18); - data.chars[data.len++] = (char)(0x80 | (c >> 12 & 0x3F)); - data.chars[data.len++] = (char)(0x80 | (c >> 6 & 0x3F)); - data.chars[data.len++] = (char)(0x80 | (c & 0x3F)); + data.chars[data.len:n] = buffer[:n]; + data.len += n; } fn DString DString.tcopy(&self) => self.copy(mem::temp()); @@ -175,8 +153,8 @@ fn DString DString.copy(self, Allocator* using = null) if (using) return new_with_capacity(0, using); return (DString)null; } - if (!using) using = mem::heap(); StringData* data = self.data(); + if (!using) using = mem::heap(); DString new_string = new_with_capacity(data.capacity, using); mem::copy((char*)new_string.data(), (char*)data, StringData.sizeof + data.len); return new_string; @@ -267,7 +245,7 @@ fn Char32[] DString.copy_utf32(&self, Allocator* using = mem::heap()) fn void DString.append_string(&self, DString str) { - StringData* other = (StringData*)str; + StringData* other = str.data(); if (!other) return; self.append(str.str_view()); } @@ -315,6 +293,37 @@ macro void DString.append(&self, value) $endswitch } +fn void DString.insert_at(&self, usz index, String s) +{ + if (s.len == 0) return; + self.reserve(s.len); + StringData* data = self.data(); + usz len = self.len(); + if (data.chars[:len].ptr == s.ptr) + { + // Source and destination are the same: nothing to do. + return; + } + index = min(index, len); + data.len += s.len; + + char* start = data.chars[index:s.len].ptr; // area to insert into + mem::move(start + s.len, start, len - index); // move existing data + switch + { + case s.ptr <= start && start < s.ptr + s.len: + // Overlapping areas. + foreach_r (i, c : s) + { + data.chars[index + i] = c; + } + case start <= s.ptr && s.ptr < start + len: + // Source has moved. + mem::move(start, s.ptr + s.len, s.len); + default: + mem::move(start, s, s.len); + } +} fn usz! DString.printf(&self, String format, args...) @maydiscard { diff --git a/test/unit/stdlib/conv_tests.c3 b/test/unit/stdlib/conv_tests.c3 index 5cfa1d70e..2ea5720aa 100644 --- a/test/unit/stdlib/conv_tests.c3 +++ b/test/unit/stdlib/conv_tests.c3 @@ -4,7 +4,7 @@ import std::io; fn void! comparison_helper_32_to_8(Char32 c32, String expected_output) { char[8] out; - usz len = conv::char32_to_utf8(c32, &out, 4)!; + usz len = conv::char32_to_utf8(c32, &out)!; assert(len == expected_output.len, "Len should be 1"); foreach (i, c : expected_output) { diff --git a/test/unit/stdlib/core/dstring.c3 b/test/unit/stdlib/core/dstring.c3 new file mode 100644 index 000000000..b8cfb879b --- /dev/null +++ b/test/unit/stdlib/core/dstring.c3 @@ -0,0 +1,64 @@ +module std::core::dstring::tests @test; + +fn void test_insert_at() +{ + DString str = dstring::tnew(" world"); + String s; + + str.insert_at(0, ""); + s = str.str_view(); + assert(s == " world", "got '%s'; want ' world'", s); + + str.insert_at(0, "hello"); + s = str.str_view(); + assert(s == "hello world", "got '%s'; want 'hello world'", s); + + str.insert_at(5, " shiny"); + s = str.str_view(); + assert(s == "hello shiny world", "got '%s'; want 'hello shiny world'", s); +} + +fn void test_insert_at_overlaps() +{ + DString str = dstring::tnew("abc"); + String s; + String v; + + str.insert_at(0, "bc"); + s = str.str_view(); + assert(s == "bcabc", "got '%s'; want 'bcabc'", s); + + // Inserted string is unchanged. + str.chop(0); + str.append("abc"); + v = str.str_view(); + str.insert_at(0, v); + s = str.str_view(); + assert(s == "abc", "got '%s'; want 'abc'", s); + + // Inserted string is part of the tail. + str.chop(0); + str.append("abc"); + v = str.str_view()[1..]; + assert(v == "bc"); + str.insert_at(0, v); + s = str.str_view(); + assert(s == "bcabc", "got '%s'; want 'bcabc'", s); + + // Inserted string is part of the head. + str.chop(0); + str.append("abc"); + v = str.str_view()[1..]; + str.insert_at(2, v); + s = str.str_view(); + assert(s == "abbcc", "got '%s'; want 'abbcc'", s); + + str.chop(0); + str.append("abcdef"); + v = str.str_view()[3..]; + assert(v == "def"); + str.insert_at(0, v); + str.chop(3); + s = str.str_view(); + assert(s == "def", "got '%s'; want 'def'", s); +} \ No newline at end of file