add DString.insert_at (#1026)

* add DString.insert
* make conv::utf32to8 more C3-like
This commit is contained in:
Pierre Curto
2023-10-05 19:12:47 +02:00
committed by GitHub
parent d16ad0b4c7
commit 9643a7c2b2
4 changed files with 118 additions and 48 deletions

View File

@@ -12,29 +12,28 @@ const uint UTF16_SURROGATE_HIGH_VALUE @private = 0xD800;
/**
* @param c `The utf32 codepoint to convert`
* @param [out] output `the resulting buffer`
* @param available `the size available`
**/
fn usz! char32_to_utf8(Char32 c, char* output, usz available)
fn usz! char32_to_utf8(Char32 c, char[] output)
{
if (!available) return UnicodeResult.CONVERSION_FAILED?;
if (!output.len) return UnicodeResult.CONVERSION_FAILED?;
switch (true)
{
case c <= 0x7f:
output[0] = (char)c;
return 1;
case c <= 0x7ff:
if (available < 2) return UnicodeResult.CONVERSION_FAILED?;
if (output.len < 2) return UnicodeResult.CONVERSION_FAILED?;
output[0] = (char)(0xC0 | c >> 6);
output[1] = (char)(0x80 | (c & 0x3F));
return 2;
case c <= 0xffff:
if (available < 3) return UnicodeResult.CONVERSION_FAILED?;
if (output.len < 3) return UnicodeResult.CONVERSION_FAILED?;
output[0] = (char)(0xE0 | c >> 12);
output[1] = (char)(0x80 | (c >> 6 & 0x3F));
output[2] = (char)(0x80 | (c & 0x3F));
return 3;
case c <= 0x10ffff:
if (available < 4) return UnicodeResult.CONVERSION_FAILED?;
if (output.len < 4) return UnicodeResult.CONVERSION_FAILED?;
output[0] = (char)(0xF0 | c >> 18);
output[1] = (char)(0x80 | (c >> 12 & 0x3F));
output[2] = (char)(0x80 | (c >> 6 & 0x3F));
@@ -297,19 +296,17 @@ fn usz utf16len_for_utf32(Char32[] utf32)
* @param [out] utf8_buffer
* @return `the number of bytes written.`
**/
fn usz! utf32to8(Char32[] utf32, String utf8_buffer)
fn usz! utf32to8(Char32[] utf32, char[] utf8_buffer)
{
usz len = utf8_buffer.len;
char* ptr = utf8_buffer.ptr;
foreach (Char32 uc : utf32)
char[] buffer = utf8_buffer;
foreach (uc : utf32)
{
usz used = char32_to_utf8(uc, ptr, len) @inline!;
len -= used;
ptr += used;
usz used = char32_to_utf8(uc, buffer) @inline!;
buffer = buffer[used..];
}
// Zero terminate if there is space.
if (len > 0) ptr[0] = 0;
return utf8_buffer.len - len;
if (buffer.len > 0) buffer[0] = 0;
return utf8_buffer.len - buffer.len;
}
/**

View File

@@ -96,7 +96,7 @@ fn void DString.chop(self, usz new_size)
fn String DString.str_view(self)
{
StringData* data = (StringData*)self;
StringData* data = self.data();
if (!data) return "";
return (String)data.chars[:data.len];
}
@@ -134,36 +134,14 @@ fn void DString.append_repeat(&self, char c, usz times)
*/
fn void DString.append_char32(&self, Char32 c)
{
if (c < 0x7f)
{
self.reserve(1);
StringData* data = self.data();
data.chars[data.len++] = (char)c;
return;
}
if (c < 0x7ff)
{
self.reserve(2);
StringData* data = self.data();
data.chars[data.len++] = (char)(0xC0 | c >> 6);
data.chars[data.len++] = (char)(0x80 | (c & 0x3F));
return;
}
if (c < 0xffff)
{
self.reserve(3);
StringData* data = self.data();
data.chars[data.len++] = (char)(0xE0 | c >> 12);
data.chars[data.len++] = (char)(0x80 | (c >> 6 & 0x3F));
data.chars[data.len++] = (char)(0x80 | (c & 0x3F));
return;
}
self.reserve(4);
char[4] buffer @noinit;
char* p = &buffer;
conv::char32_to_utf8_unsafe(c, &p);
usz n = p - (char*)&buffer;
self.reserve(n);
StringData* data = self.data();
data.chars[data.len++] = (char)(0xF0 | c >> 18);
data.chars[data.len++] = (char)(0x80 | (c >> 12 & 0x3F));
data.chars[data.len++] = (char)(0x80 | (c >> 6 & 0x3F));
data.chars[data.len++] = (char)(0x80 | (c & 0x3F));
data.chars[data.len:n] = buffer[:n];
data.len += n;
}
fn DString DString.tcopy(&self) => self.copy(mem::temp());
@@ -175,8 +153,8 @@ fn DString DString.copy(self, Allocator* using = null)
if (using) return new_with_capacity(0, using);
return (DString)null;
}
if (!using) using = mem::heap();
StringData* data = self.data();
if (!using) using = mem::heap();
DString new_string = new_with_capacity(data.capacity, using);
mem::copy((char*)new_string.data(), (char*)data, StringData.sizeof + data.len);
return new_string;
@@ -267,7 +245,7 @@ fn Char32[] DString.copy_utf32(&self, Allocator* using = mem::heap())
fn void DString.append_string(&self, DString str)
{
StringData* other = (StringData*)str;
StringData* other = str.data();
if (!other) return;
self.append(str.str_view());
}
@@ -315,6 +293,37 @@ macro void DString.append(&self, value)
$endswitch
}
fn void DString.insert_at(&self, usz index, String s)
{
if (s.len == 0) return;
self.reserve(s.len);
StringData* data = self.data();
usz len = self.len();
if (data.chars[:len].ptr == s.ptr)
{
// Source and destination are the same: nothing to do.
return;
}
index = min(index, len);
data.len += s.len;
char* start = data.chars[index:s.len].ptr; // area to insert into
mem::move(start + s.len, start, len - index); // move existing data
switch
{
case s.ptr <= start && start < s.ptr + s.len:
// Overlapping areas.
foreach_r (i, c : s)
{
data.chars[index + i] = c;
}
case start <= s.ptr && s.ptr < start + len:
// Source has moved.
mem::move(start, s.ptr + s.len, s.len);
default:
mem::move(start, s, s.len);
}
}
fn usz! DString.printf(&self, String format, args...) @maydiscard
{

View File

@@ -4,7 +4,7 @@ import std::io;
fn void! comparison_helper_32_to_8(Char32 c32, String expected_output)
{
char[8] out;
usz len = conv::char32_to_utf8(c32, &out, 4)!;
usz len = conv::char32_to_utf8(c32, &out)!;
assert(len == expected_output.len, "Len should be 1");
foreach (i, c : expected_output)
{

View File

@@ -0,0 +1,64 @@
module std::core::dstring::tests @test;
fn void test_insert_at()
{
DString str = dstring::tnew(" world");
String s;
str.insert_at(0, "");
s = str.str_view();
assert(s == " world", "got '%s'; want ' world'", s);
str.insert_at(0, "hello");
s = str.str_view();
assert(s == "hello world", "got '%s'; want 'hello world'", s);
str.insert_at(5, " shiny");
s = str.str_view();
assert(s == "hello shiny world", "got '%s'; want 'hello shiny world'", s);
}
fn void test_insert_at_overlaps()
{
DString str = dstring::tnew("abc");
String s;
String v;
str.insert_at(0, "bc");
s = str.str_view();
assert(s == "bcabc", "got '%s'; want 'bcabc'", s);
// Inserted string is unchanged.
str.chop(0);
str.append("abc");
v = str.str_view();
str.insert_at(0, v);
s = str.str_view();
assert(s == "abc", "got '%s'; want 'abc'", s);
// Inserted string is part of the tail.
str.chop(0);
str.append("abc");
v = str.str_view()[1..];
assert(v == "bc");
str.insert_at(0, v);
s = str.str_view();
assert(s == "bcabc", "got '%s'; want 'bcabc'", s);
// Inserted string is part of the head.
str.chop(0);
str.append("abc");
v = str.str_view()[1..];
str.insert_at(2, v);
s = str.str_view();
assert(s == "abbcc", "got '%s'; want 'abbcc'", s);
str.chop(0);
str.append("abcdef");
v = str.str_view()[3..];
assert(v == "def");
str.insert_at(0, v);
str.chop(3);
s = str.str_view();
assert(s == "def", "got '%s'; want 'def'", s);
}