diff --git a/lib/std/core/builtin.c3 b/lib/std/core/builtin.c3 index ccbd5f79d..e8aea39d3 100644 --- a/lib/std/core/builtin.c3 +++ b/lib/std/core/builtin.c3 @@ -4,21 +4,22 @@ module std::core::builtin; import libc, std::hash, std::io, std::os::backtrace; -<* +/* Use `IteratorResult` when reading the end of an iterator, or accessing a result out of bounds. -*> +*/ fault IteratorResult { NO_MORE_ELEMENT } -<* +/* Use `SearchResult` when trying to return a value from some collection but the element is missing. -*> +*/ fault SearchResult { MISSING } -<* +/* Use `CastResult` when an attempt at conversion fails. -*> +*/ fault CastResult { TYPE_MISMATCH } + def VoidFn = fn void(); <* diff --git a/lib/std/core/string.c3 b/lib/std/core/string.c3 index 1c764d601..e7764d39c 100644 --- a/lib/std/core/string.c3 +++ b/lib/std/core/string.c3 @@ -219,12 +219,14 @@ fn String String.strip_end(string, String needle) @param [in] s @param [in] needle - @param [&inout] allocator "The allocator to use for the String[]" @param max "Max number of elements, 0 means no limit, defaults to 0" + @param skip_empty "True to skip empty elements" + @param [&inout] allocator "The allocator to use for the String[]" + @require needle.len > 0 "The needle must be at least 1 character long" @ensure return.len > 0 *> -fn String[] String.split(s, String needle, usz max = 0, Allocator allocator = allocator::heap()) +fn String[] String.split(s, String needle, usz max = 0, Allocator allocator = allocator::heap(), bool skip_empty = false) { usz capacity = 16; usz i = 0; @@ -244,6 +246,11 @@ fn String[] String.split(s, String needle, usz max = 0, Allocator allocator = al res = s; no_more = true; } + if (!res.len && skip_empty) + { + continue; + } + if (i == capacity) { capacity *= 2; @@ -261,10 +268,11 @@ fn String[] String.split(s, String needle, usz max = 0, Allocator allocator = al @param [in] s @param [in] needle @param max "Max number of elements, 0 means no limit, defaults to 0" + @param skip_empty "True to skip empty elements" @require needle.len > 0 "The needle must be at least 1 character long" @ensure return.len > 0 *> -fn String[] String.new_split(s, String needle, usz max = 0) => s.split(needle, max, allocator::heap()) @inline; +fn String[] String.new_split(s, String needle, usz max = 0, bool skip_empty) => s.split(needle, max, allocator::heap(), skip_empty) @inline; <* This function is identical to String.split, but implicitly uses the @@ -273,8 +281,54 @@ fn String[] String.new_split(s, String needle, usz max = 0) => s.split(needle, m @param [in] s @param [in] needle @param max "Max number of elements, 0 means no limit, defaults to 0" + @param skip_empty "True to skip empty elements" *> -fn String[] String.tsplit(s, String needle, usz max = 0) => s.split(needle, max, allocator::temp()) @inline; +fn String[] String.tsplit(s, String needle, usz max = 0, bool skip_empty = false) => s.split(needle, max, allocator::temp(), skip_empty) @inline; + +fault SplitResult { BUFFER_EXCEEDED } + +<* + Split a string into parts, e.g "a|b|c" split with "|" yields { "a", "b", "c" } + + @param [in] s + @param [in] needle + @param [inout] buffer + @param max "Max number of elements, 0 means no limit, defaults to 0" + @require needle.len > 0 "The needle must be at least 1 character long" + @ensure return.len > 0 + @return! SplitResult.BUFFER_EXCEEDED `If there are more elements than would fit the buffer` +*> +fn String[]! String.split_to_buffer(s, String needle, String[] buffer, usz max = 0, bool skip_empty = false) +{ + usz max_capacity = buffer.len; + usz i = 0; + bool no_more = false; + while (!no_more) + { + usz! index = i == max - 1 ? SearchResult.MISSING? : s.index_of(needle); + String res @noinit; + if (try index) + { + res = s[:index]; + s = s[index + needle.len..]; + } + else + { + res = s; + no_more = true; + } + if (!res.len && skip_empty) + { + continue; + } + if (i == max_capacity) + { + return SplitResult.BUFFER_EXCEEDED?; + } + buffer[i++] = res; + } + return buffer[:i]; +} <* Check if a substring is found in the string. diff --git a/releasenotes.md b/releasenotes.md index ffd7a4640..e30bd1424 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -15,6 +15,7 @@ None - Updates to `Slice2d`, like `get_xy` and others. - Added `iter()` `value_iter()` and `key_iter()` to HashMap. - Add "tokenizer" to String. +- Add "skip_empty" to split methods. Add split_to_buffer method. ## 0.6.5 Change list diff --git a/test/unit/stdlib/core/string.c3 b/test/unit/stdlib/core/string.c3 index e9ed50f58..fe1fc88eb 100644 --- a/test/unit/stdlib/core/string.c3 +++ b/test/unit/stdlib/core/string.c3 @@ -85,6 +85,54 @@ fn void test_split() assert(strings[1] == "b||c|"); } +fn void test_split_skip_empty() +{ + String test = "abc|b||c|"; + String[] strings = test.split("|", skip_empty: true); + assert(strings.len == 3); + assert(strings[0] == "abc"); + assert(strings[1] == "b"); + assert(strings[2] == "c"); + strings = test.split("|", 2, skip_empty: true); + assert(strings.len == 2); + assert(strings[0] == "abc"); + assert(strings[1] == "b||c|"); +} + +fn void! test_split_to_buffer_skip_empty() +{ + String[10] buffer; + String test = "abc|b||c|"; + String[] strings = test.split_to_buffer("|", &buffer, skip_empty: true)!; + assert(strings.len == 3); + assert(strings[0] == "abc"); + assert(strings[1] == "b"); + assert(strings[2] == "c"); + strings = test.split("|", 2, skip_empty: true); + assert(strings.len == 2); + assert(strings[0] == "abc"); + assert(strings[1] == "b||c|"); +} + +fn void! test_split_to_buffer() +{ + String[5] b; + String test = "abc|b||c|"; + String[] strings = test.split_to_buffer("|", &b)!; + assert(strings.len == 5); + assert(strings[0] == "abc"); + assert(strings[1] == "b"); + assert(strings[2] == ""); + assert(strings[3] == "c"); + assert(strings[4] == ""); + String[4] c; + assert(@catch(test.split_to_buffer("|", &c)) == SplitResult.BUFFER_EXCEEDED); + strings = test.split("|", 2); + assert(strings.len == 2); + assert(strings[0] == "abc"); + assert(strings[1] == "b||c|"); +} + fn void! test_index_of() { String test = "hello world hello";