From 701d6a074653e3b43e456a89a164ddd70e200b76 Mon Sep 17 00:00:00 2001 From: Pierre Curto Date: Tue, 1 Aug 2023 10:47:21 +0200 Subject: [PATCH] std/lib/io: add Scanner (#904) * std/lib/io: add Scanner Signed-off-by: Pierre Curto * lib/std/core: use existing methods in String.convert_ascii_to_{lower, upper} Signed-off-by: Pierre Curto --------- Signed-off-by: Pierre Curto --- lib/std/core/string.c3 | 4 +- lib/std/io/stream/scanner.c3 | 93 ++++++++++++++++++++++++++++++++++ test/unit/stdlib/io/scanner.c3 | 51 +++++++++++++++++++ 3 files changed, 146 insertions(+), 2 deletions(-) create mode 100644 lib/std/io/stream/scanner.c3 create mode 100644 test/unit/stdlib/io/scanner.c3 diff --git a/lib/std/core/string.c3 b/lib/std/core/string.c3 index 141f130d4..5868fc828 100644 --- a/lib/std/core/string.c3 +++ b/lib/std/core/string.c3 @@ -404,7 +404,7 @@ fn Char32[]! String.to_utf32(s, Allocator* using = mem::heap()) fn void String.convert_ascii_to_lower(s) { - foreach (&c : s) if (*c >= 'A' && *c <= 'Z') *c += 'a' - 'A'; + foreach (&c : s) if (c.is_upper()) *c += 'a' - 'A'; } fn String String.ascii_to_lower(s, Allocator* using = mem::heap()) @@ -416,7 +416,7 @@ fn String String.ascii_to_lower(s, Allocator* using = mem::heap()) fn void String.convert_ascii_to_upper(s) { - foreach (&c : s) if (*c >= 'a' && *c <= 'z') *c -= 'a' - 'A'; + foreach (&c : s) if (c.is_lower()) *c -= 'a' - 'A'; } fn String String.ascii_to_upper(s, Allocator* using = mem::heap()) diff --git a/lib/std/io/stream/scanner.c3 b/lib/std/io/stream/scanner.c3 new file mode 100644 index 000000000..9e05e43a9 --- /dev/null +++ b/lib/std/io/stream/scanner.c3 @@ -0,0 +1,93 @@ +module std::io; + +struct Scanner +{ + Stream reader; + char[] buf; + usz pattern_idx; + usz read_idx; +} + +/** + * Scanner provides a way to read delimited data (with newlines as the default). + * The supplied buffer must be at least as large as the expected data length + * including its pattern. + * @require buffer.len > 0 "Non-empty buffer required." + **/ +fn void Scanner.init(&self, Stream reader, char[] buffer) +{ + *self = { .reader = reader, .buf = buffer }; +} + +/** + * Return and clear any remaining unscanned data. + **/ +fn char[] Scanner.flush(&self) +{ + assert(self.read_idx >= self.pattern_idx); + usz n = self.read_idx - self.pattern_idx; + char[] buf = self.buf[self.pattern_idx:n]; + self.pattern_idx = 0; + self.read_idx = 0; + return buf; +} + +/** + * Scan the stream for the next split character and return data up to the match. + * @require pattern.len > 0 "Non-empty pattern required." + * @require self.buf.len > pattern.len "Pattern too large." + **/ +fn char[]! Scanner.scan(&self, String pattern = "\n") +{ + if (self.read_idx == 0) + { + // First read. + self.read_idx = self.refill(self.buf)!; + self.pattern_idx = 0; + } + assert(self.read_idx >= self.pattern_idx); + usz n = self.read_idx - self.pattern_idx; + char[] buf = self.buf[self.pattern_idx:n]; + if (try i = self.find(buf, pattern)) + { + self.pattern_idx += i + pattern.len; + return buf[:i]; + } + if (self.pattern_idx == 0 || self.read_idx < self.buf.len) + { + // Split pattern not found with maximized search, abort. + // Split pattern not found and already read as much as possible. + return SearchResult.MISSING?; + } + // Split pattern not found: maximize the search and try one more time. + self.buf[:n] = buf[..]; + self.pattern_idx = 0; + + buf = self.buf[n..]; + usz p = self.refill(buf)!; + self.read_idx = n + p; + + buf = buf[:p]; + usz i = self.find(buf, pattern)!; + self.pattern_idx = n + i + pattern.len; + + return self.buf[:n + i]; +} + +macro usz! Scanner.find(&self, buf, pattern) @private +{ + return ((String)buf).index_of(pattern); +} + +macro usz! Scanner.refill(&self, buf) @private +{ + usz! n = self.reader.read(buf); + if (catch err = n) + { + case IoError.EOF: + return SearchResult.MISSING?; + default: + return err?; + } + return n; +} \ No newline at end of file diff --git a/test/unit/stdlib/io/scanner.c3 b/test/unit/stdlib/io/scanner.c3 new file mode 100644 index 000000000..bf41f1370 --- /dev/null +++ b/test/unit/stdlib/io/scanner.c3 @@ -0,0 +1,51 @@ +module scanner_test @test; +import std::collections::list; +import std::io; + +def Results = List(); + +struct ScanTest +{ + String in; + String[] out; + String left_over; +} + +fn void! test_scanner() +{ + ScanTest[] tcases = { + {"aa,,bb", {"aa"}, "bb"}, + {"a,,b,,", {"a", "b"}, ""}, + {"ab,,c", {"ab"}, "c"}, + {"ab,,cd,,e", {"ab", "cd"}, "e"}, + }; + foreach (tc : tcases) + { + ByteReader br; + br.init(tc.in); + Scanner sc; + char[4] buffer; // max match (2) + pattern length (2) + sc.init(br.as_stream(), buffer[..]); + + Results results; + while LOOP: (true) + { + char[]! res = sc.scan(",,"); + if (catch err = res) + { + case SearchResult.MISSING: + break LOOP; + default: + return err?; + } + String str = (String)res; + results.push(str.tconcat("")); + } + + String[] got = results.array_view(); + assert(got == tc.out, "got %s; want %s", got, tc.out); + char[] fl = sc.flush(); + String left_over = (String)fl; + assert(left_over == tc.left_over, "%s -> %s", tc.in, left_over); + } +} \ No newline at end of file