From 8a09b2e5f7562ccf7b7eb966dd4b676c24ba0488 Mon Sep 17 00:00:00 2001 From: Christoffer Lerno Date: Fri, 2 May 2025 18:06:28 +0200 Subject: [PATCH] std::ascii moved into std::core::ascii. Old _m variants are deprecated, as is uint methods. --- lib/std/ascii.c3 | 91 +++++--------- lib/std/core/ascii.c3 | 114 ++++++++++++++++++ lib/std/core/string.c3 | 3 +- lib/std/encoding/json.c3 | 1 - lib/std/net/inetaddr.c3 | 1 - releasenotes.md | 1 + .../switch/switch_in_defer_macro.c3t | 37 ++++-- test/unit/stdlib/core/ascii.c3 | 28 +++++ 8 files changed, 196 insertions(+), 80 deletions(-) create mode 100644 lib/std/core/ascii.c3 create mode 100644 test/unit/stdlib/core/ascii.c3 diff --git a/lib/std/ascii.c3 b/lib/std/ascii.c3 index 412e09b95..373b72a69 100644 --- a/lib/std/ascii.c3 +++ b/lib/std/ascii.c3 @@ -1,77 +1,42 @@ +<* This module is scheduled for removal, use std::core::ascii *> module std::ascii; macro bool in_range_m(c, start, len) => (uint)(c - start) < len; -macro bool is_lower_m(c) => in_range_m(c, 0x61, 26); -macro bool is_upper_m(c) => in_range_m(c, 0x41, 26); -macro bool is_digit_m(c) => in_range_m(c, 0x30, 10); +macro bool is_lower_m(c) => in_range_m(c, 0x61, 26); +macro bool is_upper_m(c) => in_range_m(c, 0x41, 26); +macro bool is_digit_m(c) => in_range_m(c, 0x30, 10); macro bool is_bdigit_m(c) => in_range_m(c, 0x30, 2); macro bool is_odigit_m(c) => in_range_m(c, 0x30, 8); macro bool is_xdigit_m(c) => in_range_m(c | 32, 0x61, 6) || is_digit_m(c); -macro bool is_alpha_m(c) => in_range_m(c | 32, 0x61, 26); -macro bool is_print_m(c) => in_range_m(c, 0x20, 95); -macro bool is_graph_m(c) => in_range_m(c, 0x21, 94); -macro bool is_space_m(c) => in_range_m(c, 0x9, 5) || c == 0x20; -macro bool is_alnum_m(c) => is_alpha_m(c) || is_digit_m(c); -macro bool is_punct_m(c) => !is_alnum_m(c) && is_graph_m(c); -macro bool is_blank_m(c) => c == 0x20 || c == 0x9; -macro bool is_cntrl_m(c) => c < 0x20 || c == 0x7f; +macro bool is_alpha_m(c) => in_range_m(c | 32, 0x61, 26); +macro bool is_print_m(c) => in_range_m(c, 0x20, 95); +macro bool is_graph_m(c) => in_range_m(c, 0x21, 94); +macro bool is_space_m(c) => in_range_m(c, 0x9, 5) || c == 0x20; +macro bool is_alnum_m(c) => is_alpha_m(c) || is_digit_m(c); +macro bool is_punct_m(c) => !is_alnum_m(c) && is_graph_m(c); +macro bool is_blank_m(c) => c == 0x20 || c == 0x9; +macro bool is_cntrl_m(c) => c < 0x20 || c == 0x7f; macro to_lower_m(c) => is_upper_m(c) ? c + 0x20 : c; macro to_upper_m(c) => is_lower_m(c) ? c - 0x20 : c; fn bool in_range(char c, char start, char len) => in_range_m(c, start, len); -fn bool is_lower(char c) => is_lower_m(c); -fn bool is_upper(char c) => is_upper_m(c); -fn bool is_digit(char c) => is_digit_m(c); -fn bool is_bdigit(char c) => is_bdigit_m(c); -fn bool is_odigit(char c) => is_odigit_m(c); -fn bool is_xdigit(char c) => is_xdigit_m(c); -fn bool is_alpha(char c) => is_alpha_m(c); -fn bool is_print(char c) => is_print_m(c); -fn bool is_graph(char c) => is_graph_m(c); -fn bool is_space(char c) => is_space_m(c); -fn bool is_alnum(char c) => is_alnum_m(c); -fn bool is_punct(char c) => is_punct_m(c); -fn bool is_blank(char c) => is_blank_m(c); -fn bool is_cntrl(char c) => is_cntrl_m(c); -fn char to_lower(char c) => (char)to_lower_m(c); -fn char to_upper(char c) => (char)to_upper_m(c); fn bool char.in_range(char c, char start, char len) => in_range_m(c, start, len); -fn bool char.is_lower(char c) => is_lower_m(c); -fn bool char.is_upper(char c) => is_upper_m(c); -fn bool char.is_digit(char c) => is_digit_m(c); -fn bool char.is_bdigit(char c) => is_bdigit_m(c); -fn bool char.is_odigit(char c) => is_odigit_m(c); -fn bool char.is_xdigit(char c) => is_xdigit_m(c); -fn bool char.is_alpha(char c) => is_alpha_m(c); -fn bool char.is_print(char c) => is_print_m(c); -fn bool char.is_graph(char c) => is_graph_m(c); -fn bool char.is_space(char c) => is_space_m(c); -fn bool char.is_alnum(char c) => is_alnum_m(c); -fn bool char.is_punct(char c) => is_punct_m(c); -fn bool char.is_blank(char c) => is_blank_m(c); -fn bool char.is_cntrl(char c) => is_cntrl_m(c); -fn char char.to_lower(char c) => (char)to_lower_m(c); -fn char char.to_upper(char c) => (char)to_upper_m(c); -<* - @require c.is_xdigit() -*> -fn char char.from_hex(char c) => c.is_digit() ? c - '0' : 10 + (c | 0x20) - 'a'; fn bool uint.in_range(uint c, uint start, uint len) => in_range_m(c, start, len); -fn bool uint.is_lower(uint c) => is_lower_m(c); -fn bool uint.is_upper(uint c) => is_upper_m(c); -fn bool uint.is_digit(uint c) => is_digit_m(c); -fn bool uint.is_bdigit(uint c) => is_bdigit_m(c); -fn bool uint.is_odigit(uint c) => is_odigit_m(c); -fn bool uint.is_xdigit(uint c) => is_xdigit_m(c); -fn bool uint.is_alpha(uint c) => is_alpha_m(c); -fn bool uint.is_print(uint c) => is_print_m(c); -fn bool uint.is_graph(uint c) => is_graph_m(c); -fn bool uint.is_space(uint c) => is_space_m(c); -fn bool uint.is_alnum(uint c) => is_alnum_m(c); -fn bool uint.is_punct(uint c) => is_punct_m(c); -fn bool uint.is_blank(uint c) => is_blank_m(c); -fn bool uint.is_cntrl(uint c) => is_cntrl_m(c); -fn uint uint.to_lower(uint c) => (uint)to_lower_m(c); -fn uint uint.to_upper(uint c) => (uint)to_upper_m(c); +fn bool uint.is_lower(uint c) @deprecated => is_lower_m(c); +fn bool uint.is_upper(uint c) @deprecated => is_upper_m(c); +fn bool uint.is_digit(uint c) @deprecated => is_digit_m(c); +fn bool uint.is_bdigit(uint c) @deprecated => is_bdigit_m(c); +fn bool uint.is_odigit(uint c) @deprecated => is_odigit_m(c); +fn bool uint.is_xdigit(uint c) @deprecated => is_xdigit_m(c); +fn bool uint.is_alpha(uint c) @deprecated => is_alpha_m(c); +fn bool uint.is_print(uint c) @deprecated => is_print_m(c); +fn bool uint.is_graph(uint c) @deprecated => is_graph_m(c); +fn bool uint.is_space(uint c) @deprecated => is_space_m(c); +fn bool uint.is_alnum(uint c) @deprecated => is_alnum_m(c); +fn bool uint.is_punct(uint c) @deprecated => is_punct_m(c); +fn bool uint.is_blank(uint c) @deprecated => is_blank_m(c); +fn bool uint.is_cntrl(uint c) @deprecated => is_cntrl_m(c); +fn uint uint.to_lower(uint c) @deprecated => (uint)to_lower_m(c); +fn uint uint.to_upper(uint c) @deprecated => (uint)to_upper_m(c); diff --git a/lib/std/core/ascii.c3 b/lib/std/core/ascii.c3 new file mode 100644 index 000000000..68db253af --- /dev/null +++ b/lib/std/core/ascii.c3 @@ -0,0 +1,114 @@ +<* + This module contains utils for handling ASCII characters. They only operate on + characters corresponding to 0-127. +*> +module std::core::ascii; + +macro bool @is_lower(c) => ASCII_LOOKUP[c].lower; // Is a-z +macro bool @is_upper(c) => ASCII_LOOKUP[c].upper; // Is A-Z +macro bool @is_digit(c) => ASCII_LOOKUP[c].digit; // Is 0-9 +macro bool @is_bdigit(c) => ASCII_LOOKUP[c].bin_digit; // Is 0-1 +macro bool @is_odigit(c) => ASCII_LOOKUP[c].oct_digit; // Is 0-7 +macro bool @is_xdigit(c) => ASCII_LOOKUP[c].hex_digit; // Is 0-9 or a-f or A-F +macro bool @is_alpha(c) => ASCII_LOOKUP[c].alpha; // Is a-z or A-Z +macro bool @is_print(c) => ASCII_LOOKUP[c].printable; // Is a printable character (space or higher and < 127 +macro bool @is_graph(c) => ASCII_LOOKUP[c].graph; // Does it show any graphics (printable but not space) +macro bool @is_space(c) => ASCII_LOOKUP[c].space; // Is it a space character: space, tab, linefeed etc +macro bool @is_alnum(c) => ASCII_LOOKUP[c].alphanum; // Is it alpha or digit +macro bool @is_punct(c) => ASCII_LOOKUP[c].punct; // Is it "graph" but not digit or letter +macro bool @is_blank(c) => ASCII_LOOKUP[c].blank; // Is it a blank space: space or tab +macro bool @is_cntrl(c) => ASCII_LOOKUP[c].control; // Is it a control character: before space or 127 +macro char @to_lower(c) => c + TO_LOWER[c]; // Convert A-Z to a-z if found +macro char @to_upper(c) => c - TO_UPPER[c]; // Convert a-z to A-Z if found + +fn bool is_lower(char c) => @is_lower(c); // Is a-z +fn bool is_upper(char c) => @is_upper(c); // Is A-Z +fn bool is_digit(char c) => @is_digit(c); // Is 0-9 +fn bool is_bdigit(char c) => @is_bdigit(c); // Is 0-1 +fn bool is_odigit(char c) => @is_odigit(c); // Is 0-7 +fn bool is_xdigit(char c) => @is_xdigit(c); // Is 0-9 or a-f or A-F +fn bool is_alpha(char c) => @is_alpha(c); // Is a-z or A-Z +fn bool is_print(char c) => @is_print(c); // Is a printable character (space or higher and < 127 +fn bool is_graph(char c) => @is_graph(c); // Does it show any graphics (printable but not space) +fn bool is_space(char c) => @is_space(c); // Is it a space character: space, tab, linefeed etc +fn bool is_alnum(char c) => @is_alnum(c); // Is it alpha or digit +fn bool is_punct(char c) => @is_punct(c); // Is it "graph" but not digit or letter +fn bool is_blank(char c) => @is_blank(c); // Is it a blank space: space or tab +fn bool is_cntrl(char c) => @is_cntrl(c); // Is it a control character: before space or 127 +fn char to_lower(char c) => @to_lower(c); // Convert A-Z to a-z if found +fn char to_upper(char c) => @to_upper(c); // Convert a-z to A-Z if found + +// The following methods are macro methods for the same functions +macro bool char.is_lower(char c) => @is_lower(c); +macro bool char.is_upper(char c) => @is_upper(c); +macro bool char.is_digit(char c) => @is_digit(c); +macro bool char.is_bdigit(char c) => @is_bdigit(c); +macro bool char.is_odigit(char c) => @is_odigit(c); +macro bool char.is_xdigit(char c) => @is_xdigit(c); +macro bool char.is_alpha(char c) => @is_alpha(c); +macro bool char.is_print(char c) => @is_print(c); +macro bool char.is_graph(char c) => @is_graph(c); +macro bool char.is_space(char c) => @is_space(c); +macro bool char.is_alnum(char c) => @is_alnum(c); +macro bool char.is_punct(char c) => @is_punct(c); +macro bool char.is_blank(char c) => @is_blank(c); +macro bool char.is_cntrl(char c) => @is_cntrl(c); +macro char char.to_lower(char c) => @to_lower(c); +macro char char.to_upper(char c) => @to_upper(c); + +<* + Convert a-f/A-F/0-9 to the appropriate hex value. + + @require c.is_xdigit() + @ensure return >= 0 && return <= 15 +*> +macro char char.from_hex(char c) => HEX_VALUE[c]; + +<* + Bitstruct containing the different properties of a character +*> +bitstruct CharType : ushort @private +{ + bool lower; + bool upper; + bool digit; + bool bin_digit; + bool hex_digit; + bool oct_digit; + bool alpha; + bool alphanum; + bool space; + bool printable; + bool blank; + bool punct; + bool control; + bool graph; +} + +const CharType[256] ASCII_LOOKUP @private = { + [0..31] = { .control }, + [9..13] = { .control, .space }, + ['\t'] = { .control, .space, .blank }, + [' '] = { .space, .printable, .blank }, + [33..126] = { .printable, .graph, .punct }, + ['0'..'9'] = { .printable, .graph, .alphanum, .hex_digit, .digit }, + ['2'..'7'] = { .printable, .graph, .alphanum, .hex_digit, .digit, .oct_digit }, + ['0'..'1'] = { .printable, .graph, .alphanum, .hex_digit, .digit, .oct_digit, .bin_digit }, + ['A'..'Z'] = { .printable, .graph, .alphanum, .alpha, .upper }, + ['A'..'F'] = { .printable, .graph, .alphanum, .alpha, .upper, .hex_digit }, + ['a'..'z'] = { .printable, .graph, .alphanum, .alpha, .lower }, + ['a'..'f'] = { .printable, .graph, .alphanum, .alpha, .lower, .hex_digit }, + [127] = { .control }, +}; + +const char[256] HEX_VALUE = { + ['0'] = 0, ['1'] = 1, ['2'] = 2, ['3'] = 3, ['4'] = 4, + ['5'] = 5, ['6'] = 6, ['7'] = 7, ['8'] = 8, ['9'] = 9, + ['A'] = 10, ['B'] = 11, ['C'] = 12, ['D'] = 13, ['E'] = 14, + ['F'] = 15, ['a'] = 10, ['b'] = 11, ['c'] = 12, ['d'] = 13, + ['e'] = 14, ['f'] = 15 +}; + +const char[256] TO_UPPER @private = { ['a'..'z'] = 'a' - 'A' }; +const char[256] TO_LOWER @private = { ['A'..'Z'] = 'a' - 'A' }; + diff --git a/lib/std/core/string.c3 b/lib/std/core/string.c3 index 2aade2d25..1022e4af4 100644 --- a/lib/std/core/string.c3 +++ b/lib/std/core/string.c3 @@ -1,5 +1,4 @@ module std::core::string; -import std::ascii; import std::io; typedef String @if(!$defined(String)) = inline char[]; @@ -787,7 +786,7 @@ macro String.to_integer(self, $Type, int base = 10) usz len = self.len; usz index = 0; char* ptr = self.ptr; - while (index < len && ascii::is_blank_m(ptr[index])) index++; + while (index < len && ptr[index].is_blank()) index++; if (len == index) return EMPTY_STRING?; bool is_negative; switch (self[index]) diff --git a/lib/std/encoding/json.c3 b/lib/std/encoding/json.c3 index 461f12929..0c72d7964 100644 --- a/lib/std/encoding/json.c3 +++ b/lib/std/encoding/json.c3 @@ -3,7 +3,6 @@ // a copy of which can be found in the LICENSE_STDLIB file. module std::encoding::json; import std::io; -import std::ascii; import std::collections::object; faultdef UNEXPECTED_CHARACTER, INVALID_ESCAPE_SEQUENCE, DUPLICATE_MEMBERS, INVALID_NUMBER; diff --git a/lib/std/net/inetaddr.c3 b/lib/std/net/inetaddr.c3 index 96f7c2977..7f38d7827 100644 --- a/lib/std/net/inetaddr.c3 +++ b/lib/std/net/inetaddr.c3 @@ -1,6 +1,5 @@ module std::net; import std::io; -import std::ascii; enum IpProtocol : char (AIFamily ai_family) { diff --git a/releasenotes.md b/releasenotes.md index 3da48fbc7..7f80cbd18 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -7,6 +7,7 @@ ### Stdlib changes - Added `String.quick_ztr` and `String.is_zstr` +- std::ascii moved into std::core::ascii. Old _m variants are deprecated, as is uint methods. ## 0.7.1 Change list diff --git a/test/test_suite/switch/switch_in_defer_macro.c3t b/test/test_suite/switch/switch_in_defer_macro.c3t index 866f0c355..e547c3697 100644 --- a/test/test_suite/switch/switch_in_defer_macro.c3t +++ b/test/test_suite/switch/switch_in_defer_macro.c3t @@ -692,6 +692,7 @@ fn void test() @.str.4 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1 @.str.5 = private unnamed_addr constant [3 x i8] c"*/\00", align 1 @"lexer_test.Comment$end" = linkonce constant [2 x %"char[]"] [%"char[]" { ptr @.str.4, i64 1 }, %"char[]" { ptr @.str.5, i64 2 }], align 8 +@std.core.ascii.ASCII_LOOKUP = extern_weak constant [256 x i16], align 16 @"$ct.std.io.ByteReader" = linkonce global %.introspect { i8 9, i64 0, ptr null, i64 24, i64 0, i64 2, [0 x i64] zeroinitializer }, align 8 @std.core.mem.allocator.thread_allocator = extern_weak thread_local global %any, align 8 @@ -702,31 +703,41 @@ entry: br i1 %eq, label %and.rhs, label %and.phi and.rhs: ; preds = %entry - %2 = call i8 @std.ascii.char.is_alpha(i8 zeroext %1) - %3 = trunc i8 %2 to i1 + %zext = zext i8 %1 to i64 + %ptroffset = getelementptr inbounds [2 x i8], ptr @std.core.ascii.ASCII_LOOKUP, i64 %zext + %2 = load i16, ptr %ptroffset, align 2 + %lshrl = lshr i16 %2, 6 + %3 = and i16 1, %lshrl + %trunc = trunc i16 %3 to i8 + %4 = trunc i8 %trunc to i1 br label %and.phi and.phi: ; preds = %and.rhs, %entry - %val = phi i1 [ false, %entry ], [ %3, %and.rhs ] + %val = phi i1 [ false, %entry ], [ %4, %and.rhs ] br i1 %val, label %or.phi, label %or.rhs or.rhs: ; preds = %and.phi %lt = icmp ult i64 0, %0 - br i1 %lt, label %and.rhs1, label %and.phi2 + br i1 %lt, label %and.rhs1, label %and.phi6 and.rhs1: ; preds = %or.rhs - %4 = call i8 @std.ascii.char.is_alnum(i8 zeroext %1) - %5 = trunc i8 %4 to i1 - br label %and.phi2 + %zext2 = zext i8 %1 to i64 + %ptroffset3 = getelementptr inbounds [2 x i8], ptr @std.core.ascii.ASCII_LOOKUP, i64 %zext2 + %5 = load i16, ptr %ptroffset3, align 2 + %lshrl4 = lshr i16 %5, 7 + %6 = and i16 1, %lshrl4 + %trunc5 = trunc i16 %6 to i8 + %7 = trunc i8 %trunc5 to i1 + br label %and.phi6 -and.phi2: ; preds = %and.rhs1, %or.rhs - %val3 = phi i1 [ false, %or.rhs ], [ %5, %and.rhs1 ] +and.phi6: ; preds = %and.rhs1, %or.rhs + %val7 = phi i1 [ false, %or.rhs ], [ %7, %and.rhs1 ] br label %or.phi -or.phi: ; preds = %and.phi2, %and.phi - %val4 = phi i1 [ true, %and.phi ], [ %val3, %and.phi2 ] - %6 = zext i1 %val4 to i8 - ret i8 %6 +or.phi: ; preds = %and.phi6, %and.phi + %val8 = phi i1 [ true, %and.phi ], [ %val7, %and.phi6 ] + %8 = zext i1 %val8 to i8 + ret i8 %8 } ; Function Attrs: nounwind uwtable diff --git a/test/unit/stdlib/core/ascii.c3 b/test/unit/stdlib/core/ascii.c3 new file mode 100644 index 000000000..39f705889 --- /dev/null +++ b/test/unit/stdlib/core/ascii.c3 @@ -0,0 +1,28 @@ +module std::core::ascii @test; +import std::hash; + +fn void test_all() +{ + long x = 0; + Crc64 check; + check.init(); + for (char c = 0; c < 255; c++) + { + check.updatec(c); + if (c.is_upper()) check.updatec(1); + if (c.is_lower()) check.updatec(2); + if (c.is_alpha()) check.updatec(4); + if (c.is_bdigit()) check.updatec(8); + if (c.is_odigit()) check.updatec(16); + if (c.is_xdigit()) check.updatec(16); + if (c.is_digit()) check.updatec(32); + if (c.is_graph()) check.updatec(64); + check.updatec(128); + if (c.is_punct()) check.updatec(1); + if (c.is_cntrl()) check.updatec(2); + if (c.is_space()) check.updatec(4); + check.updatec(c.to_upper()); + check.updatec(c.to_lower()); + } + test::eq(check.final(), 7327699757963224526UL); +} \ No newline at end of file