From 8a09b2e5f7562ccf7b7eb966dd4b676c24ba0488 Mon Sep 17 00:00:00 2001
From: Christoffer Lerno <christoffer@aegik.com>
Date: Fri, 2 May 2025 18:06:28 +0200
Subject: [PATCH] std::ascii moved into std::core::ascii. Old _m variants are
 deprecated, as is uint methods.

---
 lib/std/ascii.c3                              |  91 +++++---------
 lib/std/core/ascii.c3                         | 114 ++++++++++++++++++
 lib/std/core/string.c3                        |   3 +-
 lib/std/encoding/json.c3                      |   1 -
 lib/std/net/inetaddr.c3                       |   1 -
 releasenotes.md                               |   1 +
 .../switch/switch_in_defer_macro.c3t          |  37 ++++--
 test/unit/stdlib/core/ascii.c3                |  28 +++++
 8 files changed, 196 insertions(+), 80 deletions(-)
 create mode 100644 lib/std/core/ascii.c3
 create mode 100644 test/unit/stdlib/core/ascii.c3

diff --git a/lib/std/ascii.c3 b/lib/std/ascii.c3
index 412e09b95..373b72a69 100644
--- a/lib/std/ascii.c3
+++ b/lib/std/ascii.c3
@@ -1,77 +1,42 @@
+<* This module is scheduled for removal, use std::core::ascii *>
 module std::ascii;
 
 macro bool in_range_m(c, start, len) => (uint)(c - start) < len;
-macro bool is_lower_m(c) => in_range_m(c, 0x61, 26);
-macro bool is_upper_m(c) => in_range_m(c, 0x41, 26);
-macro bool is_digit_m(c) => in_range_m(c, 0x30, 10);
+macro bool is_lower_m(c)  => in_range_m(c, 0x61, 26);
+macro bool is_upper_m(c)  => in_range_m(c, 0x41, 26);
+macro bool is_digit_m(c)  => in_range_m(c, 0x30, 10);
 macro bool is_bdigit_m(c) => in_range_m(c, 0x30, 2);
 macro bool is_odigit_m(c) => in_range_m(c, 0x30, 8);
 macro bool is_xdigit_m(c) => in_range_m(c | 32, 0x61, 6) || is_digit_m(c);
-macro bool is_alpha_m(c) => in_range_m(c | 32, 0x61, 26);
-macro bool is_print_m(c) => in_range_m(c, 0x20, 95);
-macro bool is_graph_m(c) => in_range_m(c, 0x21, 94);
-macro bool is_space_m(c) => in_range_m(c, 0x9, 5) || c == 0x20;
-macro bool is_alnum_m(c) => is_alpha_m(c) || is_digit_m(c);
-macro bool is_punct_m(c) => !is_alnum_m(c) && is_graph_m(c);
-macro bool is_blank_m(c) => c == 0x20 || c == 0x9;
-macro bool is_cntrl_m(c) => c < 0x20 || c == 0x7f;
+macro bool is_alpha_m(c)  => in_range_m(c | 32, 0x61, 26);
+macro bool is_print_m(c)  => in_range_m(c, 0x20, 95);
+macro bool is_graph_m(c)  => in_range_m(c, 0x21, 94);
+macro bool is_space_m(c)  => in_range_m(c, 0x9, 5) || c == 0x20;
+macro bool is_alnum_m(c)  => is_alpha_m(c) || is_digit_m(c);
+macro bool is_punct_m(c)  => !is_alnum_m(c) && is_graph_m(c);
+macro bool is_blank_m(c)  => c == 0x20 || c == 0x9;
+macro bool is_cntrl_m(c)  => c < 0x20 || c == 0x7f;
 macro to_lower_m(c) => is_upper_m(c) ? c + 0x20 : c;
 macro to_upper_m(c) => is_lower_m(c) ? c - 0x20 : c;
 
 fn bool in_range(char c, char start, char len) => in_range_m(c, start, len);
-fn bool is_lower(char c) => is_lower_m(c);
-fn bool is_upper(char c) => is_upper_m(c);
-fn bool is_digit(char c) => is_digit_m(c);
-fn bool is_bdigit(char c) => is_bdigit_m(c);
-fn bool is_odigit(char c) => is_odigit_m(c);
-fn bool is_xdigit(char c) => is_xdigit_m(c);
-fn bool is_alpha(char c) => is_alpha_m(c);
-fn bool is_print(char c) => is_print_m(c);
-fn bool is_graph(char c) => is_graph_m(c);
-fn bool is_space(char c) => is_space_m(c);
-fn bool is_alnum(char c) => is_alnum_m(c);
-fn bool is_punct(char c) => is_punct_m(c);
-fn bool is_blank(char c) => is_blank_m(c);
-fn bool is_cntrl(char c) => is_cntrl_m(c);
-fn char to_lower(char c) => (char)to_lower_m(c);
-fn char to_upper(char c) => (char)to_upper_m(c);
 
 fn bool char.in_range(char c, char start, char len) => in_range_m(c, start, len);
-fn bool char.is_lower(char c) => is_lower_m(c);
-fn bool char.is_upper(char c) => is_upper_m(c);
-fn bool char.is_digit(char c) => is_digit_m(c);
-fn bool char.is_bdigit(char c) => is_bdigit_m(c);
-fn bool char.is_odigit(char c) => is_odigit_m(c);
-fn bool char.is_xdigit(char c) => is_xdigit_m(c);
-fn bool char.is_alpha(char c) => is_alpha_m(c);
-fn bool char.is_print(char c) => is_print_m(c);
-fn bool char.is_graph(char c) => is_graph_m(c);
-fn bool char.is_space(char c) => is_space_m(c);
-fn bool char.is_alnum(char c) => is_alnum_m(c);
-fn bool char.is_punct(char c) => is_punct_m(c);
-fn bool char.is_blank(char c) => is_blank_m(c);
-fn bool char.is_cntrl(char c) => is_cntrl_m(c);
-fn char char.to_lower(char c) => (char)to_lower_m(c);
-fn char char.to_upper(char c) => (char)to_upper_m(c);
-<*
- @require c.is_xdigit()
-*>
-fn char char.from_hex(char c) => c.is_digit() ? c - '0' : 10 + (c | 0x20) - 'a';
 
 fn bool uint.in_range(uint c, uint start, uint len) => in_range_m(c, start, len);
-fn bool uint.is_lower(uint c) => is_lower_m(c);
-fn bool uint.is_upper(uint c) => is_upper_m(c);
-fn bool uint.is_digit(uint c) => is_digit_m(c);
-fn bool uint.is_bdigit(uint c) => is_bdigit_m(c);
-fn bool uint.is_odigit(uint c) => is_odigit_m(c);
-fn bool uint.is_xdigit(uint c) => is_xdigit_m(c);
-fn bool uint.is_alpha(uint c) => is_alpha_m(c);
-fn bool uint.is_print(uint c) => is_print_m(c);
-fn bool uint.is_graph(uint c) => is_graph_m(c);
-fn bool uint.is_space(uint c) => is_space_m(c);
-fn bool uint.is_alnum(uint c) => is_alnum_m(c);
-fn bool uint.is_punct(uint c) => is_punct_m(c);
-fn bool uint.is_blank(uint c) => is_blank_m(c);
-fn bool uint.is_cntrl(uint c) => is_cntrl_m(c);
-fn uint uint.to_lower(uint c) => (uint)to_lower_m(c);
-fn uint uint.to_upper(uint c) => (uint)to_upper_m(c);
+fn bool uint.is_lower(uint c)  @deprecated => is_lower_m(c);
+fn bool uint.is_upper(uint c)  @deprecated => is_upper_m(c);
+fn bool uint.is_digit(uint c)  @deprecated => is_digit_m(c);
+fn bool uint.is_bdigit(uint c) @deprecated => is_bdigit_m(c);
+fn bool uint.is_odigit(uint c) @deprecated => is_odigit_m(c);
+fn bool uint.is_xdigit(uint c) @deprecated => is_xdigit_m(c);
+fn bool uint.is_alpha(uint c)  @deprecated => is_alpha_m(c);
+fn bool uint.is_print(uint c)  @deprecated => is_print_m(c);
+fn bool uint.is_graph(uint c)  @deprecated => is_graph_m(c);
+fn bool uint.is_space(uint c)  @deprecated => is_space_m(c);
+fn bool uint.is_alnum(uint c)  @deprecated => is_alnum_m(c);
+fn bool uint.is_punct(uint c)  @deprecated => is_punct_m(c);
+fn bool uint.is_blank(uint c)  @deprecated => is_blank_m(c);
+fn bool uint.is_cntrl(uint c)  @deprecated => is_cntrl_m(c);
+fn uint uint.to_lower(uint c)  @deprecated => (uint)to_lower_m(c);
+fn uint uint.to_upper(uint c)  @deprecated => (uint)to_upper_m(c);
diff --git a/lib/std/core/ascii.c3 b/lib/std/core/ascii.c3
new file mode 100644
index 000000000..68db253af
--- /dev/null
+++ b/lib/std/core/ascii.c3
@@ -0,0 +1,114 @@
+<*
+ This module contains utils for handling ASCII characters. They only operate on
+ characters corresponding to 0-127.
+*>
+module std::core::ascii;
+
+macro bool @is_lower(c)  => ASCII_LOOKUP[c].lower;      // Is a-z
+macro bool @is_upper(c)  => ASCII_LOOKUP[c].upper;      // Is A-Z
+macro bool @is_digit(c)  => ASCII_LOOKUP[c].digit;      // Is 0-9
+macro bool @is_bdigit(c) => ASCII_LOOKUP[c].bin_digit;  // Is 0-1
+macro bool @is_odigit(c) => ASCII_LOOKUP[c].oct_digit;  // Is 0-7
+macro bool @is_xdigit(c) => ASCII_LOOKUP[c].hex_digit;  // Is 0-9 or a-f or A-F
+macro bool @is_alpha(c)  => ASCII_LOOKUP[c].alpha;      // Is a-z or A-Z
+macro bool @is_print(c)  => ASCII_LOOKUP[c].printable;  // Is a printable character (space or higher and < 127
+macro bool @is_graph(c)  => ASCII_LOOKUP[c].graph;      // Does it show any graphics (printable but not space)
+macro bool @is_space(c)  => ASCII_LOOKUP[c].space;      // Is it a space character: space, tab, linefeed etc
+macro bool @is_alnum(c)  => ASCII_LOOKUP[c].alphanum;   // Is it alpha or digit
+macro bool @is_punct(c)  => ASCII_LOOKUP[c].punct;      // Is it "graph" but not digit or letter
+macro bool @is_blank(c)  => ASCII_LOOKUP[c].blank;      // Is it a blank space: space or tab
+macro bool @is_cntrl(c)  => ASCII_LOOKUP[c].control;    // Is it a control character: before space or 127
+macro char @to_lower(c)  => c + TO_LOWER[c];            // Convert A-Z to a-z if found
+macro char @to_upper(c)  => c - TO_UPPER[c];            // Convert a-z to A-Z if found
+
+fn bool is_lower(char c)  => @is_lower(c);  // Is a-z
+fn bool is_upper(char c)  => @is_upper(c);  // Is A-Z
+fn bool is_digit(char c)  => @is_digit(c);  // Is 0-9
+fn bool is_bdigit(char c) => @is_bdigit(c); // Is 0-1
+fn bool is_odigit(char c) => @is_odigit(c); // Is 0-7
+fn bool is_xdigit(char c) => @is_xdigit(c); // Is 0-9 or a-f or A-F
+fn bool is_alpha(char c)  => @is_alpha(c);  // Is a-z or A-Z
+fn bool is_print(char c)  => @is_print(c);  // Is a printable character (space or higher and < 127
+fn bool is_graph(char c)  => @is_graph(c);  // Does it show any graphics (printable but not space)
+fn bool is_space(char c)  => @is_space(c);  // Is it a space character: space, tab, linefeed etc
+fn bool is_alnum(char c)  => @is_alnum(c);  // Is it alpha or digit
+fn bool is_punct(char c)  => @is_punct(c);  // Is it "graph" but not digit or letter
+fn bool is_blank(char c)  => @is_blank(c);  // Is it a blank space: space or tab
+fn bool is_cntrl(char c)  => @is_cntrl(c);  // Is it a control character: before space or 127
+fn char to_lower(char c)  => @to_lower(c);  // Convert A-Z to a-z if found
+fn char to_upper(char c)  => @to_upper(c);  // Convert a-z to A-Z if found
+
+// The following methods are macro methods for the same functions
+macro bool char.is_lower(char c)  => @is_lower(c);
+macro bool char.is_upper(char c)  => @is_upper(c);
+macro bool char.is_digit(char c)  => @is_digit(c);
+macro bool char.is_bdigit(char c) => @is_bdigit(c);
+macro bool char.is_odigit(char c) => @is_odigit(c);
+macro bool char.is_xdigit(char c) => @is_xdigit(c);
+macro bool char.is_alpha(char c)  => @is_alpha(c);
+macro bool char.is_print(char c)  => @is_print(c);
+macro bool char.is_graph(char c)  => @is_graph(c);
+macro bool char.is_space(char c)  => @is_space(c);
+macro bool char.is_alnum(char c)  => @is_alnum(c);
+macro bool char.is_punct(char c)  => @is_punct(c);
+macro bool char.is_blank(char c)  => @is_blank(c);
+macro bool char.is_cntrl(char c)  => @is_cntrl(c);
+macro char char.to_lower(char c)  => @to_lower(c);
+macro char char.to_upper(char c)  => @to_upper(c);
+
+<*
+ Convert a-f/A-F/0-9 to the appropriate hex value.
+
+ @require c.is_xdigit()
+ @ensure return >= 0 && return <= 15
+*>
+macro char char.from_hex(char c) => HEX_VALUE[c];
+
+<*
+ Bitstruct containing the different properties of a character
+*>
+bitstruct CharType : ushort @private
+{
+	bool lower;
+	bool upper;
+	bool digit;
+	bool bin_digit;
+	bool hex_digit;
+	bool oct_digit;
+	bool alpha;
+	bool alphanum;
+	bool space;
+	bool printable;
+	bool blank;
+	bool punct;
+	bool control;
+	bool graph;
+}
+
+const CharType[256] ASCII_LOOKUP @private = {
+	[0..31]    = { .control },
+	[9..13]    = { .control, .space },
+	['\t']     = { .control, .space, .blank },
+	[' ']      = { .space, .printable, .blank },
+	[33..126]  = { .printable, .graph, .punct },
+	['0'..'9'] = { .printable, .graph, .alphanum, .hex_digit, .digit },
+	['2'..'7'] = { .printable, .graph, .alphanum, .hex_digit, .digit, .oct_digit },
+	['0'..'1'] = { .printable, .graph, .alphanum, .hex_digit, .digit, .oct_digit, .bin_digit },
+	['A'..'Z'] = { .printable, .graph, .alphanum, .alpha, .upper },
+	['A'..'F'] = { .printable, .graph, .alphanum, .alpha, .upper, .hex_digit },
+	['a'..'z'] = { .printable, .graph, .alphanum, .alpha, .lower },
+	['a'..'f'] = { .printable, .graph, .alphanum, .alpha, .lower, .hex_digit },
+	[127]      = { .control },
+};
+
+const char[256] HEX_VALUE = {
+	['0'] =  0, ['1'] =  1, ['2'] =  2, ['3'] =  3, ['4'] =  4,
+	['5'] =  5, ['6'] =  6, ['7'] =  7, ['8'] =  8, ['9'] =  9,
+	['A'] = 10, ['B'] = 11, ['C'] = 12, ['D'] = 13, ['E'] = 14,
+	['F'] = 15, ['a'] = 10, ['b'] = 11, ['c'] = 12, ['d'] = 13,
+	['e'] = 14, ['f'] = 15
+};
+
+const char[256] TO_UPPER @private = { ['a'..'z'] = 'a' - 'A' };
+const char[256] TO_LOWER @private  = { ['A'..'Z'] = 'a' - 'A' };
+
diff --git a/lib/std/core/string.c3 b/lib/std/core/string.c3
index 2aade2d25..1022e4af4 100644
--- a/lib/std/core/string.c3
+++ b/lib/std/core/string.c3
@@ -1,5 +1,4 @@
 module std::core::string;
-import std::ascii;
 import std::io;
 
 typedef String @if(!$defined(String)) = inline char[];
@@ -787,7 +786,7 @@ macro String.to_integer(self, $Type, int base = 10)
 	usz len = self.len;
 	usz index = 0;
 	char* ptr = self.ptr;
-	while (index < len && ascii::is_blank_m(ptr[index])) index++;
+	while (index < len && ptr[index].is_blank()) index++;
 	if (len == index) return EMPTY_STRING?;
 	bool is_negative;
 	switch (self[index])
diff --git a/lib/std/encoding/json.c3 b/lib/std/encoding/json.c3
index 461f12929..0c72d7964 100644
--- a/lib/std/encoding/json.c3
+++ b/lib/std/encoding/json.c3
@@ -3,7 +3,6 @@
 // a copy of which can be found in the LICENSE_STDLIB file.
 module std::encoding::json;
 import std::io;
-import std::ascii;
 import std::collections::object;
 
 faultdef UNEXPECTED_CHARACTER, INVALID_ESCAPE_SEQUENCE, DUPLICATE_MEMBERS, INVALID_NUMBER;
diff --git a/lib/std/net/inetaddr.c3 b/lib/std/net/inetaddr.c3
index 96f7c2977..7f38d7827 100644
--- a/lib/std/net/inetaddr.c3
+++ b/lib/std/net/inetaddr.c3
@@ -1,6 +1,5 @@
 module std::net;
 import std::io;
-import std::ascii;
 
 enum IpProtocol : char (AIFamily ai_family)
 {
diff --git a/releasenotes.md b/releasenotes.md
index 3da48fbc7..7f80cbd18 100644
--- a/releasenotes.md
+++ b/releasenotes.md
@@ -7,6 +7,7 @@
 
 ### Stdlib changes
 - Added `String.quick_ztr` and `String.is_zstr`
+- std::ascii moved into std::core::ascii. Old _m variants are deprecated, as is uint methods.
 
 ## 0.7.1 Change list
 
diff --git a/test/test_suite/switch/switch_in_defer_macro.c3t b/test/test_suite/switch/switch_in_defer_macro.c3t
index 866f0c355..e547c3697 100644
--- a/test/test_suite/switch/switch_in_defer_macro.c3t
+++ b/test/test_suite/switch/switch_in_defer_macro.c3t
@@ -692,6 +692,7 @@ fn void test()
 @.str.4 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
 @.str.5 = private unnamed_addr constant [3 x i8] c"*/\00", align 1
 @"lexer_test.Comment$end" = linkonce constant [2 x %"char[]"] [%"char[]" { ptr @.str.4, i64 1 }, %"char[]" { ptr @.str.5, i64 2 }], align 8
+@std.core.ascii.ASCII_LOOKUP = extern_weak constant [256 x i16], align 16
 @"$ct.std.io.ByteReader" = linkonce global %.introspect { i8 9, i64 0, ptr null, i64 24, i64 0, i64 2, [0 x i64] zeroinitializer }, align 8
 @std.core.mem.allocator.thread_allocator = extern_weak thread_local global %any, align 8
 
@@ -702,31 +703,41 @@ entry:
   br i1 %eq, label %and.rhs, label %and.phi
 
 and.rhs:                                          ; preds = %entry
-  %2 = call i8 @std.ascii.char.is_alpha(i8 zeroext %1)
-  %3 = trunc i8 %2 to i1
+  %zext = zext i8 %1 to i64
+  %ptroffset = getelementptr inbounds [2 x i8], ptr @std.core.ascii.ASCII_LOOKUP, i64 %zext
+  %2 = load i16, ptr %ptroffset, align 2
+  %lshrl = lshr i16 %2, 6
+  %3 = and i16 1, %lshrl
+  %trunc = trunc i16 %3 to i8
+  %4 = trunc i8 %trunc to i1
   br label %and.phi
 
 and.phi:                                          ; preds = %and.rhs, %entry
-  %val = phi i1 [ false, %entry ], [ %3, %and.rhs ]
+  %val = phi i1 [ false, %entry ], [ %4, %and.rhs ]
   br i1 %val, label %or.phi, label %or.rhs
 
 or.rhs:                                           ; preds = %and.phi
   %lt = icmp ult i64 0, %0
-  br i1 %lt, label %and.rhs1, label %and.phi2
+  br i1 %lt, label %and.rhs1, label %and.phi6
 
 and.rhs1:                                         ; preds = %or.rhs
-  %4 = call i8 @std.ascii.char.is_alnum(i8 zeroext %1)
-  %5 = trunc i8 %4 to i1
-  br label %and.phi2
+  %zext2 = zext i8 %1 to i64
+  %ptroffset3 = getelementptr inbounds [2 x i8], ptr @std.core.ascii.ASCII_LOOKUP, i64 %zext2
+  %5 = load i16, ptr %ptroffset3, align 2
+  %lshrl4 = lshr i16 %5, 7
+  %6 = and i16 1, %lshrl4
+  %trunc5 = trunc i16 %6 to i8
+  %7 = trunc i8 %trunc5 to i1
+  br label %and.phi6
 
-and.phi2:                                         ; preds = %and.rhs1, %or.rhs
-  %val3 = phi i1 [ false, %or.rhs ], [ %5, %and.rhs1 ]
+and.phi6:                                         ; preds = %and.rhs1, %or.rhs
+  %val7 = phi i1 [ false, %or.rhs ], [ %7, %and.rhs1 ]
   br label %or.phi
 
-or.phi:                                           ; preds = %and.phi2, %and.phi
-  %val4 = phi i1 [ true, %and.phi ], [ %val3, %and.phi2 ]
-  %6 = zext i1 %val4 to i8
-  ret i8 %6
+or.phi:                                           ; preds = %and.phi6, %and.phi
+  %val8 = phi i1 [ true, %and.phi ], [ %val7, %and.phi6 ]
+  %8 = zext i1 %val8 to i8
+  ret i8 %8
 }
 
 ; Function Attrs: nounwind uwtable
diff --git a/test/unit/stdlib/core/ascii.c3 b/test/unit/stdlib/core/ascii.c3
new file mode 100644
index 000000000..39f705889
--- /dev/null
+++ b/test/unit/stdlib/core/ascii.c3
@@ -0,0 +1,28 @@
+module std::core::ascii @test;
+import std::hash;
+
+fn void test_all()
+{
+	long x = 0;
+	Crc64 check;
+	check.init();
+	for (char c = 0; c < 255; c++)
+	{
+		check.updatec(c);
+		if (c.is_upper()) check.updatec(1);
+		if (c.is_lower()) check.updatec(2);
+		if (c.is_alpha()) check.updatec(4);
+		if (c.is_bdigit()) check.updatec(8);
+		if (c.is_odigit()) check.updatec(16);
+		if (c.is_xdigit()) check.updatec(16);
+    	if (c.is_digit()) check.updatec(32);
+		if (c.is_graph()) check.updatec(64);
+		check.updatec(128);
+		if (c.is_punct()) check.updatec(1);
+		if (c.is_cntrl()) check.updatec(2);
+		if (c.is_space()) check.updatec(4);
+		check.updatec(c.to_upper());
+		check.updatec(c.to_lower());
+	}
+	test::eq(check.final(), 7327699757963224526UL);
+}
\ No newline at end of file