std::ascii moved into std::core::ascii. Old _m variants are deprecated, as is uint methods.

This commit is contained in:
Christoffer Lerno
2025-05-02 18:06:28 +02:00
parent bfccc303d1
commit 8a09b2e5f7
8 changed files with 196 additions and 80 deletions

View File

@@ -1,3 +1,4 @@
<* This module is scheduled for removal, use std::core::ascii *>
module std::ascii;
macro bool in_range_m(c, start, len) => (uint)(c - start) < len;
@@ -19,59 +20,23 @@ macro to_lower_m(c) => is_upper_m(c) ? c + 0x20 : c;
macro to_upper_m(c) => is_lower_m(c) ? c - 0x20 : c;
fn bool in_range(char c, char start, char len) => in_range_m(c, start, len);
fn bool is_lower(char c) => is_lower_m(c);
fn bool is_upper(char c) => is_upper_m(c);
fn bool is_digit(char c) => is_digit_m(c);
fn bool is_bdigit(char c) => is_bdigit_m(c);
fn bool is_odigit(char c) => is_odigit_m(c);
fn bool is_xdigit(char c) => is_xdigit_m(c);
fn bool is_alpha(char c) => is_alpha_m(c);
fn bool is_print(char c) => is_print_m(c);
fn bool is_graph(char c) => is_graph_m(c);
fn bool is_space(char c) => is_space_m(c);
fn bool is_alnum(char c) => is_alnum_m(c);
fn bool is_punct(char c) => is_punct_m(c);
fn bool is_blank(char c) => is_blank_m(c);
fn bool is_cntrl(char c) => is_cntrl_m(c);
fn char to_lower(char c) => (char)to_lower_m(c);
fn char to_upper(char c) => (char)to_upper_m(c);
fn bool char.in_range(char c, char start, char len) => in_range_m(c, start, len);
fn bool char.is_lower(char c) => is_lower_m(c);
fn bool char.is_upper(char c) => is_upper_m(c);
fn bool char.is_digit(char c) => is_digit_m(c);
fn bool char.is_bdigit(char c) => is_bdigit_m(c);
fn bool char.is_odigit(char c) => is_odigit_m(c);
fn bool char.is_xdigit(char c) => is_xdigit_m(c);
fn bool char.is_alpha(char c) => is_alpha_m(c);
fn bool char.is_print(char c) => is_print_m(c);
fn bool char.is_graph(char c) => is_graph_m(c);
fn bool char.is_space(char c) => is_space_m(c);
fn bool char.is_alnum(char c) => is_alnum_m(c);
fn bool char.is_punct(char c) => is_punct_m(c);
fn bool char.is_blank(char c) => is_blank_m(c);
fn bool char.is_cntrl(char c) => is_cntrl_m(c);
fn char char.to_lower(char c) => (char)to_lower_m(c);
fn char char.to_upper(char c) => (char)to_upper_m(c);
<*
@require c.is_xdigit()
*>
fn char char.from_hex(char c) => c.is_digit() ? c - '0' : 10 + (c | 0x20) - 'a';
fn bool uint.in_range(uint c, uint start, uint len) => in_range_m(c, start, len);
fn bool uint.is_lower(uint c) => is_lower_m(c);
fn bool uint.is_upper(uint c) => is_upper_m(c);
fn bool uint.is_digit(uint c) => is_digit_m(c);
fn bool uint.is_bdigit(uint c) => is_bdigit_m(c);
fn bool uint.is_odigit(uint c) => is_odigit_m(c);
fn bool uint.is_xdigit(uint c) => is_xdigit_m(c);
fn bool uint.is_alpha(uint c) => is_alpha_m(c);
fn bool uint.is_print(uint c) => is_print_m(c);
fn bool uint.is_graph(uint c) => is_graph_m(c);
fn bool uint.is_space(uint c) => is_space_m(c);
fn bool uint.is_alnum(uint c) => is_alnum_m(c);
fn bool uint.is_punct(uint c) => is_punct_m(c);
fn bool uint.is_blank(uint c) => is_blank_m(c);
fn bool uint.is_cntrl(uint c) => is_cntrl_m(c);
fn uint uint.to_lower(uint c) => (uint)to_lower_m(c);
fn uint uint.to_upper(uint c) => (uint)to_upper_m(c);
fn bool uint.is_lower(uint c) @deprecated => is_lower_m(c);
fn bool uint.is_upper(uint c) @deprecated => is_upper_m(c);
fn bool uint.is_digit(uint c) @deprecated => is_digit_m(c);
fn bool uint.is_bdigit(uint c) @deprecated => is_bdigit_m(c);
fn bool uint.is_odigit(uint c) @deprecated => is_odigit_m(c);
fn bool uint.is_xdigit(uint c) @deprecated => is_xdigit_m(c);
fn bool uint.is_alpha(uint c) @deprecated => is_alpha_m(c);
fn bool uint.is_print(uint c) @deprecated => is_print_m(c);
fn bool uint.is_graph(uint c) @deprecated => is_graph_m(c);
fn bool uint.is_space(uint c) @deprecated => is_space_m(c);
fn bool uint.is_alnum(uint c) @deprecated => is_alnum_m(c);
fn bool uint.is_punct(uint c) @deprecated => is_punct_m(c);
fn bool uint.is_blank(uint c) @deprecated => is_blank_m(c);
fn bool uint.is_cntrl(uint c) @deprecated => is_cntrl_m(c);
fn uint uint.to_lower(uint c) @deprecated => (uint)to_lower_m(c);
fn uint uint.to_upper(uint c) @deprecated => (uint)to_upper_m(c);

114
lib/std/core/ascii.c3 Normal file
View File

@@ -0,0 +1,114 @@
<*
This module contains utils for handling ASCII characters. They only operate on
characters corresponding to 0-127.
*>
module std::core::ascii;
macro bool @is_lower(c) => ASCII_LOOKUP[c].lower; // Is a-z
macro bool @is_upper(c) => ASCII_LOOKUP[c].upper; // Is A-Z
macro bool @is_digit(c) => ASCII_LOOKUP[c].digit; // Is 0-9
macro bool @is_bdigit(c) => ASCII_LOOKUP[c].bin_digit; // Is 0-1
macro bool @is_odigit(c) => ASCII_LOOKUP[c].oct_digit; // Is 0-7
macro bool @is_xdigit(c) => ASCII_LOOKUP[c].hex_digit; // Is 0-9 or a-f or A-F
macro bool @is_alpha(c) => ASCII_LOOKUP[c].alpha; // Is a-z or A-Z
macro bool @is_print(c) => ASCII_LOOKUP[c].printable; // Is a printable character (space or higher and < 127
macro bool @is_graph(c) => ASCII_LOOKUP[c].graph; // Does it show any graphics (printable but not space)
macro bool @is_space(c) => ASCII_LOOKUP[c].space; // Is it a space character: space, tab, linefeed etc
macro bool @is_alnum(c) => ASCII_LOOKUP[c].alphanum; // Is it alpha or digit
macro bool @is_punct(c) => ASCII_LOOKUP[c].punct; // Is it "graph" but not digit or letter
macro bool @is_blank(c) => ASCII_LOOKUP[c].blank; // Is it a blank space: space or tab
macro bool @is_cntrl(c) => ASCII_LOOKUP[c].control; // Is it a control character: before space or 127
macro char @to_lower(c) => c + TO_LOWER[c]; // Convert A-Z to a-z if found
macro char @to_upper(c) => c - TO_UPPER[c]; // Convert a-z to A-Z if found
fn bool is_lower(char c) => @is_lower(c); // Is a-z
fn bool is_upper(char c) => @is_upper(c); // Is A-Z
fn bool is_digit(char c) => @is_digit(c); // Is 0-9
fn bool is_bdigit(char c) => @is_bdigit(c); // Is 0-1
fn bool is_odigit(char c) => @is_odigit(c); // Is 0-7
fn bool is_xdigit(char c) => @is_xdigit(c); // Is 0-9 or a-f or A-F
fn bool is_alpha(char c) => @is_alpha(c); // Is a-z or A-Z
fn bool is_print(char c) => @is_print(c); // Is a printable character (space or higher and < 127
fn bool is_graph(char c) => @is_graph(c); // Does it show any graphics (printable but not space)
fn bool is_space(char c) => @is_space(c); // Is it a space character: space, tab, linefeed etc
fn bool is_alnum(char c) => @is_alnum(c); // Is it alpha or digit
fn bool is_punct(char c) => @is_punct(c); // Is it "graph" but not digit or letter
fn bool is_blank(char c) => @is_blank(c); // Is it a blank space: space or tab
fn bool is_cntrl(char c) => @is_cntrl(c); // Is it a control character: before space or 127
fn char to_lower(char c) => @to_lower(c); // Convert A-Z to a-z if found
fn char to_upper(char c) => @to_upper(c); // Convert a-z to A-Z if found
// The following methods are macro methods for the same functions
macro bool char.is_lower(char c) => @is_lower(c);
macro bool char.is_upper(char c) => @is_upper(c);
macro bool char.is_digit(char c) => @is_digit(c);
macro bool char.is_bdigit(char c) => @is_bdigit(c);
macro bool char.is_odigit(char c) => @is_odigit(c);
macro bool char.is_xdigit(char c) => @is_xdigit(c);
macro bool char.is_alpha(char c) => @is_alpha(c);
macro bool char.is_print(char c) => @is_print(c);
macro bool char.is_graph(char c) => @is_graph(c);
macro bool char.is_space(char c) => @is_space(c);
macro bool char.is_alnum(char c) => @is_alnum(c);
macro bool char.is_punct(char c) => @is_punct(c);
macro bool char.is_blank(char c) => @is_blank(c);
macro bool char.is_cntrl(char c) => @is_cntrl(c);
macro char char.to_lower(char c) => @to_lower(c);
macro char char.to_upper(char c) => @to_upper(c);
<*
Convert a-f/A-F/0-9 to the appropriate hex value.
@require c.is_xdigit()
@ensure return >= 0 && return <= 15
*>
macro char char.from_hex(char c) => HEX_VALUE[c];
<*
Bitstruct containing the different properties of a character
*>
bitstruct CharType : ushort @private
{
bool lower;
bool upper;
bool digit;
bool bin_digit;
bool hex_digit;
bool oct_digit;
bool alpha;
bool alphanum;
bool space;
bool printable;
bool blank;
bool punct;
bool control;
bool graph;
}
const CharType[256] ASCII_LOOKUP @private = {
[0..31] = { .control },
[9..13] = { .control, .space },
['\t'] = { .control, .space, .blank },
[' '] = { .space, .printable, .blank },
[33..126] = { .printable, .graph, .punct },
['0'..'9'] = { .printable, .graph, .alphanum, .hex_digit, .digit },
['2'..'7'] = { .printable, .graph, .alphanum, .hex_digit, .digit, .oct_digit },
['0'..'1'] = { .printable, .graph, .alphanum, .hex_digit, .digit, .oct_digit, .bin_digit },
['A'..'Z'] = { .printable, .graph, .alphanum, .alpha, .upper },
['A'..'F'] = { .printable, .graph, .alphanum, .alpha, .upper, .hex_digit },
['a'..'z'] = { .printable, .graph, .alphanum, .alpha, .lower },
['a'..'f'] = { .printable, .graph, .alphanum, .alpha, .lower, .hex_digit },
[127] = { .control },
};
const char[256] HEX_VALUE = {
['0'] = 0, ['1'] = 1, ['2'] = 2, ['3'] = 3, ['4'] = 4,
['5'] = 5, ['6'] = 6, ['7'] = 7, ['8'] = 8, ['9'] = 9,
['A'] = 10, ['B'] = 11, ['C'] = 12, ['D'] = 13, ['E'] = 14,
['F'] = 15, ['a'] = 10, ['b'] = 11, ['c'] = 12, ['d'] = 13,
['e'] = 14, ['f'] = 15
};
const char[256] TO_UPPER @private = { ['a'..'z'] = 'a' - 'A' };
const char[256] TO_LOWER @private = { ['A'..'Z'] = 'a' - 'A' };

View File

@@ -1,5 +1,4 @@
module std::core::string;
import std::ascii;
import std::io;
typedef String @if(!$defined(String)) = inline char[];
@@ -787,7 +786,7 @@ macro String.to_integer(self, $Type, int base = 10)
usz len = self.len;
usz index = 0;
char* ptr = self.ptr;
while (index < len && ascii::is_blank_m(ptr[index])) index++;
while (index < len && ptr[index].is_blank()) index++;
if (len == index) return EMPTY_STRING?;
bool is_negative;
switch (self[index])

View File

@@ -3,7 +3,6 @@
// a copy of which can be found in the LICENSE_STDLIB file.
module std::encoding::json;
import std::io;
import std::ascii;
import std::collections::object;
faultdef UNEXPECTED_CHARACTER, INVALID_ESCAPE_SEQUENCE, DUPLICATE_MEMBERS, INVALID_NUMBER;

View File

@@ -1,6 +1,5 @@
module std::net;
import std::io;
import std::ascii;
enum IpProtocol : char (AIFamily ai_family)
{

View File

@@ -7,6 +7,7 @@
### Stdlib changes
- Added `String.quick_ztr` and `String.is_zstr`
- std::ascii moved into std::core::ascii. Old _m variants are deprecated, as is uint methods.
## 0.7.1 Change list

View File

@@ -692,6 +692,7 @@ fn void test()
@.str.4 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
@.str.5 = private unnamed_addr constant [3 x i8] c"*/\00", align 1
@"lexer_test.Comment$end" = linkonce constant [2 x %"char[]"] [%"char[]" { ptr @.str.4, i64 1 }, %"char[]" { ptr @.str.5, i64 2 }], align 8
@std.core.ascii.ASCII_LOOKUP = extern_weak constant [256 x i16], align 16
@"$ct.std.io.ByteReader" = linkonce global %.introspect { i8 9, i64 0, ptr null, i64 24, i64 0, i64 2, [0 x i64] zeroinitializer }, align 8
@std.core.mem.allocator.thread_allocator = extern_weak thread_local global %any, align 8
@@ -702,31 +703,41 @@ entry:
br i1 %eq, label %and.rhs, label %and.phi
and.rhs: ; preds = %entry
%2 = call i8 @std.ascii.char.is_alpha(i8 zeroext %1)
%3 = trunc i8 %2 to i1
%zext = zext i8 %1 to i64
%ptroffset = getelementptr inbounds [2 x i8], ptr @std.core.ascii.ASCII_LOOKUP, i64 %zext
%2 = load i16, ptr %ptroffset, align 2
%lshrl = lshr i16 %2, 6
%3 = and i16 1, %lshrl
%trunc = trunc i16 %3 to i8
%4 = trunc i8 %trunc to i1
br label %and.phi
and.phi: ; preds = %and.rhs, %entry
%val = phi i1 [ false, %entry ], [ %3, %and.rhs ]
%val = phi i1 [ false, %entry ], [ %4, %and.rhs ]
br i1 %val, label %or.phi, label %or.rhs
or.rhs: ; preds = %and.phi
%lt = icmp ult i64 0, %0
br i1 %lt, label %and.rhs1, label %and.phi2
br i1 %lt, label %and.rhs1, label %and.phi6
and.rhs1: ; preds = %or.rhs
%4 = call i8 @std.ascii.char.is_alnum(i8 zeroext %1)
%5 = trunc i8 %4 to i1
br label %and.phi2
%zext2 = zext i8 %1 to i64
%ptroffset3 = getelementptr inbounds [2 x i8], ptr @std.core.ascii.ASCII_LOOKUP, i64 %zext2
%5 = load i16, ptr %ptroffset3, align 2
%lshrl4 = lshr i16 %5, 7
%6 = and i16 1, %lshrl4
%trunc5 = trunc i16 %6 to i8
%7 = trunc i8 %trunc5 to i1
br label %and.phi6
and.phi2: ; preds = %and.rhs1, %or.rhs
%val3 = phi i1 [ false, %or.rhs ], [ %5, %and.rhs1 ]
and.phi6: ; preds = %and.rhs1, %or.rhs
%val7 = phi i1 [ false, %or.rhs ], [ %7, %and.rhs1 ]
br label %or.phi
or.phi: ; preds = %and.phi2, %and.phi
%val4 = phi i1 [ true, %and.phi ], [ %val3, %and.phi2 ]
%6 = zext i1 %val4 to i8
ret i8 %6
or.phi: ; preds = %and.phi6, %and.phi
%val8 = phi i1 [ true, %and.phi ], [ %val7, %and.phi6 ]
%8 = zext i1 %val8 to i8
ret i8 %8
}
; Function Attrs: nounwind uwtable

View File

@@ -0,0 +1,28 @@
module std::core::ascii @test;
import std::hash;
fn void test_all()
{
long x = 0;
Crc64 check;
check.init();
for (char c = 0; c < 255; c++)
{
check.updatec(c);
if (c.is_upper()) check.updatec(1);
if (c.is_lower()) check.updatec(2);
if (c.is_alpha()) check.updatec(4);
if (c.is_bdigit()) check.updatec(8);
if (c.is_odigit()) check.updatec(16);
if (c.is_xdigit()) check.updatec(16);
if (c.is_digit()) check.updatec(32);
if (c.is_graph()) check.updatec(64);
check.updatec(128);
if (c.is_punct()) check.updatec(1);
if (c.is_cntrl()) check.updatec(2);
if (c.is_space()) check.updatec(4);
check.updatec(c.to_upper());
check.updatec(c.to_lower());
}
test::eq(check.final(), 7327699757963224526UL);
}