Files
c3c/lib/std/core/ascii.c3
Zack Puhl 3f7a547d8a Merge AsciiCharset CT/non-CT Functions (#2688)
* Merge AsciiCharset CT/non-CT Functions

* release notes

* incorporate helpful review feedback

* re-separate 'create_set' and 'contains' but keep 'combine_sets'; update tests

* tabs (annoying IDE)

* Restored old code verbatim for smaller diff. Split combine_sets into easier to macro/function for runtime / macro version, this also allows for more easy type checks.

---------

Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
2026-01-15 21:39:48 +01:00

157 lines
6.4 KiB
Plaintext

<*
This module contains utils for handling ASCII characters. They only operate on
characters corresponding to 0-127.
*>
module std::core::ascii;
macro bool @is_lower(c) => ASCII_LOOKUP[c].lower; // Is a-z
macro bool @is_upper(c) => ASCII_LOOKUP[c].upper; // Is A-Z
macro bool @is_digit(c) => ASCII_LOOKUP[c].digit; // Is 0-9
macro bool @is_bdigit(c) => ASCII_LOOKUP[c].bin_digit; // Is 0-1
macro bool @is_odigit(c) => ASCII_LOOKUP[c].oct_digit; // Is 0-7
macro bool @is_xdigit(c) => ASCII_LOOKUP[c].hex_digit; // Is 0-9 or a-f or A-F
macro bool @is_alpha(c) => ASCII_LOOKUP[c].alpha; // Is a-z or A-Z
macro bool @is_print(c) => ASCII_LOOKUP[c].printable; // Is a printable character (space or higher and < 127
macro bool @is_graph(c) => ASCII_LOOKUP[c].graph; // Does it show any graphics (printable but not space)
macro bool @is_space(c) => ASCII_LOOKUP[c].space; // Is it a space character: space, tab, linefeed etc
macro bool @is_alnum(c) => ASCII_LOOKUP[c].alphanum; // Is it alpha or digit
macro bool @is_punct(c) => ASCII_LOOKUP[c].punct; // Is it "graph" but not digit or letter
macro bool @is_blank(c) => ASCII_LOOKUP[c].blank; // Is it a blank space: space or tab
macro bool @is_cntrl(c) => ASCII_LOOKUP[c].control; // Is it a control character: before space or 127
macro char @to_lower(c) => c + TO_LOWER[c]; // Convert A-Z to a-z if found
macro char @to_upper(c) => c - TO_UPPER[c]; // Convert a-z to A-Z if found
fn bool is_lower(char c) => @is_lower(c); // Is a-z
fn bool is_upper(char c) => @is_upper(c); // Is A-Z
fn bool is_digit(char c) => @is_digit(c); // Is 0-9
fn bool is_bdigit(char c) => @is_bdigit(c); // Is 0-1
fn bool is_odigit(char c) => @is_odigit(c); // Is 0-7
fn bool is_xdigit(char c) => @is_xdigit(c); // Is 0-9 or a-f or A-F
fn bool is_alpha(char c) => @is_alpha(c); // Is a-z or A-Z
fn bool is_print(char c) => @is_print(c); // Is a printable character (space or higher and < 127
fn bool is_graph(char c) => @is_graph(c); // Does it show any graphics (printable but not space)
fn bool is_space(char c) => @is_space(c); // Is it a space character: space, tab, linefeed etc
fn bool is_alnum(char c) => @is_alnum(c); // Is it alpha or digit
fn bool is_punct(char c) => @is_punct(c); // Is it "graph" but not digit or letter
fn bool is_blank(char c) => @is_blank(c); // Is it a blank space: space or tab
fn bool is_cntrl(char c) => @is_cntrl(c); // Is it a control character: before space or 127
fn char to_lower(char c) => @to_lower(c); // Convert A-Z to a-z if found
fn char to_upper(char c) => @to_upper(c); // Convert a-z to A-Z if found
// The following methods are macro methods for the same functions
macro bool char.is_lower(char c) => @is_lower(c);
macro bool char.is_upper(char c) => @is_upper(c);
macro bool char.is_digit(char c) => @is_digit(c);
macro bool char.is_bdigit(char c) => @is_bdigit(c);
macro bool char.is_odigit(char c) => @is_odigit(c);
macro bool char.is_xdigit(char c) => @is_xdigit(c);
macro bool char.is_alpha(char c) => @is_alpha(c);
macro bool char.is_print(char c) => @is_print(c);
macro bool char.is_graph(char c) => @is_graph(c);
macro bool char.is_space(char c) => @is_space(c);
macro bool char.is_alnum(char c) => @is_alnum(c);
macro bool char.is_punct(char c) => @is_punct(c);
macro bool char.is_blank(char c) => @is_blank(c);
macro bool char.is_cntrl(char c) => @is_cntrl(c);
macro char char.to_lower(char c) => @to_lower(c);
macro char char.to_upper(char c) => @to_upper(c);
<*
Convert a-f/A-F/0-9 to the appropriate hex value.
@require c.is_xdigit()
@ensure return >= 0 && return <= 15
*>
macro char char.from_hex(char c) => HEX_VALUE[c];
<*
Bitstruct containing the different properties of a character
*>
bitstruct CharType : ushort @private
{
bool lower;
bool upper;
bool digit;
bool bin_digit;
bool hex_digit;
bool oct_digit;
bool alpha;
bool alphanum;
bool space;
bool printable;
bool blank;
bool punct;
bool control;
bool graph;
}
const CharType[256] ASCII_LOOKUP @private = {
[0..31] = { .control },
[9..13] = { .control, .space },
['\t'] = { .control, .space, .blank },
[' '] = { .space, .printable, .blank },
[33..126] = { .printable, .graph, .punct },
['0'..'9'] = { .printable, .graph, .alphanum, .hex_digit, .digit },
['2'..'7'] = { .printable, .graph, .alphanum, .hex_digit, .digit, .oct_digit },
['0'..'1'] = { .printable, .graph, .alphanum, .hex_digit, .digit, .oct_digit, .bin_digit },
['A'..'Z'] = { .printable, .graph, .alphanum, .alpha, .upper },
['A'..'F'] = { .printable, .graph, .alphanum, .alpha, .upper, .hex_digit },
['a'..'z'] = { .printable, .graph, .alphanum, .alpha, .lower },
['a'..'f'] = { .printable, .graph, .alphanum, .alpha, .lower, .hex_digit },
[127] = { .control },
};
const char[256] HEX_VALUE = {
['0'] = 0, ['1'] = 1, ['2'] = 2, ['3'] = 3, ['4'] = 4,
['5'] = 5, ['6'] = 6, ['7'] = 7, ['8'] = 8, ['9'] = 9,
['A'] = 10, ['B'] = 11, ['C'] = 12, ['D'] = 13, ['E'] = 14,
['F'] = 15, ['a'] = 10, ['b'] = 11, ['c'] = 12, ['d'] = 13,
['e'] = 14, ['f'] = 15
};
const char[256] TO_UPPER @private = { ['a'..'z'] = 'a' - 'A' };
const char[256] TO_LOWER @private = { ['A'..'Z'] = 'a' - 'A' };
typedef AsciiCharset = uint128;
macro AsciiCharset @create_set(String $string) @const
{
AsciiCharset $set;
$foreach $c : $string:
$set |= 1ULL << $c;
$endforeach
return $set;
}
fn AsciiCharset create_set(String string)
{
AsciiCharset set;
foreach (c : string) set |= (AsciiCharset)1ULL << c;
return set;
}
macro bool AsciiCharset.@contains($set, char $c) @const => !!($c < 128) & !!($set & (AsciiCharset)(1ULL << $c));
macro AsciiCharset @combine_sets(AsciiCharset $first, AsciiCharset... $sets) @const
{
var $res = $first;
$foreach $c : $sets:
$res |= $c;
$endforeach
return $res;
}
fn AsciiCharset combine_sets(AsciiCharset first, AsciiCharset... sets)
{
foreach (c : sets) first |= c;
return first;
}
macro bool AsciiCharset.contains(set, char c) => !!(c < 128) & !!(set & (AsciiCharset)(1ULL << c));
const AsciiCharset WHITESPACE_SET = @create_set("\t\n\v\f\r ");
const AsciiCharset NUMBER_SET = @create_set("0123456789");
const AsciiCharset ALPHA_UPPER_SET = @create_set("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
const AsciiCharset ALPHA_LOWER_SET = @create_set("abcdefghijklmnopqrstuvwxyz");
const AsciiCharset ALPHA_SET = @combine_sets(ALPHA_UPPER_SET, ALPHA_LOWER_SET);
const AsciiCharset ALPHANUMERIC_SET = @combine_sets(ALPHA_SET, NUMBER_SET);