mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 20:11:17 +00:00
* Merge AsciiCharset CT/non-CT Functions * release notes * incorporate helpful review feedback * re-separate 'create_set' and 'contains' but keep 'combine_sets'; update tests * tabs (annoying IDE) * Restored old code verbatim for smaller diff. Split combine_sets into easier to macro/function for runtime / macro version, this also allows for more easy type checks. --------- Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
157 lines
6.4 KiB
Plaintext
157 lines
6.4 KiB
Plaintext
<*
|
|
This module contains utils for handling ASCII characters. They only operate on
|
|
characters corresponding to 0-127.
|
|
*>
|
|
module std::core::ascii;
|
|
|
|
macro bool @is_lower(c) => ASCII_LOOKUP[c].lower; // Is a-z
|
|
macro bool @is_upper(c) => ASCII_LOOKUP[c].upper; // Is A-Z
|
|
macro bool @is_digit(c) => ASCII_LOOKUP[c].digit; // Is 0-9
|
|
macro bool @is_bdigit(c) => ASCII_LOOKUP[c].bin_digit; // Is 0-1
|
|
macro bool @is_odigit(c) => ASCII_LOOKUP[c].oct_digit; // Is 0-7
|
|
macro bool @is_xdigit(c) => ASCII_LOOKUP[c].hex_digit; // Is 0-9 or a-f or A-F
|
|
macro bool @is_alpha(c) => ASCII_LOOKUP[c].alpha; // Is a-z or A-Z
|
|
macro bool @is_print(c) => ASCII_LOOKUP[c].printable; // Is a printable character (space or higher and < 127
|
|
macro bool @is_graph(c) => ASCII_LOOKUP[c].graph; // Does it show any graphics (printable but not space)
|
|
macro bool @is_space(c) => ASCII_LOOKUP[c].space; // Is it a space character: space, tab, linefeed etc
|
|
macro bool @is_alnum(c) => ASCII_LOOKUP[c].alphanum; // Is it alpha or digit
|
|
macro bool @is_punct(c) => ASCII_LOOKUP[c].punct; // Is it "graph" but not digit or letter
|
|
macro bool @is_blank(c) => ASCII_LOOKUP[c].blank; // Is it a blank space: space or tab
|
|
macro bool @is_cntrl(c) => ASCII_LOOKUP[c].control; // Is it a control character: before space or 127
|
|
macro char @to_lower(c) => c + TO_LOWER[c]; // Convert A-Z to a-z if found
|
|
macro char @to_upper(c) => c - TO_UPPER[c]; // Convert a-z to A-Z if found
|
|
|
|
fn bool is_lower(char c) => @is_lower(c); // Is a-z
|
|
fn bool is_upper(char c) => @is_upper(c); // Is A-Z
|
|
fn bool is_digit(char c) => @is_digit(c); // Is 0-9
|
|
fn bool is_bdigit(char c) => @is_bdigit(c); // Is 0-1
|
|
fn bool is_odigit(char c) => @is_odigit(c); // Is 0-7
|
|
fn bool is_xdigit(char c) => @is_xdigit(c); // Is 0-9 or a-f or A-F
|
|
fn bool is_alpha(char c) => @is_alpha(c); // Is a-z or A-Z
|
|
fn bool is_print(char c) => @is_print(c); // Is a printable character (space or higher and < 127
|
|
fn bool is_graph(char c) => @is_graph(c); // Does it show any graphics (printable but not space)
|
|
fn bool is_space(char c) => @is_space(c); // Is it a space character: space, tab, linefeed etc
|
|
fn bool is_alnum(char c) => @is_alnum(c); // Is it alpha or digit
|
|
fn bool is_punct(char c) => @is_punct(c); // Is it "graph" but not digit or letter
|
|
fn bool is_blank(char c) => @is_blank(c); // Is it a blank space: space or tab
|
|
fn bool is_cntrl(char c) => @is_cntrl(c); // Is it a control character: before space or 127
|
|
fn char to_lower(char c) => @to_lower(c); // Convert A-Z to a-z if found
|
|
fn char to_upper(char c) => @to_upper(c); // Convert a-z to A-Z if found
|
|
|
|
// The following methods are macro methods for the same functions
|
|
macro bool char.is_lower(char c) => @is_lower(c);
|
|
macro bool char.is_upper(char c) => @is_upper(c);
|
|
macro bool char.is_digit(char c) => @is_digit(c);
|
|
macro bool char.is_bdigit(char c) => @is_bdigit(c);
|
|
macro bool char.is_odigit(char c) => @is_odigit(c);
|
|
macro bool char.is_xdigit(char c) => @is_xdigit(c);
|
|
macro bool char.is_alpha(char c) => @is_alpha(c);
|
|
macro bool char.is_print(char c) => @is_print(c);
|
|
macro bool char.is_graph(char c) => @is_graph(c);
|
|
macro bool char.is_space(char c) => @is_space(c);
|
|
macro bool char.is_alnum(char c) => @is_alnum(c);
|
|
macro bool char.is_punct(char c) => @is_punct(c);
|
|
macro bool char.is_blank(char c) => @is_blank(c);
|
|
macro bool char.is_cntrl(char c) => @is_cntrl(c);
|
|
macro char char.to_lower(char c) => @to_lower(c);
|
|
macro char char.to_upper(char c) => @to_upper(c);
|
|
|
|
<*
|
|
Convert a-f/A-F/0-9 to the appropriate hex value.
|
|
|
|
@require c.is_xdigit()
|
|
@ensure return >= 0 && return <= 15
|
|
*>
|
|
macro char char.from_hex(char c) => HEX_VALUE[c];
|
|
|
|
<*
|
|
Bitstruct containing the different properties of a character
|
|
*>
|
|
bitstruct CharType : ushort @private
|
|
{
|
|
bool lower;
|
|
bool upper;
|
|
bool digit;
|
|
bool bin_digit;
|
|
bool hex_digit;
|
|
bool oct_digit;
|
|
bool alpha;
|
|
bool alphanum;
|
|
bool space;
|
|
bool printable;
|
|
bool blank;
|
|
bool punct;
|
|
bool control;
|
|
bool graph;
|
|
}
|
|
|
|
const CharType[256] ASCII_LOOKUP @private = {
|
|
[0..31] = { .control },
|
|
[9..13] = { .control, .space },
|
|
['\t'] = { .control, .space, .blank },
|
|
[' '] = { .space, .printable, .blank },
|
|
[33..126] = { .printable, .graph, .punct },
|
|
['0'..'9'] = { .printable, .graph, .alphanum, .hex_digit, .digit },
|
|
['2'..'7'] = { .printable, .graph, .alphanum, .hex_digit, .digit, .oct_digit },
|
|
['0'..'1'] = { .printable, .graph, .alphanum, .hex_digit, .digit, .oct_digit, .bin_digit },
|
|
['A'..'Z'] = { .printable, .graph, .alphanum, .alpha, .upper },
|
|
['A'..'F'] = { .printable, .graph, .alphanum, .alpha, .upper, .hex_digit },
|
|
['a'..'z'] = { .printable, .graph, .alphanum, .alpha, .lower },
|
|
['a'..'f'] = { .printable, .graph, .alphanum, .alpha, .lower, .hex_digit },
|
|
[127] = { .control },
|
|
};
|
|
|
|
const char[256] HEX_VALUE = {
|
|
['0'] = 0, ['1'] = 1, ['2'] = 2, ['3'] = 3, ['4'] = 4,
|
|
['5'] = 5, ['6'] = 6, ['7'] = 7, ['8'] = 8, ['9'] = 9,
|
|
['A'] = 10, ['B'] = 11, ['C'] = 12, ['D'] = 13, ['E'] = 14,
|
|
['F'] = 15, ['a'] = 10, ['b'] = 11, ['c'] = 12, ['d'] = 13,
|
|
['e'] = 14, ['f'] = 15
|
|
};
|
|
|
|
const char[256] TO_UPPER @private = { ['a'..'z'] = 'a' - 'A' };
|
|
const char[256] TO_LOWER @private = { ['A'..'Z'] = 'a' - 'A' };
|
|
|
|
typedef AsciiCharset = uint128;
|
|
|
|
macro AsciiCharset @create_set(String $string) @const
|
|
{
|
|
AsciiCharset $set;
|
|
$foreach $c : $string:
|
|
$set |= 1ULL << $c;
|
|
$endforeach
|
|
return $set;
|
|
}
|
|
|
|
fn AsciiCharset create_set(String string)
|
|
{
|
|
AsciiCharset set;
|
|
foreach (c : string) set |= (AsciiCharset)1ULL << c;
|
|
return set;
|
|
}
|
|
|
|
macro bool AsciiCharset.@contains($set, char $c) @const => !!($c < 128) & !!($set & (AsciiCharset)(1ULL << $c));
|
|
|
|
macro AsciiCharset @combine_sets(AsciiCharset $first, AsciiCharset... $sets) @const
|
|
{
|
|
var $res = $first;
|
|
$foreach $c : $sets:
|
|
$res |= $c;
|
|
$endforeach
|
|
return $res;
|
|
}
|
|
fn AsciiCharset combine_sets(AsciiCharset first, AsciiCharset... sets)
|
|
{
|
|
foreach (c : sets) first |= c;
|
|
return first;
|
|
}
|
|
|
|
macro bool AsciiCharset.contains(set, char c) => !!(c < 128) & !!(set & (AsciiCharset)(1ULL << c));
|
|
|
|
const AsciiCharset WHITESPACE_SET = @create_set("\t\n\v\f\r ");
|
|
const AsciiCharset NUMBER_SET = @create_set("0123456789");
|
|
const AsciiCharset ALPHA_UPPER_SET = @create_set("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
|
|
const AsciiCharset ALPHA_LOWER_SET = @create_set("abcdefghijklmnopqrstuvwxyz");
|
|
const AsciiCharset ALPHA_SET = @combine_sets(ALPHA_UPPER_SET, ALPHA_LOWER_SET);
|
|
const AsciiCharset ALPHANUMERIC_SET = @combine_sets(ALPHA_SET, NUMBER_SET);
|