Files
c3c/test/test_suite/switch/switch_in_defer_macro.c3t
Christoffer Lerno acc4a900f5 - New const enum declaration syntax.
- New enum associated value syntax.
2026-02-12 14:43:56 +01:00

838 lines
20 KiB
Plaintext

// #target: macos-x64
module lexer::faults;
faultdef UNTERMINATED_STRING, UNTERMINATED_RUNE, UNTERMINATED_COMMENT;
faultdef TRIE_FULL;
<*
@require Token.kindof == ENUM && $typefrom(Token.inner).kindof == UNSIGNED_INT
@require $defined((Token){}.token)
@require $defined((Comment){}.start) && $defined((Comment){}.end)
*>
module lexer <Token, Comment>;
import std::io;
import trie;
enum Kind : char
{
UNKNOWN,
UINT,
STRING,
RUNE,
TOKEN,
IDENTIFIER,
}
alias TokenTrie = trie::Trie{Token, ushort};
alias Ident = fn bool (usz index, char c);
struct Lexer
{
Allocator allocator;
InStream reader;
char[] buf;
TokenTrie tokens;
Ident ident;
bool peeked;
Kind kind;
uint line;
uint column;
uint span;
union
{
ulong x; // UINT
String string; // STRING, IDENTIFIER
char rune; // RUNE
Token token; // TOKEN
}
}
fn void? Lexer.init(&self, InStream reader, Ident ident, Allocator using = mem)
{
TokenTrie trie;
ushort max_token;
foreach (i, tok : Token.values)
{
String name = tok.token;
assert(name.len > 0 && name.len <= ushort.max);
trie.set(name, (Token)i)!;
max_token = max(max_token, (ushort)name.len);
}
foreach (tok : Comment.values)
{
String end = tok.end;
assert(end.len > 0 && end.len <= ushort.max);
max_token = max(max_token, (ushort)end.len);
}
char* buf = allocator::malloc_try(using, max_token)!;
*self = { .allocator = using, .reader = reader, .buf = buf[:max_token], .tokens = trie };
}
fn void? Lexer.free(&self)
{
allocator::free(self.allocator, self.buf);
*self = {};
}
fn Kind? Lexer.next(&self)
{
if (self.peeked)
{
self.peeked = false;
return self.kind;
}
char c = self.reader.read_byte()!;
switch
{
case c.is_digit():
self.reader.pushback_byte()!!;
self.x = self.parse_uint()!;
return UINT;
case c == '"':
self.string = self.parse_string(c)!;
return STRING;
case c == '\'':
self.rune = self.parse_rune()!;
return RUNE;
}
if (try tok = self.parse_token())
{
foreach (comment : Comment.values)
{
if (comment.start == tok)
{
self.parse_comment(comment.end)!;
break;
}
}
self.token = tok;
return TOKEN;
}
if (try tok = self.parse_ident())
{
self.string = tok;
return IDENTIFIER;
}
return UNKNOWN;
}
fn Kind? Lexer.peek(&self)
{
if (!self.peeked)
{
self.kind = self.next()!;
self.peeked = true;
}
return self.kind;
}
fn Token? Lexer.expect(&self, fault err, Token... tokens)
{
Kind kind = self.next()!;
if (kind == TOKEN)
{
foreach (tok : tokens)
{
if (tok == self.token) return tok;
}
}
return err~;
}
fn void? Lexer.skip_whitespace(&self) @private
{
self.span = 0;
while (true)
{
char c = self.reader.read_byte()!;
if (!c.is_space())
{
self.reader.pushback_byte()!!;
return;
}
if (c == '\n')
{
self.line++;
self.column = 0;
}
else
{
self.column++;
}
}
}
fn ulong? Lexer.parse_uint(&self) @private
{
ulong x;
char c = self.reader.read_byte()!;
if (c == '0')
{
c = self.reader.read_byte()!;
switch (c)
{
case 'x':
self.span = 2;
return self.parse_hexadecimal()!;
case 'b':
self.span = 2;
return self.parse_binary()!;
case 'o':
self.span = 2;
return self.parse_octal()!;
}
}
self.span = 0;
return self.parse_decimal()!;
}
fn ulong? Lexer.parse_decimal(&self) @private
{
return @parse_uint(self, ulong; ulong x, char c, bool ok)
{
if (c.is_digit())
{
x = x * 10 + (ulong)(c - '0');
ok = true;
}
};
}
fn ulong? Lexer.parse_hexadecimal(&self) @private
{
return @parse_uint(self, ulong; ulong x, char c, bool ok)
{
switch
{
case c.is_digit():
x = x * 10 + (ulong)(c - '0');
ok = true;
case c.is_xdigit():
c -= c < 'A' ? 'a' : 'A';
x = x * 16 + (ulong)c;
ok = true;
}
};
}
fn ulong? Lexer.parse_binary(&self) @private
{
return @parse_uint(self, ulong; ulong x, char c, bool ok)
{
if (c.is_digit())
{
x = x * 2 + (ulong)(c - '0');
ok = true;
}
};
}
fn ulong? Lexer.parse_octal(&self) @private
{
return @parse_uint(self, ulong; ulong x, char c, bool ok)
{
if (c.is_odigit())
{
x = x * 8 + (ulong)(c - '0');
ok = true;
}
};
}
macro @parse_uint(self, $Type; @body(x, c, ok)) @private
{
$Type x;
uint column = self.column;
while (true)
{
char? c = self.reader.read_byte();
if (catch err = c)
{
if (err == io::EOF) break;
return err~;
}
else
{
self.column++;
if (c == '_') continue;
$Type xx = x;
bool ok;
@body(x, c, ok);
if (xx > x) return string::INTEGER_OVERFLOW~;
if (!ok)
{
self.column--;
self.reader.pushback_byte()!!;
break;
}
}
}
self.span = self.column - column;
return x;
}
fn String? Lexer.parse_string(&self, char quote) @private
{
char c = self.read_char_for_string()!;
if (c == quote) return "";
DString str;
str.init(self.allocator, 8);
char prev;
while (true)
{
c = self.read_char_for_string()!;
if (prev != '\\')
{
str.append_char(prev);
if (c == quote)
{
self.span = (uint)str.len();
self.column += self.span + 2;
return str.str_view();
}
prev = c;
continue;
}
prev = c;
switch (c)
{
case 'a':
case 'b':
case '\\':
case '\n':
case '\f':
case '\r':
case '\v':
case '"':
case '\'':
break;
default: c = '\\';
}
str.append_char(c);
}
}
fn char? Lexer.parse_rune(&self) @private
{
char x = self.reader.read_byte()!;
char c = self.reader.read_byte()!;
if (c != '\'') return faults::UNTERMINATED_RUNE~;
return x;
}
macro char? Lexer.read_char_for_string(&self) @private
{
char? c = self.reader.read_byte();
if (catch err = c)
{
if (err == io::EOF) return faults::UNTERMINATED_STRING~;
return err~;
}
return c;
}
fn Token? Lexer.parse_token(&self) @private
{
usz n = self.reader.read(self.buf)!;
defer self.unread(n);
Token tok = self.tokens.get_best(self.buf[:n])!;
n = self.buf.len - tok.token.len;
return tok;
}
fn void? Lexer.parse_comment(&self, String end) @private
{
// Find the end token and accumulate the data in between.
DString acc;
acc.init(self.allocator, 8);
char[] buf = self.buf[:end.len];
while (true)
{
if (catch err = io::read_all(self.reader, buf))
{
if (err == io::UNEXPECTED_EOF || err == io::EOF) return faults::UNTERMINATED_COMMENT~;
return err~;
}
if (end == (String)buf)
{
self.string = acc.str_view();
return;
}
acc.append_char(buf[0]);
self.unread(buf.len - 1);
}
}
macro Lexer.unread(self, n) @private
{
switch
{
case n == 1:
self.reader.pushback_byte()!!;
case n > 1:
if (&self.reader.seek)
{
self.reader.seek(-n, CURSOR)!!;
break;
}
for (; n > 0; n--) self.reader.pushback_byte()!!;
}
}
fn String? Lexer.parse_ident(&self) @private
{
DString str;
str.init(self.allocator, 8);
while (true)
{
char? c = self.reader.read_byte();
if (catch err = c)
{
if (err == io::EOF) return str.str_view();
return err~;
}
if (!self.ident(str.len(), c)) return str.str_view();
str.append_char(c);
}
}
module lexer_test;
import lexer;
import std::io;
alias Lexer = lexer::Lexer{Token, Comment};
alias Kind = lexer::Kind{Token, Comment};
enum Token : char (String token)
{
KEYWORD1 { "keyword1" },
KEYWORD2 { "keyword2" },
SINGLE { "//" },
MULTI { "/*" },
}
enum Comment : char (Token start, String end)
{
SINGLE { SINGLE, "\n" },
MULTI { MULTI, "*/" },
}
fn bool is_ident_char(usz i, char c)
{
return (i == 0 && c.is_alpha()) || (i > 0 && c.is_alnum());
}
struct UintTest
{
String in;
ulong out;
}
fn void? lex_uint()
{
UintTest[] tcases = {};
foreach (tc : tcases)
{
ByteReader br;
Lexer lex;
lex.init(br.init((char[])tc.in), &is_ident_char)!;
Kind kind = lex.next()!;
assert(kind == UINT, "got %s; want %s", kind, Kind.UINT);
}
}
module lexer2;
import std::io;
fn int main(String[] args)
{
io::printn("Hello, World!");
return 0;
}
<*
@require Index.kindof == TypeKind.UNSIGNED_INT
*>
module trie <Value, Index>;
import std::collections::list;
import trie::bitmap;
import lexer::faults;
alias TrieNodeList = List{TrieNode};
alias TriePath @private = List{Index};
struct Trie
{
TrieNodeList nodes;
}
struct TrieNode
{
Index[256] children;
Value value;
bool valid;
}
fn void Trie.init(&self, usz initial_capacity = 8, Allocator using = mem)
{
*self = {};
self.nodes.init(using, initial_capacity);
self.nodes.push((TrieNode){});
}
<*
@require self.nodes.len() > 0
*>
fn Value? Trie.get(&self, char[] key)
{
return self.nodes[0].get(self, key);
}
<*
@require self.nodes.len() > 0
*>
fn Value? Trie.get_best(&self, char[] key)
{
TrieNode* root = &self.nodes[0];
if (key.len == 0) return root.valid ? root.value : NOT_FOUND~;
return root.get_best(self, key, 0);
}
<*
@require self.nodes.len() > 0
*>
fn void? Trie.set(&self, char[] key, Value value)
{
self.nodes[0].set(self, key, value)!!;
}
<*
@require self.nodes.len() > 0
*>
fn void? Trie.del(&self, char[] key)
{
if (key.len == 0)
{
Value zero;
(&self.nodes[0]).valid = false;
return;
}
TriePath path;
path.init(self.nodes.allocator, 8);
defer path.free();
path.push(0);
self.nodes[0].del(self, key, path)!;
}
fn Value? TrieNode.get(&self, Trie *t, char[] key) @private
{
if (key.len == 0) return self.valid ? self.value : NOT_FOUND~;
char c = key[0];
Index idx = self.children[c];
if (idx == 0) return NOT_FOUND~;
return t.nodes[idx].get(t, key[1..]);
}
fn Value? TrieNode.get_best(&self, Trie *t, char[] key, Index result) @private
{
if (key.len == 0)
{
if (result == 0) return NOT_FOUND~;
return t.nodes[result].value;
}
char c = key[0];
Index idx = self.children[c];
if (idx == 0)
{
if (result == 0) return NOT_FOUND~;
return t.nodes[result].value;
}
TrieNode* next = &t.nodes[idx];
if (next.valid) result = idx;
return next.get_best(t, key[1..], result);
}
fn void? TrieNode.set(&self, Trie *t, char[] key, Value value) @private
{
if (key.len == 0)
{
self.value = value;
self.valid = true;
return;
}
char c = key[0];
Index idx = self.children[c];
if (idx == 0)
{
usz new_idx = t.nodes.len();
assert(new_idx != 0);
if (new_idx > Index.max) return faults::TRIE_FULL~;
idx = (Index)new_idx;
self.children[c] = idx;
t.nodes.push((TrieNode){});
}
t.nodes[idx].set(t, key[1..], value)!;
}
fn void? TrieNode.del(&self, Trie* t, char[] key, TriePath path) @private
{
if (key.len > 0)
{
char c = key[0];
Index idx = self.children[c];
if (idx != 0)
{
path.push(idx);
t.nodes[idx].del(t, key[1..], path)!;
}
return;
}
Index current = path.pop()!;
for (isz i = path.len() - 1; i >= 0; i--)
{
Index parent = path[i];
bool is_empty = true;
TrieNode* node = &t.nodes[parent];
foreach (j, p : node.children)
{
if (p == current)
{
node.children[j] = 0;
(&t.nodes[p]).valid = false;
}
is_empty = is_empty && node.children[j] == 0;
}
if (!is_empty) break;
}
}
module trie::bitmap @private;
struct Bitmap
{
ulong[4] data;
}
fn bool Bitmap.get(&self, char x)
{
char i = x & 0b11;
char j = 1 << (x >> 2);
return self.data[i] & j != 0;
}
fn void Bitmap.set(&self, char x)
{
char i = x & 0b11;
char j = 1 << (x >> 2);
self.data[i] |= j;
}
fn bool Bitmap.is_empty(&self)
{
return self.data[0] == 0 && self.data[1] == 0 && self.data[2] == 0 && self.data[3] == 0;
}
module trie_test;
import trie;
alias Trie = trie::Trie{String, char};
fn void test()
{
Trie t;
t.init();
String? v = t.get("a");
if (try v) unreachable("key 'a' should not exist");
if (catch t.set("a", "va")) unreachable("key 'a' not added");
String w = t.get("a")!!;
assert(w == "va");
if (catch t.set("hello world", "hi")) unreachable("key not added");
w = t.get("hello world")!!;
assert(w == "hi");
if (try invalid = t.get("hello")) unreachable("key 'hello' should not exist");
if (catch t.set("hello", "hoi")) unreachable("key not added");
String ww = t.get("hello")!!;
assert(ww == "hoi");
ww = t.get_best("hello world")!!;
assert(ww == "hi");
t.del("a")!!;
String? x = t.get("a");
if (try x) unreachable("key 'a' should not exist");
}
/* #expect: lexer_test.ll
%.introspect = type { i8, i64, ptr, i64, i64, i64, [0 x i64] }
%"char[]" = type { ptr, i64 }
%any = type { ptr, i64 }
%"UintTest[]" = type { ptr, i64 }
%UintTest = type { %"char[]", i64 }
%ByteReader = type { %"char[]", i64 }
%"Lexer{Token, Comment}" = type { %any, %any, %"char[]", %"Trie{Token, ushort}", ptr, i8, i8, i32, i32, i32, %.anon }
%"Trie{Token, ushort}" = type { %"List{TrieNode{Token, ushort}}" }
%"List{TrieNode{Token, ushort}}" = type { i64, i64, %any, ptr }
%.anon = type { %"char[]" }
@"$ct.lexer_test.UintTest" = linkonce global %.introspect { i8 10, i64 0, ptr null, i64 24, i64 0, i64 2, [0 x i64] zeroinitializer }, align 8
@.enum.KEYWORD1 = internal constant [9 x i8] c"KEYWORD1\00", align 1
@.enum.KEYWORD2 = internal constant [9 x i8] c"KEYWORD2\00", align 1
@.enum.SINGLE = internal constant [7 x i8] c"SINGLE\00", align 1
@.enum.MULTI = internal constant [6 x i8] c"MULTI\00", align 1
@"$ct.char" = linkonce global %.introspect { i8 3, i64 0, ptr null, i64 1, i64 0, i64 0, [0 x i64] zeroinitializer }, align 8
@"$ct.lexer_test.Token" = linkonce global { i8, i64, ptr, i64, i64, i64, [4 x %"char[]"] } { i8 8, i64 0, ptr null, i64 1, i64 ptrtoint (ptr @"$ct.char" to i64), i64 4, [4 x %"char[]"] [%"char[]" { ptr @.enum.KEYWORD1, i64 8 }, %"char[]" { ptr @.enum.KEYWORD2, i64 8 }, %"char[]" { ptr @.enum.SINGLE, i64 6 }, %"char[]" { ptr @.enum.MULTI, i64 5 }] }, align 8
@.str = private unnamed_addr constant [9 x i8] c"keyword1\00", align 1
@.str.1 = private unnamed_addr constant [9 x i8] c"keyword2\00", align 1
@.str.2 = private unnamed_addr constant [3 x i8] c"//\00", align 1
@.str.3 = private unnamed_addr constant [3 x i8] c"/*\00", align 1
@"lexer_test.Token$token" = linkonce constant [4 x %"char[]"] [%"char[]" { ptr @.str, i64 8 }, %"char[]" { ptr @.str.1, i64 8 }, %"char[]" { ptr @.str.2, i64 2 }, %"char[]" { ptr @.str.3, i64 2 }], align 8
@"$ct.lexer_test.Comment" = linkonce global { i8, i64, ptr, i64, i64, i64, [2 x %"char[]"] } { i8 8, i64 0, ptr null, i64 1, i64 ptrtoint (ptr @"$ct.char" to i64), i64 2, [2 x %"char[]"] [%"char[]" { ptr @.enum.SINGLE, i64 6 }, %"char[]" { ptr @.enum.MULTI, i64 5 }] }, align 8
@"lexer_test.Comment$start" = linkonce constant [2 x i8] c"\02\03", align 1
@.str.4 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
@.str.5 = private unnamed_addr constant [3 x i8] c"*/\00", align 1
@"lexer_test.Comment$end" = linkonce constant [2 x %"char[]"] [%"char[]" { ptr @.str.4, i64 1 }, %"char[]" { ptr @.str.5, i64 2 }], align 8
@std.core.ascii.ASCII_LOOKUP = extern_weak constant [256 x i16], align 16
@"$ct.std.io.ByteReader" = linkonce global %.introspect { i8 10, i64 0, ptr null, i64 24, i64 0, i64 2, [0 x i64] zeroinitializer }, align 8
@std.core.mem.allocator.thread_allocator = extern_weak thread_local global %any, align 8
define zeroext i8 @lexer_test.is_ident_char(i64 %0, i8 zeroext %1) #0 {
entry:
%eq = icmp eq i64 0, %0
br i1 %eq, label %and.rhs, label %and.phi
and.rhs: ; preds = %entry
%zext = zext i8 %1 to i64
%ptroffset = getelementptr inbounds [2 x i8], ptr @std.core.ascii.ASCII_LOOKUP, i64 %zext
%2 = load i16, ptr %ptroffset, align 2
%lshrl = lshr i16 %2, 6
%3 = and i16 1, %lshrl
%trunc = trunc i16 %3 to i8
%4 = trunc i8 %trunc to i1
br label %and.phi
and.phi: ; preds = %and.rhs, %entry
%val = phi i1 [ false, %entry ], [ %4, %and.rhs ]
br i1 %val, label %or.phi, label %or.rhs
or.rhs: ; preds = %and.phi
%lt = icmp ult i64 0, %0
br i1 %lt, label %and.rhs1, label %and.phi6
and.rhs1: ; preds = %or.rhs
%zext2 = zext i8 %1 to i64
%ptroffset3 = getelementptr inbounds [2 x i8], ptr @std.core.ascii.ASCII_LOOKUP, i64 %zext2
%5 = load i16, ptr %ptroffset3, align 2
%lshrl4 = lshr i16 %5, 7
%6 = and i16 1, %lshrl4
%trunc5 = trunc i16 %6 to i8
%7 = trunc i8 %trunc5 to i1
br label %and.phi6
and.phi6: ; preds = %and.rhs1, %or.rhs
%val7 = phi i1 [ false, %or.rhs ], [ %7, %and.rhs1 ]
br label %or.phi
or.phi: ; preds = %and.phi6, %and.phi
%val8 = phi i1 [ true, %and.phi ], [ %val7, %and.phi6 ]
%8 = zext i1 %val8 to i8
ret i8 %8
}
; Function Attrs: nounwind uwtable
define i64 @lexer_test.lex_uint() #0 {
entry:
%tcases = alloca %"UintTest[]", align 8
%.anon = alloca i64, align 8
%tc = alloca %UintTest, align 8
%br = alloca %ByteReader, align 8
%lex = alloca %"Lexer{Token, Comment}", align 8
%error_var = alloca i64, align 8
%taddr = alloca %any, align 8
%kind = alloca i8, align 1
%error_var8 = alloca i64, align 8
%retparam = alloca i8, align 1
store %"UintTest[]" zeroinitializer, ptr %tcases, align 8
%ptradd = getelementptr inbounds i8, ptr %tcases, i64 8
%0 = load i64, ptr %ptradd, align 8
store i64 0, ptr %.anon, align 8
br label %loop.cond
loop.cond: ; preds = %noerr_block13, %entry
%1 = load i64, ptr %.anon, align 8
%lt = icmp ult i64 %1, %0
br i1 %lt, label %loop.body, label %loop.exit
loop.body: ; preds = %loop.cond
%2 = load ptr, ptr %tcases, align 8
%3 = load i64, ptr %.anon, align 8
%ptroffset = getelementptr inbounds [24 x i8], ptr %2, i64 %3
call void @llvm.memcpy.p0.p0.i32(ptr align 8 %tc, ptr align 8 %ptroffset, i32 24, i1 false)
call void @llvm.memset.p0.i64(ptr align 8 %br, i8 0, i64 24, i1 false)
call void @llvm.memset.p0.i64(ptr align 8 %lex, i8 0, i64 128, i1 false)
%lo = load ptr, ptr %tc, align 8
%ptradd1 = getelementptr inbounds i8, ptr %tc, i64 8
%hi = load i64, ptr %ptradd1, align 8
%4 = call ptr @std.io.ByteReader.init(ptr %br, ptr %lo, i64 %hi)
%5 = insertvalue %any undef, ptr %4, 0
%6 = insertvalue %any %5, i64 ptrtoint (ptr @"$ct.std.io.ByteReader" to i64), 1
%7 = call ptr @llvm.threadlocal.address.p0(ptr @std.core.mem.allocator.thread_allocator)
store %any %6, ptr %taddr, align 8
%lo2 = load i64, ptr %taddr, align 8
%ptradd3 = getelementptr inbounds i8, ptr %taddr, i64 8
%hi4 = load ptr, ptr %ptradd3, align 8
%lo5 = load i64, ptr %7, align 8
%ptradd6 = getelementptr inbounds i8, ptr %7, i64 8
%hi7 = load ptr, ptr %ptradd6, align 8
%8 = call i64 @"lexer.Lexer$lexer_test.Token$lexer_test.Comment$.init"(ptr %lex, i64 %lo2, ptr %hi4, ptr @lexer_test.is_ident_char, i64 %lo5, ptr %hi7)
%not_err = icmp eq i64 %8, 0
%9 = call i1 @llvm.expect.i1(i1 %not_err, i1 true)
br i1 %9, label %after_check, label %assign_optional
assign_optional: ; preds = %loop.body
store i64 %8, ptr %error_var, align 8
br label %guard_block
after_check: ; preds = %loop.body
br label %noerr_block
guard_block: ; preds = %assign_optional
%10 = load i64, ptr %error_var, align 8
ret i64 %10
noerr_block: ; preds = %after_check
%11 = call i64 @"lexer.Lexer$lexer_test.Token$lexer_test.Comment$.next"(ptr %retparam, ptr %lex)
%not_err9 = icmp eq i64 %11, 0
%12 = call i1 @llvm.expect.i1(i1 %not_err9, i1 true)
br i1 %12, label %after_check11, label %assign_optional10
assign_optional10: ; preds = %noerr_block
store i64 %11, ptr %error_var8, align 8
br label %guard_block12
after_check11: ; preds = %noerr_block
br label %noerr_block13
guard_block12: ; preds = %assign_optional10
%13 = load i64, ptr %error_var8, align 8
ret i64 %13
noerr_block13: ; preds = %after_check11
%14 = load i8, ptr %retparam, align 1
store i8 %14, ptr %kind, align 1
%15 = load i8, ptr %kind, align 1
%eq = icmp eq i8 %15, 1
call void @llvm.assume(i1 %eq)
%16 = load i64, ptr %.anon, align 8
%addnuw = add nuw i64 %16, 1
store i64 %addnuw, ptr %.anon, align 8
br label %loop.cond
loop.exit: ; preds = %loop.cond
ret i64 0
}