Files
c3c/src/compiler/parser.c
Christoffer Lerno 990918b609 LLVM Codegen
2019-11-20 17:09:25 +01:00

3247 lines
79 KiB
C

// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "compiler_internal.h"
const int MAX_DOCS_ROWS = 1024;
Token module = { .type = TOKEN_INVALID_TOKEN };
static Ast *parse_stmt(void);
static Expr *parse_expr(void);
static Expr *parse_paren_expr(void);
static Expr *parse_precedence(Precedence precedence);
static Expr *parse_initializer_list(void);
static Expr *parse_initializer(void);
static bool parse_type_or_expr(Expr **exprPtr, Type **typePtr);
static Decl *parse_top_level(void);
typedef Expr *(*ParseFn)(Expr *);
typedef struct
{
ParseFn prefix;
ParseFn infix;
Precedence precedence;
} ParseRule;
extern ParseRule rules[TOKEN_EOF + 1];
static inline Expr *parse_precedence_with_left_side(Expr *left_side, Precedence precedence)
{
while (precedence <= rules[tok.type].precedence)
{
if (!expr_ok(left_side)) return left_side;
ParseFn infix_rule = rules[tok.type].infix;
left_side = infix_rule(left_side);
}
return left_side;
}
// --- Parser base methods
bool try_consume(TokenType type)
{
if (tok.type == type)
{
advance();
return true;
}
return false;
}
bool consume(TokenType type, const char *message, ...)
{
if (try_consume(type))
{
return true;
}
va_list args;
va_start(args, message);
sema_verror_range(tok.span, message, args);
va_end(args);
return false;
}
static inline bool consume_ident(const char* name)
{
if (try_consume(TOKEN_IDENT)) return true;
if (tok.type == TOKEN_TYPE_IDENT || tok.type == TOKEN_CONST_IDENT)
{
SEMA_ERROR(tok, "A %s cannot start with a capital letter.", name);
return false;
}
SEMA_ERROR(tok, "A %s was expected.", name);
return false;
}
static inline bool expect_ident(const char* name)
{
switch (tok.type)
{
case TOKEN_IDENT:
return true;
case TOKEN_TYPE_IDENT:
case TOKEN_CONST_IDENT:
SEMA_ERROR(tok, "A %s cannot start with a capital letter.", name);
return false;
default:
SEMA_ERROR(tok, "A %s was expected.", name);
return false;
}
}
static bool consume_type_name(const char* type)
{
if (tok.type == TOKEN_IDENT)
{
SEMA_ERROR(tok, "Names of %ss must start with an upper case letter.", type);
return false;
}
if (tok.type == TOKEN_CONST_IDENT)
{
SEMA_ERROR(tok, "Names of %ss cannot be all upper case.", type);
return false;
}
if (!consume(TOKEN_TYPE_IDENT, "'%s' should be followed by the name of the %s.", type, type)) return false;
return true;
}
static bool consume_const_name(const char* type)
{
if (tok.type == TOKEN_IDENT || tok.type == TOKEN_TYPE_IDENT)
{
SEMA_ERROR(tok, "Names of %ss must be all upper case.", type);
return false;
}
if (!consume(TOKEN_CONST_IDENT, "'%s' should be followed by the name of the %s.", type, type)) return false;
return true;
}
/**
* Walk until we find the first top level construct.
* (Note that this is the slow path, so no need to inline)
*/
static void recover_top_level(void)
{
advance();
while (tok.type != TOKEN_EOF)
{
switch (tok.type)
{
case TOKEN_PUBLIC:
case TOKEN_FUNC:
case TOKEN_CONST:
case TOKEN_TYPEDEF:
case TOKEN_ERROR_TYPE:
case TOKEN_STRUCT:
case TOKEN_IMPORT:
case TOKEN_UNION:
case TOKEN_ENUM:
case TOKEN_MACRO:
return;
default:
advance();
break;
}
}
}
static inline bool expect(TokenType token_type)
{
if (token_type == tok.type) return true;
SEMA_ERROR(tok, "Expected '%s'.", token_type_to_string(token_type));
return false;
}
void error_at_current(const char* message, ...)
{
va_list args;
va_start(args, message);
sema_verror_range(next_tok.span, message, args);
va_end(args);
}
// --- Parsing
#define EXPECT_IDENT_FOR_OR(_name, _res) do { if (!expect_ident(_name)) return _res; } while(0)
#define EXPECT_OR(_tok, _res) do { if (!expect(_tok)) return _res; } while(0)
#define CONSUME_OR(_tok, _res) do { if (!expect(_tok)) return _res; advance(); } while(0)
#define TRY_EXPECT_OR(_tok, _message, _type) do { if (tok.type != _tok) { SEMA_ERROR(tok, _message); return _type; } } while(0)
#define TRY_CONSUME_OR(_tok, _message, _type) do { if (!consume(_tok, _message)) return _type; } while(0)
#define TRY_CONSUME(_tok, _message) TRY_CONSUME_OR(_tok, _message, &poisoned_ast)
#define TRY_CONSUME_EOS_OR(_res) TRY_CONSUME_OR(TOKEN_EOS, "Expected ';'", _res)
#define TRY_CONSUME_EOS() TRY_CONSUME_EOS_OR(&poisoned_ast)
#define TRY_CONSUME_LBRACE() TRY_CONSUME(TOKEN_LBRACE, "Expected '{'")
#define TRY_AST_OR(_ast_stmt, _res) ({ Ast* _ast = (_ast_stmt); if (!ast_ok(_ast)) return _res; _ast; })
#define TRY_AST(_ast_stmt) TRY_AST_OR(_ast_stmt, &poisoned_ast)
#define TRY_EXPR_OR(_expr_stmt, _res) ({ Expr* _expr = (_expr_stmt); if (!expr_ok(_expr)) return _res; _expr; })
#define TRY_TYPE_OR(_type_stmt, _res) ({ Type* _type = (_type_stmt); if (!type_ok(_type)) return _res; _type; })
#define TRY_DECL_OR(_decl_stmt, _res) ({ Decl* _decl = (_decl_stmt); if (!decl_ok(_decl)) return _res; _decl; })
#define COMMA_RPAREN_OR(_res) \
do { if (!try_consume(TOKEN_COMMA) && tok.type != TOKEN_RPAREN) { \
SEMA_ERROR(tok, "Expected ',' or ')'"); return _res; } } while(0)
static Ast* parse_compound_stmt()
{
CONSUME_OR(TOKEN_LBRACE, &poisoned_ast);
Ast *ast = AST_NEW(AST_COMPOUND_STMT, tok);
while (!try_consume(TOKEN_RBRACE))
{
Ast *stmt = TRY_AST(parse_stmt());
ast->compound_stmt.stmts = VECADD(ast->compound_stmt.stmts, stmt);
}
return ast;
}
static Ast* parse_function_block()
{
TODO;
CONSUME_OR(TOKEN_LPARBRA, &poisoned_ast);
Ast *ast = AST_NEW(AST_COMPOUND_STMT, tok);
while (!try_consume(TOKEN_RPARBRA))
{
Ast *stmt = TRY_AST(parse_stmt());
ast->compound_stmt.stmts = VECADD(ast->compound_stmt.stmts, stmt);
}
return ast;
}
static Path *parse_path(void)
{
if (tok.type != TOKEN_IDENT || next_tok.type != TOKEN_SCOPE) return NULL;
Path *path = malloc_arena(sizeof(Path));
memset(path, 0, sizeof(Path));
path->sub_module = tok;
if (tok.type == TOKEN_IDENT && next_tok.type == TOKEN_SCOPE)
{
advance();
advance();
path->module = path->sub_module;
path->sub_module = tok;
}
return path;
}
/**
* base_type
* : VOID
* | BOOL
* | CHAR
* | BYTE
* | SHORT
* | USHORT
* | INT
* | UINT
* | LONG
* | ULONG
* | FLOAT
* | DOUBLE
* | TYPE_IDENT
* | ident_scope TYPE_IDENT
* | TYPE '(' constant_expression ')'
* ;
*
* Assume prev_token is the type.
* @return Type (poisoned if fails)
*/
static inline Type *parse_base_type(void)
{
Path *path = parse_path();
if (path)
{
Type *type = type_new(TYPE_USER_DEFINED);
type->unresolved.path = path;
type->name_loc = tok;
if (!consume_type_name("types")) return &poisoned_type;
return type;
}
Type *type;
switch (tok.type)
{
case TOKEN_TYPE_IDENT:
type = TYPE_UNRESOLVED(tok);
break;
case TOKEN_TYPE:
advance_and_verify(TOKEN_TYPE);
CONSUME_OR(TOKEN_LPAREN, &poisoned_type);
{
type = type_new(TYPE_EXPRESSION);
type->unresolved_type_expr = TRY_EXPR_OR(parse_initializer(), &poisoned_type);
}
EXPECT_OR(TOKEN_RPAREN, &poisoned_type);
break;
case TOKEN_VOID:
type = type_void;
break;
case TOKEN_BOOL:
type = type_bool;
break;
case TOKEN_BYTE:
type = type_byte;
break;
case TOKEN_CHAR:
type = type_char;
break;
case TOKEN_DOUBLE:
type = type_double;
break;
case TOKEN_FLOAT:
type = type_float;
break;
case TOKEN_INT:
type = type_int;
break;
case TOKEN_ISIZE:
type = type_isize;
break;
case TOKEN_LONG:
type = type_long;
break;
case TOKEN_SHORT:
type = type_short;
break;
case TOKEN_UINT:
type = type_uint;
break;
case TOKEN_ULONG:
type = type_ulong;
break;
case TOKEN_USHORT:
type = type_ushort;
break;
case TOKEN_USIZE:
type = type_usize;
break;
case TOKEN_C_SHORT:
type = type_c_short;
break;
case TOKEN_C_INT:
type = type_c_int;
break;
case TOKEN_C_LONG:
type = type_c_long;
break;
case TOKEN_C_LONGLONG:
type = type_c_longlong;
break;
case TOKEN_C_USHORT:
type = type_c_ushort;
break;
case TOKEN_C_UINT:
type = type_c_uint;
break;
case TOKEN_C_ULONG:
type = type_c_ulong;
break;
case TOKEN_C_ULONGLONG:
type = type_c_ulonglong;
break;
default:
SEMA_ERROR(tok, "A type name was expected here.");
type = &poisoned_type;
break;
}
advance();
return type;
}
/**
* array_type_index
* : '[' constant_expression ']'
* | '[' ']'
* | '[' '+' ']'
* ;
*
* @param type the type to wrap, may not be poisoned.
* @return type (poisoned if fails)
*/
static inline Type *parse_array_type_index(Type *type)
{
assert(type_ok(type));
advance_and_verify(TOKEN_LBRACKET);
if (try_consume(TOKEN_PLUS))
{
CONSUME_OR(TOKEN_RBRACKET, &poisoned_type);
Type *incr_array = type_new(TYPE_INC_ARRAY);
incr_array->base = type;
incr_array->resolve_status = incr_array->base->resolve_status;
return incr_array;
}
if (try_consume(TOKEN_RBRACKET))
{
Type *array = type_new(TYPE_VARARRAY);
array->base = type;
array->len = 0;
return array;
}
Type *array = type_new(TYPE_ARRAY);
array->base = type;
array->unresolved_len = TRY_EXPR_OR(parse_expr(), &poisoned_type);
CONSUME_OR(TOKEN_RBRACKET, &poisoned_type);
return array;
}
/**
* type_expression*
* : base_type
* | type_expression '*'
* | type_expression '&'
* | type_expression array_type_index
*
* Assume already stepped into.
* @return Type, poisoned if parsing is invalid.
*/
static Type *parse_type_expression(void)
{
Type *type = parse_base_type();
while (type->type_kind != TYPE_POISONED)
{
switch (tok.type)
{
case TOKEN_LBRACKET:
type = parse_array_type_index(type);
break;
case TOKEN_STAR:
advance();
{
Type *ptr_type = type_new(TYPE_POINTER);
assert(type);
ptr_type->base = type;
type = ptr_type;
}
break;
default:
return type;
}
}
return type;
}
static inline Decl *parse_decl_after_type(bool local, Type *type)
{
if (tok.type == TOKEN_LPAREN)
{
SEMA_ERROR(tok, "Expected '{'.");
return &poisoned_decl;
}
EXPECT_IDENT_FOR_OR("variable_name", &poisoned_decl);
Token name = tok;
advance();
Visibility visibility = local ? VISIBLE_LOCAL : VISIBLE_MODULE;
Decl *decl = decl_new_var(name, type, VARDECL_LOCAL, visibility);
Decl *main_decl = decl;
while (1)
{
if (tok.type == TOKEN_RPAREN || tok.type == TOKEN_EOS)
{
if (!decl)
{
SEMA_ERROR(tok, "Expected an identifier before '%s'.", token_type_to_string(tok.type));
return &poisoned_decl;
}
return main_decl;
}
if (tok.type == TOKEN_EQ)
{
if (!decl)
{
SEMA_ERROR(tok, "Expected an identifier before '='.");
return &poisoned_decl;
}
advance_and_verify(TOKEN_EQ);
decl->var.init_expr = TRY_EXPR_OR(parse_initializer(), &poisoned_decl);
decl = NULL;
if (try_consume(TOKEN_COMMA)) continue;
return main_decl;
}
if (tok.type == TOKEN_COMMA)
{
if (!decl)
{
SEMA_ERROR(tok, "Expected identifier.");
return &poisoned_decl;
}
advance();
decl = NULL;
continue;
}
if (tok.type == TOKEN_IDENT)
{
Decl *new_decl = decl_new_var(tok, type, VARDECL_LOCAL, visibility);
advance();
if (main_decl->decl_kind == DECL_MULTI_DECL)
{
main_decl->multi_decl = VECADD(main_decl->multi_decl, new_decl);
decl = new_decl;
continue;
}
Decl *multi = decl_new(DECL_MULTI_DECL, main_decl->name, visibility);
multi->multi_decl = VECADD(multi->multi_decl, main_decl);
multi->multi_decl = VECADD(multi->multi_decl, new_decl);
main_decl = multi;
decl = new_decl;
continue;
}
type = TRY_TYPE_OR(parse_type_expression(), &poisoned_decl);
}
}
/**
* declaration ::= ('local' | 'const')? type variable ('=' expr)?
*
* @return Decl* (poisoned on error)
*/
static Decl *parse_decl(void)
{
bool local = tok.type == TOKEN_LOCAL;
bool constant = tok.type == TOKEN_CONST;
if (local || constant) advance();
Type *type = TRY_TYPE_OR(parse_type_expression(), &poisoned_decl);
Decl *decl = TRY_DECL_OR(parse_decl_after_type(local, type), &poisoned_decl);
if (constant) decl->var.kind = VARDECL_CONST;
return decl;
}
/**
* declaration_stmt
* : declaration ';'
* ;
*
* @return Ast* (poisoned if parsing fails)
*/
static Ast *parse_declaration_stmt(void)
{
Ast *decl_stmt = AST_NEW(AST_DECLARE_STMT, tok);
decl_stmt->declare_stmt = TRY_DECL_OR(parse_decl(), &poisoned_ast);
CONSUME_OR(TOKEN_EOS, &poisoned_ast);
return decl_stmt;
}
typedef enum
{
NEXT_WAS_ERROR,
NEXT_WAS_EXPR,
NEXT_WAS_LABEL,
NEXT_WAS_DECL
} ExprCheck;
/**
* expr_stmt ::= expression EOS
* @return Ast* poisoned if expression fails to parse.
*/
static Ast *parse_expr_stmt(void)
{
Ast *stmt = AST_NEW(AST_EXPR_STMT, tok);
stmt->expr_stmt = TRY_EXPR_OR(parse_expr(), &poisoned_ast);
TRY_CONSUME_EOS();
return stmt;
}
/**
* expression_list
* : expression
* | expression_list ',' expression
* ;
* @return Ast *
*/
static inline Ast *parse_expression_list(void)
{
Ast *statement_list = new_ast(AST_STMT_LIST, tok);
Ast **stmts = NULL;
do
{
Expr *expr = TRY_EXPR_OR(parse_expr(), &poisoned_ast);
Ast *ast = new_ast(AST_EXPR_STMT, expr->loc);
ast->expr_stmt = expr;
stmts = VECADD(stmts, ast);
} while (try_consume(TOKEN_COMMA));
statement_list->stmt_list = stmts;
return statement_list;
}
/**
* decl_or_expr_list
* : expression_list
* | declaration_list
* ;
*
* @return bool
*/
static inline bool parse_decl_expr_list(Ast ***stmt_list)
{
Expr *expr = NULL;
Type *type = NULL;
if (!parse_type_or_expr(&expr, &type)) return false;
if (expr)
{
while (1)
{
Ast *stmt = new_ast(AST_EXPR_STMT, expr->loc);
stmt->expr_stmt = expr;
*stmt_list = VECADD(*stmt_list, stmt);
if (!try_consume(TOKEN_COMMA)) break;
expr = TRY_EXPR_OR(parse_expr(), &poisoned_ast);
}
}
else
{
Decl *decl = TRY_DECL_OR(parse_decl_after_type(false, type), &poisoned_ast);
Ast *stmt = new_ast(AST_DECLARE_STMT, decl->name);
stmt->declare_stmt = decl;
*stmt_list = VECADD(*stmt_list, stmt);
}
return true;
}
/**
* control_expression
* : decl_or_expr_list
* | declaration_list ';' decl_or_expr_list
* ;
*
* @return Ast*
*/
static inline Ast *parse_control_expression()
{
Ast *stmt_list = AST_NEW(AST_STMT_LIST, tok);
Ast ***stmt_ref = &stmt_list->stmt_list;
if (!parse_decl_expr_list(stmt_ref)) return &poisoned_ast;
assert(*stmt_ref != NULL);
if (VECLAST(*stmt_ref)->ast_kind == AST_EXPR_STMT)
{
if (tok.type == TOKEN_EOS)
{
SEMA_ERROR(tok, "Unexpected ';'.");
return &poisoned_ast;
}
return stmt_list;
}
if (!try_consume(TOKEN_EOS))
{
return stmt_list;
}
if (!parse_decl_expr_list(stmt_ref)) return &poisoned_ast;
return stmt_list;
}
/**
* if_stmt
* : IF '(' control_expression ')' statement
* | IF '(' control_expression ')' compound_statement ELSE compound_statement
* ;
*
* @return
*/
static inline Ast* parse_if_stmt(void)
{
Ast *if_ast = AST_NEW(AST_IF_STMT, tok);
advance_and_verify(TOKEN_IF);
CONSUME_OR(TOKEN_LPAREN, &poisoned_ast);
Ast *cond = TRY_AST(parse_control_expression());
CONSUME_OR(TOKEN_RPAREN, &poisoned_ast);
Ast *stmt = TRY_AST(parse_stmt());
if_ast->if_stmt.cond = cond;
if_ast->if_stmt.then_body = stmt;
if (stmt->ast_kind != AST_COMPOUND_STMT || tok.type != TOKEN_ELSE)
{
return if_ast;
}
advance_and_verify(TOKEN_ELSE);
if (tok.type != TOKEN_LBRACE)
{
SEMA_ERROR(tok, "'{' was expected after 'else'.");
return &poisoned_ast;
}
if_ast->if_stmt.else_body = TRY_AST(parse_stmt());
return if_ast;
}
static inline Ast* parse_while_stmt(void)
{
Ast *while_ast = AST_NEW(AST_WHILE_STMT, tok);
advance_and_verify(TOKEN_WHILE);
CONSUME_OR(TOKEN_LPAREN, &poisoned_ast);
while_ast->while_stmt.cond = TRY_AST(parse_control_expression());
CONSUME_OR(TOKEN_RPAREN, &poisoned_ast);
while_ast->while_stmt.body = TRY_AST(parse_stmt());
return while_ast;
}
/**
* defer
* : DEFER statement
* | DEFER catch statement
* ;
* @return
*/
static inline Ast* parse_defer_stmt(void)
{
Ast *defer_stmt = AST_NEW(AST_DEFER_STMT, tok);
advance_and_verify(TOKEN_DEFER);
defer_stmt->defer_stmt.body = TRY_AST(parse_stmt());
return defer_stmt;
}
/**
* catch
* : catch '(' ERROR ident ')' statement
* | catch '(' type_expression ident ')' statement
* ;
*
* @return Ast*
*/
static inline Ast* parse_catch_stmt(void)
{
Ast *catch_stmt = AST_NEW(AST_CATCH_STMT, tok);
advance_and_verify(TOKEN_CATCH);
CONSUME_OR(TOKEN_LPAREN, &poisoned_ast);
Type *type = NULL;
if (!try_consume(TOKEN_ERROR_TYPE))
{
type = TRY_TYPE_OR(parse_type_expression(), &poisoned_ast);
}
EXPECT_IDENT_FOR_OR("error parameter", &poisoned_ast);
Decl *decl = decl_new_var(tok, type, VARDECL_PARAM, VISIBLE_LOCAL);
catch_stmt->catch_stmt.error_param = decl;
CONSUME_OR(TOKEN_RPAREN, &poisoned_ast);
catch_stmt->catch_stmt.body = TRY_AST(parse_stmt());
return catch_stmt;
}
static inline Ast* parse_asm_stmt(void)
{
TODO
}
/**
* do_stmt
* : DO statement WHILE '(' expression ')' ';'
*/
static inline Ast* parse_do_stmt(void)
{
Ast *do_ast = AST_NEW(AST_DO_STMT, tok);
advance_and_verify(TOKEN_DO);
do_ast->do_stmt.body = TRY_AST(parse_stmt());
CONSUME_OR(TOKEN_WHILE, &poisoned_ast);
CONSUME_OR(TOKEN_LPAREN, &poisoned_ast);
do_ast->do_stmt.expr = TRY_EXPR_OR(parse_expr(), &poisoned_ast);
CONSUME_OR(TOKEN_RPAREN, &poisoned_ast);
CONSUME_OR(TOKEN_EOS, &poisoned_ast);
return do_ast;
}
/**
* switch
* : SWITCH '(' control_expression ')' compound_statement
*
* @return
*/
static inline Ast* parse_switch_stmt(void)
{
Ast *switch_ast = AST_NEW(AST_SWITCH_STMT, tok);
advance_and_verify(TOKEN_SWITCH);
CONSUME_OR(TOKEN_LPAREN, &poisoned_ast);
switch_ast->switch_stmt.cond = TRY_AST(parse_control_expression());
CONSUME_OR(TOKEN_RPAREN, &poisoned_ast);
switch_ast->switch_stmt.body = TRY_AST(parse_compound_stmt());
return switch_ast;
}
/**
* for_statement
* : FOR '(' decl_or_expr_list ';' expression_statement ')' statement
* | FOR '(' decl_or_expr_list ';' expression_statement expression_list ')' statement
* ;
*
* decl_or_expr_list
* : expression_list
* | declaration_list
* ;
*
* @return Ast*
*/
static inline Ast* parse_for_stmt(void)
{
Ast *ast = AST_NEW(AST_FOR_STMT, tok);
advance_and_verify(TOKEN_FOR);
CONSUME_OR(TOKEN_LPAREN, &poisoned_ast);
Ast *cond = new_ast(AST_COND_STMT, tok);
if (!parse_decl_expr_list(&cond->cond_stmt.stmts)) return &poisoned_ast;
CONSUME_OR(TOKEN_EOS, &poisoned_ast);
if (tok.type != TOKEN_EOS)
{
cond->cond_stmt.expr = TRY_EXPR_OR(parse_expr(), &poisoned_ast);
}
ast->for_stmt.cond = cond;
CONSUME_OR(TOKEN_EOS, &poisoned_ast);
if (!try_consume(TOKEN_RPAREN))
{
ast->for_stmt.incr = parse_expression_list();
CONSUME_OR(TOKEN_RPAREN, &poisoned_ast);
}
ast->for_stmt.body = TRY_AST(parse_stmt());
return ast;
}
static inline Expr* parse_constant_expr(void)
{
return parse_precedence(PREC_CONDITIONAL);
}
/**
* case_stmt
* : CASE constant_expression ':'
*
* @return Ast*
*/
static inline Ast* parse_case_stmt(void)
{
Ast *ast = AST_NEW(AST_CASE_STMT, tok);
advance();
Expr *expr = TRY_EXPR_OR(parse_constant_expr(), &poisoned_ast);
ast->case_stmt.expr = expr;
TRY_CONSUME(TOKEN_COLON, "Missing ':' after case");
return ast;
}
static inline Ast* parse_goto_stmt(void)
{
advance_and_verify(TOKEN_GOTO);
Ast *ast = AST_NEW(AST_GOTO_STMT, tok);
if (!consume_const_name("label")) return &poisoned_ast;
CONSUME_OR(TOKEN_EOS, &poisoned_ast);
return ast;
}
static inline Ast* parse_continue_stmt(void)
{
Ast *ast = AST_NEW(AST_CONTINUE_STMT, tok);
advance_and_verify(TOKEN_CONTINUE);
CONSUME_OR(TOKEN_EOS, &poisoned_ast);
return ast;
}
static inline Ast* parse_next_stmt(void)
{
Ast *ast = AST_NEW(AST_NEXT_STMT, tok);
advance_and_verify(TOKEN_NEXT);
CONSUME_OR(TOKEN_EOS, &poisoned_ast);
return ast;
}
static inline Ast* parse_break_stmt(void)
{
Ast *ast = AST_NEW(AST_BREAK_STMT, tok);
advance_and_verify(TOKEN_BREAK);
CONSUME_OR(TOKEN_EOS, &poisoned_ast);
return ast;
}
/**
* CTSWITCH '(' expression ')' '{' ct_switch_body '}'
*
* ct_switch_body
* : ct_case_statement
* | ct_switch_body ct_case_statement
* ;
*
* ct_case_statement
* : CTCASE type_list ':' statement
* | CTDEFAULT ':' statement
* ;
*
* @return
*/
static inline Ast* parse_ct_switch_stmt(void)
{
Ast *ast = AST_NEW(AST_CT_SWITCH_STMT, tok);
advance_and_verify(TOKEN_CT_SWITCH);
ast->ct_switch_stmt.cond = TRY_EXPR_OR(parse_paren_expr(), &poisoned_ast);
CONSUME_OR(TOKEN_LBRACE, &poisoned_ast);
Ast **switch_statements = NULL;
Ast *stmt = &poisoned_ast;
while (stmt)
{
switch (tok.type)
{
case TOKEN_CT_CASE:
stmt = AST_NEW(AST_CT_CASE_STMT, tok);
advance();
while (1)
{
Type *type = TRY_TYPE_OR(parse_type_expression(), &poisoned_ast);
vec_add(stmt->ct_case_stmt.types, type);
if (!try_consume(TOKEN_COMMA)) break;
}
CONSUME_OR(TOKEN_COLON, &poisoned_ast);
stmt->ct_case_stmt.body = TRY_AST_OR(parse_stmt(), &poisoned_ast);
vec_add(switch_statements, stmt);
break;
case TOKEN_DEFAULT:
stmt = AST_NEW(AST_CT_CASE_STMT, tok);
advance();
CONSUME_OR(TOKEN_COLON, &poisoned_ast);
stmt->ct_default_stmt = TRY_AST_OR(parse_stmt(), &poisoned_ast);
vec_add(switch_statements, stmt);
break;
case TOKEN_RBRACE:
stmt = NULL;
break;
default:
SEMA_ERROR(tok, "Expected $case or $default.");
return &poisoned_ast;
}
}
CONSUME_OR(TOKEN_RBRACE, &poisoned_ast);
ast->ct_switch_stmt.body = switch_statements;
return ast;
}
static inline Ast* parse_ct_else_stmt(void)
{
Ast *ast = AST_NEW(AST_CT_ELSE_STMT, tok);
advance_and_verify(TOKEN_CT_ELSE);
ast->ct_elif_stmt.then = TRY_AST(parse_compound_stmt());
return ast;
}
/**
* ct_elif_stmt
* : $elif '(' expression ')' compound_statement
* @return
*/
static inline Ast *parse_ct_elif_stmt(void)
{
Ast *ast = AST_NEW(AST_CT_ELIF_STMT, tok);
advance_and_verify(TOKEN_CT_ELIF);
ast->ct_elif_stmt.expr = TRY_EXPR_OR(parse_paren_expr(), &poisoned_ast);
ast->ct_elif_stmt.then = TRY_AST(parse_compound_stmt());
if (tok.type == TOKEN_CT_ELIF)
{
ast->ct_elif_stmt.elif = TRY_AST(parse_ct_elif_stmt());
}
else if (tok.type == TOKEN_CT_ELSE)
{
ast->ct_elif_stmt.elif = TRY_AST(parse_ct_else_stmt());
}
return ast;
}
/**
* ct_if_stmt
* : $if '(' expression ')' compound_stmt
* | $if '(' expression ')' compound_stmt elif_stmt
* | $if '(' expression ')' compound_stmt else_stmt
* ;
*
* @return Ast*
*/
static inline Ast* parse_ct_if_stmt(void)
{
Ast *ast = AST_NEW(AST_CT_IF_STMT, tok);
advance_and_verify(TOKEN_CT_IF);
ast->ct_if_stmt.expr = TRY_EXPR_OR(parse_paren_expr(), &poisoned_ast);
ast->ct_if_stmt.then = TRY_AST(parse_compound_stmt());
if (tok.type == TOKEN_CT_ELIF)
{
ast->ct_if_stmt.elif = TRY_AST(parse_ct_elif_stmt());
}
else if (tok.type == TOKEN_CT_ELSE)
{
ast->ct_if_stmt.elif = TRY_AST(parse_ct_else_stmt());
}
return ast;
}
/**
* ct_for_stmt
* : CTFOR '(' CT_IDENT IN expression ')' statement
* | CTFOR '(' CT_IDENT, CT_IDENT IN expression ')' statement
* ;
*
* @return
*/
static inline Ast* parse_ct_for_stmt(void)
{
Ast *ast = AST_NEW(AST_CT_FOR_STMT, tok);
advance_and_verify(TOKEN_CT_FOR);
CONSUME_OR(TOKEN_LPAREN, &poisoned_ast);
if (next_tok.type == TOKEN_COMMA)
{
ast->ct_for_stmt.index = tok;
TRY_CONSUME_OR(TOKEN_CT_IDENT, "Expected a compile time index variable", &poisoned_ast);
advance_and_verify(TOKEN_COMMA);
}
ast->ct_for_stmt.value = tok;
TRY_CONSUME_OR(TOKEN_CT_IDENT, "Expected a compile time variable", &poisoned_ast);
TRY_CONSUME_OR(TOKEN_IN, "Expected 'in'.", &poisoned_ast);
ast->ct_for_stmt.expr = TRY_EXPR_OR(parse_expr(), &poisoned_ast);
CONSUME_OR(TOKEN_RPAREN, &poisoned_ast);
ast->ct_for_stmt.body = TRY_AST(parse_stmt());
return ast;
}
/**
* return_stmt
* : RETURN expression ';'
* | RETURN ';'
* ;
*
* @return Ast* if parsing fails it is poisoned
*/
static Ast *parse_return_stmt(void)
{
advance_and_verify(TOKEN_RETURN);
Ast *ast = AST_NEW(AST_RETURN_STMT, tok);
ast->exit = EXIT_RETURN;
ast->return_stmt.defer = 0;
if (try_consume(TOKEN_EOS))
{
ast->return_stmt.expr = NULL;
return ast;
}
ast->return_stmt.expr = TRY_EXPR_OR(parse_expr(), &poisoned_ast);
CONSUME_OR(TOKEN_EOS, &poisoned_ast);
return ast;
}
static Ast *parse_throw_stmt(void)
{
Ast *ast = AST_NEW(AST_THROW_STMT, tok);
advance_and_verify(TOKEN_THROW);
ast->throw_stmt = TRY_EXPR_OR(parse_expr(), &poisoned_ast);
CONSUME_OR(TOKEN_EOS, &poisoned_ast);
return ast;
}
static Ast *parse_volatile_stmt(void)
{
Ast *ast = AST_NEW(AST_VOLATILE_STMT, tok);
ast->volatile_stmt = TRY_AST_OR(parse_compound_stmt(), &poisoned_ast);
return ast;
}
static Ast *parse_default_stmt(void)
{
Ast *ast = AST_NEW(AST_DEFAULT_STMT, tok);
advance_and_verify(TOKEN_DEFAULT);
TRY_CONSUME_OR(TOKEN_COLON, "Expected ':' after 'default'.", &poisoned_ast);
return ast;
}
bool is_valid_try_statement(TokenType type)
{
switch (type)
{
case TOKEN_SWITCH:
case TOKEN_IF:
case TOKEN_FOR:
case TOKEN_WHILE:
case TOKEN_DO:
case TOKEN_RETURN:
return true;
default:
return false;
}
}
static inline Ast *parse_label_stmt(void)
{
Ast *ast = AST_NEW(AST_LABEL, tok);
advance_and_verify(TOKEN_CONST_IDENT);
advance_and_verify(TOKEN_COLON);
return ast;
}
static inline bool is_expr_after_type_ident(void)
{
return next_tok.type == TOKEN_DOT || next_tok.type == TOKEN_LPAREN;
}
static bool parse_type_or_expr(Expr **exprPtr, Type **typePtr)
{
switch (tok.type)
{
case TOKEN_VOID:
case TOKEN_BYTE:
case TOKEN_BOOL:
case TOKEN_CHAR:
case TOKEN_DOUBLE:
case TOKEN_FLOAT:
case TOKEN_INT:
case TOKEN_ISIZE:
case TOKEN_LONG:
case TOKEN_SHORT:
case TOKEN_UINT:
case TOKEN_ULONG:
case TOKEN_USHORT:
case TOKEN_USIZE:
case TOKEN_QUAD:
case TOKEN_C_SHORT:
case TOKEN_C_INT:
case TOKEN_C_LONG:
case TOKEN_C_LONGLONG:
case TOKEN_C_USHORT:
case TOKEN_C_UINT:
case TOKEN_C_ULONG:
case TOKEN_C_ULONGLONG:
case TOKEN_TYPE_IDENT:
if (next_tok.type == TOKEN_DOT || next_tok.type == TOKEN_LPAREN) break;
*typePtr = parse_type_expression();
return type_ok(*typePtr);
case TOKEN_IDENT:
if (next_tok.type == TOKEN_SCOPE)
{
// We need a little lookahead to see if this is type or expression.
lexer_store_state();
module = tok;
advance(); advance();
if (tok.type == TOKEN_TYPE_IDENT && !is_expr_after_type_ident())
{
lexer_restore_state();
*typePtr = parse_type_expression();
return type_ok(*typePtr);
}
lexer_restore_state();
}
break;
case TOKEN_TYPE:
{
Token start = tok;
advance_and_verify(TOKEN_TYPE);
CONSUME_OR(TOKEN_LPAREN, false);
Expr* inner_expr = NULL;
Type* inner_type = NULL;
if (!parse_type_or_expr(&inner_expr, &inner_type)) return false;
CONSUME_OR(TOKEN_RPAREN, false);
if (inner_expr)
{
*typePtr = type_new(TYPE_EXPRESSION);
(**typePtr).unresolved_type_expr = inner_expr;
return true;
}
Expr *type_expr = expr_new(EXPR_TYPE, start);
type_expr->type_expr.type = inner_type;
*exprPtr = parse_precedence_with_left_side(type_expr, PREC_ASSIGNMENT);
return expr_ok(*exprPtr);
}
default:
break;
}
*exprPtr = parse_expr();
return expr_ok(*exprPtr);
}
static inline Ast *parse_decl_or_expr_stmt(void)
{
Expr *expr = NULL;
Type *type = NULL;
if (!parse_type_or_expr(&expr, &type)) return &poisoned_ast;
if (expr)
{
CONSUME_OR(TOKEN_EOS, &poisoned_ast);
Ast *ast = new_ast(AST_EXPR_STMT, expr->loc);
ast->expr_stmt = expr;
return ast;
}
else
{
Decl *decl = TRY_DECL_OR(parse_decl_after_type(false, type), &poisoned_ast);
Ast *ast = AST_NEW(AST_DECLARE_STMT, decl->name);
ast->declare_stmt = decl;
CONSUME_OR(TOKEN_EOS, &poisoned_ast);
return ast;
}
}
static Ast *parse_stmt(void)
{
switch (tok.type)
{
case TOKEN_LBRACE:
return parse_compound_stmt();
case TOKEN_LPARBRA:
return parse_function_block();
case TOKEN_HALF:
case TOKEN_QUAD:
SEMA_ERROR(next_tok, "Type is unsupported by platform.");
advance();
return &poisoned_ast;
case TOKEN_VOID:
case TOKEN_BYTE:
case TOKEN_BOOL:
case TOKEN_CHAR:
case TOKEN_DOUBLE:
case TOKEN_FLOAT:
case TOKEN_INT:
case TOKEN_ISIZE:
case TOKEN_LONG:
case TOKEN_SHORT:
case TOKEN_UINT:
case TOKEN_ULONG:
case TOKEN_USHORT:
case TOKEN_USIZE:
case TOKEN_C_SHORT:
case TOKEN_C_INT:
case TOKEN_C_LONG:
case TOKEN_C_LONGLONG:
case TOKEN_C_USHORT:
case TOKEN_C_UINT:
case TOKEN_C_ULONG:
case TOKEN_C_ULONGLONG:
case TOKEN_TYPE_IDENT:
if (next_tok.type == TOKEN_DOT || next_tok.type == TOKEN_LBRACE)
{
return parse_expr_stmt();
}
return parse_declaration_stmt();
case TOKEN_LOCAL: // Local means declaration!
case TOKEN_CONST: // Const means declaration!
return parse_declaration_stmt();
case TOKEN_TYPE:
return parse_decl_or_expr_stmt();
case TOKEN_CONST_IDENT:
if (next_tok.type == TOKEN_COLON)
{
return parse_label_stmt();
}
return parse_expr_stmt();
case TOKEN_IDENT:
if (next_tok.type == TOKEN_SCOPE)
{
return parse_decl_or_expr_stmt();
}
return parse_expr_stmt();
case TOKEN_RETURN:
return parse_return_stmt();
case TOKEN_IF:
return parse_if_stmt();
case TOKEN_WHILE:
return parse_while_stmt();
case TOKEN_DEFER:
return parse_defer_stmt();
case TOKEN_SWITCH:
return parse_switch_stmt();
case TOKEN_GOTO:
return parse_goto_stmt();
case TOKEN_DO:
return parse_do_stmt();
case TOKEN_FOR:
return parse_for_stmt();
case TOKEN_CATCH:
return parse_catch_stmt();
case TOKEN_TRY:
if (is_valid_try_statement(next_tok.type))
{
Token try_token = tok;
advance();
Ast *stmt = TRY_AST(parse_stmt());
Ast *try_ast = AST_NEW(AST_TRY_STMT, try_token);
try_ast->try_stmt = stmt;
return try_ast;
}
return parse_expr_stmt();
case TOKEN_CONTINUE:
return parse_continue_stmt();
case TOKEN_CASE:
return parse_case_stmt();
case TOKEN_BREAK:
return parse_break_stmt();
case TOKEN_NEXT:
return parse_next_stmt();
case TOKEN_ASM:
return parse_asm_stmt();
case TOKEN_DEFAULT:
return parse_default_stmt();
case TOKEN_CT_IF:
return parse_ct_if_stmt();
case TOKEN_CT_SWITCH:
return parse_ct_switch_stmt();
case TOKEN_CT_FOR:
return parse_ct_for_stmt();
case TOKEN_THROW:
return parse_throw_stmt();
case TOKEN_VOLATILE:
return parse_volatile_stmt();
case TOKEN_STAR:
case TOKEN_AMP:
case TOKEN_INTEGER:
case TOKEN_BIT_NOT:
case TOKEN_BIT_OR:
case TOKEN_BIT_XOR:
case TOKEN_LPAREN:
case TOKEN_MINUS:
case TOKEN_NOT:
case TOKEN_OR:
case TOKEN_PLUS:
case TOKEN_MINUSMINUS:
case TOKEN_PLUSPLUS:
case TOKEN_AT_IDENT:
case TOKEN_HASH_IDENT:
case TOKEN_CT_IDENT:
case TOKEN_STRING:
case TOKEN_REAL:
case TOKEN_CAST:
case TOKEN_FALSE:
case TOKEN_NIL:
case TOKEN_TRUE:
return parse_expr_stmt();
case TOKEN_INVALID_TOKEN:
advance();
return &poisoned_ast;
case TOKEN_AT:
case TOKEN_COLON:
case TOKEN_COMMA:
case TOKEN_EQ:
case TOKEN_GREATER:
case TOKEN_DIV:
case TOKEN_DOLLAR:
case TOKEN_DOT:
case TOKEN_HASH:
case TOKEN_LESS:
case TOKEN_LBRACKET:
case TOKEN_MOD:
case TOKEN_QUESTION:
case TOKEN_AND:
case TOKEN_ARROW:
case TOKEN_BIT_AND_ASSIGN:
case TOKEN_BIT_OR_ASSIGN:
case TOKEN_BIT_XOR_ASSIGN:
case TOKEN_DIV_ASSIGN:
case TOKEN_DOTDOT:
case TOKEN_ELVIS:
case TOKEN_EQEQ:
case TOKEN_GREATER_EQ:
case TOKEN_LESS_EQ:
case TOKEN_MINUS_ASSIGN:
case TOKEN_MOD_ASSIGN:
case TOKEN_MULT_ASSIGN:
case TOKEN_NOT_EQUAL:
case TOKEN_PLUS_ASSIGN:
case TOKEN_ELIPSIS:
case TOKEN_SCOPE:
case TOKEN_SHR:
case TOKEN_SHL:
case TOKEN_AND_ASSIGN:
case TOKEN_OR_ASSIGN:
case TOKEN_SHR_ASSIGN:
case TOKEN_SHL_ASSIGN:
case TOKEN_ALIAS:
case TOKEN_AS:
case TOKEN_ELSE:
case TOKEN_ENUM:
case TOKEN_ERROR_TYPE:
case TOKEN_FUNC:
case TOKEN_GENERIC:
case TOKEN_IMPORT:
case TOKEN_MACRO:
case TOKEN_MODULE:
case TOKEN_PUBLIC:
case TOKEN_STRUCT:
case TOKEN_THROWS:
case TOKEN_TYPEDEF:
case TOKEN_UNION:
case TOKEN_UNTIL:
case TOKEN_ATTRIBUTE:
case TOKEN_VAR:
case TOKEN_AT_PARAM:
case TOKEN_AT_THROWS:
case TOKEN_AT_RETURN:
case TOKEN_AT_ENSURE:
case TOKEN_AT_REQUIRE:
case TOKEN_AT_PURE:
case TOKEN_AT_CONST:
case TOKEN_AT_REQPARSE:
case TOKEN_AT_DEPRECATED:
case TOKEN_DOCS_START:
case TOKEN_DOCS_END:
case TOKEN_DOCS_EOL:
case TOKEN_DOCS_LINE:
case TOKEN_CT_CASE:
case TOKEN_CT_ELIF:
case TOKEN_CT_ELSE:
case TOKEN_CT_DEFAULT:
case TOKEN_RPARBRA:
case TOKEN_IN:
SEMA_ERROR(tok, "Unexpected '%s' found when expecting a statement.", token_type_to_string(tok.type));
advance();
return &poisoned_ast;
break;
case TOKEN_RPAREN:
case TOKEN_RBRACE:
case TOKEN_RBRACKET:
SEMA_ERROR(tok, "Mismatched '%s' found.", token_type_to_string(tok.type));
advance();
return &poisoned_ast;
case TOKEN_EOS:
advance();
return AST_NEW(AST_NOP_STMT, tok);
case TOKEN_EOF:
sema_error_at(tok.span.loc - 1, "Reached the end of the file when expecting a statement.");
return &poisoned_ast;
}
}
/**
*
* module_param
* : CT_IDENT
* | HASH_IDENT
* ;
*
* module_params
* : module_param
* | module_params ',' module_param
* ;
*/
static inline bool parse_optional_module_params(Token **tokens)
{
*tokens = NULL;
if (!try_consume(TOKEN_LPAREN)) return true;
if (try_consume(TOKEN_RPAREN))
{
SEMA_ERROR(tok, "Generic parameter list cannot be empty.");
return false;
}
// No params
while (1)
{
switch (tok.type)
{
case TOKEN_IDENT:
sema_error_range(next_tok.span, "The module parameter must be a $ or #-prefixed name, did you forgot the '$'?");
return false;
case TOKEN_COMMA:
sema_error_range(next_tok.span, "Unexpected ','");
return false;
case TOKEN_AT_IDENT:
case TOKEN_CT_IDENT:
case TOKEN_HASH_IDENT:
case TOKEN_TYPE_IDENT:
break;
default:
SEMA_ERROR(tok, "Only generic parameters are allowed here as parameters to the module.");
return false;
}
*tokens = VECADD(*tokens, next_tok);
advance();
if (!try_consume(TOKEN_COMMA))
{
return consume(TOKEN_RPAREN, "Expected ')'.");
}
}
}
/**
* module
* : MODULE IDENT ';'
* | MODULE IDENT '(' module_params ')' ';'
*/
static inline void parse_module(void)
{
if (!try_consume(TOKEN_MODULE))
{
context_set_module_from_filename(current_context);
return;
}
Token name = tok;
// Expect the module name
if (!consume(TOKEN_IDENT, "After 'module' the name of the module should be placed."))
{
context_set_module(current_context, (Token) {.type = TOKEN_INVALID_TOKEN}, NULL);
recover_top_level();
return;
}
// Is this a generic module?
Token *generic_parameters = NULL;
if (!parse_optional_module_params(&generic_parameters))
{
context_set_module(current_context, name, generic_parameters);
recover_top_level();
return;
}
context_set_module(current_context, name, generic_parameters);
TRY_CONSUME_EOS_OR();
}
/**
* Only call this if the first '(' has been detected.
*
* macro_invocation_list ::= '(' ')'
* | '(' macro_parameter (',' macro_parameter)* ')'
*
* macro_parameter ::= type
* | type_or_expr
* | expr
*/
static inline bool parse_macro_parameter_list(Expr*** result)
{
TODO
advance_and_verify(TOKEN_LPAREN);
*result = NULL;
while (try_consume(TOKEN_RPAREN))
{
if (try_consume(TOKEN_COMMA))
{
sema_error_range(tok.span, "There was an empty value here, did you accidentally add a ',' too many?");
return false;
}
Expr *expr = NULL;// TODO parse_expr();
if (expr->expr_kind == EXPR_POISONED) return false;
*result = VECADD(*result, expr);
COMMA_RPAREN_OR(false);
}
}
/**
*
* import
* : IMPORT IDENT ';'
* | IMPORT IDENT AS IDENT ';'
* | IMPORT IDENT AS IDENT LOCAL ';'
* | IMPORT IDENT LOCAL ';'
*
* // TODO macro parameters (after grammar is updated)
*
* @return true if import succeeded
*/
static inline bool parse_import()
{
advance_and_verify(TOKEN_IMPORT);
Token module_name = tok;
TRY_CONSUME_OR(TOKEN_IDENT, "Import statement should be followed by the name of the module to import.", false);
Expr **generic_parameters = NULL;
/* MACRO params here
if (tok.type == TOKEN_LPAREN)
{
if (!parse_macro_parameter_list(&generic_parameters)) return false;
}*/
Token alias = {};
ImportType import_type = IMPORT_TYPE_FULL;
if (try_consume(TOKEN_AS))
{
alias = tok;
if (!consume_ident("alias")) return false;
import_type = IMPORT_TYPE_ALIAS;
}
if (try_consume(TOKEN_LOCAL))
{
import_type = import_type == IMPORT_TYPE_ALIAS ? IMPORT_TYPE_ALIAS_LOCAL : IMPORT_TYPE_LOCAL;
}
context_add_import(current_context, module_name, alias, import_type, generic_parameters);
TRY_CONSUME_EOS_OR(false);
return true;
}
static Expr *parse_precedence(Precedence precedence)
{
// Get the rule for the previous token.
ParseFn prefix_rule = rules[tok.type].prefix;
if (prefix_rule == NULL)
{
SEMA_ERROR(tok, "An expression was expected.");
return &poisoned_expr;
}
Expr *left_side = prefix_rule(NULL);
if (!expr_ok(left_side)) return left_side;
return parse_precedence_with_left_side(left_side, precedence);
}
static inline Expr* parse_expr(void)
{
Token start = tok;
bool found_try = try_consume(TOKEN_TRY);
Expr *expr = TRY_EXPR_OR(parse_precedence(PREC_ASSIGNMENT), &poisoned_expr);
if (found_try)
{
Expr *try_expr = expr_new(EXPR_TRY, start);
try_expr->try_expr.expr = expr;
if (try_consume(TOKEN_ELSE))
{
try_expr->try_expr.else_expr = TRY_EXPR_OR(parse_precedence(PREC_ASSIGNMENT), &poisoned_expr);
}
return try_expr;
}
return expr;
}
static inline Expr *parse_paren_expr(void)
{
CONSUME_OR(TOKEN_LPAREN, &poisoned_expr);
Expr *expr = TRY_EXPR_OR(parse_expr(), &poisoned_expr);
CONSUME_OR(TOKEN_RPAREN, &poisoned_expr);
return expr;
}
/**
* imports
* : import_decl
* | imports import_decl
* ;
*/
static inline void parse_imports(void)
{
while (tok.type == TOKEN_IMPORT)
{
if (!parse_import()) recover_top_level();
}
}
/**
* const_decl
* : 'const' CT_IDENT '=' const_expr ';'
* | 'const' type IDENT '=' const_expr ';'
* ;
*/
static inline Decl *parse_const_declaration(Visibility visibility)
{
advance_and_verify(TOKEN_CONST);
Decl *decl = decl_new_var(tok, NULL, VARDECL_CONST, visibility);
// Parse the compile time constant.
if (tok.type == TOKEN_CT_IDENT)
{
if (!is_all_upper(tok.string))
{
SEMA_ERROR(tok, "Compile time constants must be all upper characters.");
return &poisoned_decl;
}
}
else
{
if (!consume_const_name("constant")) return &poisoned_decl;
decl->var.type = TRY_TYPE_OR(parse_type_expression(), &poisoned_decl);
}
CONSUME_OR(TOKEN_EQ, &poisoned_decl);
decl->var.init_expr = TRY_EXPR_OR(parse_initializer(), &poisoned_decl);
CONSUME_OR(TOKEN_EOS, &poisoned_decl);
return decl;
}
/**
* global_declaration
* : type_expression IDENT ';'
* | type_expression IDENT '=' expression ';'
* ;
*
* @param visibility
* @return true if parsing succeeded
*/
static inline Decl *parse_global_declaration(Visibility visibility)
{
Type *type = TRY_TYPE_OR(parse_type_expression(), &poisoned_decl);
Decl *decl = decl_new_var(tok, type, VARDECL_GLOBAL, visibility);
if (!consume_ident("global variable")) return &poisoned_decl;
if (try_consume(TOKEN_EQ))
{
decl->var.init_expr = TRY_EXPR_OR(parse_initializer(), &poisoned_decl);
}
TRY_CONSUME_EOS_OR(&poisoned_decl);
return decl;
}
/**
* attribute_list
* : attribute
* | attribute_list attribute
* ;
*
* attribute
* : AT_IDENT
* | path AT_IDENT
* | AT_IDENT '(' constant_expression ')'
* | path AT_IDENT '(' constant_expression ')'
* ;
*
* @return true if parsing succeeded, false if recovery is needed
*/
static inline bool parse_attributes(Decl *parent_decl)
{
parent_decl->attributes = NULL;
while (tok.type == TOKEN_AT_IDENT || (tok.type == TOKEN_IDENT && next_tok.type == TOKEN_SCOPE))
{
Path *path = parse_path();
Attr *attr = malloc_arena(sizeof(Attr));
attr->name = tok;
attr->path = path;
TRY_CONSUME_OR(TOKEN_AT_IDENT, "Expected an attribute", false);
if (tok.type == TOKEN_LPAREN)
{
attr->expr = TRY_EXPR_OR(parse_paren_expr(), false);
}
const char *name= attr->name.string;
VECEACH(parent_decl->attributes, i)
{
Attr *other_attr = parent_decl->attributes[i];
if (other_attr->name.string == name)
{
SEMA_ERROR(attr->name, "Repeat of attribute '%s' here.", name);
return false;
}
}
parent_decl->attributes = VECADD(parent_decl->attributes, attr);
}
return true;
}
/**
* Expect pointer to after '{'
*
* struct_body
* : '{' struct_declaration_list '}'
* ;
*
* struct_declaration_list
* : struct_member_declaration
* | struct_declaration_list struct_member_declaration
* ;
*
* struct_member_declaration
* : type_expression identifier_list opt_attributes ';'
* | struct_or_union IDENT opt_attributes struct_body
* | struct_or_union opt_attributes struct_body
* ;
*
* @param parent the direct parent.
* @param visible_parent the visible parent when checking duplicate symbols.
*/
bool parse_struct_body(Decl *parent, Decl *visible_parent)
{
CONSUME_OR(TOKEN_LBRACE, false);
while (tok.type != TOKEN_RBRACE)
{
TokenType token_type = tok.type;
if (token_type == TOKEN_STRUCT || token_type == TOKEN_UNION)
{
DeclKind decl_kind = decl_from_token(token_type);
Decl *member;
if (next_tok.type != TOKEN_IDENT)
{
Token name_replacement = tok;
name_replacement.string = NULL;
member = decl_new_user_defined_type(name_replacement, decl_kind, parent->visibility);
advance();
}
else
{
advance();
member = decl_new_user_defined_type(tok, decl_kind, parent->visibility);
Decl *other = struct_find_name(visible_parent, tok.string);
if (other)
{
SEMA_ERROR(tok, "Duplicate member '%s' found.", tok.string);
sema_prev_at_range(other->name.span, "Previous declaration with the same name was here.");
decl_poison(visible_parent);
decl_poison(other);
decl_poison(member);
}
advance_and_verify(TOKEN_IDENT);
}
if (!parse_attributes(member)) return false;
parent->strukt.members = VECADD(parent->strukt.members, member);
if (!parse_struct_body(member, tok.type == TOKEN_IDENT ? member : visible_parent))
{
decl_poison(visible_parent);
return false;
}
continue;
}
Type *type = TRY_TYPE_OR(parse_type_expression(), false);
while (1)
{
EXPECT_OR(TOKEN_IDENT, false);
Decl *member = decl_new_var(tok, type, VARDECL_MEMBER, parent->visibility);
Decl *other = struct_find_name(visible_parent, member->name.string);
if (other)
{
SEMA_ERROR(member->name, "Duplicate member '%s' found.", member->name.string);
sema_prev_at_range(other->name.span, "Previous declaration with the same name was here.");
decl_poison(visible_parent);
decl_poison(other);
decl_poison(member);
}
parent->strukt.members = VECADD(parent->strukt.members, member);
advance();
if (tok.type != TOKEN_COMMA) break;
}
CONSUME_OR(TOKEN_EOS, false);
}
advance_and_verify(TOKEN_RBRACE);
return true;
}
/**
* struct_declaration
* : struct_or_union TYPE_IDENT opt_attributes struct_body
* ;
*
* @param visibility
*/
static inline Decl *parse_struct_declaration(Visibility visibility)
{
TokenType type = tok.type;
advance();
const char* type_name = struct_union_name_from_token(type);
Token name = tok;
if (!consume_type_name(type_name)) return &poisoned_decl;
Decl *decl = decl_new_user_defined_type(name, decl_from_token(type), visibility);
if (!parse_attributes(decl))
{
return &poisoned_decl;
}
if (!parse_struct_body(decl, decl))
{
return &poisoned_decl;
}
DEBUG_LOG("Parsed %s %s completely.", type_name, name.string);
return decl;
}
/**
* Parse statements up to the next '}', 'case' or 'default'
*/
static inline Ast *parse_generics_statements(void)
{
Ast *ast = AST_NEW(AST_COMPOUND_STMT, tok);
while (tok.type != TOKEN_RBRACE && tok.type != TOKEN_CASE && tok.type != TOKEN_DEFAULT)
{
Ast *stmt = TRY_AST_OR(parse_stmt(), &poisoned_ast);
ast->compound_stmt.stmts = VECADD(ast->compound_stmt.stmts, stmt);
}
return ast;
}
/**
* generics_declaration
* : GENERIC opt_path IDENT '(' macro_argument_list ')' '{' generics_body '}'
* | GENERIC type_expression opt_path IDENT '(' macro_argument_list ')' '{' generics_body '}'
* ;
*
* opt_path
* :
* | path
* ;
*
* @param visibility
* @return
*/
static inline Decl *parse_generics_declaration(Visibility visibility)
{
advance_and_verify(TOKEN_GENERIC);
Type *rtype = NULL;
if (tok.type != TOKEN_IDENT)
{
rtype = TRY_TYPE_OR(parse_type_expression(), &poisoned_decl);
}
Path *path = parse_path();
Decl *decl = decl_new_user_defined_type(tok, DECL_GENERIC, visibility);
decl->generic_decl.path = path;
if (!consume_ident("generic function name")) return &poisoned_decl;
decl->generic_decl.rtype = rtype;
Token *parameters = NULL;
CONSUME_OR(TOKEN_LPAREN, &poisoned_decl);
while (!try_consume(TOKEN_RPAREN))
{
if (tok.type != TOKEN_IDENT)
{
SEMA_ERROR(tok, "Expected an identifier.");
return false;
}
parameters = VECADD(parameters, tok);
advance();
COMMA_RPAREN_OR(&poisoned_decl);
}
CONSUME_OR(TOKEN_LBRACE, &poisoned_decl);
Ast **cases = NULL;
while (!try_consume(TOKEN_RBRACE))
{
if (tok.type == TOKEN_CASE)
{
Ast *generic_case = AST_NEW(AST_GENERIC_CASE_STMT, tok);
advance_and_verify(TOKEN_CASE);
Type **types = NULL;
while (!try_consume(TOKEN_COLON))
{
Type *type = TRY_TYPE_OR(parse_type_expression(), &poisoned_decl);
types = VECADD(types, type);
if (!try_consume(TOKEN_COMMA) && tok.type != TOKEN_COLON)
{
SEMA_ERROR(tok, "Expected ',' or ':'.");
return &poisoned_decl;
}
}
generic_case->generic_case_stmt.types = types;
generic_case->generic_case_stmt.body = TRY_AST_OR(parse_generics_statements(), &poisoned_decl);
cases = VECADD(cases, generic_case);
continue;
}
if (tok.type == TOKEN_DEFAULT)
{
Ast *generic_case = AST_NEW(AST_GENERIC_DEFAULT_STMT, tok);
advance_and_verify(TOKEN_DEFAULT);
CONSUME_OR(TOKEN_COLON, &poisoned_decl);
generic_case->generic_default_stmt = TRY_AST_OR(parse_generics_statements(), &poisoned_decl);
cases = VECADD(cases, generic_case);
continue;
}
SEMA_ERROR(tok, "Expected 'case' or 'default'.");
return &poisoned_decl;
}
decl->generic_decl.cases = cases;
decl->generic_decl.parameters = parameters;
return decl;
}
/**
* param_declaration
* : type_expression
* | type_expression IDENT
* | type_expression IDENT '=' initializer
* ;
*/
static inline bool parse_param_decl(Visibility parent_visibility, Decl*** parameters, bool type_only)
{
Type *type = TRY_TYPE_OR(parse_type_expression(), false);
Decl *param = decl_new_var(tok, type, VARDECL_PARAM, parent_visibility);
if (!try_consume(TOKEN_IDENT))
{
param->name.string = NULL;
}
const char *name = param->name.string;
if (!name && !type_only)
{
SEMA_ERROR(tok, "The function parameter must be named.");
return false;
}
if (name && try_consume(TOKEN_EQ))
{
param->var.init_expr = TRY_EXPR_OR(parse_initializer(), false);
}
if (param->name.string)
{
VECEACH(*parameters, i)
{
if ((*parameters)[i]->name.string == name)
{
SEMA_ERROR(param->name, "Duplicate parameter name '%s' - parameter %d and %d clash.",
name, i + 1, vec_size(*parameters));
return false;
}
}
}
*parameters = VECADD(*parameters, param);
return true;
}
/**
* throw_declaration
* : THROWS
* | THROWS error_list
* ;
*
* opt_throw_declaration
* : throw_declaration
* |
* ;
*
*/
static inline bool parse_opt_throw_declaration(FunctionSignature *signature)
{
if (tok.type == TOKEN_THROW)
{
SEMA_ERROR(tok, "Did you mean 'throws'?");
return false;
}
if (!try_consume(TOKEN_THROWS)) return true;
Token *throws = NULL;
while (tok.type == TOKEN_TYPE_IDENT)
{
throws = VECADD(throws, tok);
advance();
if (!try_consume(TOKEN_COMMA)) break;
}
switch (tok.type)
{
case TOKEN_TYPE_IDENT:
SEMA_ERROR(tok, "Expected ',' between each error type.");
return false;
case TOKEN_IDENT:
case TOKEN_CONST_IDENT:
SEMA_ERROR(tok, "Expected an error type.");
return false;
default:
break;
}
signature->throws = throws;
return true;
}
/**
*
* parameter_type_list
* : parameter_list
* | parameter_list ',' ELLIPSIS
* | parameter_list ',' type_expression ELLIPSIS
* ;
*
* opt_parameter_type_list
* : '(' ')'
* | '(' parameter_type_list ')'
* ;
*
* parameter_list
* : param_declaration
* | parameter_list ',' param_declaration
* ;
*
*/
static inline bool parse_opt_parameter_type_list(Visibility parent_visibility, FunctionSignature *signature, bool is_interface)
{
CONSUME_OR(TOKEN_LPAREN, false);
while (!try_consume(TOKEN_RPAREN))
{
if (try_consume(TOKEN_ELIPSIS))
{
signature->variadic = true;
}
else
{
if (!parse_param_decl(parent_visibility, &(signature->params), is_interface)) return false;
}
if (!try_consume(TOKEN_COMMA))
{
EXPECT_OR(TOKEN_RPAREN, false);
}
if (signature->variadic)
{
SEMA_ERROR(tok, "Variadic arguments should be the last in a parameter list.");
return false;
}
}
return true;
}
static AttributeDomains TOKEN_TO_ATTR[TOKEN_EOF + 1] = {
[TOKEN_FUNC] = ATTR_FUNC,
[TOKEN_VAR] = ATTR_VAR,
[TOKEN_ENUM] = ATTR_ENUM,
[TOKEN_STRUCT] = ATTR_STRUCT,
[TOKEN_UNION] = ATTR_UNION,
[TOKEN_CONST] = ATTR_CONST,
[TOKEN_TYPEDEF] = ATTR_TYPEDEF,
[TOKEN_ERROR_TYPE] = ATTR_ERROR,
};
/**
* attribute_declaration
* : ATTRIBUTE attribute_domains AT_IDENT ';'
* | ATTRIBUTE attribute_domains AT_IDENT '(' parameter_type_list ')' ';'
* ;
*
* attribute_domains
* : attribute_domain
* | attribute_domains ',' attribute_domain
* ;
*
* attribute_domain
* : FUNC
* | VAR
* | ENUM
* | STRUCT
* | UNION
* | TYPEDEF
* | CONST
* | ERROR
* ;
*
* @param visibility
* @return Decl*
*/
static inline Decl *parse_attribute_declaration(Visibility visibility)
{
advance_and_verify(TOKEN_ATTRIBUTE);
AttributeDomains domains = 0;
AttributeDomains last_domain;
last_domain = TOKEN_TO_ATTR[tok.type];
while (last_domain)
{
advance();
if ((domains & last_domain) != 0)
{
SEMA_ERROR(tok, "'%s' appeared more than once.", tok.string);
continue;
}
domains |= last_domain;
if (!try_consume(TOKEN_COMMA)) break;
last_domain = TOKEN_TO_ATTR[tok.type];
}
TRY_CONSUME_OR(TOKEN_AT_IDENT, "Expected an attribute name.", &poisoned_decl);
Decl *decl = decl_new(DECL_ATTRIBUTE, tok, visibility);
if (last_domain == 0)
{
SEMA_ERROR(tok, "Expected at least one domain for attribute '%s'.", decl->name.string);
return false;
}
if (!parse_opt_parameter_type_list(visibility, &decl->attr.attr_signature, false)) return &poisoned_decl;
TRY_CONSUME_EOS_OR(&poisoned_decl);
return decl;
}
/**
*
*/
/**
* func_typedef
* : FUNC type_expression opt_parameter_type_list
* | FUNC type_expression opt_parameter_type_list throw_declaration
* ;
*/
static inline bool parse_func_typedef(Decl *decl, Visibility visibility)
{
decl->typedef_decl.is_func = true;
advance_and_verify(TOKEN_FUNC);
Type *type = TRY_TYPE_OR(parse_type_expression(), false);
decl->typedef_decl.function_signature.rtype = type;
if (!parse_opt_parameter_type_list(visibility, &(decl->typedef_decl.function_signature), true))
{
return false;
}
return parse_opt_throw_declaration(&(decl->typedef_decl.function_signature));
}
static inline Decl *parse_typedef_declaration(Visibility visibility)
{
Decl *decl = decl_new_user_defined_type(tok, DECL_TYPEDEF, visibility);
advance_and_verify(TOKEN_TYPEDEF);
if (tok.type == TOKEN_FUNC)
{
if (!parse_func_typedef(decl, visibility)) return &poisoned_decl;
}
else
{
decl->typedef_decl.type = TRY_TYPE_OR(parse_type_expression(), &poisoned_decl);
decl->typedef_decl.is_func = false;
}
CONSUME_OR(TOKEN_AS, &poisoned_decl);
decl->name = tok;
if (!consume_type_name("typedef")) return &poisoned_decl;
CONSUME_OR(TOKEN_EOS, &poisoned_decl);
return decl;
}
static inline Decl *parse_macro_declaration(Visibility visibility)
{
advance_and_verify(TOKEN_MACRO);
Type *rtype = NULL;
if (tok.type != TOKEN_AT_IDENT)
{
rtype = TRY_TYPE_OR(parse_type_expression(), &poisoned_decl);
}
Decl *decl = decl_new(DECL_MACRO, tok, visibility);
decl->macro_decl.rtype = rtype;
TRY_CONSUME_OR(TOKEN_AT_IDENT, "Expected a macro name starting with '@'", &poisoned_decl);
CONSUME_OR(TOKEN_LPAREN, &poisoned_decl);
Decl **params = NULL;
while (!try_consume(TOKEN_RPAREN))
{
Type *parm_type = NULL;
TEST_TYPE:
switch (tok.type)
{
case TOKEN_IDENT:
case TOKEN_AT_IDENT:
case TOKEN_CT_IDENT:
case TOKEN_HASH_IDENT:
break;
default:
if (parm_type)
{
SEMA_ERROR(tok, "Expected a macro parameter");
return &poisoned_decl;
}
parm_type = TRY_TYPE_OR(parse_type_expression(), &poisoned_decl);
goto TEST_TYPE;
}
Decl *param = decl_new_var(tok, parm_type, VARDECL_PARAM, visibility);
advance();
params = VECADD(params, param);
COMMA_RPAREN_OR(&poisoned_decl);
}
decl->macro_decl.parameters = params;
decl->macro_decl.body = TRY_AST_OR(parse_stmt(), &poisoned_decl);
return decl;
}
/**
* Starts after 'func'
*
* func_name
* : path TYPE_IDENT '.' IDENT
* | TYPE_IDENT '.' IDENT
* | IDENT
* ;
*
* func_definition
* : func_declaration compound_statement
* | func_declaration ';'
* ;
*
* func_declaration
* : FUNC type_expression func_name '(' opt_parameter_type_list ')' opt_attributes
* | FUNC type_expression func_name '(' opt_parameter_type_list ')' throw_declaration opt_attributes
* ;
*
* @param visibility
* @return Decl*
*/
static inline Decl *parse_func_definition(Visibility visibility, bool is_interface)
{
advance_and_verify(TOKEN_FUNC);
Type *return_type = TRY_TYPE_OR(parse_type_expression(), false);
Decl *func = decl_new_user_defined_type(tok, DECL_FUNC, visibility);
func->func.function_signature.rtype = return_type;
Path *path = parse_path();
if (path || tok.type == TOKEN_TYPE_IDENT)
{
// Special case, actually an extension
TRY_EXPECT_OR(TOKEN_TYPE_IDENT, "A type was expected after '::'.", &poisoned_decl);
Type *type = type_new(TYPE_USER_DEFINED);
type->unresolved.path = path;
type->name_loc = tok;
func->func.type_parent = type;
advance_and_verify(TOKEN_TYPE_IDENT);
TRY_CONSUME_OR(TOKEN_DOT, "Expected '.' after the type in a method function.", &poisoned_decl);
}
EXPECT_IDENT_FOR_OR("function name", &poisoned_decl);
func->name = tok;
advance_and_verify(TOKEN_IDENT);
if (!parse_opt_parameter_type_list(visibility, &(func->func.function_signature), is_interface)) return &poisoned_decl;
if (!parse_opt_throw_declaration(&(func->func.function_signature))) return &poisoned_decl;
if (is_interface)
{
if (tok.type == TOKEN_LBRACE)
{
SEMA_ERROR(next_tok, "Functions bodies are not allowed in interface files.");
return &poisoned_decl;
}
TRY_CONSUME_OR(TOKEN_EOS, "Expected ';' after function declaration.", &poisoned_decl);
return func;
}
TRY_EXPECT_OR(TOKEN_LBRACE, "Expected the beginning of a block with '{'", &poisoned_decl);
func->func.body = TRY_AST_OR(parse_compound_stmt(), &poisoned_decl);
DEBUG_LOG("Finished parsing function %s", func->name.string);
return func;
}
/**
* error_declaration
* : ERROR TYPE_IDENT '{' error_list '}'
* ;
*
*/
static inline Decl *parse_error_declaration(Visibility visibility)
{
advance_and_verify(TOKEN_ERROR_TYPE);
Decl *error_decl = decl_new_user_defined_type(tok, DECL_ERROR, visibility);
if (!consume_type_name("error type")) return &poisoned_decl;
CONSUME_OR(TOKEN_LBRACE, &poisoned_decl);
while (tok.type == TOKEN_CONST_IDENT)
{
Decl *err_constant = decl_new(DECL_ERROR_CONSTANT, tok, error_decl->visibility);
err_constant->error_constant.parent = error_decl;
VECEACH(error_decl->error.error_constants, i)
{
Decl *other_constant = error_decl->error.error_constants[i];
if (other_constant->name.string == tok.string)
{
SEMA_ERROR(tok, "This error is declared twice.");
sema_prev_at_range(other_constant->name.span, "The previous declaration was here.");
decl_poison(err_constant);
decl_poison(error_decl);
break;
}
}
error_decl->error.error_constants = VECADD(error_decl->error.error_constants, err_constant);
advance_and_verify(TOKEN_CONST_IDENT);
if (!try_consume(TOKEN_COMMA)) break;
}
if (tok.type == TOKEN_TYPE_IDENT || tok.type == TOKEN_IDENT)
{
SEMA_ERROR(tok, "Errors must be all upper case.");
return &poisoned_decl;
}
CONSUME_OR(TOKEN_RBRACE, &poisoned_decl);
return error_decl;
}
/**
* Expect current at enum name.
*
* enum ::= ENUM TYPE_NAME (':' type)? '{' enum_def (',' enum_def)* ','? '}'
*
* enum_def ::= CAPS_IDENT ('=' const_expr)?
*
* TODO enum extra data?
*/
static inline Decl *parse_enum_declaration(Visibility visibility)
{
advance_and_verify(TOKEN_ENUM);
Decl *decl = decl_new_user_defined_type(tok, DECL_ENUM, visibility);
if (!consume_type_name("enum")) return &poisoned_decl;
Type *type = NULL;
if (try_consume(TOKEN_COLON))
{
type = TRY_TYPE_OR(parse_base_type(), &poisoned_decl);
}
CONSUME_OR(TOKEN_LBRACE, false);
decl->enums.type = type ? type : type_int;
while (!try_consume(TOKEN_RBRACE))
{
Decl *enum_const = decl_new(DECL_ENUM_CONSTANT, tok, decl->visibility);
enum_const->enum_constant.parent = decl;
VECEACH(decl->enums.values, i)
{
Decl *other_constant = decl->enums.values[i];
if (other_constant->name.string == tok.string)
{
SEMA_ERROR(tok, "This enum constant is declared twice.");
sema_prev_at_range(other_constant->name.span, "The previous declaration was here.");
decl_poison(enum_const);
break;
}
}
if (!consume_const_name("enum constant"))
{
return &poisoned_decl;
}
if (try_consume(TOKEN_EQ))
{
enum_const->enum_constant.expr = TRY_EXPR_OR(parse_expr(), &poisoned_decl);
}
decl->enums.values = VECADD(decl->enums.values, enum_const);
// Allow trailing ','
if (!try_consume(TOKEN_COMMA))
{
EXPECT_OR(TOKEN_RBRACE, &poisoned_decl);
}
}
return decl;
}
static inline bool parse_conditional_top_level(Decl ***decls)
{
CONSUME_OR(TOKEN_LBRACE, false);
while (tok.type != TOKEN_RBRACE && tok.type != TOKEN_EOF)
{
Decl *decl = parse_top_level();
if (decl == NULL) continue;
if (decl_ok(decl))
{
vec_add(*decls, decl);
}
else
{
recover_top_level();
}
}
CONSUME_OR(TOKEN_RBRACE, false);
return true;
}
static inline Decl *parse_ct_if_top_level(void)
{
Decl *ct = decl_new(DECL_CT_IF, tok, VISIBLE_LOCAL);
advance_and_verify(TOKEN_CT_IF);
ct->ct_if_decl.expr = TRY_EXPR_OR(parse_paren_expr(), &poisoned_decl);
if (!parse_conditional_top_level(&ct->ct_if_decl.then)) return &poisoned_decl;
CtIfDecl *ct_if_decl = &ct->ct_if_decl;
while (tok.type == TOKEN_CT_ELIF)
{
advance_and_verify(TOKEN_CT_ELIF);
Decl *ct_elif = decl_new(DECL_CT_ELIF, tok, VISIBLE_LOCAL);
ct_elif->ct_elif_decl.expr = TRY_EXPR_OR(parse_paren_expr(), &poisoned_decl);
if (!parse_conditional_top_level(&ct_elif->ct_elif_decl.then)) return &poisoned_decl;
ct_if_decl->elif = ct_elif;
ct_if_decl = &ct_elif->ct_elif_decl;
}
if (tok.type == TOKEN_CT_ELSE)
{
advance_and_verify(TOKEN_CT_ELSE);
Decl *ct_else = decl_new(DECL_CT_ELSE, tok, VISIBLE_LOCAL);
ct_if_decl->elif = ct_else;
if (!parse_conditional_top_level(&ct_else->ct_else_decl)) return &poisoned_decl;
}
return ct;
}
static inline Decl *parse_incremental_array(void)
{
Token name = tok;
advance_and_verify(TOKEN_IDENT);
CONSUME_OR(TOKEN_PLUS_ASSIGN, &poisoned_decl);
Decl *decl = decl_new(DECL_ARRAY_VALUE, name, VISIBLE_LOCAL);
decl->incr_array_decl = TRY_EXPR_OR(parse_initializer(), &poisoned_decl);
return decl;
}
static inline bool check_no_visibility_before(Visibility visibility)
{
switch (visibility)
{
case VISIBLE_PUBLIC:
SEMA_ERROR(tok, "Unexpected 'public' before '%.*s'.", tok.span.length, tok.start);
return false;
case VISIBLE_LOCAL:
SEMA_ERROR(tok, "Unexpected 'local' before '%.*s'.", tok.span.length, tok.start);
return false;
default:
return true;
}
}
/**
* top_level
* : struct_declaration
* | enum_declaration
* | error_declaration
* | const_declaration
* | global_declaration
* | macro_declaration
* | func_definition
* | generics_declaration
* | typedef_declaration
* | conditional_compilation
* | attribute_declaration
* ;
* @param visibility
* @return true if parsing worked
*/
static inline Decl *parse_top_level(void)
{
Visibility visibility = VISIBLE_MODULE;
switch (tok.type)
{
case TOKEN_PUBLIC:
visibility = VISIBLE_PUBLIC;
advance();
break;
case TOKEN_LOCAL:
visibility = VISIBLE_LOCAL;
advance();
break;
default:
break;
}
switch (tok.type)
{
case TOKEN_ATTRIBUTE:
return parse_attribute_declaration(visibility);
case TOKEN_FUNC:
return parse_func_definition(visibility, false);
case TOKEN_CT_IF:
if (!check_no_visibility_before(visibility)) return false;
return parse_ct_if_top_level();
case TOKEN_CONST:
return parse_const_declaration(visibility);
case TOKEN_STRUCT:
case TOKEN_UNION:
return parse_struct_declaration(visibility);
case TOKEN_GENERIC:
return parse_generics_declaration(visibility);
case TOKEN_MACRO:
return parse_macro_declaration(visibility);
case TOKEN_ENUM:
return parse_enum_declaration(visibility);
case TOKEN_ERROR_TYPE:
return parse_error_declaration(visibility);
case TOKEN_TYPEDEF:
return parse_typedef_declaration(visibility);
case TOKEN_TYPE:
case TOKEN_TYPE_IDENT:
// All of these start type
return parse_global_declaration(visibility);
case TOKEN_IDENT:
if (!check_no_visibility_before(visibility)) return false;
return parse_incremental_array();
case TOKEN_EOF:
assert(visibility != VISIBLE_MODULE);
sema_error_at(tok.span.loc - 1, "Expected a top level declaration'.");
return &poisoned_decl;
default:
// We could have included all fundamental types above, but do it here instead.
if (token_is_type(tok.type))
{
return parse_global_declaration(visibility);
}
error_at_current("Unexpected token found");
return &poisoned_decl;
}
}
void parse_current(void)
{
// Prime everything
advance(); advance();
parse_module();
parse_imports();
while (tok.type != TOKEN_EOF)
{
Decl *decl = parse_top_level();
if (decl_ok(decl))
{
context_register_global_decl(current_context, decl);
}
else
{
recover_top_level();
}
}
}
void parse_file(Context *context)
{
lexer_add_file_for_lexing(context->file);
context_push(context);
parse_current();
}
#define CHECK_EXPR(_expr) do { if (!expr_ok(_expr)) return _expr; } while(0)
static Expr *parse_conditional_expr(Expr *left_side)
{
assert(expr_ok(left_side));
Expr *expr_ternary = EXPR_NEW_EXPR(EXPR_CONDITIONAL, left_side);
expr_ternary->conditional_expr.cond = left_side;
// Check for elvis
if (try_consume(TOKEN_ELVIS))
{
expr_ternary->conditional_expr.then_expr = NULL;
}
else
{
advance_and_verify(TOKEN_QUESTION);
Expr *true_expr = TRY_EXPR_OR(parse_precedence(PREC_CONDITIONAL + 1), &poisoned_expr);
expr_ternary->conditional_expr.then_expr = true_expr;
CONSUME_OR(TOKEN_COLON, &poisoned_expr);
}
Expr *false_expr = TRY_EXPR_OR(parse_precedence(PREC_CONDITIONAL + 1), &poisoned_expr);
expr_ternary->conditional_expr.else_expr = false_expr;
return expr_ternary;
}
static Expr *parse_unary_expr(Expr *left)
{
assert(!left && "Did not expect a left hand side!");
TokenType operator_type = tok.type;
Expr *unary = EXPR_NEW_TOKEN(EXPR_UNARY, tok);
unary->unary_expr.operator = operator_type;
Precedence rule_precedence = rules[operator_type].precedence;
advance();
Expr *right_side = parse_precedence(rule_precedence);
CHECK_EXPR(right_side);
unary->unary_expr.expr = right_side;
return unary;
}
static Expr *parse_post_unary(Expr *left)
{
assert(expr_ok(left));
Expr *unary = EXPR_NEW_TOKEN(EXPR_POST_UNARY, tok);
unary->post_expr.expr = left;
unary->post_expr.operator = tok.type;
advance();
return unary;
}
/**
* grouping_expr
* : '(' expression ')'
* ;
*/
static Expr *parse_grouping_expr(Expr *left)
{
assert(!left && "Unexpected left hand side");
advance_and_verify(TOKEN_LPAREN);
Expr *right = TRY_EXPR_OR(parse_expr(), &poisoned_expr);
CONSUME_OR(TOKEN_RPAREN, &poisoned_expr);
return right;
}
static Expr *parse_binary(Expr *left_side)
{
assert(left_side && expr_ok(left_side));
// Remember the operator.
TokenType operator_type = tok.type;
advance();
Expr *right_side;
if (tok.type == TOKEN_LBRACE && operator_type == TOKEN_EQ)
{
right_side = TRY_EXPR_OR(parse_initializer_list(), &poisoned_expr);
}
else
{
right_side = TRY_EXPR_OR(parse_precedence(rules[operator_type].precedence + 1), &poisoned_expr);
}
Expr *expr = EXPR_NEW_EXPR(EXPR_BINARY, left_side);
expr->binary_expr.operator = operator_type;
expr->binary_expr.left = left_side;
expr->binary_expr.right = right_side;
return expr;
}
static Expr *parse_call_expr(Expr *left)
{
assert(left && expr_ok(left));
advance_and_verify(TOKEN_LPAREN);
Expr **params = NULL;
while (!try_consume(TOKEN_RPAREN))
{
Expr *param = TRY_EXPR_OR(parse_expr(), &poisoned_expr);
params = VECADD(params, param);
COMMA_RPAREN_OR(&poisoned_expr);
}
Expr *call = EXPR_NEW_EXPR(EXPR_CALL, left);
call->call_expr.function = left;
call->call_expr.arguments = params;
return call;
}
static Expr *parse_subscript_expr(Expr *left)
{
assert(left && expr_ok(left));
advance_and_verify(TOKEN_LBRACKET);
Expr *index = TRY_EXPR_OR(parse_expr(), &poisoned_expr);
CONSUME_OR(TOKEN_RBRACKET, &poisoned_expr);
Expr *subscript_ast = EXPR_NEW_EXPR(EXPR_SUBSCRIPT, left);
subscript_ast->subscript_expr.expr = left;
subscript_ast->subscript_expr.index = index;
return subscript_ast;
}
static Expr *parse_access_expr(Expr *left)
{
assert(left && expr_ok(left));
advance_and_verify(TOKEN_DOT);
Expr *access_expr = EXPR_NEW_EXPR(EXPR_ACCESS, left);
access_expr->access_expr.parent = left;
access_expr->access_expr.sub_element = tok;
TRY_CONSUME_OR(TOKEN_IDENT, "Expected identifier", &poisoned_expr);
return access_expr;
}
static Expr *parse_string_literal(Expr *left)
{
assert(!left && "Had left hand side");
Expr *expr_string = EXPR_NEW_TOKEN(EXPR_CONST, tok);
expr_string->resolve_status = RESOLVE_DONE;
expr_string->type = type_string;
char *str = malloc_arena(tok.span.length + 1);
size_t len = tok.span.length;
memcpy(str, tok.start, tok.span.length);
// Just keep chaining if there are multiple parts.
advance_and_verify(TOKEN_STRING);
while (tok.type == TOKEN_STRING)
{
char *new_string = malloc_arena(len + tok.span.length + 1);
memcpy(new_string, str, len);
memcpy(new_string + len, tok.start, tok.span.length);
str = new_string;
len += tok.span.length;
advance();
}
str[len] = '\0';
expr_string->const_expr.string.chars = str;
expr_string->const_expr.string.len = len;
expr_string->type = type_string;
expr_string->const_expr.type = CONST_STRING;
return expr_string;
}
static Expr *parse_integer(Expr *left)
{
assert(!left && "Had left hand side");
Expr *expr_int = EXPR_NEW_TOKEN(EXPR_CONST, tok);
const char *string = tok.start;
const char *end = string + tok.span.length;
uint64_t i = 0;
if (string[0] == '\'')
{
union
{
uint8_t u8;
uint16_t u16;
uint32_t u32;
uint64_t u64;
uint8_t b[8];
} bytes;
int pos = 0;
while (++string < end - 1)
{
if (*string == '\\')
{
if (*(++string) == 'x')
{
int hex = 0;
for (int j = 0; j < 2; j++)
{
hex <<= 4;
char c = *(++string);
if (c < 'A')
{
hex += c - '0';
}
else if (c < 'a')
{
hex += c - 'A' + 10;
}
else
{
hex += c - 'a' + 10;
}
}
bytes.b[pos++] = hex;
continue;
}
}
bytes.b[pos++] = (unsigned)*string;
}
switch (pos)
{
case 1:
expr_int->const_expr.i = bytes.u8;
break;
case 2:
expr_int->const_expr.i = bytes.u16;
break;
case 4:
expr_int->const_expr.i = bytes.u32;
break;
case 8:
expr_int->const_expr.i = bytes.u64;
break;
}
expr_int->const_expr.type = CONST_INT;
expr_int->type = i > INT64_MAX ? type_compuint : type_compint;
expr_int->resolve_status = RESOLVE_DONE;
advance();
return expr_int;
}
switch (tok.span.length > 2 ? string[1] : '0')
{
case 'x':
string += 2;
while (string < end)
{
char c = *(string++);
if (c == '_') continue;
if (i > (UINT64_MAX >> 4u))
{
SEMA_ERROR(tok, "Number is larger than an unsigned 64 bit number.");
return &poisoned_expr;
}
i <<= 4u;
if (c < 'A')
{
i += c - '0';
}
else if (c < 'a')
{
i += c - 'A' + 10;
}
else
{
i += c - 'a' + 10;
}
}
break;
case 'o':
string += 2;
while (string < end)
{
char c = *(string++);
if (c == '_') continue;
if (i > (UINT64_MAX >> 3u))
{
SEMA_ERROR(tok, "Number is larger than an unsigned 64 bit number.");
return &poisoned_expr;
}
i <<= (unsigned) 3;
i += c - '0';
}
break;
case 'b':
string += 2;
while (string < end)
{
char c = *(string++);
if (c == '_') continue;
if (i > (UINT64_MAX >> 1u))
{
SEMA_ERROR(tok, "Number is larger than an unsigned 64 bit number.");
return &poisoned_expr;
}
i <<= (unsigned) 1;
i += c - '0';
}
break;
default:
while (string < end)
{
char c = *(string++);
if (c == '_') continue;
if (i > (UINT64_MAX / 10))
{
SEMA_ERROR(tok, "Number is larger than an unsigned 64 bit number.");
return &poisoned_expr;
}
i *= 10;
i += c - '0';
}
break;
}
expr_int->const_expr.i = i;
expr_int->const_expr.type = CONST_INT;
expr_int->type = i > INT64_MAX ? type_compuint : type_compint;
expr_int->resolve_status = RESOLVE_DONE;
advance();
return expr_int;
}
static Expr *parse_double(Expr *left)
{
assert(!left && "Had left hand side");
Expr *number = EXPR_NEW_TOKEN(EXPR_CONST, tok);
char *end = NULL;
// IMPROVE
long double fval = strtold(tok.start, &end);
if (end != tok.span.length + tok.start)
{
SEMA_ERROR(tok, "Invalid float value");
return &poisoned_expr;
}
advance();
number->const_expr.f = fval;
number->type = type_compfloat;
number->const_expr.type = CONST_FLOAT;
number->resolve_status = RESOLVE_DONE;
return number;
}
static Expr *parse_bool(Expr *left)
{
assert(!left && "Had left hand side");
Expr *number = EXPR_NEW_TOKEN(EXPR_CONST, tok);
number->const_expr = (ExprConst) { .b = tok.type == TOKEN_TRUE, .type = CONST_BOOL };
number->type = type_bool;
number->resolve_status = RESOLVE_DONE;
advance();
return number;
}
static Expr *parse_nil(Expr *left)
{
assert(!left && "Had left hand side");
Expr *number = EXPR_NEW_TOKEN(EXPR_CONST, tok);
number->const_expr.type = CONST_NIL;
number->type = type_voidptr;
number->resolve_status = RESOLVE_DONE;
advance();
return number;
}
/**
* initializer_list
* : '{' initializer_values '}'
* | '{' initializer_values ',' '}'
* ;
*
* initializer_values
* : initializer
* | initializer_values ',' initializer
* ;
*
* @param elements
* @return
*/
static Expr *parse_initializer_list(void)
{
Expr *initializer_list = EXPR_NEW_TOKEN(EXPR_INITIALIZER_LIST, tok);
CONSUME_OR(TOKEN_LBRACE, &poisoned_expr);
while (!try_consume(TOKEN_RBRACE))
{
Expr *expr = TRY_EXPR_OR(parse_initializer(), &poisoned_expr);
initializer_list->initializer_expr = VECADD(initializer_list->initializer_expr, expr);
if (!try_consume(TOKEN_COMMA) && tok.type != TOKEN_RBRACE)
{
SEMA_ERROR(tok, "Expected ',' or '}'");
return &poisoned_expr;
}
}
return initializer_list;
}
static Expr *parse_initializer(void)
{
if (tok.type == TOKEN_LBRACE)
{
return parse_initializer_list();
}
else
{
return parse_expr();
}
}
/**
* method_ref
* : '.' IDENT
* ;
*
* @param type
* @return Expr
*/
static Expr *parse_type_access(Type *type)
{
Expr *expr = EXPR_NEW_TOKEN(EXPR_TYPE_ACCESS, tok);
expr->type_access.type = type;
advance_and_verify(TOKEN_DOT);
expr->type_access.name = tok;
TRY_CONSUME_OR(TOKEN_IDENT, "Expected a function name or value", &poisoned_expr);
return expr;
}
static Expr *parse_identifier_with_path(Path *path)
{
Expr *expr = EXPR_NEW_TOKEN(EXPR_IDENTIFIER, tok);
expr->identifier_expr.identifier = tok;
expr->identifier_expr.path = path;
advance();
return expr;
}
static Expr *parse_identifier(Expr *left)
{
assert(!left && "Unexpected left hand side");
return parse_identifier_with_path(NULL);
}
/**
* type_identifier
* : TYPE_IDENT initializer_list
* | TYPE_IDENT method_ref
* ;
*
* @param left must be null.
* @return Expr*
*/
static Expr *parse_type_identifier_with_path(Path *path)
{
Type *type = TYPE_UNRESOLVED(tok);
type->unresolved.path = path;
advance_and_verify(TOKEN_TYPE_IDENT);
if (tok.type == TOKEN_LBRACE)
{
Expr *expr = EXPR_NEW_TOKEN(EXPR_STRUCT_VALUE, tok);
expr->struct_value_expr.type = type;
expr->struct_value_expr.init_expr = TRY_EXPR_OR(parse_initializer_list(), &poisoned_expr);
return expr;
}
EXPECT_OR(TOKEN_DOT, &poisoned_expr);
return parse_type_access(type);
}
/**
* @param left must be null.
* @return Expr*
*/
static Expr *parse_type_identifier(Expr *left)
{
assert(!left && "Unexpected left hand side");
return parse_type_identifier_with_path(NULL);
}
static Expr *parse_maybe_scope(Expr *left)
{
assert(!left && "Unexpected left hand side");
Path *path = parse_path();
switch (tok.type)
{
case TOKEN_IDENT:
case TOKEN_CT_IDENT:
case TOKEN_AT_IDENT:
case TOKEN_CONST_IDENT:
return parse_identifier_with_path(path);
case TOKEN_TYPE_IDENT:
return parse_type_identifier_with_path(path);
default:
SEMA_ERROR(tok, "Expected a type, function or constant.");
return &poisoned_expr;
}
}
static Expr *parse_type_expr(Expr *left)
{
assert(!left && "Unexpected left hand side");
Expr *expr = EXPR_NEW_TOKEN(EXPR_TYPE, tok);
advance_and_verify(TOKEN_TYPE);
CONSUME_OR(TOKEN_LPAREN, &poisoned_expr);
Type *type = TRY_TYPE_OR(parse_type_expression(), &poisoned_expr);
CONSUME_OR(TOKEN_RPAREN, &poisoned_expr);
expr->type_expr.type = type;
return expr;
}
static Expr *parse_cast_expr(Expr *left)
{
assert(!left && "Unexpected left hand side");
Expr *expr = EXPR_NEW_TOKEN(EXPR_CAST, tok);
advance_and_verify(TOKEN_CAST);
CONSUME_OR(TOKEN_LPAREN, &poisoned_expr);
expr->type = TRY_TYPE_OR(parse_type_expression(), &poisoned_expr);
CONSUME_OR(TOKEN_COMMA, &poisoned_expr);
expr->expr_cast.expr = TRY_EXPR_OR(parse_expr(), &poisoned_expr);
CONSUME_OR(TOKEN_RPAREN, &poisoned_expr);
return expr;
}
ParseRule rules[TOKEN_EOF + 1] = {
[TOKEN_QUESTION] = { NULL, parse_conditional_expr, PREC_CONDITIONAL },
[TOKEN_ELVIS] = { NULL, parse_conditional_expr, PREC_CONDITIONAL },
[TOKEN_PLUSPLUS] = { parse_unary_expr, parse_post_unary, PREC_CALL },
[TOKEN_MINUSMINUS] = { parse_unary_expr, parse_post_unary, PREC_CALL },
[TOKEN_LPAREN] = { parse_grouping_expr, parse_call_expr, PREC_CALL },
[TOKEN_TYPE] = { parse_type_expr, NULL, PREC_NONE },
[TOKEN_CAST] = { parse_cast_expr, NULL, PREC_NONE },
//[TOKEN_SIZEOF] = { parse_sizeof, NULL, PREC_NONE },
[TOKEN_LBRACKET] = { NULL, parse_subscript_expr, PREC_CALL },
[TOKEN_MINUS] = { parse_unary_expr, parse_binary, PREC_ADDITIVE },
[TOKEN_PLUS] = { NULL, parse_binary, PREC_ADDITIVE },
[TOKEN_DIV] = { NULL, parse_binary, PREC_MULTIPLICATIVE },
[TOKEN_MOD] = { NULL, parse_binary, PREC_MULTIPLICATIVE },
[TOKEN_STAR] = { parse_unary_expr, parse_binary, PREC_MULTIPLICATIVE },
[TOKEN_DOT] = { NULL, parse_access_expr, PREC_CALL },
[TOKEN_NOT] = { parse_unary_expr, NULL, PREC_UNARY },
[TOKEN_BIT_NOT] = { parse_unary_expr, NULL, PREC_UNARY },
[TOKEN_BIT_XOR] = { NULL, parse_binary, PREC_BIT },
[TOKEN_BIT_OR] = { NULL, parse_binary, PREC_BIT },
[TOKEN_AMP] = { parse_unary_expr, parse_binary, PREC_BIT },
[TOKEN_EQEQ] = { NULL, parse_binary, PREC_RELATIONAL },
[TOKEN_NOT_EQUAL] = { NULL, parse_binary, PREC_RELATIONAL },
[TOKEN_GREATER] = { NULL, parse_binary, PREC_RELATIONAL },
[TOKEN_GREATER_EQ] = { NULL, parse_binary, PREC_RELATIONAL },
[TOKEN_LESS] = { NULL, parse_binary, PREC_RELATIONAL },
[TOKEN_LESS_EQ] = { NULL, parse_binary, PREC_RELATIONAL },
[TOKEN_SHL] = { NULL, parse_binary, PREC_SHIFT },
[TOKEN_SHR] = { NULL, parse_binary, PREC_SHIFT },
[TOKEN_TRUE] = { parse_bool, NULL, PREC_NONE },
[TOKEN_FALSE] = { parse_bool, NULL, PREC_NONE },
[TOKEN_NIL] = { parse_nil, NULL, PREC_NONE },
[TOKEN_INTEGER] = { parse_integer, NULL, PREC_NONE },
[TOKEN_IDENT] = { parse_maybe_scope, NULL, PREC_NONE },
[TOKEN_TYPE_IDENT] = { parse_type_identifier, NULL, PREC_NONE },
[TOKEN_CT_IDENT] = { parse_identifier, NULL, PREC_NONE },
[TOKEN_AT_IDENT] = { parse_identifier, NULL, PREC_NONE },
[TOKEN_CONST_IDENT] = { parse_identifier, NULL, PREC_NONE },
[TOKEN_STRING] = { parse_string_literal, NULL, PREC_NONE },
[TOKEN_FLOAT] = { parse_double, NULL, PREC_NONE },
[TOKEN_OR] = { NULL, parse_binary, PREC_LOGICAL },
[TOKEN_AND] = { NULL, parse_binary, PREC_LOGICAL },
[TOKEN_EQ] = { NULL, parse_binary, PREC_ASSIGNMENT },
[TOKEN_PLUS_ASSIGN] = { NULL, parse_binary, PREC_ASSIGNMENT },
[TOKEN_MINUS_ASSIGN] = { NULL, parse_binary, PREC_ASSIGNMENT },
[TOKEN_MULT_ASSIGN] = { NULL, parse_binary, PREC_ASSIGNMENT },
[TOKEN_MOD_ASSIGN] = { NULL, parse_binary, PREC_ASSIGNMENT },
[TOKEN_DIV_ASSIGN] = { NULL, parse_binary, PREC_ASSIGNMENT },
[TOKEN_AND_ASSIGN] = { NULL, parse_binary, PREC_ASSIGNMENT },
[TOKEN_OR_ASSIGN] = { NULL, parse_binary, PREC_ASSIGNMENT },
[TOKEN_BIT_XOR_ASSIGN] = { NULL, parse_binary, PREC_ASSIGNMENT },
[TOKEN_BIT_AND_ASSIGN] = { NULL, parse_binary, PREC_ASSIGNMENT },
[TOKEN_BIT_OR_ASSIGN] = { NULL, parse_binary, PREC_ASSIGNMENT },
[TOKEN_SHR_ASSIGN] = { NULL, parse_binary, PREC_ASSIGNMENT },
[TOKEN_SHL_ASSIGN] = { NULL, parse_binary, PREC_ASSIGNMENT },
};