Parses most of C3.

This commit is contained in:
Christoffer Lerno
2019-08-09 22:30:04 +02:00
parent 33770b905d
commit ebce81ad51
38 changed files with 9753 additions and 545 deletions

View File

@@ -1,7 +1,6 @@
cmake_minimum_required(VERSION 3.13)
project(c3c C)
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
set(CMAKE_C_STANDARD 11)
include_directories(
@@ -19,4 +18,14 @@ add_executable(c3c
src/compiler/symtab.c
src/compiler/parser.c
src/compiler_tests/tests.c
src/compiler_tests/benchmark.c src/utils/malloc.c src/utils/malloc.h src/compiler/compiler.c src/compiler/compiler.h src/compiler/semantic_analyser.c src/compiler/semantic_analyser.h src/utils/common.h src/compiler/source_file.c src/compiler/source_file.h src/compiler/diagnostics.c src/compiler/diagnostics.h)
src/compiler_tests/benchmark.c
src/utils/malloc.c
src/compiler/compiler.c
src/compiler/semantic_analyser.c
src/compiler/source_file.c
src/compiler/diagnostics.c
src/compiler/ast.c
src/compiler/module.c
src/compiler/value.c src/compiler/value.h src/compiler/bigint.c src/compiler/bigint.h src/compiler/context.c)
target_compile_options(c3c PRIVATE -Werror -Wall -Wextra -Wno-unused-function -Wno-unused-variable -Wno-unused-parameter)

View File

@@ -18,4 +18,6 @@ C3 tries to be an alternative in the the C/C++ niche: fast and close to the meta
Most work is still being done in the design draft here: https://c3lang.github.io/c3docs/. If you have suggestions, send a mail to [christoffer@aegik.com](mailto:christoffer@aegik.com), [file an issue](https://github.com/c3lang/c3c/issues) or discuss C3 on the r/ProgrammingLanguages Discord server: https://discord.gg/cfu4wdk
There are some small work being done, in particular lifting some code from an earlier "work-in-progress" C2 compiler called [Titanos](https://github.com/lerno/titanos).
There are some small work being done on the parser here, but most of the structure is still missing.
If you wish to contribute with ideas, please file issues on the c3docs: https://github.com/c3lang/c3docs instead of the compiler.

179
resources/c3.l Normal file
View File

@@ -0,0 +1,179 @@
D [0-9]
UN [_]
L [a-zA-Z_]
AN [a-zA-Z_0-9]
H [a-fA-F0-9]
UA [A-Z_0-9]
DC [a-z]
UC [A-Z]
E [Ee][+-]?{D}+
FS (f|F|l|L)
IS (u|U|l|L)*
%{
#include <stdio.h>
#include "y.tab.h"
void count(void);
void comment(void);
%}
%%
"/*" { comment(); }
"break" { count(); return(BREAK); }
"case" { count(); return(CASE); }
"char" { count(); return(CHAR); }
"const" { count(); return(CONST); }
"continue" { count(); return(CONTINUE); }
"default" { count(); return(DEFAULT); }
"do" { count(); return(DO); }
"double" { count(); return(DOUBLE); }
"else" { count(); return(ELSE); }
"enum" { count(); return(ENUM); }
"float" { count(); return(FLOAT); }
"for" { count(); return(FOR); }
"goto" { count(); return(GOTO); }
"if" { count(); return(IF); }
"int" { count(); return(INT); }
"uint" { count(); return(UINT); }
"long" { count(); return(LONG); }
"ulong" { count(); return(ULONG); }
"return" { count(); return(RETURN); }
"short" { count(); return(SHORT); }
"ushort" { count(); return(USHORT); }
"sizeof" { count(); return(SIZEOF); }
"local" { count(); return(LOCAL); }
"type" { count(); return(TYPE); }
"error" { count(); return(ERROR); }
"module" { count(); return(MODULE); }
"as" { count(); return(AS); }
"import" { count(); return(IMPORT); }
"generic" { count(); return(GENERIC); }
"struct" { count(); return(STRUCT); }
"switch" { count(); return(SWITCH); }
"typedef" { count(); return(TYPEDEF); }
"union" { count(); return(UNION); }
"void" { count(); return(VOID); }
"volatile" { count(); return(VOLATILE); }
"while" { count(); return(WHILE); }
"throw" { count(); return(THROW); }
"throws" { count(); return(THROWS); }
"func" { count(); return(FUNC); }
"nil" { count(); return(NIL); }
"next" { count(); return(NEXT);
[_]*[A-Z]{UA}* { count(); return(CONST_IDENT); }
[_]*[A-Z]{UA}*[a-z]{AN}* { count(); return(TYPE_IDENT); }
[_]*[a-z]{AN}* { count(); return(IDENT); }
@{L}+[!]? { count(); return(AT_IDENT); }
${L}+ { count(); return(CT_IDENT); }
#{L}+ { count(); return(HASH_IDENT); }
0[xX]{H}+{IS}? { count(); return(CONSTANT); }
0{D}+{IS}? { count(); return(CONSTANT); }
{D}+{IS}? { count(); return(CONSTANT); }
L?'(\\.|[^\\'])+' { count(); return(CONSTANT); }
{D}+{E}{FS}? { count(); return(CONSTANT); }
{D}*"."{D}+({E})?{FS}? { count(); return(CONSTANT); }
{D}+"."{D}*({E})?{FS}? { count(); return(CONSTANT); }
L?\"(\\.|[^\\"])*\" { count(); return(STRING_LITERAL); }
"..." { count(); return(ELLIPSIS); }
">>=" { count(); return(RIGHT_ASSIGN); }
"<<=" { count(); return(LEFT_ASSIGN); }
"+=" { count(); return(ADD_ASSIGN); }
"-=" { count(); return(SUB_ASSIGN); }
"*=" { count(); return(MUL_ASSIGN); }
"/=" { count(); return(DIV_ASSIGN); }
"%=" { count(); return(MOD_ASSIGN); }
"&=" { count(); return(AND_ASSIGN); }
"^=" { count(); return(XOR_ASSIGN); }
"|=" { count(); return(OR_ASSIGN); }
">>" { count(); return(RIGHT_OP); }
"<<" { count(); return(LEFT_OP); }
"++" { count(); return(INC_OP); }
"--" { count(); return(DEC_OP); }
"&&" { count(); return(AND_OP); }
"||" { count(); return(OR_OP); }
"<=" { count(); return(LE_OP); }
">=" { count(); return(GE_OP); }
"==" { count(); return(EQ_OP); }
"!=" { count(); return(NE_OP); }
"::" { count(); return(SCOPE); }
"?:" { count(); return(ELVIS); }
";" { count(); return(';'); }
("{") { count(); return('{'); }
("}") { count(); return('}'); }
"," { count(); return(','); }
":" { count(); return(':'); }
"=" { count(); return('='); }
"(" { count(); return('('); }
")" { count(); return(')'); }
("[") { count(); return('['); }
("]") { count(); return(']'); }
"." { count(); return('.'); }
"&" { count(); return('&'); }
"!" { count(); return('!'); }
"~" { count(); return('~'); }
"-" { count(); return('-'); }
"+" { count(); return('+'); }
"*" { count(); return('*'); }
"/" { count(); return('/'); }
"%" { count(); return('%'); }
"<" { count(); return('<'); }
">" { count(); return('>'); }
"^" { count(); return('^'); }
"|" { count(); return('|'); }
"?" { count(); return('?'); }
[ \t\v\n\f] { count(); }
. { /* ignore bad characters */ }
%%
int yywrap(void)
{
return 1;
}
void comment(void)
{
char c, c1;
loop:
while ((c = input()) != '*' && c != 0)
putchar(c);
if ((c1 = input()) != '/' && c != 0)
{
unput(c1);
goto loop;
}
if (c != 0)
putchar(c1);
}
int column = 0;
void count(void)
{
int i;
for (i = 0; yytext[i] != '\0'; i++)
if (yytext[i] == '\n')
column = 0;
else if (yytext[i] == '\t')
column += 8 - (column % 8);
else
column++;
ECHO;
}

697
resources/grammar.y Normal file
View File

@@ -0,0 +1,697 @@
%{
#include <stdio.h>
#define YYERROR_VERBOSE
extern char yytext[];
extern int column;
int yylex(void);
void yyerror(char *s);
%}
%token IDENT AT_IDENT CT_IDENT CONSTANT CONST_IDENT TYPE_IDENT STRING_LITERAL SIZEOF
%token INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
%token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
%token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
%token XOR_ASSIGN OR_ASSIGN VAR NIL ELVIS HASH_IDENT NEXT
%token TYPEDEF MODULE IMPORT
%token CHAR SHORT INT LONG FLOAT DOUBLE CONST VOLATILE VOID
%token BYTE USHORT UINT ULONG BOOL
%token STRUCT UNION ENUM ELLIPSIS AS LOCAL
%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN
%token TYPE FUNC ERROR MACRO GENERIC CTIF CTELIF CTENDIF CTELSE CTSWITCH CTCASE CTDEFAULT CTEACH
%token THROWS THROW TRY CATCH SCOPE PUBLIC DEFER ATTRIBUTE
%start translation_unit
%%
ident_scope
: IDENT SCOPE
;
ident_expression
: CONST_IDENT
| IDENT
| CT_IDENT
| AT_IDENT
;
primary_expression
: STRING_LITERAL
| CONSTANT
| NIL
| ident_scope ident_expression
| ident_expression
| base_type initializer_list
| base_type '.' IDENT
| TYPE '(' type_expression ')'
| '(' expression ')'
;
postfix_expression
: primary_expression
| postfix_expression '[' expression ']'
| postfix_expression '(' ')'
| postfix_expression '(' argument_expression_list ')'
| postfix_expression '.' IDENT
| postfix_expression INC_OP
| postfix_expression DEC_OP
;
argument_expression_list
: expression
| argument_expression_list ',' expression
;
unary_expression
: postfix_expression
| INC_OP unary_expression
| DEC_OP unary_expression
| unary_operator unary_expression
| SIZEOF '(' type_expression ')'
;
unary_operator
: '&'
| '*'
| '+'
| '-'
| '~'
| '!'
;
multiplicative_expression
: unary_expression
| multiplicative_expression '*' unary_expression
| multiplicative_expression '/' unary_expression
| multiplicative_expression '%' unary_expression
;
shift_expression
: multiplicative_expression
| shift_expression LEFT_OP multiplicative_expression
| shift_expression RIGHT_OP multiplicative_expression
;
bit_expression
: shift_expression
| bit_expression '&' shift_expression
| bit_expression '^' shift_expression
| bit_expression '|' shift_expression
;
additive_expression
: bit_expression
| additive_expression '+' bit_expression
| additive_expression '-' bit_expression
;
relational_expression
: additive_expression
| relational_expression '<' additive_expression
| relational_expression '>' additive_expression
| relational_expression LE_OP additive_expression
| relational_expression GE_OP additive_expression
| relational_expression EQ_OP additive_expression
| relational_expression NE_OP additive_expression
;
logical_expression
: relational_expression
| logical_expression AND_OP relational_expression
| logical_expression OR_OP relational_expression
;
conditional_expression
: logical_expression
| logical_expression '?' expression ':' conditional_expression
| logical_expression ELVIS conditional_expression
;
assignment_expression
: conditional_expression
| unary_expression assignment_operator assignment_expression
| unary_expression '=' initializer_list
;
expression
: assignment_expression
| TRY assignment_expression
| TRY assignment_expression ELSE assignment_expression
;
assignment_operator
: '='
| MUL_ASSIGN
| DIV_ASSIGN
| MOD_ASSIGN
| ADD_ASSIGN
| SUB_ASSIGN
| LEFT_ASSIGN
| RIGHT_ASSIGN
| AND_ASSIGN
| XOR_ASSIGN
| OR_ASSIGN
;
constant_expression
: conditional_expression
;
enumerators
: enumerator
| enumerators ',' enumerator
;
enumerator_list
: enumerators
| enumerators ','
;
enumerator
: CONST_IDENT
| CONST_IDENT '=' constant_expression
;
identifier_list
: IDENT
| identifier_list ',' IDENT
;
macro_argument
: CT_IDENT
| IDENT
;
macro_argument_list
: macro_argument
| macro_argument_list ',' macro_argument
;
implicit_decl
: IDENT
| IDENT '=' initializer
;
explicit_decl
: type_expression IDENT '=' initializer
| type_expression IDENT
;
declaration
: explicit_decl
| explicit_decl ',' implicit_decl
| explicit_decl ',' explicit_decl
;
declaration_list
: declaration
;
param_declaration
: type_expression
| type_expression IDENT
| type_expression IDENT '=' initializer
;
parameter_type_list
: parameter_list
| parameter_list ',' ELLIPSIS
| parameter_list ',' type_expression ELLIPSIS
;
opt_parameter_type_list
: '(' ')'
| '(' parameter_type_list ')'
;
parameter_list
: param_declaration
| parameter_list ',' param_declaration
;
base_type
: VOID
| BOOL
| CHAR
| BYTE
| SHORT
| USHORT
| INT
| UINT
| LONG
| ULONG
| FLOAT
| DOUBLE
| TYPE_IDENT
| ident_scope TYPE_IDENT
| TYPE '(' constant_expression ')'
;
type_expression
: base_type
| type_expression '*'
| type_expression '&'
| type_expression '[' constant_expression ']'
| type_expression '[' ']'
| type_expression '[' '+' ']'
;
initializer
: expression
| initializer_list
;
initializer_values
: initializer
| initializer_values ',' initializer
;
initializer_list
: '{' initializer_values '}'
| '{' initializer_values ',' '}'
;
ct_case_statement
: CTCASE type_list ':' statement
| CTDEFAULT ':' statement
;
ct_elif_body
: ct_elif compound_statement
| ct_elif_body ct_elif compound_statement
;
ct_else_body
: ct_elif_body
| CTELSE compound_statement
| ct_elif_body CTELSE compound_statement
;
ct_switch_body
: ct_case_statement
| ct_switch_body ct_case_statement
;
ct_statement
: ct_if compound_statement
| ct_if compound_statement ct_else_body
| ct_switch '{' ct_switch_body '}'
| CTEACH '(' expression AS CT_IDENT ')' statement
;
throw_statement
: THROW expression ';'
statement
: compound_statement
| labeled_statement
| expression_statement
| selection_statement
| iteration_statement
| jump_statement
| declaration_statement
| volatile_statement
| catch_statement
| try_statement
| defer_statement
| ct_statement
| throw_statement
;
defer_catch_body
: compound_statement
| expression_statement
| jump_statement
| iteration_statement
| selection_statement
;
defer_statement
: DEFER defer_catch_body
| DEFER catch_statement
;
catch_statement
: CATCH '(' type_expression IDENT ')' defer_catch_body
| CATCH '(' ERROR IDENT ')' defer_catch_body
;
try_statement
: TRY selection_statement
| TRY iteration_statement
| TRY jump_statement
;
volatile_statement
: VOLATILE compound_statement
;
label_statement
: IDENT ':' statement
labeled_statement
: label_statement
| CASE constant_expression ':'
| DEFAULT ':'
;
compound_statement
: '{' '}'
| '{' statement_list '}'
;
statement_list
: statement
| statement_list statement
;
declaration_statement
: declaration ';'
;
expression_statement
: ';'
| expression ';'
;
control_expression
: decl_or_expr_list
| declaration_list ';' decl_or_expr_list
;
selection_statement
: IF '(' control_expression ')' statement
| IF '(' control_expression ')' compound_statement ELSE statement
| SWITCH '(' control_expression ')' compound_statement
;
expression_list
: expression
| expression_list ',' expression
;
decl_or_expr_list
: expression_list
| declaration_list
;
for_statement
: FOR '(' decl_or_expr_list ';' expression_statement ')' statement
| FOR '(' decl_or_expr_list ';' expression_statement expression_list ')' statement
;
iteration_statement
: WHILE '(' control_expression ')' statement
| DO statement WHILE '(' expression ')' ';'
| for_statement
;
jump_statement
: GOTO CONSTANT ';'
| CONTINUE ';'
| BREAK ';'
| RETURN ';'
| RETURN expression ';'
;
attribute
: AT_IDENT
| IDENT SCOPE AT_IDENT
| AT_IDENT '(' constant_expression ')'
| IDENT SCOPE AT_IDENT '(' constant_expression ')'
;
attribute_list
: attribute
| attribute_list attribute
;
opt_attributes
: attribute_list
|
;
error_type
: IDENT SCOPE TYPE_IDENT
| TYPE_IDENT
| ERROR '(' expression ')'
;
error_list
: error_type
| error_list error_type
;
throw_declaration
: THROWS
| THROWS error_list
;
opt_throw_declaration
: throw_declaration
|
;
func_name
: IDENT SCOPE TYPE_IDENT '.' IDENT
| TYPE_IDENT '.' IDENT
| IDENT
;
func_declaration
: FUNC type_expression func_name opt_parameter_type_list opt_attributes opt_throw_declaration
;
func_definition
: func_declaration compound_statement
| func_declaration ';'
;
macro_declaration
: MACRO AT_IDENT '(' macro_argument_list ')' compound_statement
;
struct_or_union
: STRUCT
| UNION
;
struct_declaration
: struct_or_union TYPE_IDENT opt_attributes struct_body
;
struct_body
: '{' struct_declaration_list '}'
;
struct_declaration_list
: struct_member_declaration
| struct_declaration_list struct_member_declaration
;
struct_member_declaration
: type_expression identifier_list opt_attributes ';'
| struct_or_union IDENT opt_attributes struct_body
| struct_or_union opt_attributes struct_body
;
enum_declaration
: ENUM TYPE_IDENT ':' type_expression opt_attributes '{' enumerator_list '}'
| ENUM TYPE_IDENT opt_attributes '{' enumerator_list '}'
;
errors
: CONST_IDENT
| errors ',' CONST_IDENT
;
error_list
: errors
| errors ','
;
error_declaration
: ERROR TYPE_IDENT '{' error_list '}'
;
type_list
: type_expression
| type_list ',' type_expression
;
generics_case
: CASE type_list ':' statement
generics_body
: generics_case
| generics_body generics_case
;
generics_declaration
: GENERIC IDENT '(' macro_argument_list ')' '{' generics_body '}'
| GENERIC type_expression IDENT '(' macro_argument_list ')' '{' generics_body '}'
;
const_declaration
: CONST CT_IDENT '=' initializer ';'
| CONST type_expression IDENT '=' initializer ';'
;
func_typedef
: FUNC type_expression opt_parameter_type_list opt_throw_declaration
;
typedef_declaration
: TYPEDEF type_expression AS TYPE_IDENT ';'
| TYPEDEF func_typedef AS TYPE_IDENT ';'
;
attribute_domain
: FUNC
| VAR
| ENUM
| STRUCT
| UNION
| TYPEDEF
| CONST
;
attribute_domains
: attribute_domain
| attribute_domains ',' attribute_domain
;
attribute_declaration
: ATTRIBUTE AT_IDENT attribute_domains
| ATTRIBUTE AT_IDENT attribute_domains '(' parameter_type_list ')'
;
global_declaration
: type_expression IDENT ';'
| type_expression IDENT '=' initializer ';'
;
ct_if
: CTIF '(' expression ')'
;
ct_elif
: CTELIF '(' expression ')'
;
ct_switch
: CTSWITCH '(' expression ')'
;
top_level_block
: '{' top_level_statements '}'
;
tl_ct_elif_body
: ct_elif top_level_block
| tl_ct_elif_body ct_elif top_level_block
;
tl_ct_else_body
: tl_ct_elif_body
| tl_ct_else_body CTELSE top_level_block
;
tl_ct_case
: CTCASE type_list ':' top_level_statements
| CTDEFAULT ':' top_level_statements
;
tl_ct_switch_body
: tl_ct_case
| tl_ct_switch_body tl_ct_case
;
conditional_compilation
: ct_if top_level_block
| ct_if top_level_block tl_ct_else_body
| ct_switch '{' tl_ct_switch_body '}'
;
module_param
: CT_IDENT
| HASH_IDENT
| TYPE_IDENT
| AT_IDENT
;
module_params
: module_param
| module_params ',' module_param
;
module
: MODULE IDENT ';'
| MODULE IDENT '(' module_params ')' ';'
;
import_decl
: IMPORT IDENT ';'
| IMPORT IDENT AS IDENT ';'
| IMPORT IDENT AS IDENT LOCAL ';'
| IMPORT IDENT LOCAL ';'
;
imports
: import_decl
| imports import_decl
;
translation_unit
: module imports top_level_statements
;
top_level_statements
: visibility top_level
| top_level_statements visibility top_level
;
visibility
: LOCAL
| PUBLIC
| LOCAL PUBLIC
| PUBLIC LOCAL
|
;
top_level
: func_definition
| conditional_compilation
| struct_declaration
| attribute_declaration
| enum_declaration
| error_declaration
| const_declaration
| global_declaration
| macro_declaration
| generics_declaration
| typedef_declaration
;
%%
void yyerror(char *s)
{
fflush(stdout);
printf("\n%*s\n%*s\n", column, "^", column, s);
}
int main(int argc, char *argv[])
{
yyparse();
return(0);
}

View File

@@ -0,0 +1,11 @@
module foo;
func void test()
{
return;
}
func int test2()
{
return;
}

View File

@@ -0,0 +1,209 @@
module foo ($foo, #bar, Integer);
import bar as eok local;
import bar2 as eok2;
import bar3 local;
macro void @foo(int i, $e)
{
$e = 1;
printf("Helo");
}
macro @goo(i, $e)
{
}
macro @soom!(i, $e)
{}
local struct Foom
{
int i;
Foom *test;
int*** j;
int*[][]* k;
}
struct Hej
{
int x;
}
enum FEok : int {
IFEJ
}
enum Test
{
FOO = 1 + 2,
BAR,
}
enum Test2 : int
{
FOO = 1,
BAR,
}
union Foomt
{
int i;
double d;
}
error Errors
{
BADERROR,
OTHER_ERROR
}
func Foom test(int a)
{
return 1 + 2;
}
func boo::Bar zab::Baz.sd(die::Eij i) throws Zab // , sij:Zig
{
float a = 0, b = 3, double c = 1, d;
int i = 0;
}
generic int boor(i)
{
case int:
return 1;
case double:
return 100;
default:
return 1000;
}
generic boor2(i)
{
case int:
return "Helo";
default:
return 1000;
}
$if ($e > 0)
{
func void foo() {}
}
$elif ($e < 0)
{
func void foo() { printf("HELO"); }
}
$else
{
func void foo() { printf("OLEH"); }
}
$if ($e > 0)
{
func void foo() {}
}
$if ($b > 0)
{
}
$else
{
generic test(i) { }
}
generic boofer2(i, g, eok)
{
case int, char[], type($eoo):
return "Helo";
default:
return 1000;
}
func void hello() throws Errors
{
int i, j;
throw FOO;
throw awesome::FOO;
defer close(b);
foo::Bar x = 3;
try foo();
try foo() else 1;
foo(try 1);
type($error) fk;
type(int).size + fk;
Errors {};
Ferrors{a = 1, b = 20, b = { token }};
Ferrors{1, 3, 1+4};
$erro = 1;
FOO:
goto FOO;
type($error) fk;
foo::@macrof();
int i = foo ? 2 : 4;
@macros();
type(foo::y) z;
type(int) * 2;
$error = type(int);
int[4] a;
foo[1 + 2] * b;
type((i > 0) ? type(int) : type(double)) doek;
$e = type(type(type(Bar)));
$e = foo ? type(int) : type(Bar);
$e = type(type(foo::$eofk));
if (a == 0 && 1 == b)
{
i = 0;
}
while (bpb >= 3)
{
a();
}
do
{
} while (0);
for (i = 0;;)
{}
for (i = 0, j = 3; i < 0; i++, j++) {}
for (int i = 0; i < 100; i++)
{
i++;
}
int i = 1;
i + 1 * 100;
&i;
int j = i;
2;
i++;
switch (int foo = 1; bar)
{
case 1:
next;
continue;
default:
break;
}
do {
i++;
} while (a < 0);
while (a > 0)
{
a--;
}
while (int a = 4; int b = 20)
{
a + 1;
}
return;
}
typedef Foo* as Bar;
typedef func void(int, Foo*) as Zoo;
func void test2()
{
return;
}

View File

@@ -50,6 +50,8 @@ static void usage(void)
OUTPUT(" --template <template> - Use a different template: \"lib\", \"staticlib\" or a path.");
OUTPUT(" --about - Prints a short description of C3.");
OUTPUT(" --symtab <value> - Sets the preferred symtab size.");
OUTPUT(" -E - Lex only.");
OUTPUT(" -P - Only parse and output the AST as S-expressions.");
}
@@ -262,6 +264,7 @@ void parse_arguments(int argc, const char *argv[])
exit(EXIT_SUCCESS);
}
build_options.pointer_size = sizeof(void *);
build_options.path = ".";
build_options.command = COMMAND_MISSING;
build_options.symtab_size = DEFAULT_SYMTAB_SIZE;

View File

@@ -81,6 +81,7 @@ typedef struct
uint32_t symtab_size;
CompileOption compile_option;
DiagnosticsSeverity severity[DIAG_END_SENTINEL];
int pointer_size;
} BuildOptions;

856
src/compiler/ast.c Normal file
View File

@@ -0,0 +1,856 @@
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <utils/malloc.h>
#include <build/build_options.h>
#include <utils/lib.h>
#include "ast.h"
Decl *decl_new_in_module(Module *module, DeclKind decl_kind, Token name, Visibility visibility)
{
assert(name.string);
Decl *decl = malloc_arena(sizeof(Decl));
memset(decl, 0, sizeof(Decl));
decl->decl_kind = decl_kind;
decl->name = name;
decl->visibility = visibility;
decl->module = module;
return decl;
}
Type poisoned_type = { .type_kind = TYPE_POISONED };
char *type_to_string(Type *type)
{
switch (type->type_kind)
{
case TYPE_VOID:
return "void";
case TYPE_UNRESOLVED:
TODO
case TYPE_STRING:
return "string";
case TYPE_UNRESOLVED_EXPR:
TODO
case TYPE_NIL:
return "nil";
case TYPE_BUILTIN:
return "TODO";
default:
TODO
}
}
Type *type_poisoned()
{
static Type poison = { .type_kind = TYPE_POISONED };
return &poison;
}
Type *type_new(TypeKind type_kind)
{
Type *type = malloc_arena(sizeof(Type));
memset(type, 0, sizeof(Type));
type->type_kind = type_kind;
return type;
}
Decl *decl_new_self_type(struct _Module *module, Token name, DeclKind decl_type, TypeKind type_kind, Visibility visibility)
{
Decl *decl = decl_new_in_module(module, decl_type, name, visibility);
Type *type = type_new(type_kind);
type->canonical_type = type;
type->decl = decl;
decl->type = type;
return decl;
}
Decl poisoned_decl = { .decl_kind = DECL_POISONED, .resolve_status = RESOLVE_DONE };
Decl *decl_new_var(struct _Module *module, Token name, Type *type, VarDeclKind kind, Visibility visibility)
{
Decl *decl = decl_new_in_module(module, DECL_VAR, name, visibility);
decl->var.kind = kind;
decl->var.type = type;
return decl;
}
Decl *decl_new_enum_const(Decl *parent, Token name, DeclKind kind)
{
Decl *decl = decl_new_in_module(parent->module, DECL_VAR, name, parent->visibility);
decl->decl_kind = kind;
assert(parent->type);
decl->type = parent->type;
return decl;
}
Decl *struct_find_name(Decl *decl, const char* name)
{
Decl** compare_members = decl->strukt.members;
VECEACH(compare_members, i)
{
Decl *member = compare_members[i];
if (member->name.type == TOKEN_INVALID_TOKEN)
{
Decl *found = struct_find_name(member, name);
if (found) return found;
}
else if (member->name.string == name) return member;
}
return NULL;
}
Expr *expr_new(ExprKind kind, Token start)
{
Expr *expr = malloc_arena(sizeof(Expr));
expr->expr_kind = kind;
expr->loc = start;
expr->type = NULL;
return expr;
}
Expr poisoned_expr = { .expr_kind = EXPR_POISONED };
Type type_bool;
Type type_void, type_nil, type_string;
Type type_half, type_float, type_double, type_quad;
Type type_char, type_short, type_int, type_long, type_isize;
Type type_byte, type_ushort, type_uint, type_ulong, type_usize;
Type type_compint, type_compfloat;
void type_setup(int pointer_size)
{
type_void = (Type) { .type_kind = TYPE_VOID, .canonical_type = &type_void };
type_nil = (Type) { .type_kind = TYPE_NIL, .canonical_type = &type_nil };
type_string = (Type) { .type_kind = TYPE_STRING, .canonical_type = &type_string };
#define DEF_TYPE(name, bits, _bytes, type) name = (Type) { .type_kind = TYPE_BUILTIN, .bitsize = bits, .bytes = _bytes, .num_type = type, .canonical_type = &name }
DEF_TYPE(type_compint, 0, 0, NUMBER_TYPE_SIGNED_INT);
DEF_TYPE(type_compfloat, 0, 0, NUMBER_TYPE_FLOAT);
DEF_TYPE(type_bool, 1, 8, NUMBER_TYPE_BOOL);
DEF_TYPE(type_half, 2, 16, NUMBER_TYPE_FLOAT);
DEF_TYPE(type_float, 4, 32, NUMBER_TYPE_FLOAT);
DEF_TYPE(type_double, 8, 64, NUMBER_TYPE_FLOAT);
DEF_TYPE(type_quad, 16, 128, NUMBER_TYPE_FLOAT);
DEF_TYPE(type_char, 1, 8, NUMBER_TYPE_SIGNED_INT);
DEF_TYPE(type_short, 2, 16, NUMBER_TYPE_SIGNED_INT);
DEF_TYPE(type_int, 4, 32, NUMBER_TYPE_SIGNED_INT);
DEF_TYPE(type_long, 8, 64, NUMBER_TYPE_SIGNED_INT);
DEF_TYPE(type_isize, pointer_size / 8, pointer_size, NUMBER_TYPE_SIGNED_INT);
DEF_TYPE(type_byte, 1, 8, NUMBER_TYPE_UNSIGNED_INT);
DEF_TYPE(type_ushort, 2, 16, NUMBER_TYPE_UNSIGNED_INT);
DEF_TYPE(type_uint, 4, 32, NUMBER_TYPE_UNSIGNED_INT);
DEF_TYPE(type_ulong, 8, 64, NUMBER_TYPE_UNSIGNED_INT);
DEF_TYPE(type_usize, pointer_size / 8, pointer_size, NUMBER_TYPE_UNSIGNED_INT);
#undef DEF_TYPE
}
Type* type_int_max_type(bool is_signed)
{
return is_signed ? &type_long : &type_ulong;
}
Type *type_get_signed(Type *type)
{
assert(type->type_kind == TYPE_BUILTIN);
if (type->num_type == NUMBER_TYPE_SIGNED_INT) return type;
assert(type->num_type == NUMBER_TYPE_UNSIGNED_INT);
switch (type->bytes)
{
case 8:
return &type_long;
case 4:
return &type_int;
case 2:
return &type_short;
case 1:
return &type_char;
default:
UNREACHABLE
}
}
Type* type_get_unsigned(Type *type)
{
assert(type->type_kind == TYPE_BUILTIN);
if (type->num_type == NUMBER_TYPE_UNSIGNED_INT) return type;
assert(type->num_type == NUMBER_TYPE_SIGNED_INT);
switch (type->bytes)
{
case 8:
return &type_ulong;
case 4:
return &type_uint;
case 2:
return &type_ushort;
case 1:
return &type_byte;
default:
UNREACHABLE
}
}
BinOp bin_op[256] = {
[TOKEN_EQ] = BINOP_ASSIGN,
[TOKEN_STAR] = BINOP_MULT,
[TOKEN_PLUS] = BINOP_ADD,
[TOKEN_MINUS] = BINOP_SUB,
[TOKEN_DIV] = BINOP_DIV,
[TOKEN_MOD] = BINOP_MOD,
[TOKEN_NOT_EQUAL] = BINOP_NE,
[TOKEN_AND] = BINOP_AND,
[TOKEN_OR] = BINOP_OR,
[TOKEN_AMP] = BINOP_BIT_AND,
[TOKEN_BIT_OR] = BINOP_BIT_OR,
[TOKEN_BIT_XOR] = BINOP_BIT_XOR,
[TOKEN_EQEQ] = BINOP_EQ,
[TOKEN_GREATER] = BINOP_GT,
[TOKEN_GREATER_EQ] = BINOP_GE,
[TOKEN_LESS] = BINOP_LT,
[TOKEN_LESS_EQ] = BINOP_LE,
[TOKEN_SHR] = BINOP_SHR,
[TOKEN_SHL] = BINOP_SHL,
[TOKEN_ELVIS] = BINOP_ELVIS
};
AssignOp assign_op[256] = {
[TOKEN_EQ] = ASSIGNOP_ASSIGN,
[TOKEN_MULT_ASSIGN] = ASSIGNOP_MULT_ASSIGN,
[TOKEN_PLUS_ASSIGN] = ASSIGNOP_ADD_ASSIGN,
[TOKEN_MINUS_ASSIGN] = ASSIGNOP_SUB_ASSIGN,
[TOKEN_DIV_ASSIGN] = ASSIGNOP_DIV_ASSIGN,
[TOKEN_MOD_ASSIGN] = ASSIGNOP_MOD_ASSIGN,
[TOKEN_AND_ASSIGN] = ASSIGNOP_AND_ASSIGN,
[TOKEN_OR_ASSIGN] = ASSIGNOP_OR_ASSIGN,
[TOKEN_BIT_AND_ASSIGN] = ASSIGNOP_BIT_AND_ASSIGN,
[TOKEN_BIT_OR_ASSIGN] = ASSIGNOP_BIT_OR_ASSIGN,
[TOKEN_BIT_XOR_ASSIGN] = ASSIGNOP_BIT_XOR_ASSIGN,
[TOKEN_SHR_ASSIGN] = ASSIGNOP_SHR_ASSIGN,
[TOKEN_SHL_ASSIGN] = ASSIGNOP_SHL_ASSIGN,
};
UnaryOp unary_op[256] = {
[TOKEN_STAR] = UNARYOP_DEREF,
[TOKEN_AMP] = UNARYOP_ADDR,
[TOKEN_BIT_NOT] = UNARYOP_BITNEG,
[TOKEN_NOT] = UNARYOP_NOT,
[TOKEN_MINUS] = UNARYOP_NEG,
[TOKEN_PLUSPLUS] = UNARYOP_INC,
[TOKEN_MINUSMINUS] = UNARYOP_DEC,
};
BinOp binop_from_token(TokenType type)
{
return bin_op[type];
}
AssignOp assignop_from_token(TokenType type)
{
return assign_op[type];
}
TokenType assignop_to_token(AssignOp type)
{
for (unsigned i = 0; i < 256; i++)
{
if (assign_op[i] == type) return (TokenType)i;
}
return TOKEN_INVALID_TOKEN;
}
TokenType binop_to_token(BinOp type)
{
for (unsigned i = 0; i < 256; i++)
{
if (bin_op[i] == type) return (TokenType)i;
}
return TOKEN_INVALID_TOKEN;
}
UnaryOp unaryop_from_token(TokenType type)
{
return unary_op[type];
}
TokenType unaryop_to_token(UnaryOp type)
{
for (unsigned i = 0; i < 256; i++)
{
if (unary_op[i] == type) return (TokenType)i;
}
return TOKEN_INVALID_TOKEN;
}
Ast poisoned_ast = { .ast_kind = AST_POISONED };
void fprint_indent(FILE *file, int indent)
{
for (int i = 0; i < indent * 2; i++) fprintf(file, " ");
}
void fprint_endparen(FILE *file, int indent)
{
fprint_indent(file, indent);
fprintf(file, ")\n");
}
void fprint_type_recursive(FILE *file, Type *type, int indent)
{
fprint_indent(file, indent);
if (!type)
{
fprintf(file, "(none)\n");
return;
}
switch (type->type_kind)
{
case TYPE_POISONED:
fprintf(file, "(POISON)\n");
return;
case TYPE_UNRESOLVED:
if (type->unresolved.module.string)
{
fprintf(file, "(unresolved %s::%s)\n", type->unresolved.module.string, type->unresolved.name.string);
}
else
{
fprintf(file, "(unresolved %s)\n", type->unresolved.name.string);
}
return;
case TYPE_UNRESOLVED_EXPR:
fprintf(file, "(unresolved\n");
fprint_expr_recursive(file, type->unresolved_type_expr, indent + 1);
fprint_endparen(file, indent);
return;
case TYPE_VOID:
fprintf(file, "(void)\n");
return;
case TYPE_OPAQUE:
break;
case TYPE_BUILTIN:
fprintf(file, "(builtin)\n");
return;
case TYPE_NIL:
fprintf(file, "(nil)\n");
return;
case TYPE_POINTER:
fprintf(file, "(pointer\n");
fprint_type_recursive(file, type->base, indent + 1);
fprint_endparen(file, indent);
return;
case TYPE_STRING:
fprintf(file, "(string)\n");
return;
case TYPE_ARRAY:
fprintf(file, "(array [%zu]\n", type->len);
fprint_type_recursive(file, type->base, indent + 1);
fprint_endparen(file, indent);
return;
case TYPE_INC_ARRAY:
break;
case TYPE_UNRESOLVED_ARRAY:
fprintf(file, "(array\n");
fprint_type_recursive(file, type->base, indent + 1);
fprint_expr_recursive(file, type->unresolved_len, indent + 1);
fprint_endparen(file, indent);
return;
case TYPE_TYPEDEF:
break;
case TYPE_MACRO:
break;
case TYPE_FUNC_TYPE:
break;
case TYPE_ENUM:
break;
case TYPE_ERROR:
break;
case TYPE_FUNC:
break;
case TYPE_STRUCT:
break;
case TYPE_UNION:
break;
case TYPE_GENERIC:
break;
}
fprintf(file, "(TYPETODO)\n");
}
void fprint_expr_recursive(FILE *file, Expr *expr, int indent)
{
fprint_indent(file, indent);
switch (expr->expr_kind)
{
case EXPR_IDENTIFIER:
fprintf(file, "(ident %s)\n", expr->identifier_expr.identifier.string);
return;
case EXPR_CONST:
fprintf(file, "(const ");
value_fprint(file, expr->const_expr);
fprintf(file, ")\n");
return;
case EXPR_BINARY:
fprintf(file, "(binary %s\n", token_type_to_string(binop_to_token(expr->binary_expr.operator)));
fprint_expr_recursive(file, expr->binary_expr.left, indent + 1);
fprint_expr_recursive(file, expr->binary_expr.right, indent + 1);
break;
case EXPR_UNARY:
fprintf(file, "(unary %s\n", token_type_to_string(unaryop_to_token(expr->unary_expr.operator)));
fprint_expr_recursive(file, expr->unary_expr.expr, indent + 1);
break;
case EXPR_POST_UNARY:
fprintf(file, "(postunary %s\n", token_type_to_string(unaryop_to_token(expr->post_expr.operator)));
fprint_expr_recursive(file, expr->post_expr.expr, indent + 1);
break;
case EXPR_METHOD_REF:
fprintf(file, "(methodref .%s\n", expr->method_ref_expr.method.string);
fprint_type_recursive(file, expr->method_ref_expr.type, indent + 1);
break;
case EXPR_STRUCT_VALUE:
fprintf(file, "(structvalue\n");
fprint_type_recursive(file, expr->struct_value_expr.type, indent + 1);
fprint_expr_recursive(file, expr->struct_value_expr.init_expr, indent + 1);
break;
case EXPR_ACCESS:
fprintf(file, "(access .%s\n", expr->access_expr.sub_element.string);
fprint_expr_recursive(file, expr->access_expr.parent, indent + 1);
break;
case EXPR_TYPE:
fprintf(file, "(type\n");
fprint_type_recursive(file, expr->type_expr.type, indent + 1);
break;
case EXPR_CALL:
fprintf(file, "(call\n");
fprint_expr_recursive(file, expr->call_expr.function, indent + 1);
{
VECEACH(expr->call_expr.parameters, i)
{
fprint_expr_recursive(file, expr->call_expr.parameters[i], indent + 1);
}
}
break;
case EXPR_CONDITIONAL:
if (!expr->conditional_expr.then_expr)
{
fprintf(file, "(elvis\n");
fprint_expr_recursive(file, expr->conditional_expr.cond, indent + 1);
}
else
{
fprintf(file, "(cond\n");
fprint_expr_recursive(file, expr->conditional_expr.cond, indent + 1);
fprint_expr_recursive(file, expr->conditional_expr.then_expr, indent + 1);
}
fprint_expr_recursive(file, expr->conditional_expr.else_expr, indent + 1);
break;
case EXPR_INITIALIZER_LIST:
fprintf(file, "(initializerlist\n");
{
VECEACH(expr->initializer_expr, i)
{
fprint_expr_recursive(file, expr->initializer_expr[i], indent + 1);
}
}
break;
case EXPR_SUBSCRIPT:
fprintf(file, "(subscript\n");
fprint_expr_recursive(file, expr->subscript_expr.expr, indent + 1);
fprint_expr_recursive(file, expr->subscript_expr.index, indent + 1);
break;
case EXPR_EXPRESSION_LIST:
fprintf(file, "(expressionlist\n");
{
VECEACH(expr->expression_list, i)
{
fprint_expr_recursive(file, expr->expression_list[i], indent + 1);
}
}
break;
case EXPR_TRY:
if (!expr->try_expr.else_expr)
{
fprintf(file, "(try\n");
fprint_expr_recursive(file, expr->try_expr.expr, indent + 1);
}
else
{
fprintf(file, "(try-else\n");
fprint_expr_recursive(file, expr->try_expr.expr, indent + 1);
fprint_expr_recursive(file, expr->try_expr.else_expr, indent + 1);
}
break;
default:
fprintf(file, "(TODOEXPR)\n");
return;
}
fprint_endparen(file, indent);
}
static void fprint_decl_list(FILE *file, Decl **decls, int indent);
static void fprint_ast_recursive(FILE *file, Ast *ast, int indent);
void fprint_func_signature(FILE *file, FunctionSignature *signature, int indent)
{
fprint_type_recursive(file, signature->rtype, indent);
fprint_indent(file, indent);
fprintf(file, "(params\n");
fprint_decl_list(file, signature->params, indent + 1);
fprint_endparen(file, indent);
// TODO throws, variable
}
void fprint_decl_recursive(FILE *file, Decl *decl, int indent)
{
fprint_indent(file, indent);
switch (decl->decl_kind)
{
case DECL_MULTI_DECL:
fprintf(file, "(multi-decl\n");
fprint_decl_list(file, decl->multi_decl, indent + 1);
break;
case DECL_VAR:
if (!decl->var.init_expr)
{
fprintf(file, "(var %s)\n", decl->name.string);
return;
}
fprintf(file, "(var %s\n", decl->name.string);
fprint_expr_recursive(file, decl->var.init_expr, indent + 1);
break;
case DECL_MACRO:
fprintf(file, "(macro %s\n", decl->name.string);
fprint_type_recursive(file, decl->macro_decl.rtype, indent + 1);
fprint_indent(file, indent + 1);
fprintf(file, "(params\n");
fprint_decl_list(file, decl->macro_decl.parameters, indent + 2);
fprint_endparen(file, indent + 1);
fprint_ast_recursive(file, decl->macro_decl.body, indent + 1);
break;
case DECL_FUNC:
fprintf(file, "(func %s\n", decl->name.string);
fprint_type_recursive(file, decl->func.struct_parent, indent + 1);
fprint_func_signature(file, &decl->func.function_signature, indent + 1);
fprint_ast_recursive(file, decl->func.body, indent + 1);
break;
case DECL_STRUCT:
fprintf(file, "(struct %s\n", decl->name.string);
fprint_decl_list(file, decl->strukt.members, indent + 1);
break;
case DECL_UNION:
fprintf(file, "(union %s\n", decl->name.string);
fprint_decl_list(file, decl->strukt.members, indent + 1);
break;
case DECL_ENUM:
fprintf(file, "(enum %s\n", decl->name.string);
fprint_type_recursive(file, decl->enums.type, indent + 1);
fprint_decl_list(file, decl->enums.values, indent + 1);
break;
case DECL_ERROR:
fprintf(file, "(error %s\n", decl->name.string);
fprint_decl_list(file, decl->error.error_constants, indent + 1);
break;
case DECL_ENUM_CONSTANT:
if (!decl->enum_constant.expr)
{
fprintf(file, "(enum-constant %s)\n", decl->name.string);
return;
}
fprintf(file, "(enum-constant %s\n", decl->name.string);
fprint_expr_recursive(file, decl->enum_constant.expr, indent + 1);
break;
case DECL_ERROR_CONSTANT:
fprintf(file, "(error-constant %s)\n", decl->name.string);
return;
case DECL_GENERIC:
fprintf(file, "(generic %s\n", decl->name.string);
fprint_indent(file, indent + 1);
fprintf(file, "(params\n");
{
VECEACH(decl->generic_decl.parameters, i)
{
fprint_indent(file, indent + 2);
fprintf(file, "%s\n", decl->generic_decl.parameters[i].string);
}
}
fprint_endparen(file, indent + 1);
fprint_indent(file, indent + 1);
fprintf(file, "(cases\n");
{
VECEACH(decl->generic_decl.cases, i)
{
fprint_ast_recursive(file, decl->generic_decl.cases[i], indent + 2);
}
}
fprint_endparen(file, indent + 2);
break;
case DECL_TYPEDEF:
fprintf(file, "(typedef %s\n", decl->name.string);
if (decl->typedef_decl.is_func)
{
fprint_func_signature(file, &decl->typedef_decl.function_signature, indent + 1);
}
else
{
fprint_type_recursive(file, decl->typedef_decl.type, indent + 1);
}
break;
case DECL_CT_IF:
fprintf(file, "(ct-if\n");
fprint_expr_recursive(file, decl->ct_if_decl.expr, indent + 1);
fprint_decl_list(file, decl->ct_if_decl.then, indent + 1);
if (decl->ct_if_decl.elif)
{
fprint_decl_recursive(file, decl->ct_if_decl.elif, indent + 1);
}
break;
case DECL_CT_ELIF:
fprintf(file, "(ct-elif\n");
fprint_expr_recursive(file, decl->ct_elif_decl.expr, indent + 1);
fprint_decl_list(file, decl->ct_elif_decl.then, indent + 1);
if (decl->ct_elif_decl.elif)
{
fprint_decl_recursive(file, decl->ct_elif_decl.elif, indent + 1);
}
break;
case DECL_CT_ELSE:
fprintf(file, "(ct-else\n");
fprint_decl_list(file, decl->ct_else_decl, indent + 1);
break;
case DECL_POISONED:
fprintf(file, "(poisoned-decl)\n");
return;
case DECL_BUILTIN:
fprintf(file, "(builtin %s)\n", decl->name.string);
break;
case DECL_FUNC_TYPE:
TODO
break;
case DECL_ARRAY_VALUE:
TODO
break;
case DECL_IMPORT:
fprintf(file, "(import %s", decl->name.string);
TODO
break;
}
fprint_endparen(file, indent);
}
static void fprint_decl_list(FILE *file, Decl **decls, int indent)
{
VECEACH(decls, i)
{
fprint_decl_recursive(file, decls[i], indent);
}
}
static void fprint_ast_recursive(FILE *file, Ast *ast, int indent)
{
fprint_indent(file, indent);
switch (ast->ast_kind)
{
case AST_COMPOUND_STMT:
if (!ast->compound_stmt.stmts)
{
fprintf(file, "(compound)\n");
return;
}
fprintf(file, "(compound\n");
{
VECEACH(ast->compound_stmt.stmts, i)
{
fprint_ast_recursive(file, ast->compound_stmt.stmts[i], indent + 1);
}
}
break;
case AST_DECLARE_STMT:
fprintf(file, "(declare\n");
fprint_decl_recursive(file, ast->declare_stmt, indent + 1);
break;
case AST_EXPR_STMT:
fprintf(file, "(exprstmt\n");
fprint_expr_recursive(file, ast->expr_stmt, indent + 1);
break;
case AST_WHILE_STMT:
fprintf(file, "(while\n");
fprint_ast_recursive(file, ast->while_stmt.cond, indent + 1);
fprint_ast_recursive(file, ast->while_stmt.body, indent + 1);
break;
case AST_DO_STMT:
fprintf(file, "(do\n");
fprint_ast_recursive(file, ast->do_stmt.body, indent + 1);
fprint_expr_recursive(file, ast->do_stmt.expr, indent + 1);
break;
case AST_RETURN_STMT:
if (ast->return_stmt.expr)
{
fprintf(file, "(return\n");
fprint_expr_recursive(file, ast->expr_stmt, indent + 1);
break;
}
else
{
fprintf(file, "(return)\n");
return;
}
case AST_BREAK_STMT:
fprintf(file, "(break)\n");
return;
case AST_NEXT_STMT:
fprintf(file, "(next)\n");
return;
case AST_CONTINUE_STMT:
fprintf(file, "(continue)\n");
return;
case AST_DEFAULT_STMT:
fprintf(file, "(default)\n");
return;
case AST_FOR_STMT:
fprintf(file, "(for\n");
if (ast->for_stmt.init)
{
fprint_ast_recursive(file, ast->for_stmt.init, indent + 1);
}
else
{
fprint_indent(file, indent + 1);
fprintf(file, "(noinit)\n");
}
if (ast->for_stmt.cond)
{
fprint_expr_recursive(file, ast->for_stmt.cond, indent + 1);
}
else
{
fprint_indent(file, indent + 1);
fprintf(file, "(nocond)\n");
}
if (ast->for_stmt.incr)
{
fprint_expr_recursive(file, ast->for_stmt.incr, indent + 1);
}
else
{
fprint_indent(file, indent + 1);
fprintf(file, "(noincr)\n");
}
fprint_ast_recursive(file, ast->for_stmt.body, indent + 1);
break;
case AST_IF_STMT:
fprintf(file, "(if\n");
fprint_ast_recursive(file, ast->if_stmt.cond, indent + 1);
fprint_ast_recursive(file, ast->if_stmt.then_body, indent + 1);
if (ast->if_stmt.else_body)
{
fprint_ast_recursive(file, ast->if_stmt.else_body, indent + 1);
}
break;
case AST_DECL_EXPR_LIST:
fprintf(file, "(declexprlist\n");
if (ast->decl_expr_list.list_type == DECLEXPR_EXPR)
{
fprint_expr_recursive(file, ast->decl_expr_list.expr, indent + 1);
}
else
{
fprint_decl_recursive(file, ast->decl_expr_list.decl, indent + 1);
}
break;
case AST_COND_STMT:
fprintf(file, "(condstmt\n");
fprint_decl_recursive(file, ast->cond_stmt.decl, indent + 1);
fprint_ast_recursive(file, ast->cond_stmt.decl_expr, indent + 1);
break;
case AST_SWITCH_STMT:
fprintf(file, "(condstmt\n");
fprint_ast_recursive(file, ast->switch_stmt.cond, indent + 1);
fprint_ast_recursive(file, ast->switch_stmt.body, indent + 1);
break;
case AST_CASE_STMT:
fprintf(file, "(case\n");
fprint_expr_recursive(file, ast->case_stmt.expr, indent + 1);
break;
case AST_DEFER_STMT:
fprintf(file, "(defer\n");
fprint_ast_recursive(file, ast->defer_stmt.body, indent + 1);
break;
case AST_GENERIC_CASE_STMT:
fprintf(file, "(generic-case\n");
fprint_indent(file, indent + 1);
fprintf(file, "(match\n");
{
VECEACH(ast->generic_case_stmt.types, i)
{
fprint_type_recursive(file, ast->generic_case_stmt.types[i], indent + 2);
}
}
fprint_endparen(file, indent + 1);
fprint_ast_recursive(file, ast->generic_case_stmt.body, indent + 1);
break;
case AST_GENERIC_DEFAULT_STMT:
fprintf(file, "(generic-default\n");
fprint_ast_recursive(file, ast->generic_default_stmt, indent + 1);
break;
case AST_POISONED:
fprintf(file, "(ast-poisoned)\n");
return;
case AST_ASM_STMT:
TODO
break;
case AST_ATTRIBUTE:
TODO
break;
case AST_CATCH_STMT:
TODO
break;
case AST_CT_IF_STMT:
TODO
break;
case AST_CT_ELIF_STMT:
TODO
break;
case AST_CT_ELSE_STMT:
TODO
break;
case AST_GOTO_STMT:
fprintf(file, "(goto %s)\n", ast->token.string);
return;
case AST_LABEL:
fprintf(file, "(label %s)\n", ast->token.string);
return;
case AST_NOP_STMT:
TODO
break;
case AST_THROW_STMT:
fprintf(file, "(throw\n");
fprint_expr_recursive(file, ast->throw_stmt, indent + 1);
break;
case AST_TRY_STMT:
TODO
break;
case AST_VOLATILE_STMT:
TODO
break;
}
fprint_endparen(file, indent);
}
void fprint_ast(FILE *file, Ast *ast)
{
fprint_ast_recursive(file, ast, 0);
}
void fprint_decl(FILE *file, Decl *dec)
{
fprint_decl_recursive(file, dec, 0);
}
Module module_poisoned = { .name = "INVALID" };

812
src/compiler/ast.h Normal file
View File

@@ -0,0 +1,812 @@
#pragma once
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "compiler_common.h"
#include "symtab.h"
#include "value.h"
typedef enum _NumberType
{
NUMBER_TYPE_BOOL,
NUMBER_TYPE_FLOAT,
NUMBER_TYPE_SIGNED_INT,
NUMBER_TYPE_UNSIGNED_INT,
} NumberType;
// IF ORDER IS CHANGED, rewrite type_implicit_convert_ordered
typedef enum _TypeKind
{
TYPE_POISONED,
TYPE_UNRESOLVED,
TYPE_UNRESOLVED_EXPR,
TYPE_VOID,
TYPE_OPAQUE,
TYPE_BUILTIN,
TYPE_NIL,
TYPE_POINTER,
TYPE_STRING,
TYPE_ARRAY,
TYPE_INC_ARRAY,
TYPE_UNRESOLVED_ARRAY,
TYPE_TYPEDEF,
TYPE_MACRO,
TYPE_FUNC_TYPE,
TYPE_ENUM,
TYPE_ERROR,
TYPE_FUNC,
TYPE_STRUCT,
TYPE_UNION,
TYPE_GENERIC,
} TypeKind;
struct _Type
{
TypeKind type_kind;
union
{
struct
{
unsigned char bytes;
unsigned char bitsize;
NumberType num_type : 8;
};
Decl *decl;
struct
{
Token module;
Token name;
} unresolved;
Expr *unresolved_type_expr;
struct
{
union
{
Expr *unresolved_len;
size_t len;
};
Type *base;
bool nullable : 1;
};
};
Type *canonical_type;
};
void type_setup(int pointer_size);
Type *type_new(TypeKind type_kind);
Type *type_poisoned();
char *type_to_string(Type *type);
static bool type_is_poison(Type *type)
{
return type->type_kind == TYPE_POISONED;
}
static bool type_ok(Type *type)
{
return !type || type->type_kind != TYPE_POISONED;
}
extern Type poisoned_type;
extern Type type_bool, type_void, type_nil, type_string;
extern Type type_half, type_float, type_double, type_quad;
extern Type type_char, type_short, type_int, type_long, type_isize;
extern Type type_byte, type_ushort, type_uint, type_ulong, type_usize;
extern Type type_compint, type_compfloat;
typedef enum _AttrKind
{
ATTR_INVALID,
ATTR_UNRESOLVED,
} AttrKind;
typedef struct _Attr
{
Token module;
Token name;
union
{
Expr *expr;
};
} Attr;
typedef struct _ErrorDecl
{
Decl **error_constants;
} ErrorDecl;
typedef struct _ImportDecl
{
ImportType type : 3;
Token alias;
Expr** generic_parameters;
struct _Module *module;
} ImportDecl;
typedef struct _StructDecl
{
Decl **members;
Decl **method_functions;
} StructDecl;
typedef enum _VarDeclKind {
VARDECL_CONST = 0,
VARDECL_GLOBAL = 1,
VARDECL_LOCAL = 2,
VARDECL_PARAM = 3,
VARDECL_MEMBER = 4,
VARDECL_MULTI = 5,
} VarDeclKind;
typedef struct _VarDecl
{
VarDeclKind kind : 3;
Type *type;
Expr *init_expr;
} VarDecl;
typedef struct _CtIfDecl
{
Expr *expr;
Decl **then;
Decl *elif;
} CtIfDecl;
typedef enum _DeclKind
{
DECL_POISONED = 0,
DECL_BUILTIN,
DECL_FUNC,
DECL_VAR,
DECL_ENUM_CONSTANT,
DECL_TYPEDEF,
DECL_STRUCT,
DECL_UNION,
DECL_ENUM,
DECL_ERROR,
DECL_ERROR_CONSTANT,
DECL_FUNC_TYPE,
DECL_ARRAY_VALUE,
DECL_IMPORT,
DECL_MACRO,
DECL_MULTI_DECL,
DECL_GENERIC,
DECL_CT_IF,
DECL_CT_ELSE,
DECL_CT_ELIF,
} DeclKind;
static inline bool decl_may_be_type(DeclKind kind)
{
switch (kind)
{
case DECL_TYPEDEF:
case DECL_STRUCT:
case DECL_UNION:
case DECL_ENUM:
case DECL_ERROR:
case DECL_FUNC_TYPE:
case DECL_BUILTIN:
return true;
default:
return false;
}
}
typedef struct _EnumConstantDecl
{
Expr *expr;
} EnumConstantDecl;
typedef struct _ErrorConstantDecl
{
uint32_t value;
} ErrorConstantDecl;
typedef struct _EnumDecl
{
Decl** values;
Type *type;
} EnumDecl;
typedef struct _FunctionSignature
{
bool variadic : 1;
Type *rtype;
Decl** params;
Token *throws;
} FunctionSignature;
typedef struct _FuncDecl
{
const char *full_name;
Type *struct_parent;
FunctionSignature function_signature;
Ast *body;
} FuncDecl;
typedef struct _TypedefDecl
{
bool is_func : 1;
union
{
FunctionSignature function_signature;
Type *type;
};
} TypedefDecl;
typedef struct _MacroDecl
{
Decl **parameters;
Type *rtype; // May be null!
Ast *body;
} MacroDecl;
typedef struct _GenericDecl
{
Ast **cases;
Token *parameters;
Type *rtype; // May be null!
} GenericDecl;
struct _Decl
{
DeclKind decl_kind : 6;
bool is_exported : 1;
Visibility visibility : 2;
ResolveStatus resolve_status : 2;
bool is_used : 1;
bool is_used_public : 1;
bool has_cname : 1;
uint32_t alignment : 5;
union
{
uint32_t offset;
uint32_t counter;
};
uint32_t size;
Token name;
struct _Module *module;
Attr** attributes;
Type *type;
Type *pointer_types[2]; // Up to three stars
union
{
ErrorDecl error;
ErrorConstantDecl error_constant;
ImportDecl import;
StructDecl strukt;
VarDecl var;
EnumDecl enums;
EnumConstantDecl enum_constant;
FuncDecl func;
TypedefDecl typedef_decl;
Decl** multi_decl;
MacroDecl macro_decl;
GenericDecl generic_decl;
CtIfDecl ct_if_decl;
CtIfDecl ct_elif_decl;
Decl** ct_else_decl;
/*
QualifiedType alias;
SourceRange unparsed_alias;
EnumConstantDecl enum_constant;
MacroDecl macro_decl;
FuncTypeDecl func_type;
ArrayDecl array_decl;
MacroParmDecl macro_param;*/
};
};
Decl *decl_new_in_module(struct _Module *module, DeclKind decl_kind, Token name, Visibility visibility);
Decl *decl_new_self_type(struct _Module *module, Token name, DeclKind decl_type, TypeKind type_kind, Visibility visibility);
Decl *decl_new_var(struct _Module *module, Token name, Type *type, VarDeclKind kind, Visibility visibility);
Decl *decl_new_enum_const(Decl *parent, Token name, DeclKind kind);
Decl *struct_find_name(Decl *decl, const char* name);
static inline Decl *decl_poison(Decl *decl)
{
decl->decl_kind = DECL_POISONED;
return decl;
}
static inline DeclKind decl_from_token(TokenType type)
{
if (type == TOKEN_STRUCT) return DECL_STRUCT;
if (type == TOKEN_UNION) return DECL_UNION;
UNREACHABLE
}
static inline bool decl_ok(Decl *decl)
{
return decl->decl_kind != DECL_POISONED;
}
extern Decl poisoned_decl;
typedef enum _BinOp
{
BINOP_ERROR,
BINOP_ASSIGN,
BINOP_MULT,
BINOP_MULT_ASSIGN,
BINOP_ADD,
BINOP_ADD_ASSIGN,
BINOP_SUB,
BINOP_SUB_ASSIGN,
BINOP_DIV,
BINOP_DIV_ASSIGN,
BINOP_MOD,
BINOP_MOD_ASSIGN,
BINOP_AND,
BINOP_AND_ASSIGN,
BINOP_OR,
BINOP_OR_ASSIGN,
BINOP_BIT_AND,
BINOP_BIT_AND_ASSIGN,
BINOP_BIT_OR,
BINOP_BIT_OR_ASSIGN,
BINOP_BIT_XOR,
BINOP_BIT_XOR_ASSIGN,
BINOP_NE,
BINOP_EQ,
BINOP_GE,
BINOP_GT,
BINOP_LE,
BINOP_LT,
BINOP_SHR,
BINOP_SHR_ASSIGN,
BINOP_SHL,
BINOP_SHL_ASSIGN,
BINOP_ELVIS
} BinOp;
typedef enum _AssignOp
{
ASSIGNOP_ERROR,
ASSIGNOP_ASSIGN,
ASSIGNOP_MULT_ASSIGN,
ASSIGNOP_ADD_ASSIGN,
ASSIGNOP_SUB_ASSIGN,
ASSIGNOP_DIV_ASSIGN,
ASSIGNOP_MOD_ASSIGN,
ASSIGNOP_AND_ASSIGN,
ASSIGNOP_OR_ASSIGN,
ASSIGNOP_BIT_AND_ASSIGN,
ASSIGNOP_BIT_OR_ASSIGN,
ASSIGNOP_BIT_XOR_ASSIGN,
ASSIGNOP_SHR_ASSIGN,
ASSIGNOP_SHL_ASSIGN,
} AssignOp;
typedef enum _UnaryOp
{
UNARYOP_ERROR,
UNARYOP_DEREF,
UNARYOP_ADDR,
UNARYOP_NEG,
UNARYOP_BITNEG,
UNARYOP_NOT,
UNARYOP_INC,
UNARYOP_DEC,
} UnaryOp;
typedef enum _ExprKind
{
EXPR_POISONED,
EXPR_TRY,
EXPR_CONST,
EXPR_BINARY,
EXPR_CONDITIONAL,
EXPR_UNARY,
EXPR_POST_UNARY,
EXPR_TYPE,
EXPR_IDENTIFIER,
EXPR_METHOD_REF,
EXPR_CALL,
EXPR_SIZEOF,
EXPR_SUBSCRIPT,
EXPR_ACCESS,
EXPR_STRUCT_VALUE,
EXPR_STRUCT_INIT_VALUES,
EXPR_INITIALIZER_LIST,
EXPR_EXPRESSION_LIST,
EXPR_DEFERRED_TOKENS,
} ExprKind;
typedef struct _ExprTry
{
Expr *expr;
Expr *else_expr;
} ExprTry;
typedef struct _ExprMethodRef
{
Type *type;
Token method;
} ExprMethodRef;
typedef struct _ExprStructValue
{
Type *type;
Expr *init_expr;
} ExprStructValue;
typedef struct _ExprTernary
{
Expr *cond;
Expr *then_expr; // May be null for elvis!
Expr *else_expr;
} ExprTernary;
typedef struct _ExprBinary
{
Expr *left;
Expr *right;
BinOp operator;
} ExprBinary;
typedef struct _ExprAssign
{
Expr *left;
Expr *right;
AssignOp operator;
} ExprAssign;
typedef struct _ExprUnary
{
Expr* expr;
UnaryOp operator;
} ExprUnary;
typedef struct _ExprCall
{
bool is_struct_function;
Expr *function;
Expr **parameters;
} ExprCall;
typedef struct _ExprSubscript
{
Expr *expr;
Expr *index;
} ExprSubscript;
typedef struct _ExprAccess
{
Expr *parent;
Token sub_element;
} ExprAccess;
typedef struct _ExprIdentifier
{
Token module;
Token identifier;
bool is_ref;
Decl *decl;
} ExprIdentifier;
typedef struct _ExprType
{
Type *type;
} ExprType;
struct _Expr
{
ExprKind expr_kind : 8;
Token loc;
Type *type;
union {
Token* deferred_tokens;
Token deferred_token;
Value const_expr;
ExprStructValue struct_value_expr;
ExprMethodRef method_ref_expr;
ExprTry try_expr;
ExprBinary binary_expr;
ExprAssign assign_expr;
ExprTernary conditional_expr;
ExprUnary unary_expr;
ExprUnary post_expr;
ExprCall call_expr;
ExprSubscript subscript_expr;
ExprAccess access_expr;
ExprIdentifier identifier_expr;
ExprType type_expr;
Expr** initializer_expr;
Expr** expression_list;
/*
Value const_expr;
ExprPost post_expr;
ExprStructInitValues struct_init_values_expr;
ExprDesignatedInitializer designated_initializer_expr;
ExprSizeof sizeof_expr;
ExprCast cast_expr;*/
};
};
Expr *expr_new(ExprKind kind, Token start);
#define EXPR_NEW_EXPR(_kind, _expr) expr_new(_kind, _expr->loc)
#define EXPR_NEW_TOKEN(_kind, _tok) expr_new(_kind, _tok)
AssignOp assignop_from_token(TokenType type);
BinOp binop_from_token(TokenType type);
UnaryOp unaryop_from_token(TokenType type);
extern Expr poisoned_expr;
static inline bool expr_ok(Expr *expr)
{
return expr == NULL || expr->expr_kind != EXPR_POISONED;
}
typedef struct _AstAttribute
{
} AstAttribute;
typedef enum _AstKind
{
AST_POISONED,
AST_ASM_STMT,
AST_ATTRIBUTE,
AST_BREAK_STMT,
AST_CASE_STMT,
AST_CATCH_STMT,
AST_COMPOUND_STMT,
AST_COND_STMT,
AST_CONTINUE_STMT,
AST_CT_IF_STMT,
AST_CT_ELIF_STMT,
AST_CT_ELSE_STMT,
AST_DECLARE_STMT,
AST_DECL_EXPR_LIST,
AST_DEFAULT_STMT,
AST_DEFER_STMT,
AST_DO_STMT,
AST_EXPR_STMT,
AST_FOR_STMT,
AST_GOTO_STMT,
AST_IF_STMT,
AST_LABEL,
AST_NOP_STMT,
AST_RETURN_STMT,
AST_SWITCH_STMT,
AST_THROW_STMT,
AST_TRY_STMT,
AST_NEXT_STMT,
AST_VOLATILE_STMT,
AST_WHILE_STMT,
AST_GENERIC_CASE_STMT,
AST_GENERIC_DEFAULT_STMT,
} AstKind;
// Ordering here is in priority if two branches should have the same exit.
typedef enum _ExitType
{
EXIT_NONE,
EXIT_BREAK,
EXIT_GOTO,
EXIT_CONTINUE,
EXIT_RETURN,
} ExitType;
typedef struct _AstCompoundStmt
{
Ast **stmts;
// DeferList defer_list; TODO
} AstCompoundStmt;
typedef struct _AstLabel
{
uint16_t last_goto;
bool is_used : 1;
Ast *defer;
Ast *in_defer;
} AstLabelStmt;
typedef struct _AstReturnStmt
{
Expr *expr; // May be NULL
Ast *defer;
} AstReturnStmt;
typedef struct _AstWhileStmt
{
Ast *cond;
Ast *body;
} AstWhileStmt;
typedef struct _AstDoStmt
{
Expr *expr;
Ast *body;
} AstDoStmt;
typedef struct _AstIfStmt
{
Ast *cond;
Ast *then_body;
Ast *else_body;
} AstIfStmt;
typedef struct _AstCaseStmt
{
Expr *expr;
} AstCaseStmt;
typedef struct _AstSwitchStmt
{
Ast *cond;
Ast *body;
} AstSwitchStmt;
typedef struct _AstForStmt
{
Ast *init;
Expr *cond;
Expr *incr;
Ast *body;
} AstForStmt;
typedef enum DeclExprType
{
DECLEXPR_DECL,
DECLEXPR_EXPR,
} DeclExprType;
typedef struct _AstCondStmt
{
Decl *decl;
Ast *decl_expr;
} AstCondStmt;
typedef struct _AstDeclExprList
{
DeclExprType list_type : 2;
union
{
Decl *decl;
Expr *expr;
};
} AstDeclExprList;
typedef enum _GotoType
{
GOTO_NOT_ANALYSED,
GOTO_JUMP_FORWARD,
GOTO_JUMP_BACK
} GotoType;
typedef struct _AstGotoStmt
{
GotoType type : 2;
Ast *defer;
union
{
Ast *in_defer;
Ast *defer_end;
};
} AstGotoStmt;
typedef struct _AstDeferStmt
{
bool emit_boolean : 1;
Ast *body; // Compound statement
Ast *prev_defer;
} AstDeferStmt;
typedef struct _AstCatchStmt
{
Decl *error_param;
Ast *body;
} AstCatchStmt;
typedef struct _AstCtIfStmt
{
Expr *expr;
Ast *then;
Ast *elif;
} AstCtIfStmt;
typedef struct _AstGenericCaseStmt
{
Type **types;
Ast *body;
} AstGenericCaseStmt;
typedef struct _Ast
{
AstKind ast_kind : 8;
ExitType exit : 3;
Token token;
union
{
AstAttribute attribute;
AstCompoundStmt compound_stmt;
Decl *declare_stmt;
Expr *expr_stmt;
Expr *throw_stmt;
Ast *volatile_stmt;
Ast *try_stmt;
AstLabelStmt label_stmt;
AstReturnStmt return_stmt;
AstWhileStmt while_stmt;
AstDoStmt do_stmt;
AstIfStmt if_stmt;
AstDeferStmt defer_stmt;
AstSwitchStmt switch_stmt;
AstCaseStmt case_stmt;
AstCatchStmt catch_stmt;
AstGotoStmt goto_stmt;
AstForStmt for_stmt;
AstCondStmt cond_stmt;
AstCtIfStmt ct_if_stmt;
AstCtIfStmt ct_elif_stmt;
Ast* ct_else_stmt;
AstDeclExprList decl_expr_list;
AstGenericCaseStmt generic_case_stmt;
Ast* generic_default_stmt;
};
} Ast;
#define NEW_AST(_kind, _token) new_ast(_kind, _token)
void *malloc_arena(size_t mem);
static inline Ast *new_ast(AstKind kind, Token token)
{
Ast *ast = malloc_arena(sizeof(Ast));
memset(ast, 0, sizeof(Ast));
ast->token = token;
ast->ast_kind = kind;
ast->exit = EXIT_NONE;
return ast;
}
extern Ast poisoned_ast;
static inline bool ast_ok(Ast *ast)
{
return ast == NULL || ast->ast_kind != AST_POISONED;
}
static inline void ast_poison(Ast *ast)
{
ast->ast_kind = AST_POISONED;
}
typedef struct _Module
{
const char *name;
bool is_external;
bool is_c_library;
bool is_exported;
Ast **files; // Asts
Decl** functions;
STable struct_functions;
STable symbols;
STable public_symbols;
} Module;
extern Module module_poisoned;
void fprint_ast(FILE *file, Ast *ast);
void fprint_decl(FILE *file, Decl *dec);
void fprint_type_recursive(FILE *file, Type *type, int indent);
void fprint_expr_recursive(FILE *file, Expr *expr, int indent);

2096
src/compiler/bigint.c Normal file

File diff suppressed because it is too large Load Diff

65
src/compiler/bigint.h Normal file
View File

@@ -0,0 +1,65 @@
#pragma once
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "compiler_common.h"
typedef struct _BigInt
{
unsigned digit_count;
bool is_negative;
union {
uint64_t digit;
uint64_t *digits;
};
} BigInt;
typedef enum _CmpRes
{
CMP_LT,
CMP_GT,
CMP_EQ,
} CmpRes;
void bigint_init_unsigned(BigInt *big_int, uint64_t value);
void bigint_init_signed(BigInt *big_int, int64_t value);
void bigint_init_bigint(BigInt *dest, const BigInt *src);
void bigint_init_data(BigInt *dest, const uint64_t *digits, unsigned int digit_count, bool is_negative);
void bigint_negate(BigInt *dest, const BigInt *source);
size_t bigint_clz(const BigInt *big_int, size_t bit_count);
size_t bigint_ctz(const BigInt *big_int, size_t bit_count);
bool bigint_fits_in_bits(const BigInt *big_int, size_t bit_count, bool is_signed);
void bigint_write_twos_complement(const BigInt *big_int, uint8_t *buf, size_t bit_count, bool is_big_endian);
void bigint_read_twos_complement(BigInt *dest, const uint8_t *buf, size_t bit_count, bool is_big_endian, bool is_signed);
void bigint_add(BigInt *dest, const BigInt *op1, const BigInt *op2);
void bigint_add_wrap(BigInt *dest, const BigInt *op1, const BigInt *op2, size_t bit_count, bool is_signed);
void bigint_sub(BigInt *dest, const BigInt *op1, const BigInt *op2);
void bigint_mul(BigInt *dest, const BigInt *op1, const BigInt *op2);
void bigint_mul_wrap(BigInt *dest, const BigInt *op1, const BigInt *op2, size_t bit_count, bool is_signed);
void bigint_rem(BigInt *dest, const BigInt *op1, const BigInt *op2);
void bigint_mod(BigInt *dest, const BigInt *op1, const BigInt *op2);
void bigint_shl(BigInt *dest, const BigInt *op1, const BigInt *op2);
void bigint_shl_int(BigInt *dest, const BigInt *op1, uint64_t shift);
void bigint_shl_trunc(BigInt *dest, const BigInt *op1, const BigInt *op2, size_t bit_count, bool is_signed);
void bigint_shr(BigInt *dest, const BigInt *op1, const BigInt *op2);
void bigint_div_floor(BigInt *dest, const BigInt *op1, const BigInt *op2);
void bigint_or(BigInt *dest, const BigInt *op1, const BigInt *op2);
void bigint_and(BigInt *dest, const BigInt *op1, const BigInt *op2);
void bigint_xor(BigInt *dest, const BigInt *op1, const BigInt *op2);
void bigint_negate_wrap(BigInt *dest, const BigInt *op, size_t bit_count);
void bigint_not(BigInt *dest, const BigInt *op, size_t bit_count, bool is_signed);
bool bigint_eql(BigInt a, BigInt b);
CmpRes bigint_cmp(const BigInt *op1, const BigInt *op2);
CmpRes bigint_cmp_zero(const BigInt *op);
uint32_t bigint_hash(BigInt x);
void bigint_print(BigInt *bigint, uint64_t base);
void bigint_fprint(FILE *file, BigInt *bigint, uint64_t base);
uint64_t bigint_as_unsigned(const BigInt *bigint);
int64_t bigint_as_signed(const BigInt *bigint);
long double bigint_as_float(const BigInt *bigint);
void bigint_truncate(BigInt *dst, const BigInt *op, size_t bit_count, bool is_signed);
void bigint_incr(BigInt *x);
size_t bigint_popcount_signed(const BigInt *bi, size_t bit_count);
size_t bigint_popcount_unsigned(const BigInt *big_int);

View File

@@ -12,6 +12,7 @@
#include "source_file.h"
#include "parser.h"
#include "diagnostics.h"
#include "semantic_analyser.h"
void compiler_init(void)
{
@@ -39,6 +40,7 @@ static void compiler_lex()
void compiler_parse()
{
type_setup(build_options.pointer_size);
VECEACH(build_options.files, i)
{
bool loaded = false;
@@ -46,7 +48,22 @@ void compiler_parse()
if (loaded) continue;
diag_reset();
parse_file(file);
printf("\n");
context_print_ast(current_context, stdout);
}
exit(EXIT_SUCCESS);
}
void compiler_compile()
{
type_setup(build_options.pointer_size);
VECEACH(build_options.files, i)
{
bool loaded = false;
File *file = source_file_load(build_options.files[i], &loaded);
if (loaded) continue;
diag_reset();
parse_file(file);
sema_analysis(current_context);
}
exit(EXIT_SUCCESS);
}
@@ -63,7 +80,10 @@ void compile_file()
compiler_parse();
break;
default:
compiler_compile();
break;
}
TODO
}
}

View File

@@ -6,9 +6,37 @@
#include "tokens.h"
#include "utils/common.h"
#include "symtab.h"
typedef uint32_t SourceLoc;
#define INVALID_LOC UINT32_MAX
#define INVALID_RANGE ((SourceRange){ .loc = UINT32_MAX })
#define EMPTY_TOKEN ((Token) { .string = NULL })
typedef struct _Decl Decl;
typedef struct _Type Type;
typedef struct _Expr Expr;
typedef struct _Ast Ast;
typedef enum {
IMPORT_TYPE_FULL,
IMPORT_TYPE_ALIAS,
IMPORT_TYPE_ALIAS_LOCAL,
IMPORT_TYPE_LOCAL
} ImportType;
typedef enum
{
VISIBLE_MODULE,
VISIBLE_LOCAL,
VISIBLE_PUBLIC,
} Visibility;
typedef enum
{
RESOLVE_NOT_DONE,
RESOLVE_RUNNING,
RESOLVE_DONE
} ResolveStatus;
typedef struct
{
@@ -19,15 +47,22 @@ typedef struct
typedef struct
{
const char* start;
SourceRange span;
TokenType type : 8;
union
{
const char *string;
const char* start;
};
} Token;
#define TOK2VARSTR(_token) _token.span.length, _token.start
static inline Token wrap(const char *string)
{
return (Token) { .span = INVALID_RANGE, .type = TOKEN_IDENT, .string = string };
}
typedef struct
{
const char *contents;
@@ -37,5 +72,14 @@ typedef struct
SourceLoc end_id;
} File;
typedef enum
{
LEXER_STATE_NORMAL,
LEXER_STATE_DEFERED_PARSING,
LEXER_STATE_DOCS_PARSE,
LEXER_STATE_DOCS_PARSE_DIRECTIVE,
} LexerState;
#define TOKEN_MAX_LENGTH 0xFFFF

182
src/compiler/context.c Normal file
View File

@@ -0,0 +1,182 @@
//
// Created by Christoffer Lerno on 2019-08-24.
//
#include <utils/file_utils.h>
#include <utils/lib.h>
#include "context.h"
#include "diagnostics.h"
Context *current_context;
Context *context_create(File *file)
{
Context *context = malloc_arena(sizeof(Context));
memset(context, 0, sizeof(Context));
context->file = file;
stable_init(&context->local_symbols, 256);
return context;
}
void context_push(Context *context)
{
current_context = context;
}
static inline bool create_module_or_check_name(Context *context, Token module_name)
{
context->module_name = module_name;
if (context->module == NULL)
{
context->module = malloc_arena(sizeof(Module));
memset(context->module, 0, sizeof(Module));
context->module->name = module_name.string;
stable_init(&(context->module)->symbols, 0x10000);
return true;
}
else if (context->module->name != module_name.string)
{
SEMA_ERROR(module_name, "Module name here '%s' did not match actual module '%s'.", module_name.string, context->module->name);
return false;
}
return true;
}
bool context_set_module_from_filename(Context *context)
{
LOG_FUNC
char buffer[MAX_IDENTIFIER_LENGTH + 1];
int len = filename_to_module(context->file->full_path, buffer);
if (!len)
{
sema_error("The filename '%s' could not be converted to a valid module name, try using an explicit module name.");
return false;
}
TokenType type = TOKEN_IDENT;
const char *module_name = symtab_add(buffer, (uint32_t) len, fnv1a(buffer, (uint32_t) len), &type);
if (type != TOKEN_IDENT)
{
sema_error("Generating a filename from the file '%s' resulted in a name that is a reserved keyword, "
"try using an explicit module name.");
return false;
}
return create_module_or_check_name(context, wrap(module_name));
}
bool context_set_module(Context *context, Token module_name, Token *generic_parameters)
{
LOG_FUNC
DEBUG_LOG("CONTEXT: Setting module to '%s'.", module_name.string);
// Note that we allow the illegal name for now, to be able to parse further.
context->module_name = module_name;
if (!is_all_lower(module_name.string))
{
sema_error_range(module_name.span, "A module name may not have any upper case characters.");
return false;
}
context->module_parameters = generic_parameters;
return create_module_or_check_name(context, module_name);
}
void context_register_global_decl(Context *context, Decl *decl)
{
if (decl->decl_kind == DECL_CT_IF)
{
context->ct_ifs = VECADD(context->ct_ifs, decl);
}
else
{
DEBUG_LOG("Registering %s.", decl->name.string);
context->declarations = VECADD(context->declarations, decl);
}
}
bool context_add_import(Context *context, Token module_name, Token alias, ImportType import_type, Expr** generic_parameters)
{
LOG_FUNC
DEBUG_LOG("SEMA: Add import of '%s'.", module_name.string);
if (!is_all_lower(module_name.string))
{
sema_error_range(module_name.span, "A module is not expected to have any upper case characters, please change it.");
return false;
}
Decl *decl = decl_new_in_module(context->module, DECL_IMPORT, module_name, VISIBLE_LOCAL);
decl->import.type = import_type;
decl->import.generic_parameters = generic_parameters;
if (import_type == IMPORT_TYPE_ALIAS_LOCAL || import_type == IMPORT_TYPE_ALIAS)
{
decl->import.alias = alias;
if (!is_all_lower(alias.string))
{
sema_error_range(alias.span, "A module alias is not expected to have any upper case characters, please change it.");
return false;
}
if (alias.string == module_name.string)
{
sema_error_range(alias.span, "If a module alias would be the same as the alias, it wouldn't have any effect.");
return false;
}
if (alias.string == context->module_name.string)
{
sema_error_range(alias.span, "An alias should not be the same as the name of the current module.");
return false;
}
}
else
{
decl->import.alias.string = NULL;
}
VECEACH(context->imports, i)
{
Decl *other_import = context->imports[i];
if (other_import->name.string == module_name.string
&& !other_import->import.generic_parameters
&& !generic_parameters)
{
sema_error_range(module_name.span, "This module was imported earlier in the file.");
}
if (other_import->import.alias.string == module_name.string)
{
sema_error_range(other_import->import.alias.span,
"An alias should not be the same as the name of another imported module.");
return false;
}
if (decl->import.alias.string == other_import->name.string)
{
sema_error_range(decl->import.alias.span,
"An alias should not be the same as the name of another imported module.");
return false;
}
if (decl->import.alias.string && decl->import.alias.string == other_import->import.alias.string)
{
sema_error_range(decl->import.alias.span,
"This alias has already been used by an earlier import statement.");
return false;
}
}
context->imports = VECADD(context->imports, decl);
return true;
}
void context_print_ast(Context *context, FILE *file)
{
{
VECEACH(context->declarations, i)
{
fprint_decl(file, context->declarations[i]);
}
}
{
VECEACH(context->ct_ifs, i)
{
fprint_decl(file, context->ct_ifs[i]);
}
}
}

32
src/compiler/context.h Normal file
View File

@@ -0,0 +1,32 @@
#pragma once
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "compiler_common.h"
#include "ast.h"
typedef struct _Context
{
Token module_name;
Token* module_parameters;
File * file;
Decl** imports;
Module *module;
STable local_symbols;
Decl **declarations;
Decl **ct_ifs;
Decl *active_function_for_analysis;
} Context;
extern Context *current_context;
Context *context_create(File *file);
void context_push(Context *context);
void context_register_global_decl(Context *context, Decl *decl);
bool context_add_import(Context *context, Token module_name, Token alias, ImportType import_type, Expr** generic_parameters);
bool context_set_module_from_filename(Context *context);
bool context_set_module(Context *context, Token module_name, Token *generic_parameters);
void context_print_ast(Context *context, FILE *file);

View File

@@ -4,6 +4,7 @@
#include "diagnostics.h"
#include "source_file.h"
#include "lexer.h"
#include <math.h>
#include <stdarg.h>
@@ -12,7 +13,6 @@ typedef struct _Diagnostics
bool panic_mode;
unsigned errors;
unsigned warnings;
bool use_color;
} Diagnostics;
Diagnostics diagnostics;
@@ -38,7 +38,7 @@ typedef enum
static void print_error(SourceRange source_range, const char *message, PrintType print_type)
{
File *file = source_file_from_position(source_range.loc);
File *file = source_file_from_position(source_range.loc);
const char *content = file->contents;
const char *error_start = file->contents + source_range.loc - file->start_id;
@@ -78,25 +78,37 @@ static void print_error(SourceRange source_range, const char *message, PrintType
}
}
int max_line_length = (int)round(log10(line)) + 1;
unsigned max_line_length = (int)round(log10(line)) + 1;
char number_buffer[20];
snprintf(number_buffer, 20, "%%%dd: %%.*s\n", max_line_length);
for (unsigned i = 3; i > 0; i--)
{
int line_number = line - i;
int line_number = (int)line - i;
const char *start = linestarts[i];
if (start == NULL) continue;
const char *line_end = linestarts[i - 1];
eprintf(number_buffer, line_number, line_end - start - 1, start);
}
eprintf(number_buffer, line, end - linestarts[0], linestarts[0]);
for (unsigned i = 0; i < max_line_length + 2 + error_start - linestarts[0]; i++)
eprintf(" ");
for (unsigned i = 0; i < max_line_length; i++)
{
eprintf(" ");
}
for (int i = 0; i < source_range.length; i++)
for (unsigned i = 0; i < error_start - linestarts[0]; i++)
{
if (linestarts[0][i] == '\t')
{
eprintf("\t");
}
else
{
eprintf(" ");
}
}
for (uint32_t i = 0; i < source_range.length; i++)
{
eprintf("^");
}
@@ -127,7 +139,7 @@ static void vprint_error(SourceRange span, const char *message, va_list args)
print_error(span, buffer, PRINT_TYPE_ERROR);
}
void diag_error_at(SourceRange span, const char *message, ...)
void diag_error_range(SourceRange span, const char *message, ...)
{
if (diagnostics.panic_mode) return;
diagnostics.panic_mode = true;
@@ -138,7 +150,7 @@ void diag_error_at(SourceRange span, const char *message, ...)
diagnostics.errors++;
}
void diag_verror_at(SourceRange span, const char *message, va_list args)
void diag_verror_range(SourceRange span, const char *message, va_list args)
{
if (diagnostics.panic_mode) return;
diagnostics.panic_mode = true;
@@ -146,6 +158,65 @@ void diag_verror_at(SourceRange span, const char *message, va_list args)
diagnostics.errors++;
}
void sema_error_at(SourceLoc loc, const char *message, ...)
{
va_list list;
va_start(list, message);
sema_verror_at(loc, message, list);
va_end(list);
}
void sema_error_range(SourceRange range, const char *message, ...)
{
va_list list;
va_start(list, message);
sema_verror_range(range, message, list);
va_end(list);
}
void sema_verror_at(SourceLoc loc, const char *message, va_list args)
{
vprint_error((SourceRange) { loc, 1 }, message, args);
diagnostics.errors++;
}
void sema_verror_range(SourceRange range, const char *message, va_list args)
{
vprint_error(range, message, args);
diagnostics.errors++;
}
void sema_error(const char *message, ...)
{
File *file = lexer_current_file();
va_list list;
va_start(list, message);
eprintf("(%s:0) Error: ", file->name);
evprintf(message, list);
eprintf("\n");
va_end(list);
}
void sema_prev_at_range(SourceRange span, const char *message, ...)
{
va_list args;
va_start(args, message);
char buffer[256];
vsnprintf(buffer, 256, message, args);
print_error(span, buffer, PRINT_TYPE_PREV);
va_end(args);
}
void sema_prev_at(SourceLoc loc, const char *message, ...)
{
va_list args;
va_start(args, message);
char buffer[256];
vsnprintf(buffer, 256, message, args);
print_error((SourceRange){ loc, 1 }, buffer, PRINT_TYPE_PREV);
va_end(args);
}
/*

View File

@@ -7,8 +7,17 @@
#include "compiler_common.h"
void diag_reset(void);
void diag_error_at(SourceRange span, const char *message, ...);
void diag_verror_at(SourceRange span, const char *message, va_list args);
void diag_error_range(SourceRange span, const char *message, ...);
void diag_verror_range(SourceRange span, const char *message, va_list args);
void sema_error_at(SourceLoc loc, const char *message, ...);
void sema_error_range(SourceRange range, const char *message, ...);
void sema_verror_at(SourceLoc loc, const char *message, va_list args);
void sema_verror_range(SourceRange range, const char *message, va_list args);
void sema_error(const char *message, ...);
void sema_prev_at_range(SourceRange span, const char *message, ...);
void sema_prev_at(SourceLoc loc, const char *message, ...);
#define SEMA_ERROR(_tok, ...) sema_error_range(_tok.span, __VA_ARGS__)
/*

View File

@@ -11,19 +11,11 @@
#include "diagnostics.h"
#include <stdarg.h>
typedef enum
{
LEXER_STATE_NORMAL,
LEXER_STATE_DEFERED_PARSING,
LEXER_STATE_DOCS_PARSE,
LEXER_STATE_DOCS_PARSE_DIRECTIVE,
} LexerState;
typedef struct
{
bool lexer_init_complete;
const char *begin;
const char *start;
const char *file_begin;
const char *lexing_start;
const char *current;
uint16_t source_file;
LexerState lexer_state;
@@ -31,10 +23,18 @@ typedef struct
//Token saved_tok; Will be used later if doing deferred parsing.
//Token saved_prev_tok; Will be used later is doing deferred parsing.
SourceLoc last_in_range;
struct
{
const char *start;
const char *current;
Token tok;
Token prev_tok;
} stored;
} Lexer;
Lexer lexer;
Token next_tok;
Token tok;
// --- Lexing general methods.
@@ -53,14 +53,31 @@ static inline void backtrack()
lexer.current--;
}
static inline char lookahead(int steps)
void lexer_store_state(void)
{
return lexer.current[steps];
lexer.stored.current = lexer.current;
lexer.stored.start = lexer.lexing_start;
lexer.stored.tok = next_tok;
lexer.stored.prev_tok = tok;
}
void lexer_restore_state(void)
{
lexer.current = lexer.stored.current;
lexer.lexing_start = lexer.stored.start;
next_tok = lexer.stored.tok;
tok = lexer.stored.prev_tok;
}
static inline char peek_next()
{
return lookahead(1);
return lexer.current[1];
}
static inline char peek_next_next()
{
return lexer.current[2];
}
static inline char next()
@@ -68,7 +85,7 @@ static inline char next()
return *(lexer.current++);
}
static inline void advance(int steps)
static inline void skip(int steps)
{
lexer.current += steps;
}
@@ -80,39 +97,38 @@ static inline bool reached_end(void)
static Token error_token(const char *message, ...)
{
Token token;
token.type = INVALID_TOKEN;
token.start = lexer.start;
token.span.length = 1;
token.span.loc = lexer.current_file->start_id + (lexer.begin - lexer.start);
Token token = {
.type = TOKEN_INVALID_TOKEN,
.span = { (SourceLoc) (lexer.current_file->start_id + (lexer.lexing_start - lexer.file_begin)), 1 },
.start = lexer.lexing_start
};
va_list list;
va_start(list, message);
diag_verror_at(token.span, message, list);
diag_verror_range(token.span, message, list);
va_end(list);
return token;
}
static Token make_token(TokenType type)
{
size_t token_size = lexer.current - lexer.start;
size_t token_size = lexer.current - lexer.lexing_start;
if (token_size > TOKEN_MAX_LENGTH) return error_token("Token exceeding max length");
return (Token)
{
.type = type,
.start = lexer.start,
.span = { .loc = lexer.current_file->start_id + (lexer.start - lexer.begin), .length = token_size }
.span = { .loc = (SourceLoc) (lexer.current_file->start_id + (lexer.lexing_start - lexer.file_begin)), .length = token_size },
.start = lexer.lexing_start
};
}
static Token make_string_token(TokenType type, const char* string)
{
size_t token_size = lexer.current - lexer.start;
size_t token_size = lexer.current - lexer.lexing_start;
if (token_size > TOKEN_MAX_LENGTH) return error_token("Token exceeding max length");
return (Token)
{
.type = type,
.start = lexer.start,
.span = { .loc = lexer.current_file->start_id + (lexer.start - lexer.begin), .length = token_size },
.span = { .loc = (SourceLoc) (lexer.current_file->start_id + (lexer.lexing_start - lexer.file_begin)), .length = token_size },
.string = string,
};
}
@@ -175,7 +191,7 @@ SkipWhitespaceResult skip_whitespace()
if (peek_next() == '*')
{
// Enter docs parsing on /**
if (lookahead(2) == '*' && lexer.lexer_state == LEXER_STATE_NORMAL)
if (peek_next_next() == '*' && lexer.lexer_state == LEXER_STATE_NORMAL)
{
return WHITESPACE_FOUND_DOCS_START;
}
@@ -222,16 +238,20 @@ SkipWhitespaceResult skip_whitespace()
// --- Normal scanning methods start here
static inline Token scan_prefixed_ident(TokenType type, TokenType no_ident_type)
static inline Token scan_prefixed_ident(TokenType type, TokenType no_ident_type, bool ends_with_bang)
{
uint32_t hash = FNV1a(prev(), FNV1_SEED);
while (is_alphanum_(peek()))
{
hash = FNV1a(next(), hash);
}
int len = lexer.current - lexer.start;
if (ends_with_bang && peek() == '!')
{
hash = FNV1a(next(), hash);
}
uint32_t len = (uint32_t)(lexer.current - lexer.lexing_start);
if (len == 1) return make_token(no_ident_type);
const char* interned = symtab_add(lexer.start, len, hash, &type);
const char* interned = symtab_add(lexer.lexing_start, len, hash, &type);
return make_string_token(type, interned);
}
@@ -241,15 +261,16 @@ static inline void scan_skipped_ident()
}
// Parses identifiers. Note that this is a bit complicated here since
// we split identifiers into 3 types + find keywords.
// we split identifiers into 2 types + find keywords.
static inline Token scan_ident(void)
{
// If we're in ignore keywords state, simply skip stuff.
if (lexer.lexer_state == LEXER_STATE_DEFERED_PARSING)
{
scan_skipped_ident();
return make_token(TOKEN_VAR_IDENT);
return make_token(TOKEN_IDENT);
}
TokenType type = 0;
@@ -270,9 +291,9 @@ static inline Token scan_ident(void)
case 'z':
if (!type)
{
type = TOKEN_VAR_IDENT;
type = TOKEN_IDENT;
}
else if (type == TOKEN_CAPS_IDENT)
else if (type == TOKEN_CONST_IDENT)
{
type = TOKEN_TYPE_IDENT;
}
@@ -283,7 +304,7 @@ static inline Token scan_ident(void)
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z':
if (!type) type = TOKEN_CAPS_IDENT;
if (!type) type = TOKEN_CONST_IDENT;
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
@@ -296,9 +317,8 @@ static inline Token scan_ident(void)
hash = FNV1a(next(), hash);
}
EXIT:;
if (type == INVALID_TOKEN) error_token("An identifier may not only consist of '_'");
uint32_t len = lexer.current - lexer.start;
const char* interned_string = symtab_add(lexer.start, len, hash, &type);
uint32_t len = lexer.current - lexer.lexing_start;
const char* interned_string = symtab_add(lexer.lexing_start, len, hash, &type);
return make_string_token(type, interned_string);
}
@@ -377,15 +397,15 @@ static inline Token scan_digit(void)
{
case 'x':
case 'X':
advance(2);
skip(2);
return scan_hex();
case 'o':
case 'O':
advance(2);
skip(2);
return scan_oct();
case 'b':
case 'B':
advance(2);
skip(2);
return scan_binary();
default:
break;
@@ -462,7 +482,7 @@ static inline void skip_docs_whitespace()
static inline Token scan_docs_directive(void)
{
match_assert('@');
Token token = scan_prefixed_ident(TOKEN_AT_IDENT, TOKEN_AT);
Token token = scan_prefixed_ident(TOKEN_AT_IDENT, TOKEN_AT, false);
assert(token.type != TOKEN_AT);
lexer.lexer_state = LEXER_STATE_DOCS_PARSE_DIRECTIVE;
return token;
@@ -482,10 +502,10 @@ static inline Token scan_docs(void)
if (peek_next() == '/')
{
// Reset start
lexer.start = lexer.current;
lexer.lexing_start = lexer.current;
// Consume the '*/'
advance(2);
skip(2);
// Return end
lexer.lexer_state = LEXER_STATE_NORMAL;
@@ -500,7 +520,7 @@ static inline Token scan_docs(void)
skip_docs_whitespace();
// Reset start
lexer.start = lexer.current;
lexer.lexing_start = lexer.current;
// Now we passed through all of the whitespace. Here we might possibly see a "@",
// if so, we found a directive:
@@ -550,14 +570,14 @@ Token lexer_scan_token(void)
SkipWhitespaceResult result = skip_whitespace();
// Point start to the first non-whitespace character.
lexer.start = lexer.current;
lexer.lexing_start = lexer.current;
switch (result)
{
case WHITESPACE_FOUND_DOCS_START:
// Here we found '/**', so we skip past that
// and switch state.
advance(3);
skip(3);
lexer.lexer_state = LEXER_STATE_DOCS_PARSE;
return make_token(TOKEN_DOCS_START);
case WHITESPACE_COMMENT_REACHED_EOF:
@@ -565,7 +585,7 @@ Token lexer_scan_token(void)
case WHITESPACE_FOUND_EOF:
return make_token(TOKEN_EOF);
case WHITESPACE_FOUND_DOCS_EOL:
advance(1);
skip(1);
lexer.lexer_state = LEXER_STATE_DOCS_PARSE;
return make_token(TOKEN_DOCS_EOL);
case WHITESPACE_SKIPPED_OK:
@@ -576,15 +596,15 @@ Token lexer_scan_token(void)
switch (c)
{
case '@':
return scan_prefixed_ident(TOKEN_AT_IDENT, TOKEN_AT);
return scan_prefixed_ident(TOKEN_AT_IDENT, TOKEN_AT, true);
case '\'':
return scan_char();
case '"':
return scan_string();
case '#':
return scan_prefixed_ident(TOKEN_HASH_IDENT, TOKEN_HASH);
return scan_prefixed_ident(TOKEN_HASH_IDENT, TOKEN_HASH, false);
case '$':
return scan_prefixed_ident(TOKEN_DOLLAR_IDENT, TOKEN_DOLLAR);
return scan_prefixed_ident(TOKEN_CT_IDENT, TOKEN_DOLLAR, false);
case ',':
return make_token(TOKEN_COMMA);
case ';':
@@ -607,7 +627,7 @@ Token lexer_scan_token(void)
case '~':
return make_token(TOKEN_BIT_NOT);
case ':':
return make_token(match(':') ? TOKEN_COLCOLON : TOKEN_COLON);
return make_token(match(':') ? TOKEN_SCOPE : TOKEN_COLON);
case '!':
return make_token(match('=') ? TOKEN_NOT_EQUAL : TOKEN_NOT);
case '/':
@@ -645,7 +665,7 @@ Token lexer_scan_token(void)
return make_token(TOKEN_PLUS);
case '-':
if (match('>')) return make_token(TOKEN_ARROW);
if (match('-')) make_token(TOKEN_MINUSMINUS);
if (match('-')) return make_token(TOKEN_MINUSMINUS);
if (match('=')) return make_token(TOKEN_MINUS_ASSIGN);
return make_token(TOKEN_MINUS);
default:
@@ -675,10 +695,9 @@ void lexer_add_file_for_lexing(File *file)
LOG_FUNC
lexer_check_init();
lexer.current_file = file;
lexer.last_in_range = 0;
lexer.begin = lexer.current_file->contents;
lexer.start = lexer.begin;
lexer.current = lexer.start;
lexer.file_begin = lexer.current_file->contents;
lexer.lexing_start = lexer.file_begin;
lexer.current = lexer.lexing_start;
lexer.lexer_state = LEXER_STATE_NORMAL;
}
@@ -687,9 +706,9 @@ void lexer_test_setup(const char *text, size_t len)
lexer_check_init();
static File helper;
lexer.lexer_state = LEXER_STATE_NORMAL;
lexer.start = text;
lexer.lexing_start = text;
lexer.current = text;
lexer.begin = text;
lexer.file_begin = text;
lexer.current_file = &helper;
lexer.current_file->start_id = 0;
lexer.current_file->contents = text;
@@ -703,9 +722,9 @@ Token lexer_scan_ident_test(const char *scan)
{
static File helper;
lexer.lexer_state = LEXER_STATE_NORMAL;
lexer.start = scan;
lexer.lexing_start = scan;
lexer.current = scan;
lexer.begin = scan;
lexer.file_begin = scan;
lexer.current_file = &helper;
lexer.current_file->start_id = 0;
lexer.current_file->contents = scan;
@@ -717,6 +736,5 @@ Token lexer_scan_ident_test(const char *scan)
lexer.lexer_state = LEXER_STATE_DOCS_PARSE;
return scan_docs();
}
return lexer_scan_token();
return scan_ident();
}

View File

@@ -7,11 +7,33 @@
#include "compiler_common.h"
extern Token next_tok;
extern Token tok;
Token lexer_scan_token(void);
Token lexer_scan_ident_test(const char *scan);
void lexer_test_setup(const char *text, size_t len);
void lexer_add_file_for_lexing(File *file);
File* lexer_current_file(void);
void lexer_check_init(void);
void lexer_check_init(void);
void lexer_store_state(void);
void lexer_restore_state(void);
static inline void advance(void)
{
tok = next_tok;
while (1)
{
next_tok = lexer_scan_token();
// printf(">>> %.*s => %s\n", tok.length, tok.start, token_type_to_string(tok.type));
if (next_tok.type != TOKEN_INVALID_TOKEN) break;
}
}
static inline void advance_and_verify(TokenType token_type)
{
assert(tok.type == token_type);
advance();
}

8
src/compiler/module.c Normal file
View File

@@ -0,0 +1,8 @@
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "../utils/lib.h"
#include "compiler_common.h"
#include "ast.h"

File diff suppressed because it is too large Load Diff

View File

@@ -7,4 +7,19 @@
#include "compiler_common.h"
#include "tokens.h"
typedef enum _Precedence
{
PREC_NONE,
PREC_ASSIGNMENT, // =, *=, /=, %=, ...
PREC_CONDITIONAL, // ?:
PREC_LOGICAL, // && ||
PREC_RELATIONAL, // < > <= >= == !=
PREC_ADDITIVE, // + -
PREC_BIT, // ^ | &
PREC_SHIFT, // << >> >>>
PREC_MULTIPLICATIVE, // * / %
PREC_UNARY, // ! - + ~ * & prefix ++/--
PREC_CALL, // . () [] postfix ++/--
} Precedence;
void parse_file(File *file);

View File

@@ -2,67 +2,429 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <utils/errors.h>
#include <utils/lib.h>
#include "../utils/errors.h"
#include "../utils/lib.h"
#include "semantic_analyser.h"
#include "../utils/file_utils.h"
#include "symtab.h"
#include "ast.h"
#include "diagnostics.h"
#include "context.h"
CompilationUnit current_unit;
void sema_init(File *file)
{
LOG_FUNC
current_unit.file = file;
current_unit.module_name.type = INVALID_TOKEN;
}
void sema_add_module(Token module_name)
static inline Decl *module_find_symbol(Module *module, const char *symbol)
{
LOG_FUNC
current_unit.module_name = module_name;
return stable_get(&module->symbols, symbol);
}
void sema_add_module_from_filename(void)
static inline bool sema_resolve_unresolved_array(Type *type)
{
LOG_FUNC
char buffer[MAX_IDENTIFIER_LENGTH + 1];
int len = filename_to_module(current_unit.file->full_path, buffer);
if (!len)
TODO
}
static inline bool sema_resolve_unresolved_type_expr(Type *type)
{
TODO
}
static inline bool sema_resolve_unresolved_type(Context *context, Type *type)
{
assert(type->type_kind == TYPE_UNRESOLVED);
if (type->unresolved.module.string)
{
TODO
}
TokenType type = TOKEN_VAR_IDENT;
const char *module_name = symtab_add(buffer, len, fnv1a(buffer, len), &type);
if (type != TOKEN_VAR_IDENT)
Decl *decl = stable_get(&context->local_symbols, type->unresolved.name.string);
if (!decl)
{
decl = module_find_symbol(context->module, type->unresolved.name.string);
}
if (!decl)
{
TODO
}
current_unit.module_name.string = module_name;
TODO
if (!decl)
{
SEMA_ERROR(type->unresolved.name, "Unknown type '%s'.", type->unresolved.name);
type->type_kind = TYPE_POISONED;
return false;
}
switch (decl->decl_kind)
{
case DECL_STRUCT:
type->type_kind = TYPE_STRUCT;
break;
case DECL_UNION:
type->type_kind = TYPE_UNION;
break;
case DECL_TYPEDEF:
type->type_kind = TYPE_TYPEDEF;
break;
case DECL_FUNC_TYPE:
type->type_kind = TYPE_FUNC_TYPE;
break;
case DECL_ENUM:
type->type_kind = TYPE_ENUM;
break;
case DECL_ERROR:
type->type_kind = TYPE_ERROR;
break;
default:
SEMA_ERROR(type->unresolved.name, "Unknown type '%s'.", type->unresolved.name);
type->type_kind = TYPE_POISONED;
return false;
}
type->decl = decl;
type->canonical_type = decl->type;
return true;
}
void sema_add_import(Token module_name, Token alias, ImportType import_type)
static bool sema_resolve_type(Context *context, Type *type)
{
LOG_FUNC
switch (type->type_kind)
{
case TYPE_POISONED:
return false;
case TYPE_UNRESOLVED:
return sema_resolve_unresolved_type(context, type);
case TYPE_UNRESOLVED_EXPR:
return sema_resolve_unresolved_type_expr(type);
case TYPE_UNRESOLVED_ARRAY:
return sema_resolve_unresolved_array(type);
default:
return true;
}
}
static inline bool sema_analyse_struct_member(Context *context, Decl *decl)
{
LOG_FUNC
assert(decl->decl_kind == DECL_VAR);
assert(decl->var.kind == VARDECL_MEMBER);
assert(!decl->var.init_expr);
if (!sema_resolve_type(context, decl->type))
{
decl_poison(decl);
return false;
}
return true;
}
static inline void sema_analyse_struct(Context *context, Decl *decl)
{
LOG_FUNC
DEBUG_LOG("Beginning analysis of %s.", decl->name.string);
assert(decl->decl_kind == DECL_STRUCT);
VECEACH(decl->strukt.members, i)
{
Decl *member = decl->strukt.members[i];
if (!decl_ok(member))
{
decl_poison(decl);
continue;
}
if (!sema_analyse_struct_member(context, decl->strukt.members[i]))
{
if (decl_ok(decl))
{
decl_poison(decl);
continue;
}
decl_poison(decl);
}
}
DEBUG_LOG("Analysing complete.");
}
static inline bool sema_analyse_expression(Context *context, Expr *expr)
{
return false;
}
/**
* Convert an expression to a given type using implicit conversion.
*
* @param expr the expression to be implictly converted
* @param type the type to convert to
* @return an expression with cast if needed, or NULL if an error has been sent and the conversion failed.
*/
Expr *expr_implicit_conversion(Expr *expr, Type *type)
{
TODO
return NULL;
}
static inline bool sema_analyse_function_param(Context *context, Decl *param, bool is_function)
{
if (!decl_ok(param)) return false;
assert(param->decl_kind == DECL_VAR);
assert(param->var.kind == VARDECL_PARAM);
if (!sema_resolve_type(context, param->type))
{
return false;
}
if (param->var.init_expr && !is_function)
{
SEMA_ERROR(param->var.init_expr->loc, "Function types may not have default arguments.");
return false;
}
if (param->var.init_expr)
{
Expr *expr = param->var.init_expr;
sema_analyse_expression(context, expr);
if (!expr_ok(expr)) return false;
if (expr->expr_kind != EXPR_CONST)
{
SEMA_ERROR(expr->loc, "Only constant expressions may be used as default values.");
return false;
}
Expr *converted_expr = expr_implicit_conversion(expr, param->var.type);
if (!converted_expr)
{
return false;
}
param->var.init_expr = converted_expr;
}
return true;
}
static inline bool sema_analyse_function_signature(Context *context, FunctionSignature *signature, bool is_function)
{
bool all_ok = true;
all_ok = sema_resolve_type(context, signature->rtype) && all_ok;
// TODO check parameter name appearing more than once.
VECEACH(signature->params, i)
{
if (!sema_analyse_function_param(context, signature->params[i], is_function))
{
decl_poison(signature->params[i]);
all_ok = false;
}
}
VECEACH(signature->throws, i)
{
TODO
}
return all_ok;
}
static bool sema_analyse_statement(Context *context, Ast *parent, Ast *statement);
static inline bool sema_analyse_compound_statement(Context *context, Ast *parent, Ast *compound_statement)
{
LOG_FUNC
bool all_ok = ast_ok(compound_statement);
VECEACH(compound_statement->compound_stmt.stmts, i)
{
if (!sema_analyse_statement(context, compound_statement, compound_statement->compound_stmt.stmts[i]))
{
ast_poison(compound_statement->compound_stmt.stmts[i]);
all_ok = false;
}
}
if (parent->exit < compound_statement->exit)
{
parent->exit = compound_statement->exit;
}
return all_ok;
}
static inline bool sema_analyse_return_stmt(Context *context, Ast *parent, Ast *statement)
{
LOG_FUNC
parent->exit = EXIT_RETURN;
Type *expected_rtype = context->active_function_for_analysis->func.function_signature.rtype;
if (statement->return_stmt.expr == NULL)
{
if (expected_rtype->type_kind != TYPE_VOID)
{
SEMA_ERROR(statement->token, "Expected to return a result of type %s.", type_to_string(expected_rtype));
return false;
}
}
else
{
if (!sema_analyse_expression(context, statement->return_stmt.expr)) return false;
Expr *conversion = expr_implicit_conversion(statement->return_stmt.expr, expected_rtype);
if (!conversion) return false;
statement->return_stmt.expr = conversion;
}
return true;
}
static bool sema_analyse_statement(Context *context, Ast *parent, Ast *statement)
{
LOG_FUNC
switch (statement->ast_kind)
{
case AST_POISONED:
break;
case AST_ASM_STMT:
break;
case AST_ATTRIBUTE:
break;
case AST_BREAK_STMT:
break;
case AST_CASE_STMT:
break;
case AST_CATCH_STMT:
break;
case AST_COMPOUND_STMT:
break;
case AST_COND_STMT:
break;
case AST_CONTINUE_STMT:
break;
case AST_CT_IF_STMT:
break;
case AST_CT_ELIF_STMT:
break;
case AST_CT_ELSE_STMT:
break;
case AST_DECLARE_STMT:
break;
case AST_DECL_EXPR_LIST:
break;
case AST_DEFAULT_STMT:
break;
case AST_DEFER_STMT:
break;
case AST_DO_STMT:
break;
case AST_EXPR_STMT:
break;
case AST_FOR_STMT:
break;
case AST_GOTO_STMT:
break;
case AST_IF_STMT:
break;
case AST_LABEL:
break;
case AST_NOP_STMT:
break;
case AST_RETURN_STMT:
return sema_analyse_return_stmt(context, parent, statement);
case AST_SWITCH_STMT:
break;
case AST_THROW_STMT:
break;
case AST_TRY_STMT:
break;
case AST_NEXT_STMT:
break;
case AST_VOLATILE_STMT:
break;
case AST_WHILE_STMT:
break;
case AST_GENERIC_CASE_STMT:
break;
case AST_GENERIC_DEFAULT_STMT:
break;
}
TODO
}
void sema_add_errors(Token error_type_name /* todo values */)
static inline bool sema_analyse_function_body(Context *context, Decl *func)
{
TODO
context->active_function_for_analysis = func;
if (!sema_analyse_compound_statement(context, func->func.body, func->func.body)) return false;
if (func->func.body->exit != EXIT_RETURN && func->func.function_signature.rtype->type_kind != TYPE_VOID)
{
SEMA_ERROR(func->name, "Missing return statement at the end of the function.");
return false;
}
return true;
}
void sema_add_macro_var(Token macro_var_name /* , expr **/ )
static inline bool sema_analyse_func(Context *context, Decl *decl)
{
TODO
bool all_ok = sema_analyse_function_signature(context, &decl->func.function_signature, true);
if (decl->func.struct_parent)
{
all_ok = sema_resolve_type(context, decl->func.struct_parent) && all_ok;
}
all_ok = all_ok && sema_analyse_function_body(context, decl);
if (!all_ok) decl_poison(decl);
return all_ok;
}
static inline void sema_analyse_decl(Context *context, Decl *decl)
{
LOG_FUNC
DEBUG_LOG("Analyse %s", decl->name.string);
switch (decl->decl_kind)
{
case DECL_IMPORT:
// TODO
break;
case DECL_STRUCT:
sema_analyse_struct(context, decl);
break;
case DECL_FUNC:
sema_analyse_func(context, decl);
break;
default:
TODO
}
}
// If we have a public parameter, then the next one will be the real one.
void sema_mark_next_public(void)
static void show_shadow_error(Decl *decl, Decl *old)
{
TODO
sema_error_range(decl->name.span, "The '%s' would shadow a previous declaration.", decl->name.string);
sema_prev_at_range(old->name.span, "The previous use of '%s' was here.", decl->name.string);
}
void sema_verror_at(SourceRange range, const char *message, va_list args)
bool context_register_global(Context *context, Decl *decl)
{
TODO
Decl *old = stable_set(&context->local_symbols, decl->name.string, decl);
if (!old && decl->visibility != VISIBLE_LOCAL)
{
old = stable_set(&context->module->symbols, decl->name.string, decl);
}
if (!old && decl->visibility == VISIBLE_PUBLIC)
{
old = stable_set(&context->module->public_symbols, decl->name.string, decl);
}
if (old != NULL)
{
show_shadow_error(decl, old);
decl_poison(decl);
return false;
}
context->declarations = VECADD(context->declarations, decl);
return true;
}
static inline void sema_register_declarations(Context *context)
{
VECEACH(context->declarations, i)
{
context_register_global(context, context->declarations[i]);
}
}
static inline void sema_analyse_declarations(Context *context)
{
VECEACH(context->declarations, i)
{
sema_analyse_decl(context, context->declarations[i]);
}
}
static inline void sema_process_imports(Context *context)
{
// TODO
}
void sema_analysis(Context *context)
{
sema_process_imports(context);
sema_register_declarations(context);
// Skip the ct_if for now -> assume they passed.
sema_analyse_declarations(context);
}

View File

@@ -5,28 +5,13 @@
// license that can be found in the LICENSE file.
#include "compiler_common.h"
#include "ast.h"
#include "context.h"
typedef struct
{
Token module_name;
File *file;
} CompilationUnit;
typedef enum {
IMPORT_TYPE_FULL,
IMPORT_TYPE_ALIAS,
IMPORT_TYPE_ALIAS_LOCAL,
IMPORT_TYPE_LOCAL
} ImportType;
void sema_init(File *file);
void sema_add_module(Token module_name);
void sema_add_module_from_filename(void);
void sema_add_import(Token module_name, Token alias, ImportType import_type);
void sema_add_errors(Token error_type_name /* todo values */);
void sema_add_macro_var(Token macro_var_name /* , expr **/ );
void sema_analysis(Context *context);
// If we have a public parameter, then the next one will be the real one.
void sema_mark_next_public(void);
void sema_verror_at(SourceRange range, const char *message, va_list args);

View File

@@ -52,7 +52,7 @@ File *source_file_load(const char *filename, bool *already_loaded)
file->start_id = vec_size(source_files.files) ? VECLAST(source_files.files)->end_id : 0;
file->contents = source_text;
ASSERT(file->start_id + size < UINT32_MAX, "Total files loaded exceeded %d bytes", UINT32_MAX);
file->end_id = file->start_id + size;
file->end_id = (SourceLoc) (file->start_id + size);
file->name = filename;
source_files.files = VECADD(source_files.files, file);
return file;
@@ -87,3 +87,5 @@ File *source_file_from_position(SourceLoc loc)
return file;
}
}

View File

@@ -61,13 +61,13 @@ void symtab_init(uint32_t capacity)
// Skip non-keywords
if (!is_lower(name[0]))
{
if (name[0] != '@' || !is_lower(name[1])) continue;
if ((name[0] != '@' && name[0] != '$') || !is_lower(name[1])) continue;
}
int len = strlen(name);
uint32_t len = (uint32_t)strlen(name);
TokenType type = (TokenType)i;
const char* interned = symtab_add(name, strlen(name), fnv1a(name, len), &type);
const char* interned = symtab_add(name, (uint32_t)strlen(name), fnv1a(name, len), &type);
assert(type == i);
assert(symtab_add(name, strlen(name), fnv1a(name, len), &type) == interned);
assert(symtab_add(name, (uint32_t)strlen(name), fnv1a(name, len), &type) == interned);
}
}

View File

@@ -10,7 +10,7 @@ const char *token_type_to_string(TokenType type)
switch (type)
{
case INVALID_TOKEN:
case TOKEN_INVALID_TOKEN:
return "INVALID_TOKEN";
// One character tokens
@@ -80,8 +80,6 @@ const char *token_type_to_string(TokenType type)
return "|=";
case TOKEN_BIT_XOR_ASSIGN:
return "^=";
case TOKEN_COLCOLON:
return "::";
case TOKEN_DIV_ASSIGN:
return "/=";
case TOKEN_DOTDOT:
@@ -110,6 +108,8 @@ const char *token_type_to_string(TokenType type)
return "+=";
case TOKEN_PLUSPLUS:
return "++";
case TOKEN_SCOPE:
return "::";
case TOKEN_SHL:
return "<<";
case TOKEN_SHR:
@@ -128,18 +128,18 @@ const char *token_type_to_string(TokenType type)
return "||=";
// Identifiers
case TOKEN_VAR_IDENT:
return "VAR_IDENT";
case TOKEN_TYPE_IDENT:
return "TYPE_IDENT";
case TOKEN_CAPS_IDENT:
return "CAPS_IDENT";
case TOKEN_IDENT:
return "IDENT";
case TOKEN_AT_IDENT:
return "AT_IDENT";
case TOKEN_HASH_IDENT:
return "HASH_IDENT";
case TOKEN_DOLLAR_IDENT:
return "DOLLAR_IDENT";
case TOKEN_CT_IDENT:
return "CT_IDENT";
case TOKEN_CONST_IDENT:
return "CONST_IDENT";
case TOKEN_TYPE_IDENT:
return "TYPE_IDENT";
// Values
case TOKEN_STRING:
@@ -156,6 +156,8 @@ const char *token_type_to_string(TokenType type)
return "as";
case TOKEN_ASM:
return "asm";
case TOKEN_ATTRIBUTE:
return "attribute";
case TOKEN_BREAK:
return "break";
case TOKEN_CASE:
@@ -168,8 +170,6 @@ const char *token_type_to_string(TokenType type)
return "const";
case TOKEN_CONTINUE:
return "continue";
case TOKEN_DECORATOR:
return "decorator";
case TOKEN_DEFAULT:
return "default";
case TOKEN_DEFER:
@@ -180,7 +180,7 @@ const char *token_type_to_string(TokenType type)
return "else";
case TOKEN_ENUM:
return "enum";
case TOKEN_ERROR:
case TOKEN_ERROR_TYPE:
return "error";
case TOKEN_FALSE:
return "false";
@@ -202,6 +202,8 @@ const char *token_type_to_string(TokenType type)
return "macro";
case TOKEN_MODULE:
return "module";
case TOKEN_NEXT:
return "next";
case TOKEN_NIL:
return "nil";
case TOKEN_PUBLIC:
@@ -235,64 +237,6 @@ const char *token_type_to_string(TokenType type)
case TOKEN_VOLATILE:
return "volatile";
case TOKEN_C_LONGDOUBLE:
return "c_longdouble";
case TOKEN_C_USHORT:
return "c_ushort";
case TOKEN_C_UINT:
return "c_uint";
case TOKEN_C_ULONG:
return "c_ulong";
case TOKEN_C_ULONGLONG:
return "c_ulonglong";
case TOKEN_C_SHORT:
return "c_short";
case TOKEN_C_INT:
return "c_int";
case TOKEN_C_LONG:
return "c_long";
case TOKEN_C_LONGLONG:
return "c_longlong";
// Bit types
case TOKEN_F256:
return "f256";
case TOKEN_I256:
return "i256";
case TOKEN_U256:
return "u256";
case TOKEN_F128:
return "f128";
case TOKEN_I128:
return "i128";
case TOKEN_U128:
return "u128";
case TOKEN_F64:
return "f64";
case TOKEN_I64:
return "i64";
case TOKEN_U64:
return "u64";
case TOKEN_F32:
return "f32";
case TOKEN_I32:
return "i32";
case TOKEN_U32:
return "u32";
case TOKEN_F16:
return "f16";
case TOKEN_I16:
return "i16";
case TOKEN_U16:
return "u16";
case TOKEN_I8:
return "i8";
case TOKEN_U8:
return "u8";
case TOKEN_U1:
return "u1";
// Named types
case TOKEN_VOID:
return "void";
@@ -356,6 +300,20 @@ const char *token_type_to_string(TokenType type)
case TOKEN_AT_REQPARSE:
return "@reqparse";
case TOKEN_CT_CASE:
return "$case";
case TOKEN_CT_DEFAULT:
return "$default";
case TOKEN_CT_EACH:
return "$each";
case TOKEN_CT_ELSE:
return "$else";
case TOKEN_CT_ELIF:
return "$elif";
case TOKEN_CT_IF:
return "$if";
case TOKEN_CT_SWITCH:
return "$switch";
case TOKEN_EOF:
return "EOF";
@@ -363,3 +321,27 @@ const char *token_type_to_string(TokenType type)
UNREACHABLE
}
bool token_is_type(TokenType type)
{
switch (type)
{
case TOKEN_VOID:
case TOKEN_BYTE:
case TOKEN_BOOL:
case TOKEN_CHAR:
case TOKEN_DOUBLE:
case TOKEN_FLOAT:
case TOKEN_INT:
case TOKEN_ISIZE:
case TOKEN_LONG:
case TOKEN_SHORT:
case TOKEN_UINT:
case TOKEN_ULONG:
case TOKEN_USHORT:
case TOKEN_USIZE:
return true;
default:
return false;
}
}

View File

@@ -5,9 +5,11 @@
// license that can be found in the LICENSE file.
#include <stdbool.h>
typedef enum _TokenType
{
INVALID_TOKEN = 0,
TOKEN_INVALID_TOKEN = 0,
// Single-character tokens.
TOKEN_AMP, // &
@@ -42,23 +44,23 @@ typedef enum _TokenType
// two character tokens.
TOKEN_AND, // &&
TOKEN_ARROW, // -> // Not used but reserved
TOKEN_BIT_AND_ASSIGN, // &=
TOKEN_BIT_OR_ASSIGN, // |=
TOKEN_BIT_XOR_ASSIGN, // ^=
TOKEN_COLCOLON, // :: Not used but reserved
TOKEN_DIV_ASSIGN, // /=
TOKEN_DOTDOT, // ..
TOKEN_ELVIS, // ?:
TOKEN_EQEQ, // ==
TOKEN_GREATER_EQ, // >=
TOKEN_LESS_EQ, // <=
TOKEN_NOT_EQUAL, // !=
TOKEN_MINUS_ASSIGN, // -=
TOKEN_MINUSMINUS, // --
TOKEN_GREATER_EQ, // >=
TOKEN_MOD_ASSIGN, // %=
TOKEN_MULT_ASSIGN, // *=
TOKEN_NOT_EQUAL, // !=
TOKEN_PLUS_ASSIGN, // +=
TOKEN_PLUSPLUS, // ++
TOKEN_BIT_AND_ASSIGN, // &=
TOKEN_SCOPE, // ::
TOKEN_SHR, // >>
TOKEN_SHL, // >>
@@ -69,25 +71,6 @@ typedef enum _TokenType
TOKEN_SHR_ASSIGN, // >>=
TOKEN_SHL_ASSIGN, // >>=
// Basic types bit
TOKEN_F256, // f256
TOKEN_I256, // i256
TOKEN_U256, // u256
TOKEN_F128, // f128
TOKEN_I128, // i128
TOKEN_U128, // u128
TOKEN_F64, // f64
TOKEN_I64, // i64
TOKEN_U64, // u64
TOKEN_F32, // f32
TOKEN_I32, // i32
TOKEN_U32, // u32
TOKEN_F16, // f16
TOKEN_I16, // i16
TOKEN_U16, // u16
TOKEN_I8, // i8
TOKEN_U8, // u8
TOKEN_U1, // u1
// Basic types names
TOKEN_VOID,
@@ -107,31 +90,18 @@ typedef enum _TokenType
TOKEN_USIZE,
TOKEN_QUAD,
// C compatibility types
TOKEN_C_USHORT,
TOKEN_C_SHORT,
TOKEN_C_INT,
TOKEN_C_UINT,
TOKEN_C_LONG,
TOKEN_C_ULONG,
TOKEN_C_LONGLONG,
TOKEN_C_ULONGLONG,
TOKEN_C_LONGDOUBLE,
// Literals.
// In order to make the grammar
// non ambiguous, we split tokens at the
// lexer level
TOKEN_TYPE_IDENT, // FooBarBaz
TOKEN_CAPS_IDENT, // FOO_BAR_BAZ
TOKEN_VAR_IDENT, // fooBarBaz
TOKEN_IDENT, // Any normal ident.
TOKEN_CONST_IDENT, // Any purely upper case ident,
TOKEN_TYPE_IDENT, // Any ident on the format FooBar or __FooBar
// We want to parse @foo / #foo / $foo separately.
// Otherwise we allow things like "@ foo" which would be pretty bad.
TOKEN_AT_IDENT, // @foobar
TOKEN_HASH_IDENT, // #foobar
TOKEN_DOLLAR_IDENT, // $foobar
TOKEN_CT_IDENT, // $foobar
TOKEN_STRING, // "Teststring"
TOKEN_INTEGER, // 123 0x23 0b10010 0o327
@@ -141,19 +111,19 @@ typedef enum _TokenType
TOKEN_ALIAS, // Reserved
TOKEN_AS,
TOKEN_ASM,
TOKEN_ATTRIBUTE,
TOKEN_BREAK,
TOKEN_CASE,
TOKEN_CAST,
TOKEN_CATCH,
TOKEN_CONST,
TOKEN_CONTINUE,
TOKEN_DECORATOR,
TOKEN_DEFAULT,
TOKEN_DEFER,
TOKEN_DO,
TOKEN_ELSE,
TOKEN_ENUM,
TOKEN_ERROR,
TOKEN_ERROR_TYPE,
TOKEN_FALSE,
TOKEN_FOR,
TOKEN_FUNC,
@@ -164,6 +134,7 @@ typedef enum _TokenType
TOKEN_LOCAL,
TOKEN_MACRO,
TOKEN_MODULE,
TOKEN_NEXT,
TOKEN_NIL,
TOKEN_PUBLIC,
TOKEN_RETURN,
@@ -191,7 +162,15 @@ typedef enum _TokenType
TOKEN_AT_REQPARSE, // @reqparse
TOKEN_AT_DEPRECATED, // @deprecated
TOKEN_DOCS_START, // /** (will consume an arbitrary number of `*` after this.
TOKEN_CT_CASE, // $case
TOKEN_CT_DEFAULT, // $default
TOKEN_CT_EACH, // $each
TOKEN_CT_ELIF, // $elif
TOKEN_CT_ELSE, // $else
TOKEN_CT_IF, // $if
TOKEN_CT_SWITCH, // $switch
TOKEN_DOCS_START, // /**
TOKEN_DOCS_END, // */ (may start with an arbitrary number of `*`
TOKEN_DOCS_EOL, // "\n" only seen in docs.
TOKEN_DOCS_LINE, // Any line within /** **/
@@ -200,4 +179,10 @@ typedef enum _TokenType
} TokenType;
bool token_is_type(TokenType type);
const char *token_type_to_string(TokenType type);
static inline const char* struct_union_name_from_token(TokenType type)
{
return type == TOKEN_STRUCT ? "struct" : "union";
}

902
src/compiler/value.c Normal file
View File

@@ -0,0 +1,902 @@
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "value.h"
#include "ast.h"
#define ERROR_VALUE (Value) { .type = VALUE_TYPE_ERROR }
Value value_new_int_with_bigint(BigInt big_int)
{
return (Value) { .big_int = big_int, .type = VALUE_TYPE_INT };
}
Value value_new_float(long double f)
{
return (Value) { .f = f, .type = VALUE_TYPE_FLOAT };
}
Value value_new_int_with_int(int64_t val)
{
Value value = { .type = VALUE_TYPE_INT };
bigint_init_signed(&value.big_int, val);
return value;
}
Value value_nil()
{
return (Value) { .b = false, .type = VALUE_TYPE_NIL };
}
Value value_not(Value value)
{
switch (value.type)
{
case VALUE_TYPE_FLOAT:
return (Value) { .b = value.f == 0.0, .type = VALUE_TYPE_BOOL };
case VALUE_TYPE_INT:
return (Value) { .b = bigint_cmp_zero(&value.big_int) == CMP_EQ, .type = VALUE_TYPE_BOOL };
case VALUE_TYPE_BOOL:
return (Value) { .b = !value.b, .type = VALUE_TYPE_BOOL };
case VALUE_TYPE_NIL:
return value_new_bool(true);
case VALUE_TYPE_STRING:
case VALUE_TYPE_ERROR:
break;
}
return ERROR_VALUE;
}
#define BIN_OP(_x, _intop, _floatop) \
Value value_## _x(Value lhs, Value rhs) { \
assert(lhs.type == rhs.type); \
switch (lhs.type)\
{ \
case VALUE_TYPE_FLOAT: \
return (Value) { .f = lhs.f _floatop rhs.f, .type = VALUE_TYPE_FLOAT }; \
case VALUE_TYPE_INT: {\
Value value = value_new_int_with_int(0);\
_intop(&value.big_int, &lhs.big_int, &rhs.big_int); \
return value;\
}\
case VALUE_TYPE_BOOL:\
case VALUE_TYPE_STRING:\
case VALUE_TYPE_NIL:\
case VALUE_TYPE_ERROR:\
return ERROR_VALUE;\
}\
}
#define BIN_OP_W(_x, _intop, _intopwrap, _floatop) \
Value value_## _x(Value lhs, Value rhs) { \
assert(lhs.type == rhs.type); \
switch (lhs.type)\
{ \
case VALUE_TYPE_FLOAT: \
return (Value) { .f = lhs.f _floatop rhs.f, .type = VALUE_TYPE_FLOAT }; \
case VALUE_TYPE_INT: {\
Value value = value_new_int_with_int(0);\
if (lhs.int_bits > 0) { \
value.int_bits = lhs.int_bits; value.is_unsigned = lhs.is_unsigned; \
_intopwrap(&value.big_int, &lhs.big_int, &rhs.big_int, lhs.int_bits, !lhs.is_unsigned); \
} else { \
_intop(&value.big_int, &lhs.big_int, &rhs.big_int); \
};\
return value;\
}\
case VALUE_TYPE_BOOL:\
case VALUE_TYPE_STRING:\
case VALUE_TYPE_NIL:\
case VALUE_TYPE_ERROR:\
return ERROR_VALUE;\
}\
}
BIN_OP_W(add, bigint_add, bigint_add_wrap, +)
BIN_OP_W(mult, bigint_mul, bigint_mul_wrap, *)
BIN_OP(div, bigint_div_floor, /)
BIN_OP(mod, bigint_mod, /)
Value value_sub(Value value1, Value value2)
{
return value_add(value1, value_negate(value2));
}
Value value_and(Value value1, Value value2)
{
if (value1.type == VALUE_TYPE_BOOL && value2.type == VALUE_TYPE_BOOL)
{
value1.b = value1.b && value2.b;
return value1;
}
assert(value1.type == VALUE_TYPE_INT && value2.type == VALUE_TYPE_INT);
assert(value1.int_bits == value2.int_bits);
BigInt res;
bigint_and(&res, &value1.big_int, &value2.big_int);
value1.big_int = res;
return value1;
}
Value value_or(Value value1, Value value2)
{
if (value1.type == VALUE_TYPE_BOOL && value2.type == VALUE_TYPE_BOOL)
{
value1.b = value1.b || value2.b;
return value1;
}
assert(value1.type == VALUE_TYPE_INT && value2.type == VALUE_TYPE_INT);
assert(value1.int_bits == value2.int_bits);
BigInt res;
bigint_or(&res, &value1.big_int, &value2.big_int);
value1.big_int = res;
return value1;
}
static inline CmpRes cmp(Value value1, Value value2)
{
switch (value1.type)
{
case VALUE_TYPE_BOOL:
if (value1.b < value2.b) return CMP_LT;
return value1.b == value2.b ? CMP_EQ : CMP_GT;
case VALUE_TYPE_INT:
return bigint_cmp(&value1.big_int, &value2.big_int);
case VALUE_TYPE_FLOAT:
if (value1.f < value2.b) return CMP_LT;
return value2.f > value2.f ? CMP_GT : CMP_EQ;
default:
UNREACHABLE;
}
UNREACHABLE
}
bool value_le(Value value1, Value value2)
{
CmpRes res = cmp(value1, value2);
return res != CMP_GT;
}
bool value_ge(Value value1, Value value2)
{
CmpRes res = cmp(value1, value2);
return res != CMP_LT;
}
bool value_gt(Value value1, Value value2)
{
CmpRes res = cmp(value1, value2);
return res == CMP_GT;
}
bool value_lt(Value value1, Value value2)
{
CmpRes res = cmp(value1, value2);
return res == CMP_LT;
}
bool value_eq(Value value1, Value value2)
{
CmpRes res = cmp(value1, value2);
return res == CMP_EQ;
}
bool value_ne(Value value1, Value value2)
{
CmpRes res = cmp(value1, value2);
return res != CMP_EQ;
}
Value value_xor(Value value1, Value value2)
{
if (value1.type == VALUE_TYPE_BOOL && value2.type == VALUE_TYPE_BOOL)
{
value1.b = value1.b ^ value2.b;
return value1;
}
assert(value1.type == VALUE_TYPE_INT && value2.type == VALUE_TYPE_INT);
assert(value1.int_bits == value2.int_bits);
BigInt res;
bigint_xor(&res, &value1.big_int, &value2.big_int);
value1.big_int = res;
return value1;
}
Value value_negate(Value value)
{
switch (value.type)
{
case VALUE_TYPE_INT:
{
Value result = value_new_int_with_int(0);
result.is_unsigned = value.is_unsigned;
result.int_bits = value.int_bits;
if (value.int_bits)
{
bigint_negate_wrap(&result.big_int, &value.big_int, value.int_bits);
}
else
{
bigint_negate(&result.big_int, &value.big_int);
}
return result;
}
case VALUE_TYPE_BOOL:
return value_new_int_with_int(value.b ? -1 : 0);
case VALUE_TYPE_FLOAT:
return value_new_float(-value.f);
case VALUE_TYPE_NIL:
case VALUE_TYPE_STRING:
case VALUE_TYPE_ERROR:
break;
}
return ERROR_VALUE;
}
Value value_bit_not(Value value)
{
switch (value.type)
{
case VALUE_TYPE_INT:
{
value_print(value);
printf("--%d--\n", value.is_unsigned);
Value result = value_new_int_with_int(0);
result.is_unsigned = value.is_unsigned;
result.int_bits = value.int_bits;
if (!value.int_bits)
{
FATAL_ERROR("Not supported");
}
bigint_not(&result.big_int, &value.big_int, value.int_bits, !value.is_unsigned);
value_print(result);
printf("--%d--\n", result.is_unsigned);
return result;
}
case VALUE_TYPE_BOOL:
return value_new_int_with_int(value.b ? 0 : 1);
case VALUE_TYPE_FLOAT:
case VALUE_TYPE_NIL:
case VALUE_TYPE_STRING:
case VALUE_TYPE_ERROR:
break;
}
return ERROR_VALUE;
}
inline Value value_new_bool(bool value)
{
return (Value) { .b = value, .type = VALUE_TYPE_BOOL };
}
Value value_new_string(const char *string, uint32_t len)
{
return (Value) { .str = string, .str_len = len, .type = VALUE_TYPE_STRING };
}
bool value_as_bool(Value *value)
{
switch (value->type)
{
case VALUE_TYPE_FLOAT:
return value->f != 0.0;
case VALUE_TYPE_INT:
return bigint_cmp_zero(&value->big_int) != CMP_EQ;
case VALUE_TYPE_BOOL:
return value->b;
case VALUE_TYPE_NIL:
return false;
case VALUE_TYPE_STRING:
return true;
case VALUE_TYPE_ERROR:
return false;
}
}
Value value_to_bool(Value value)
{
switch (value.type)
{
case VALUE_TYPE_FLOAT:
return value_new_bool(value.f != 0.0);
case VALUE_TYPE_INT:
return value_new_bool(bigint_cmp_zero(&value.big_int) != CMP_EQ);
case VALUE_TYPE_BOOL:
return value;
case VALUE_TYPE_NIL:
return value_new_bool(false);
case VALUE_TYPE_STRING:
case VALUE_TYPE_ERROR:
break;
}
return ERROR_VALUE;
}
Value value_float(Value value)
{
switch (value.type)
{
case VALUE_TYPE_FLOAT:
return value_new_bool(value.f != 0.0);
case VALUE_TYPE_INT:
return value_new_bool(bigint_cmp_zero(&value.big_int) != CMP_EQ);
case VALUE_TYPE_BOOL:
return value;
case VALUE_TYPE_NIL:
return value_new_bool(false);
case VALUE_TYPE_STRING:
case VALUE_TYPE_ERROR:
break;
}
return ERROR_VALUE;
}
const char *value_type_name(const Value *value)
{
switch (value->type)
{
case VALUE_TYPE_BOOL:
return "bool";
case VALUE_TYPE_NIL:
return "nil";
case VALUE_TYPE_FLOAT:
switch (value->float_bits)
{
case 0: return "float";
case 16: return "f16";
case 32: return "f32";
case 64: return "f64";
case 128: return "f128";
default:
UNREACHABLE;
}
case VALUE_TYPE_STRING:
return "string";
case VALUE_TYPE_INT:
switch (value->int_bits)
{
case 0: return "int";
case 8: return value->is_unsigned ? "u8" : "i8";
case 16: return value->is_unsigned ? "u16" : "i16";
case 32: return value->is_unsigned ? "u32" : "i32";
case 64: return value->is_unsigned ? "u64" : "i64";
default:
UNREACHABLE;
}
case VALUE_TYPE_ERROR:
return "<error>";
}
FATAL_ERROR("Can't happen");
}
bool value_is_number(const Value *value)
{
return value->type == VALUE_TYPE_INT || value->type == VALUE_TYPE_FLOAT;
}
Type *value_find_type(const Value *value)
{
switch (value->type)
{
case VALUE_TYPE_FLOAT:
switch (value->float_bits)
{
case 0: return &type_compfloat;
case 16: return &type_half;
case 32: return &type_float;
case 64: return &type_double;
// case 128: return type_builtin_f128();
default: break;
}
UNREACHABLE
case VALUE_TYPE_INT:
switch (value->int_bits)
{
case 0: return &type_compint;
case 8: return value->is_unsigned ? &type_byte : &type_char;
case 16: return value->is_unsigned ? &type_ushort : &type_short;
case 32: return value->is_unsigned ? &type_uint : &type_int;
case 64: return value->is_unsigned ? &type_ulong : &type_long;
default: break;
}
UNREACHABLE
case VALUE_TYPE_BOOL:
return &type_bool;
case VALUE_TYPE_NIL:
return &type_nil;
case VALUE_TYPE_STRING:
return &type_string;
case VALUE_TYPE_ERROR:
return &poisoned_type;
}
}
// Assume well-formed hex!
static inline Value parse_hex(const char *string, int len)
{
Value value = { .type = VALUE_TYPE_INT };
BigInt *b = &value.big_int;
bigint_init_signed(b, 0);
const char *end = string + len;
BigInt temp = { .digit_count = 0 };
BigInt add = { .digit_count = 0 };
while (string < end)
{
char c = *(string++);
if (c == '_') continue;
bigint_shl_int(&temp, b, 4);
if (c < 'A')
{
bigint_init_signed(&add, (c - '0'));
}
else if (c < 'a')
{
bigint_init_signed(&add, (c - 'A' + 10));
}
else
{
bigint_init_signed(&add, (c - 'a' + 10));
}
bigint_add(b, &temp, &add);
}
return value;
}
static inline Value parse_dec(const char *string, int len)
{
Value value = { .type = VALUE_TYPE_INT };
BigInt *b = &value.big_int;
bigint_init_signed(b, 0);
const char *end = string + len;
BigInt temp = { .digit_count = 0 };
BigInt mult;
bigint_init_signed(&mult, 10);
BigInt add = { .digit_count = 0 };
while (string < end)
{
char c = *(string++);
if (c == '_') continue;
bigint_mul(&temp, b, &mult);
bigint_init_signed(&add, (c - '0'));
bigint_add(b, &temp, &add);
}
return value;
}
static inline Value parse_oct(const char *string, int len)
{
Value value = { .type = VALUE_TYPE_INT };
BigInt *b = &value.big_int;
bigint_init_signed(b, 0);
const char *end = string + len;
BigInt temp = { .digit_count = 0 };
BigInt add = { .digit_count = 0 };
while (string < end)
{
char c = *(string++);
if (c == '_') continue;
bigint_shl_int(&temp, b, 3);
bigint_init_signed(&add, (c - '0'));
bigint_add(b, &temp, &add);
}
return value;
}
static inline Value parse_bin(const char *string, int len)
{
Value value = { .type = VALUE_TYPE_INT };
BigInt *b = &value.big_int;
bigint_init_signed(b, 0);
const char *end = string + len;
BigInt temp = { .digit_count = 0 };
BigInt add = { .digit_count = 0 };
while (string < end)
{
char c = *(string++);
if (c == '_') continue;
bigint_shl_int(&temp, b, 1);
bigint_init_signed(&add, (c - '0'));
bigint_add(b, &temp, &add);
}
return value;
}
// Parse normal integers, parse 0xBEEF, parse 0o1337, parse 0b1010101 positive numbers only
Value parse_int(const char *string, int len)
{
if (len > 2)
{
switch (string[1])
{
case 'x':
return parse_hex(string + 2, (uint16_t) (len - 2));
case 'o':
return parse_oct(string + 2, (uint16_t) (len - 2));
case 'b':
return parse_bin(string + 2, (uint16_t) (len - 2));
default:
break;
}
}
return parse_dec(string, (uint16_t) len);
}
void value_print(Value value)
{
switch (value.type)
{
case VALUE_TYPE_BOOL:
printf(value.b ? "true" : "false");
break;
case VALUE_TYPE_STRING:
printf("%.*s", value.str_len, value.str);
break;
case VALUE_TYPE_INT:
bigint_print(&value.big_int, 10);
break;
case VALUE_TYPE_ERROR:
printf("ERROR");
break;
case VALUE_TYPE_FLOAT:
printf("%Lf", value.f);
break;
case VALUE_TYPE_NIL:
printf("nil");
break;
}
}
void value_fprint(FILE *file, Value value)
{
switch (value.type)
{
case VALUE_TYPE_BOOL:
fprintf(file, value.b ? "true" : "false");
break;
case VALUE_TYPE_STRING:
fprintf(file, "%.*s", value.str_len, value.str);
break;
case VALUE_TYPE_INT:
bigint_fprint(file, &value.big_int, 10);
break;
case VALUE_TYPE_ERROR:
fprintf(file, "ERROR");
break;
case VALUE_TYPE_FLOAT:
fprintf(file, "%Lf", value.f);
break;
case VALUE_TYPE_NIL:
fprintf(file, "nil");
break;
}
}
void value_update_to_float(Value *value, long double f, uint16_t bits)
{
value->f = f;
value->type = VALUE_TYPE_FLOAT;
value->float_bits = bits;
}
/**
* Convert value2 to value1 (note that we have already ordered things in conversion order.
*
* @param value1
* @param value2
* @return true if conversion worked.
*/
static bool value_convert_to_type_ordered(Value *value1, Value *value2)
{
switch (value1->type)
{
case VALUE_TYPE_FLOAT:
switch (value2->type)
{
case VALUE_TYPE_FLOAT:
value1->float_bits = value2->float_bits;
return true;
case VALUE_TYPE_INT:
value_update_to_float(value2, bigint_as_float(&value2->big_int), value1->float_bits);
return true;
case VALUE_TYPE_BOOL:
value_update_to_float(value2, value2->b ? 1.0 : 0.0, value1->float_bits);
return true;
case VALUE_TYPE_NIL:
value_update_to_float(value2, 0.0, value1->float_bits);
return true;
case VALUE_TYPE_STRING:
case VALUE_TYPE_ERROR:
return false;
}
UNREACHABLE
case VALUE_TYPE_INT:
switch (value2->type)
{
case VALUE_TYPE_INT:
// First check if we have a comptime int. If so, check that it fits.
if (value2->int_bits == 0)
{
if (value1->int_bits == 0) return true;
if (!bigint_fits_in_bits(&value2->big_int, value1->int_bits, !value1->is_unsigned)) return false;
BigInt res;
bigint_truncate(&res, &value2->big_int, value1->int_bits, !value1->is_unsigned);
value2->big_int = res;
return true;
}
if (!value1->is_unsigned && value2->is_unsigned)
{
// If unsigned value is same or larger, disallow!
if (value1->int_bits <= value2->int_bits) return false;
value2->is_unsigned = false;
value2->int_bits = value1->int_bits;
return true;
}
// Final case, both has same sign, promote to largest.
value2->int_bits = value1->int_bits;
return true;
case VALUE_TYPE_BOOL:
bigint_init_unsigned(&value2->big_int, value2->b ? 1 : 0);
value2->int_bits = value1->int_bits;
value2->is_unsigned = value1->is_unsigned;
value2->type = VALUE_TYPE_INT;
return true;
case VALUE_TYPE_NIL:
bigint_init_unsigned(&value2->big_int, 0);
value2->int_bits = value1->int_bits;
value2->is_unsigned = value1->is_unsigned;
value2->type = VALUE_TYPE_INT;
return true;
case VALUE_TYPE_STRING:
case VALUE_TYPE_ERROR:
return false;
case VALUE_TYPE_FLOAT:
UNREACHABLE;
}
UNREACHABLE;
case VALUE_TYPE_BOOL:
switch (value2->type)
{
case VALUE_TYPE_BOOL:
return true;
case VALUE_TYPE_NIL:
value2->b = false;
value2->type = VALUE_TYPE_BOOL;
return true;
case VALUE_TYPE_STRING:
case VALUE_TYPE_ERROR:
return false;
case VALUE_TYPE_FLOAT:
case VALUE_TYPE_INT:
UNREACHABLE;
}
UNREACHABLE;
case VALUE_TYPE_NIL:
switch (value2->type)
{
case VALUE_TYPE_NIL:
return true;
case VALUE_TYPE_STRING:
case VALUE_TYPE_ERROR:
return false;
case VALUE_TYPE_FLOAT:
case VALUE_TYPE_BOOL:
case VALUE_TYPE_INT:
UNREACHABLE;
}
UNREACHABLE;
case VALUE_TYPE_STRING:
return value2->type == VALUE_TYPE_STRING;
case VALUE_TYPE_ERROR:
return false;
}
UNREACHABLE;
}
bool value_convert_to_type(Value *value1, Value *value2)
{
bool reverse_order = false;
if (value2->type == value1->type)
{
switch (value1->type)
{
case VALUE_TYPE_FLOAT:
reverse_order = value2->float_bits > value1->float_bits;
break;
case VALUE_TYPE_INT:
if (value1->is_unsigned != value2->is_unsigned)
{
reverse_order = value1->is_unsigned;
break;
}
reverse_order = value2->int_bits > value1->int_bits;
break;
case VALUE_TYPE_BOOL:
case VALUE_TYPE_NIL:
case VALUE_TYPE_STRING:
case VALUE_TYPE_ERROR:
break;
}
}
else
{
reverse_order = value2->type < value1->type;
}
return reverse_order ? value_convert_to_type_ordered(value2, value1) : value_convert_to_type_ordered(value1, value2);
}
static inline bool set_bits_and_truncate_int_value_if_needed(Value *value, uint16_t bits, bool allow_trunc)
{
value->int_bits = bits;
// No truncation
if (bits == 0) return true;
// If it fits then we're fine.
if (bigint_fits_in_bits(&value->big_int, bits, !value->is_unsigned))
{
return true;
}
// If we can truncate, do so.
if (allow_trunc)
{
BigInt temp;
bigint_truncate(&temp, &value->big_int, bits, !value->is_unsigned);
value->big_int = temp;
return true;
}
// Otherwise fail.
return false;
}
bool value_int_change_sign(Value *value, bool is_unsigned, bool allow_trunc)
{
if (value->is_unsigned == is_unsigned) return true;
if (value->is_unsigned)
{
value->is_unsigned = false;
// No bit limit? Goodie
if (!value->int_bits) return true;
// If it fits, then we're golden.
if (bigint_fits_in_bits(&value->big_int, value->int_bits, true)) return true;
// If not and we're not allowed conversion? Exit:
if (!allow_trunc) return false;
BigInt temp;
bigint_truncate(&temp, &value->big_int, value->int_bits, true);
value->big_int = temp;
// TODO verify that this actually works!
return true;
}
else
{
// Signed to unsigned
value->is_unsigned = true;
// No bit limit? Goodie
if (!value->int_bits) return true;
// If the value was positive we're golden
if (!value->big_int.is_negative) return true;
// If not and we're not allowed conversion? Exit:
if (!allow_trunc) return false;
BigInt temp;
bigint_truncate(&temp, &value->big_int, value->int_bits, false);
value->big_int = temp;
// TODO verify that this actually works!
return true;
}
}
bool value_convert(Value *value, ValueType type, uint16_t bits, bool is_unsigned, bool allow_trunc)
{
switch (type)
{
case VALUE_TYPE_FLOAT:
switch (value->type)
{
case VALUE_TYPE_FLOAT:
// TODO actual truncation
value->float_bits = bits;
break;
case VALUE_TYPE_INT:
value->f = bigint_as_float(&value->big_int);
break;
case VALUE_TYPE_BOOL:
value->f = value->b ? 1.0 : 0.0;
break;
case VALUE_TYPE_NIL:
value->f = 0.0;
break;
case VALUE_TYPE_STRING:
return false;
case VALUE_TYPE_ERROR:
return false;
}
value->float_bits = bits;
value->type = VALUE_TYPE_FLOAT;
return true;
case VALUE_TYPE_INT:
switch (value->type)
{
case VALUE_TYPE_FLOAT:
if (value->f < 0 && is_unsigned)
{
if (!allow_trunc) return false;
// First convert to signed, then convert to unsigned.
bool success = value_convert(value, type, bits, false, true);
assert(success && "Unexpected failure");
return value_convert(value, type, bits, true, true);
}
// TODO actual expansion
bigint_init_signed(&value->big_int, (int64_t)value->f);
value->is_unsigned = is_unsigned;
value->type = VALUE_TYPE_INT;
return set_bits_and_truncate_int_value_if_needed(value, bits, allow_trunc);
case VALUE_TYPE_INT:
if (!value_int_change_sign(value, is_unsigned, allow_trunc)) return false;
return set_bits_and_truncate_int_value_if_needed(value, bits, allow_trunc);
case VALUE_TYPE_BOOL:
value->type = VALUE_TYPE_INT;
value->int_bits = bits;
value->is_unsigned = is_unsigned;
bigint_init_unsigned(&value->big_int, value->b ? 1 : 0);
return true;
case VALUE_TYPE_NIL:
value->type = VALUE_TYPE_INT;
value->int_bits = bits;
value->is_unsigned = is_unsigned;
bigint_init_unsigned(&value->big_int, 0);
return true;
case VALUE_TYPE_STRING:
return false;
case VALUE_TYPE_ERROR:
return false;
}
UNREACHABLE
case VALUE_TYPE_BOOL:
switch (value->type)
{
case VALUE_TYPE_FLOAT:
if (!allow_trunc) return false;
value->b = value->f != 0.0;
break;
case VALUE_TYPE_INT:
value->b = value->big_int.digit_count != 0;
break;
case VALUE_TYPE_BOOL:
return true;
case VALUE_TYPE_NIL:
value->b = false;
break;
case VALUE_TYPE_STRING:
return false;
case VALUE_TYPE_ERROR:
return false;
}
value->type = VALUE_TYPE_BOOL;
return true;
case VALUE_TYPE_NIL:
return value->type == VALUE_TYPE_NIL;
case VALUE_TYPE_STRING:
return value->type == VALUE_TYPE_STRING;
case VALUE_TYPE_ERROR:
return false;
}
UNREACHABLE
}

87
src/compiler/value.h Normal file
View File

@@ -0,0 +1,87 @@
#pragma once
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "compiler_common.h"
#include "bigint.h"
// DO NOT CHANGE ORDER!
typedef enum _ValueType
{
VALUE_TYPE_FLOAT,
VALUE_TYPE_INT,
VALUE_TYPE_BOOL,
VALUE_TYPE_NIL,
VALUE_TYPE_STRING,
VALUE_TYPE_ERROR,
} ValueType;
typedef struct _BigInt BigInt;
typedef struct _Value
{
ValueType type;
union
{
struct
{
BigInt big_int;
bool is_unsigned;
uint16_t int_bits;
};
struct
{
long double f;
uint16_t float_bits;
};
bool b;
struct {
const char *str;
uint32_t str_len;
};
};
uint32_t len;
} Value;
bool value_is_number(const Value *value);
Value value_new_int_with_bigint(BigInt big_int);
Value value_new_int_with_int(int64_t val);
Value value_new_float(long double f);
Value value_new_string(const char *string, uint32_t len);
Value value_new_bool(bool value);
Value value_to_bool(Value value);
Value value_negate(Value value);
Value value_bit_not(Value value);
Value value_mult(Value lhs, Value rhs);
Value value_sub(Value lhs, Value rhs);
Value value_xor(Value lhs, Value rhs);
bool value_ne(Value value1, Value value2);
bool value_eq(Value value1, Value value2);
bool value_gt(Value value1, Value value2);
bool value_lt(Value value1, Value value2);
bool value_ge(Value value1, Value value2);
bool value_le(Value value1, Value value2);
Value value_and(Value lhs, Value rhs);
Value value_or(Value lhs, Value rhs);
Value value_add(Value lhs, Value rhs);
Value value_mod(Value lhs, Value rhs);
Value value_div(Value lhs, Value rhs);
Value value_not(Value value);
Value value_nil();
void value_update_to_float(Value *value, long double f, uint16_t bits);
const char *value_type_name(const Value *value);
Type *value_find_type(const Value *value);
void value_print(Value value);
void value_fprint(FILE *file, Value value);
bool value_as_bool(Value *value);
bool value_convert_to_type(Value *value1, Value *value2);
bool value_convert(Value *value, ValueType type, uint16_t bits, bool is_unsigned, bool allow_trunc);
// Parse normal integers, parse 0xBEEF, parse 0o1337, parse 0b1010101 positive numbers only
Value parse_int(const char *string, int len);

View File

@@ -32,9 +32,9 @@ static void test_lexer(void)
const char* token = token_type_to_string((TokenType)i);
tokens[i] = token;
len[i] = strlen(token);
TokenType lookup = TOKEN_VAR_IDENT;
TokenType lookup = TOKEN_IDENT;
const char* interned = symtab_add(token, len[i], fnv1a(token, len[i]), &lookup);
if (lookup != TOKEN_VAR_IDENT)
if (lookup != TOKEN_IDENT)
{
Token scanned = lexer_scan_ident_test(token);
TEST_ASSERT(scanned.type == i, "Mismatch scanning: was '%s', expected '%s' - lookup: %s - interned: %s.",
@@ -88,7 +88,8 @@ static void test_lexer(void)
{
token = lexer_scan_token();
if (token.type == TOKEN_EOF) break;
TEST_ASSERT(token.type != INVALID_TOKEN, "Got invalid token");
TEST_ASSERT(token.type != TOKEN_INVALID_TOKEN, "Got invalid token");
tokens_parsed++;
}
}

View File

@@ -1,4 +1,5 @@
#include <stdio.h>
#include <compiler/compiler.h>
#include "build/build_options.h"
#include "build/project_creation.h"
#include "utils/errors.h"
@@ -15,6 +16,7 @@ int main(int argc, const char *argv[])
break;
case COMMAND_UNIT_TEST:
compiler_tests();
break;
case COMMAND_COMPILE:
compile_file();
break;

View File

@@ -5,6 +5,11 @@
#include "errors.h"
#include <stdarg.h>
void evprintf(const char *format, va_list list)
{
vfprintf(stderr, format, list);
}
void eprintf(const char *format, ...)
{
va_list arglist;
@@ -13,7 +18,7 @@ void eprintf(const char *format, ...)
va_end(arglist);
}
void error_exit(const char *format, ...)
void __attribute__((noreturn)) error_exit(const char *format, ...)
{
va_list arglist;
va_start(arglist, format);

View File

@@ -7,8 +7,9 @@
#include <stdio.h>
#include <stdlib.h>
void evprintf(const char *format, va_list list);
void eprintf(const char *format, ...);
void error_exit(const char *format, ...);
void error_exit(const char *format, ...) __attribute__((noreturn));
#define FATAL_ERROR(_string, ...) do { error_exit("FATAL ERROR at %s:%d: " _string, __func__, __LINE__, ##__VA_ARGS__); } while(0)
@@ -30,5 +31,5 @@ void error_exit(const char *format, ...);
#else
#define DEBUG_LOG(_string, ...)
#endif
#define LOG_FUNC DEBUG_LOG("%s entered", __func__);
#define LOG_FUNC DEBUG_LOG("ENTER %s.", __func__);

View File

@@ -24,7 +24,7 @@ const char* expand_path(const char* path)
int filename_to_module(const char *path, char buffer[MAX_IDENTIFIER_LENGTH + 1])
{
size_t len = strlen(path);
int len = (int)strlen(path);
int last_slash = 0;
int last_dot = -1;
for (int i = 0; i < len; i++)
@@ -40,7 +40,7 @@ int filename_to_module(const char *path, char buffer[MAX_IDENTIFIER_LENGTH + 1])
char c = path[i];
if (is_letter(c))
{
c = is_upper(c) ? c + 'a' - 'A' : c;
c = (char)(is_upper(c) ? c + 'a' - 'A' : c);
}
else
{

View File

@@ -176,7 +176,7 @@ static inline bool is_letter(char c)
static inline uint32_t fnv1a(const char *key, uint32_t len)
{
uint32_t hash = FNV1_SEED;
for (int i = 0; i < len; i++)
for (uint32_t i = 0; i < len; i++)
{
hash = FNV1a(key[i], hash);
}
@@ -213,17 +213,17 @@ static inline void* _expand(void *vec, size_t element_size)
header->size++;
if (header->size == header->capacity)
{
_VHeader *new_array = _vec_new(element_size, header->capacity >> 1u);
_VHeader *new_array = _vec_new(element_size, header->capacity << 1u);
memcpy(new_array, header, element_size * header->capacity + sizeof(_VHeader));
header = new_array;
new_array->capacity = header->capacity << 1u;
vec = header + 1;
}
return vec;
}
#define VECEACH(_vec, _index) \
unsigned __vecsize = vec_size(_vec); \
for (unsigned _index = 0; _index < __vecsize; _index++)
for (unsigned _index = 0, __vecsize = vec_size(_vec); _index < __vecsize; _index++)
#define VECNEW(_type, _capacity) ((_type *)(_vec_new(sizeof(_type), _capacity) + 1))
#define VECADD(_vec, _value) \
@@ -232,3 +232,23 @@ static inline void* _expand(void *vec, size_t element_size)
__temp[vec_size(__temp) - 1] = _value; \
__temp; })
#define VECLAST(_vec) ( (_vec) ? (_vec)[vec_size(_vec) - 1] : NULL)
static inline bool is_all_upper(const char* string)
{
char c;
while ((c = *(string++)) != '\0')
{
if (is_lower(c)) return false;
}
return true;
}
static inline bool is_all_lower(const char* string)
{
char c;
while ((c = *(string++)) != '\0')
{
if (is_upper(c)) return false;
}
return true;
}

View File

@@ -11,7 +11,7 @@ static const size_t BUCKET_SIZE = MB;
static const size_t STARTING_ARENA_BUCKETS = 16;
static uint8_t **arena_buckets;
static int arena_buckets_used;
static size_t arena_buckets_used;
static size_t arena_buckets_array_size;
static size_t current_use;
static void *current_arena;
@@ -69,13 +69,13 @@ void print_arena_status(void)
{
printf("-- ARENA INFO -- \n");
printf(" * Memory used: %ld Kb\n", ((arena_buckets_used - 1) * BUCKET_SIZE + current_use) / 1024);
printf(" * Buckets used: %d\n", arena_buckets_used);
printf(" * Buckets used: %d\n", (int)arena_buckets_used);
printf(" * Allocations: %d\n", allocations_done);
}
void free_arena(void)
{
for (int i = 0; i < arena_buckets_used; i++)
for (uint32_t i = 0; i < arena_buckets_used; i++)
{
free(arena_buckets[i]);
}
@@ -113,7 +113,7 @@ void run_arena_allocator_tests(void)
EXPECT("buckets in use", arena_buckets_used, 2);
ASSERT(malloc_arena(BUCKET_SIZE / 8), "Expected alloc to pass");
EXPECT("buckets in use", arena_buckets_used, 3);
for (int i = 0; i < 8 * STARTING_ARENA_BUCKETS; i++)
for (size_t i = 0; i < 8 * STARTING_ARENA_BUCKETS; i++)
{
ASSERT(malloc_arena(BUCKET_SIZE / 8), "Should be possible to allocate this");
}