diff --git a/CMakeLists.txt b/CMakeLists.txt index 8443aad82..a621b431c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,5 +17,6 @@ add_executable(c3c src/compiler/lexer.c src/compiler/tokens.c src/compiler/symtab.c + src/compiler/parser.c src/compiler_tests/tests.c - src/compiler_tests/benchmark.c src/compiler/malloc.c src/compiler/malloc.h src/compiler/compiler.c src/compiler/compiler.h) \ No newline at end of file + src/compiler_tests/benchmark.c src/utils/malloc.c src/utils/malloc.h src/compiler/compiler.c src/compiler/compiler.h src/compiler/semantic_analyser.c src/compiler/semantic_analyser.h src/utils/common.h src/compiler/source_file.c src/compiler/source_file.h src/compiler/diagnostics.c src/compiler/diagnostics.h) \ No newline at end of file diff --git a/resources/testfragments/lexertest.c3 b/resources/testfragments/lexertest.c3 new file mode 100644 index 000000000..bb402ac10 --- /dev/null +++ b/resources/testfragments/lexertest.c3 @@ -0,0 +1,24 @@ +module foo; + +const int GLOBAL = 0; + +struct Boo +{ + int i; + union + { + int death; + }; +} + +func void test() +{ + int i = 0; + i++; + if (i < 100) + { + int j = 0; + j += i; + return; + } +} \ No newline at end of file diff --git a/src/build/build_options.c b/src/build/build_options.c index 8e7e56765..b50239b2f 100644 --- a/src/build/build_options.c +++ b/src/build/build_options.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "../utils/errors.h" @@ -97,12 +98,12 @@ static inline bool match_shortopt(const char* name) void append_file() { - if (build_options.file_count == MAX_FILES) + if (vec_size(build_options.files) == MAX_FILES) { fprintf(stderr, "Max %d files may be specified\n", MAX_FILES); exit(EXIT_FAILURE); } - build_options.files[build_options.file_count++] = current_arg; + build_options.files = VECADD(build_options.files, current_arg); } static bool arg_match(const char *candidate) @@ -196,6 +197,20 @@ static void parse_option() { case 'h': break; + case 'E': + if (build_options.compile_option != COMPILE_NORMAL) + { + FAIL_WITH_ERR("Illegal combination of compile options."); + } + build_options.compile_option = COMPILE_LEX_ONLY; + return; + case 'P': + if (build_options.compile_option != COMPILE_NORMAL) + { + FAIL_WITH_ERR("Illegal combination of compile options."); + } + build_options.compile_option = COMPILE_LEX_PARSE_ONLY; + return; case '-': if (match_longopt("about")) { @@ -220,12 +235,11 @@ static void parse_option() { if (at_end() || next_is_opt()) error_exit("error: --symtab needs a number."); const char *number = next_arg(); - int size = atoi(number); + int size = atoi(number); // NOLINT(cert-err34-c) if (size < 1024) error_exit("error: --symtab valid size > 1024."); if (size > MAX_SYMTAB_SIZE) error_exit("error: --symptab size cannot exceed %d", MAX_SYMTAB_SIZE); build_options.symtab_size = size; return; - } if (match_longopt("help")) { @@ -251,6 +265,20 @@ void parse_arguments(int argc, const char *argv[]) build_options.path = "."; build_options.command = COMMAND_MISSING; build_options.symtab_size = DEFAULT_SYMTAB_SIZE; + build_options.files = VECNEW(const char *, MAX_FILES); + for (int i = DIAG_NONE; i < DIAG_WARNING_TYPE; i++) + { + build_options.severity[i] = DIAG_IGNORE; + } + for (int i = DIAG_WARNING_TYPE; i < DIAG_ERROR_TYPE; i++) + { + build_options.severity[i] = DIAG_WARN; + } + for (int i = DIAG_ERROR_TYPE; i < DIAG_END_SENTINEL; i++) + { + build_options.severity[i] = DIAG_ERROR; + } + arg_count = argc; args = argv; for (arg_index = 1; arg_index < arg_count; arg_index++) diff --git a/src/build/build_options.h b/src/build/build_options.h index 292c05fa9..62df3f43a 100644 --- a/src/build/build_options.h +++ b/src/build/build_options.h @@ -4,7 +4,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include +#include "../utils/common.h" #define MAX_LIB_DIRS 1024 #define MAX_FILES 2048 @@ -25,19 +25,67 @@ typedef enum COMMAND_UNIT_TEST, } CompilerCommand; +typedef enum +{ + DIAG_NONE = 0, // Don't use! + DIAG_WARNING_TYPE, // Don't use! + DIAG_UNUSED, + DIAG_UNUSED_PARAMETER, + DIAG_UNUSED_FUNCTION, + DIAG_UNUSED_VARIABLE, + DIAG_UNUSED_IMPORT, + DIAG_UNUSED_MODULE, + DIAG_UNUSED_LABEL, + DIAG_UNUSED_PUBLIC, + DIAG_UNUSED_TYPE, + DIAG_CONVERSION, + DIAG_COVERED_SWITCH_DEFAULT, + DIAG_UNREACHABLE_DEFAULT, + DIAG_ERROR_TYPE, // Don't use this! + DIAG_FALLOFF_NONVOID, + DIAG_DUPLICATE_ATTRIBUTE, + DIAG_NOT_IN_ENUM, + DIAG_MISSING_CASE, + DIAG_REMAINDER_DIV_BY_ZERO, + DIAG_INT_TO_POINTER_CAST, + DIAG_SHIFT_LHS_NEGATIVE, + DIAG_SHIFT_NEGATIVE, + DIAG_SHIFT_GT_TYPEWIDTH, + DIAG_END_SENTINEL +} DiagnosticsType; + +typedef enum +{ + DIAG_IGNORE = 0, + DIAG_WARN, + DIAG_ERROR, +} DiagnosticsSeverity; + +typedef enum +{ + COMPILE_NORMAL, + COMPILE_LEX_ONLY, + COMPILE_LEX_PARSE_ONLY, + COMPILE_OUTPUT_AST, +} CompileOption; + typedef struct { const char* lib_dir[MAX_LIB_DIRS]; int lib_count; - const char* files[MAX_FILES]; - int file_count; + const char** files; const char* project_name; const char* target; const char* path; CompilerCommand command; uint32_t symtab_size; + CompileOption compile_option; + DiagnosticsSeverity severity[DIAG_END_SENTINEL]; } BuildOptions; + + + extern BuildOptions build_options; void parse_arguments(int argc, const char *argv[]); diff --git a/src/compiler/compiler.c b/src/compiler/compiler.c index b3abf200d..940c4bfdb 100644 --- a/src/compiler/compiler.c +++ b/src/compiler/compiler.c @@ -2,14 +2,68 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +#include +#include #include "compiler.h" #include "symtab.h" #include "../build/build_options.h" #include "../utils/lib.h" +#include "lexer.h" +#include "source_file.h" +#include "parser.h" +#include "diagnostics.h" void compiler_init(void) { - uint32_t symtab_size = nextHighestPowerOf2(build_options.symtab_size); - symtab_init(symtab_size); +} +static void compiler_lex() +{ + VECEACH(build_options.files, i) + { + bool loaded = false; + File *file = source_file_load(build_options.files[i], &loaded); + if (loaded) continue; + lexer_add_file_for_lexing(file); + printf("# %s\n", file->full_path); + while (1) + { + Token token = lexer_scan_token(); + printf("%s ", token_type_to_string(token.type)); + if (token.type == TOKEN_EOF) break; + } + printf("\n"); + } + exit(EXIT_SUCCESS); +} + +void compiler_parse() +{ + VECEACH(build_options.files, i) + { + bool loaded = false; + File *file = source_file_load(build_options.files[i], &loaded); + if (loaded) continue; + diag_reset(); + parse_file(file); + printf("\n"); + } + exit(EXIT_SUCCESS); +} + +void compile_file() +{ + if (!vec_size(build_options.files)) error_exit("No files to compile."); + switch (build_options.compile_option) + { + case COMPILE_LEX_ONLY: + compiler_lex(); + break; + case COMPILE_LEX_PARSE_ONLY: + compiler_parse(); + break; + default: + break; + } + TODO } \ No newline at end of file diff --git a/src/compiler/compiler.h b/src/compiler/compiler.h index 4457e1ef5..2c1fdfd2b 100644 --- a/src/compiler/compiler.h +++ b/src/compiler/compiler.h @@ -5,4 +5,5 @@ // license that can be found in the LICENSE file. -void compiler_init(); \ No newline at end of file +void compiler_init(); +void compile_file(); diff --git a/src/compiler/compiler_common.h b/src/compiler/compiler_common.h index 70b17d9d3..a9b6dacb2 100644 --- a/src/compiler/compiler_common.h +++ b/src/compiler/compiler_common.h @@ -4,9 +4,11 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include +#include "tokens.h" +#include "utils/common.h" typedef uint32_t SourceLoc; +#define INVALID_LOC UINT32_MAX typedef struct { @@ -30,9 +32,10 @@ typedef struct { const char *contents; const char *name; - SourceLoc start; - SourceLoc end; + const char *full_path; + SourceLoc start_id; + SourceLoc end_id; } File; #define TOKEN_MAX_LENGTH 0xFFFF -#define MAX_IDENTIFIER_LENGTH 31 + diff --git a/src/compiler/diagnostics.c b/src/compiler/diagnostics.c new file mode 100644 index 000000000..46319e1eb --- /dev/null +++ b/src/compiler/diagnostics.c @@ -0,0 +1,317 @@ +// Copyright (c) 2019 Christoffer Lerno. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "diagnostics.h" +#include "source_file.h" +#include +#include + +typedef struct _Diagnostics +{ + bool panic_mode; + unsigned errors; + unsigned warnings; + bool use_color; +} Diagnostics; + +Diagnostics diagnostics; + +void diag_reset(void) +{ + diagnostics.panic_mode = false; + diagnostics.errors = 0; + diagnostics.warnings = 0; +} + +void reset_panic_mode(void) +{ + diagnostics.panic_mode = false; +} + +typedef enum +{ + PRINT_TYPE_ERROR, + PRINT_TYPE_PREV, + PRINT_TYPE_WARN +} PrintType; + +static void print_error(SourceRange source_range, const char *message, PrintType print_type) +{ + File *file = source_file_from_position(source_range.loc); + + const char *content = file->contents; + const char *error_start = file->contents + source_range.loc - file->start_id; + + const static int LINES_SHOWN = 4; + + const char *linestarts[LINES_SHOWN]; + for (int i = 0; i < LINES_SHOWN; i++) linestarts[i] = NULL; + const char *current = content; + linestarts[0] = content; + unsigned line = 1; + while (current < error_start) + { + if (current[0] == '\n') + { + line++; + linestarts[3] = linestarts[2]; + linestarts[2] = linestarts[1]; + linestarts[1] = linestarts[0]; + linestarts[0] = current + 1; + } + current++; + } + + const char *end = NULL; + while (!end) + { + switch (current[0]) + { + case '\n': + case '\0': + end = current; + break; + default: + current++; + break; + } + } + + int max_line_length = (int)round(log10(line)) + 1; + + char number_buffer[20]; + snprintf(number_buffer, 20, "%%%dd: %%.*s\n", max_line_length); + + for (unsigned i = 3; i > 0; i--) + { + int line_number = line - i; + const char *start = linestarts[i]; + if (start == NULL) continue; + const char *line_end = linestarts[i - 1]; + eprintf(number_buffer, line_number, line_end - start - 1, start); + } + eprintf(number_buffer, line, end - linestarts[0], linestarts[0]); + for (unsigned i = 0; i < max_line_length + 2 + error_start - linestarts[0]; i++) + { + eprintf(" "); + } + for (int i = 0; i < source_range.length; i++) + { + eprintf("^"); + } + eprintf("\n"); + + switch (print_type) + { + case PRINT_TYPE_ERROR: + eprintf("(%s:%d) Error: %s\n", file->name, line, message); + break; + case PRINT_TYPE_PREV: + eprintf("(%s:%d) %s\n", file->name, line, message); + break; + case PRINT_TYPE_WARN: + eprintf("(%s:%d) Warning: %s\n", file->name, line, message); + break; + default: + UNREACHABLE + } + +} + + +static void vprint_error(SourceRange span, const char *message, va_list args) +{ + char buffer[256]; + vsnprintf(buffer, 256, message, args); + print_error(span, buffer, PRINT_TYPE_ERROR); +} + +void diag_error_at(SourceRange span, const char *message, ...) +{ + if (diagnostics.panic_mode) return; + diagnostics.panic_mode = true; + va_list args; + va_start(args, message); + vprint_error(span, message, args); + va_end(args); + diagnostics.errors++; +} + +void diag_verror_at(SourceRange span, const char *message, va_list args) +{ + if (diagnostics.panic_mode) return; + diagnostics.panic_mode = true; + vprint_error(span, message, args); + diagnostics.errors++; +} + +/* + + + +bool diagnostics_silence_warnings(Array *warnings) +{ + for (unsigned i = 0; i < warnings->count; i++) + { + const char *warning = warnings->entries[i]; + if (strcmp("no-unused", warning) == 0) + { + diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED); + diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_PARAMETER); + continue; + } + if (strcmp("no-unused-variable", warning) == 0) + { + diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_VARIABLE); + continue; + } + if (strcmp("no-unused-function", warning) == 0) + { + diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_FUNCTION); + continue; + } + if (strcmp("no-unused-type", warning) == 0) + { + diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_TYPE); + continue; + } + if (strcmp("no-unused-module", warning) == 0) + { + diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_MODULE); + continue; + } + if (strcmp("no-unused-public", warning) == 0) + { + diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_PUBLIC); + continue; + } + if (strcmp("no-unused-import", warning) == 0) + { + diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_IMPORT); + continue; + } + if (strcmp("no-unused-label", warning) == 0) + { + diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_LABEL); + continue; + } + PRINT_ERROR("recipe has unknown warning: '%s'\n", warning); + return false; + } + return true; +} + + + + + + +void prev_at_range(SourceRange span, const char *message, ...) +{ + va_list args; + va_start(args, message); + char buffer[256]; + vsnprintf(buffer, 256, message, args); + print_error(span, buffer, PRINT_TYPE_PREV); + va_end(args); +} + +void prev_at(SourceLoc loc, const char *message, ...) +{ + va_list args; + va_start(args, message); + char buffer[256]; + vsnprintf(buffer, 256, message, args); + print_error((SourceRange){ loc, 1 }, buffer, PRINT_TYPE_PREV); + va_end(args); +} + +void sema_error_range(SourceRange token, const char *message, ...) +{ + va_list args; + va_start(args, message); + vprint_error(token, message, args); + va_end(args); + diagnostics.errors++; +} + +void sema_error_at(SourceLoc loc, const char *message, ...) +{ + va_list args; + va_start(args, message); + vprint_error((SourceRange) { loc, 1 }, message, args); + va_end(args); + diagnostics.errors++; +} + +void sema_warn_at(DiagnosticsType type, SourceLoc loc, const char *message, ...) +{ + // TODO ENABLE + return; + SourceRange span = {.loc = loc, .length = 1}; + switch (diagnostics.severity[type]) + { + case DIAG_IGNORE: + return; + case DIAG_WARN: + break; + case DIAG_ERROR: + { + va_list args; + va_start(args, message); + vprint_error(span, message, args); + va_end(args); + diagnostics.errors++; + return; + } + } + va_list args; + va_start(args, message); + char buffer[256]; + vsnprintf(buffer, 256, message, args); + if (diagnostics.severity[type]) + print_error(span, buffer, PRINT_TYPE_WARN); + va_end(args); + diagnostics.warnings++; +} + +void sema_warn_range(DiagnosticsType type, SourceRange span, const char *message, ...) +{ + // TODO ENABLE + return; + switch (diagnostics.severity[type]) + { + case DIAG_IGNORE: + return; + case DIAG_WARN: + break; + case DIAG_ERROR: + { + va_list args; + va_start(args, message); + vprint_error(span, message, args); + va_end(args); + diagnostics.errors++; + return; + } + } + va_list args; + va_start(args, message); + char buffer[256]; + vsnprintf(buffer, 256, message, args); + if (diagnostics.severity[type]) print_error(span, buffer, PRINT_TYPE_WARN); + va_end(args); + diagnostics.warnings++; +} + +unsigned errors() +{ + return diagnostics.errors; +} + +bool error_found() +{ + return diagnostics.errors > 0; +} +*/ \ No newline at end of file diff --git a/src/compiler/diagnostics.h b/src/compiler/diagnostics.h new file mode 100644 index 000000000..91a0df1a2 --- /dev/null +++ b/src/compiler/diagnostics.h @@ -0,0 +1,34 @@ +#pragma once + +// Copyright (c) 2019 Christoffer Lerno. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "compiler_common.h" + +void diag_reset(void); +void diag_error_at(SourceRange span, const char *message, ...); +void diag_verror_at(SourceRange span, const char *message, va_list args); + +/* + + +typedef struct _Array Array; + +void diagnostics_init(void); +void diagnostics_reset(void); +void diagnostics_update_severity(DiagnosticsSeverity severity, DiagnosticsType type); +bool diagnostics_silence_warnings(Array *warnings); +void diagnostics_use_color(bool use_color); +void verror_at(SourceRange span, const char *message, va_list args); +void sema_error_range(SourceRange token, const char *message, ...); +void sema_error_at(SourceLoc loc, const char *message, ...); +void prev_at_range(SourceRange span, const char *message, ...); +void prev_at(SourceLoc loc, const char *message, ...); +void sema_warn_at(DiagnosticsType type, SourceLoc loc, const char *message, ...); +void sema_warn_range(DiagnosticsType type, SourceRange span, const char *message, ...); +bool in_panic_mode(void); +unsigned errors(); +void reset_panic_mode(void); +bool error_found(void); +*/ \ No newline at end of file diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c index c83570934..608a3e0c0 100644 --- a/src/compiler/lexer.c +++ b/src/compiler/lexer.c @@ -2,15 +2,14 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include #include "lexer.h" -#include -#include +#include #include "../utils/errors.h" #include "../utils/lib.h" #include "symtab.h" - - +#include "source_file.h" +#include "diagnostics.h" +#include typedef enum { @@ -22,6 +21,7 @@ typedef enum typedef struct { + bool lexer_init_complete; const char *begin; const char *start; const char *current; @@ -78,14 +78,17 @@ static inline bool reached_end(void) return *lexer.current == '\0'; } -static Token error_token(const char *message) +static Token error_token(const char *message, ...) { Token token; - token.type = TOKEN_ERROR; + token.type = INVALID_TOKEN; token.start = lexer.start; token.span.length = 1; - token.span.loc = lexer.current_file->start + (lexer.begin - lexer.start); - // TODO error_at(token.span, message); + token.span.loc = lexer.current_file->start_id + (lexer.begin - lexer.start); + va_list list; + va_start(list, message); + diag_verror_at(token.span, message, list); + va_end(list); return token; } @@ -97,7 +100,7 @@ static Token make_token(TokenType type) { .type = type, .start = lexer.start, - .span = { .loc = lexer.current_file->start + (lexer.start - lexer.begin), .length = token_size } + .span = { .loc = lexer.current_file->start_id + (lexer.start - lexer.begin), .length = token_size } }; } @@ -109,7 +112,7 @@ static Token make_string_token(TokenType type, const char* string) { .type = type, .start = lexer.start, - .span = { .loc = lexer.current_file->start + (lexer.start - lexer.begin), .length = token_size }, + .span = { .loc = lexer.current_file->start_id + (lexer.start - lexer.begin), .length = token_size }, .string = string, }; } @@ -293,6 +296,7 @@ static inline Token scan_ident(void) hash = FNV1a(next(), hash); } EXIT:; + if (type == INVALID_TOKEN) error_token("An identifier may not only consist of '_'"); uint32_t len = lexer.current - lexer.start; const char* interned_string = symtab_add(lexer.start, len, hash, &type); return make_string_token(type, interned_string); @@ -303,8 +307,8 @@ static inline Token scan_ident(void) static Token scan_oct(void) { - next(); // Skip the o - if (!is_oct(next())) return error_token("Invalid octal sequence"); + char o = next(); // Skip the o + if (!is_oct(next())) return error_token("An expression starting with '0%c' would expect to be followed by octal numbers (0-7).", o); while (is_oct_or_(peek())) next(); return make_token(TOKEN_INTEGER); } @@ -312,41 +316,49 @@ static Token scan_oct(void) Token scan_binary(void) { - next(); // Skip the b - if (!is_binary(next())) return error_token("Invalid binary sequence"); + char b = next(); // Skip the b + if (!is_binary(next())) + { + return error_token("An expression starting with '0%c' would expect a sequence of zeroes and ones, " + "did you try to write a hex value but forgot the '0x'?", b); + } while (is_binary_or_(peek())) next(); return make_token(TOKEN_INTEGER); } #define PARSE_SPECIAL_NUMBER(is_num, is_num_with_underscore, exp, EXP) \ - while (is_num_with_underscore(peek())) next(); \ - bool is_float = false; \ - if (peek() == '.') \ - { \ - is_float = true; \ - next(); \ - char c = peek(); \ - if (c == '_') return error_token("Underscore may only appear between digits."); \ - if (is_num(c)) next(); \ - while (is_num_with_underscore(peek())) next(); \ - } \ +while (is_num_with_underscore(peek())) next(); \ +bool is_float = false; \ +if (peek() == '.') \ +{ \ + is_float = true; \ + next(); \ char c = peek(); \ - if (c == (exp) || c == (EXP)) \ - { \ - is_float = true; \ - next(); \ - char c2 = next(); \ - if (c2 == '+' || c2 == '-') c2 = next(); \ - if (!is_num(c2)) return error_token("Invalid exponential expression"); \ - while (is_num(peek())) next(); \ - } \ - if (prev() == '_') return error_token("Underscore may only appear between digits."); \ - return make_token(is_float ? TOKEN_FLOAT : TOKEN_INTEGER) + if (c == '_') return error_token("Can't parse this as a floating point value due to the '_' directly after decimal point."); \ + if (is_num(c)) next(); \ + while (is_num_with_underscore(peek())) next(); \ +} \ +char c = peek(); \ +if (c == (exp) || c == (EXP)) \ +{ \ + is_float = true; \ + next(); \ + char c2 = next(); \ + if (c2 == '+' || c2 == '-') c2 = next(); \ + if (!is_num(c2)) return error_token("Parsing the floating point exponent failed, because '%c' is not a number.", c2); \ + while (is_num(peek())) next(); \ +} \ +if (prev() == '_') return error_token("The number ended with '_', but that character needs to be between, not after, digits."); \ +return make_token(is_float ? TOKEN_FLOAT : TOKEN_INTEGER) static inline Token scan_hex(void) { - next(); // skip the x - if (!is_hex(next())) return error_token("Invalid hex sequence"); + char x = next(); // skip the x + if (!is_hex(next())) + { + return error_token("'0%c' starts a hexadecimal number, " + "but it was followed by '%c' which is not part of a hexadecimal number.", x, prev()); + } PARSE_SPECIAL_NUMBER(is_hex, is_hex_or_, 'p', 'P'); } @@ -363,7 +375,6 @@ static inline Token scan_digit(void) { switch (peek_next()) { - // case 'X': Let's not support this? REVISIT case 'x': case 'X': advance(2); @@ -397,11 +408,17 @@ static inline Token scan_char() { for (int i = 0; i < 2; i++) { - if (!is_hex(next())) return error_token("Invalid escape sequence"); + if (!is_hex(next())) + { + return error_token( + "An escape sequence starting with " + "'\\x' needs to be followed by " + "a two digit hexadecimal number."); + } } } } - if (next() != '\'') return error_token("Invalid character value"); + if (next() != '\'') return error_token("The character only consist of a single character, did you want to use \"\" instead?"); return make_token(TOKEN_INTEGER); } @@ -417,7 +434,7 @@ static inline Token scan_string() } if (reached_end()) { - return error_token("Unterminated string."); + return error_token("Reached the end looking for '\"'. Did you forget it?"); } } return make_token(TOKEN_STRING); @@ -513,15 +530,15 @@ static inline Token scan_docs(void) next(); return make_token(TOKEN_DOCS_LINE); case '\0': - return error_token("Docs reached end of the file"); + return error_token("The document ended without finding the end of the doc comment. " + "Did you forget a '*/' somewhere?"); default: break; } } } - -Token scan_token(void) +Token lexer_scan_token(void) { // First we handle our "in docs" state. if (lexer.lexer_state == LEXER_STATE_DOCS_PARSE) @@ -544,7 +561,7 @@ Token scan_token(void) lexer.lexer_state = LEXER_STATE_DOCS_PARSE; return make_token(TOKEN_DOCS_START); case WHITESPACE_COMMENT_REACHED_EOF: - return error_token("Comment was not terminated"); + return error_token("Reached the end looking for '*/'. Did you forget it somewhere?"); case WHITESPACE_FOUND_EOF: return make_token(TOKEN_EOF); case WHITESPACE_FOUND_DOCS_EOL: @@ -637,25 +654,52 @@ Token scan_token(void) backtrack(); return is_digit(c) ? scan_digit() : scan_ident(); } - return error_token("Unexpected character."); + return error_token("'%c' may not be placed outside of a string or comment, did you perhaps forget a \" somewhere?", c); } } -void lexer_test_setup(const char* text) +File* lexer_current_file(void) { + return lexer.current_file; +} + +void lexer_check_init(void) +{ + if (lexer.lexer_init_complete) return; + lexer.lexer_init_complete = true; + symtab_init(build_options.symtab_size); +} + +void lexer_add_file_for_lexing(File *file) +{ + LOG_FUNC + lexer_check_init(); + lexer.current_file = file; + lexer.last_in_range = 0; + lexer.begin = lexer.current_file->contents; + lexer.start = lexer.begin; + lexer.current = lexer.start; + lexer.lexer_state = LEXER_STATE_NORMAL; +} + +void lexer_test_setup(const char *text, size_t len) +{ + lexer_check_init(); static File helper; lexer.lexer_state = LEXER_STATE_NORMAL; lexer.start = text; lexer.current = text; lexer.begin = text; lexer.current_file = &helper; - lexer.current_file->start = 0; + lexer.current_file->start_id = 0; lexer.current_file->contents = text; - lexer.current_file->end = 100000; - lexer.current_file->name = "Foo"; + lexer.current_file->end_id = len; + lexer.current_file->name = "Test"; } -Token scan_ident_test(const char* scan) + + +Token lexer_scan_ident_test(const char *scan) { static File helper; lexer.lexer_state = LEXER_STATE_NORMAL; @@ -663,9 +707,9 @@ Token scan_ident_test(const char* scan) lexer.current = scan; lexer.begin = scan; lexer.current_file = &helper; - lexer.current_file->start = 0; + lexer.current_file->start_id = 0; lexer.current_file->contents = scan; - lexer.current_file->end = 1000; + lexer.current_file->end_id = 1000; lexer.current_file->name = "Foo"; if (scan[0] == '@' && is_letter(scan[1])) @@ -674,5 +718,5 @@ Token scan_ident_test(const char* scan) return scan_docs(); } - return scan_token(); + return lexer_scan_token(); } diff --git a/src/compiler/lexer.h b/src/compiler/lexer.h index 86993d80d..b543da947 100644 --- a/src/compiler/lexer.h +++ b/src/compiler/lexer.h @@ -4,14 +4,14 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include "tokens.h" + #include "compiler_common.h" -Token scan_token(void); +Token lexer_scan_token(void); -TokenType identifier_type(const char* restrict start, int len); -TokenType ident_type_fnv1(const char *restrict start, int len); +Token lexer_scan_ident_test(const char *scan); -Token scan_ident_test(const char* scan); - -void lexer_test_setup(const char* text); +void lexer_test_setup(const char *text, size_t len); +void lexer_add_file_for_lexing(File *file); +File* lexer_current_file(void); +void lexer_check_init(void); \ No newline at end of file diff --git a/src/compiler/parser.c b/src/compiler/parser.c new file mode 100644 index 000000000..68155184b --- /dev/null +++ b/src/compiler/parser.c @@ -0,0 +1,493 @@ +// Copyright (c) 2019 Christoffer Lerno. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include +#include "../utils/errors.h" +#include "parser.h" +#include "semantic_analyser.h" +#include "lexer.h" + +const int MAX_DOCS_ROWS = 1024; + +Token tok; +Token prev_tok; +Token poisoned = { + .type = INVALID_TOKEN, +}; + +// --- Parser base methods + +Token token_wrap(const char* name) +{ + TODO +} +void advance(void) +{ + prev_tok = tok; + while (1) + { + tok = lexer_scan_token(); + // printf(">>> %.*s => %s\n", tok.length, tok.start, token_type_to_string(tok.type)); + if (tok.type != INVALID_TOKEN) break; + } +} + +void advance_and_verify(TokenType token_type) +{ + assert(tok.type == token_type); + advance(); +} + +bool try_consume(TokenType type) +{ + if (tok.type == type) + { + advance(); + return true; + } + return false; +} + +bool consume(TokenType type, const char *message, ...) +{ + if (try_consume(type)) + { + return true; + } + + va_list args; + va_start(args, message); + sema_verror_at(tok.span, message, args); + va_end(args); + return false; +} + +bool was_ident() +{ + switch (tok.type) + { + case TOKEN_VAR_IDENT: + case TOKEN_TYPE_IDENT: + case TOKEN_CAPS_IDENT: + return true; + default: + return false; + } +} + +static void recover_to(TokenType type) +{ + TODO +} + +static void recover_and_consume(TokenType type) +{ + TODO +} + +static void recover_stmt(void) +{ + recover_and_consume(TOKEN_EOS); +} + +/** + * Walk until we find the first top level construct. + * (Note that this is the slow path, so no need to inline) + */ +static void recover_top_level(void) +{ + while (tok.type != TOKEN_EOF) + { + switch (tok.type) + { + case TOKEN_FUNC: + case TOKEN_CONST: + case TOKEN_TYPEDEF: + case TOKEN_ERROR: + case TOKEN_STRUCT: + case TOKEN_IMPORT: + case TOKEN_UNION: + case TOKEN_ENUM: + case TOKEN_MACRO: + return; + default: + break; + } + } +} + +static inline bool consume_stmt_end(void) +{ + if (consume(TOKEN_EOS, "Expected ';'")) return true; + recover_and_consume(TOKEN_EOS); + return false; +} + +void error_at_current(const char* message, ...) +{ + va_list args; + va_start(args, message); + sema_verror_at(tok.span, message, args); + va_end(args); +} + +// --- Parsing + +/** + * Optionally parse docs + * + * docs ::= TOKEN_DOCS_START docs_body TOKEN_DOCS_END + * + * docs_body ::= docs_line + * | docs_body TOKEN_DOCS_EOL docs_line + * + * docs_line ::= TOKEN_DOCS_LINE + * | TOKEN_AT_IDENT [ignored] + * | TOKEN_AT_RETURN string + * | TOKEN_AT_PARAM VAR_IDENT string + * | TOKEN_AT_THROWS string + * | TOKEN_AT_REQUIRE [expr]+ + * | TOKEN_AT_ENSURE [expr]+ + * | TOKEN_AT_CONST VAR_IDENT + * | TOKEN_AT_PURE + * | TOKEN_AT_REQPARSE [expr | stmt] + * | TOKEN_AT_DEPRECATED + */ +static inline void parse_docs() +{ + LOG_FUNC + if (!try_consume(TOKEN_DOCS_START)) return;; + static Token docs[MAX_DOCS_ROWS]; + int lines = 0; + while (tok.type != TOKEN_DOCS_END) + { + switch (tok.type) + { + case TOKEN_DOCS_LINE: + if (lines == MAX_DOCS_ROWS) error_at_current("Exceeded max number of lines in docs: %d.", MAX_DOCS_ROWS); + docs[lines++] = tok; + break; + case TOKEN_AT_DEPRECATED: + break; + case TOKEN_AT_PURE: + break; + case TOKEN_AT_IDENT: + case TOKEN_AT_THROWS: + case TOKEN_AT_CONST: + case TOKEN_AT_REQPARSE: + case TOKEN_AT_PARAM: + case TOKEN_AT_RETURN: + default: + TODO + } + } +} + +/** + * module ::= [docs]? MODULE IDENTIFIER EOS + */ +static inline void parse_module() +{ + LOG_FUNC + parse_docs(); + if (!try_consume(TOKEN_MODULE)) + { + sema_add_module_from_filename(); + return; + } + + // Expect the module name + if (!consume(TOKEN_VAR_IDENT, "Expected a valid module name")) + { + sema_add_module(poisoned); + recover_top_level(); + return;; + } + + sema_add_module(prev_tok); + consume_stmt_end(); +} + + +/** + * import ::= IMPORT VAR_IDENT EOS + * | IMPORT VAR_IDENT AS VAR_IDENT EOS + * | IMPORT VAR_IDENT LOCAL EOS + */ +static inline void parse_import() +{ + advance_and_verify(TOKEN_IMPORT); + + if (!consume(TOKEN_VAR_IDENT, "Expected a module name")) + { + recover_top_level(); + return; + } + + Token module_name = prev_tok; + Token alias = {}; + ImportType import_type = IMPORT_TYPE_FULL; + if (try_consume(TOKEN_AS)) + { + if (!consume(TOKEN_VAR_IDENT, "Expected a valid alias name")) + { + recover_and_consume(TOKEN_EOS); + return; + } + alias = prev_tok; + import_type = IMPORT_TYPE_ALIAS; + } + if (try_consume(TOKEN_LOCAL)) + { + import_type = import_type == IMPORT_TYPE_ALIAS ? IMPORT_TYPE_ALIAS_LOCAL : IMPORT_TYPE_LOCAL; + } + + sema_add_import(module_name, alias, import_type); + + consume_stmt_end(); + +} + +/** + * imports ::= import + * | imports import + */ +static inline void parse_imports(void) +{ + while (tok.type == TOKEN_IMPORT) + { + parse_import(); + } +} + +static inline void parse_func(void) +{ + TODO +} + +static inline void *parse_type(void) +{ + TODO +} + +static inline void *parse_deferred_expression(void) +{ + TODO +} +static inline void parse_const(void) +{ + advance_and_verify(TOKEN_CONST); + // parse_type(); + if (!consume(TOKEN_CAPS_IDENT, "Expected an upper case identifier")) + { + recover_top_level(); + return; + } + if (!consume(TOKEN_EQEQ, "Expected '=' here")) + { + recover_top_level(); + return; + } + parse_deferred_expression(); + consume_stmt_end(); +} + +static inline void parse_union(void) +{ + TODO; +} + +static inline void parse_struct(void) +{ + TODO; +} + +static inline void parse_macro(void) +{ + TODO; +} + +/** + * error ::= ERROR TYPE_IDENT '{' CAPS_IDENT (',' CAPS_IDENT)* ','? '}' + */ +static inline void parse_error(void) +{ + advance_and_verify(TOKEN_ERROR); + if (!consume(TOKEN_TYPE_IDENT, "Expected a valid error type name here")) + { + recover_top_level(); + return; + } + Token name = prev_tok; + + if (!consume(TOKEN_LBRACE, "Expected ’{' after error type name")) + { + recover_top_level(); + return; + } + + while (tok.type == TOKEN_CAPS_IDENT) + { + // TODO store + advance(); + if (!try_consume(TOKEN_COMMA)) break; + } + + if (!consume(TOKEN_RBRACE, "Expected '}' here")) + { + recover_top_level(); + } + + sema_add_errors(name /* todo values */); + +} + +/** + * enum ::= ENUM TYPE_NAME (':' type)? '{' enum_def (',' enum_def)* ','? '}' + * + * enum_def ::= CAPS_IDENT ('=' const_expr)? + * + * TODO enum extra data? + */ +static inline void parse_enum(void) +{ + advance_and_verify(TOKEN_ENUM); + if (!consume(TOKEN_TYPE_IDENT, "Expected a valid enum type name here")) + { + recover_top_level(); + return; + } + Token name = prev_tok; + + void *type = NULL; + if (try_consume(TOKEN_COLON)) + { + type = parse_type(); + } + + if (!consume(TOKEN_LBRACE, type ? "Expected '{' after enum type" : "Expected ’{' after enum type name")) + { + recover_top_level(); + return; + } + + while (tok.type == TOKEN_CAPS_IDENT) + { + // TODO store + advance(); + if (try_consume(TOKEN_EQ)) + { + // Store + parse_deferred_expression(); + } + if (!try_consume(TOKEN_COMMA)) break; + } + + if (!consume(TOKEN_RBRACE, "Expected '}' here")) + { + recover_top_level(); + return; + } + + sema_add_errors(name /* todo values */); + +} + +static inline void parse_global_var(void) +{ + + TODO; +} + +static inline void parse_macro_var(void) +{ + advance_and_verify(TOKEN_DOLLAR_IDENT); + Token var_name = prev_tok; + + if (!consume(TOKEN_EQ, "Expected assignment here")) + { + recover_top_level(); + return; + } + + // TODO use the result + parse_deferred_expression(); + + sema_add_macro_var(var_name /* , expr **/ ); + consume_stmt_end(); +} + +static inline void parse_macro_expansion(void) +{ + TODO +} + +static inline void parse_top_level() +{ + LOG_FUNC + while (tok.type != TOKEN_EOF) + { + switch (tok.type) + { + case TOKEN_FUNC: + parse_func(); + break; + case TOKEN_CONST: + parse_const(); + break; + case TOKEN_STRUCT: + parse_struct(); + break; + case TOKEN_UNION: + parse_union(); + break; + case TOKEN_MACRO: + parse_macro(); + break; + case TOKEN_ENUM: + parse_enum(); + break; + case TOKEN_ERROR: + parse_error(); + break; + case TOKEN_PUBLIC: + sema_mark_next_public(); + break; + case TOKEN_TYPE_IDENT: + parse_global_var(); + break; + case TOKEN_AT_IDENT: + parse_macro_expansion(); + break; + case TOKEN_DOLLAR_IDENT: + parse_macro_var(); + break; + case TOKEN_DOCS_START: + parse_docs(); + break; + default: + error_at_current("Unexpected token found"); + recover_top_level(); + break; + } + } +} + +void parse_current(void) +{ + LOG_FUNC + advance(); + parse_module(); + parse_imports(); + parse_top_level(); +} + +void parse_file(File *file) +{ + LOG_FUNC + lexer_add_file_for_lexing(file); + sema_init(file); + parse_current(); +} \ No newline at end of file diff --git a/src/compiler/parser.h b/src/compiler/parser.h new file mode 100644 index 000000000..fd4095928 --- /dev/null +++ b/src/compiler/parser.h @@ -0,0 +1,10 @@ +#pragma once + +// Copyright (c) 2019 Christoffer Lerno. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "compiler_common.h" +#include "tokens.h" + +void parse_file(File *file); \ No newline at end of file diff --git a/src/compiler/semantic_analyser.c b/src/compiler/semantic_analyser.c new file mode 100644 index 000000000..00b4d5606 --- /dev/null +++ b/src/compiler/semantic_analyser.c @@ -0,0 +1,68 @@ +// Copyright (c) 2019 Christoffer Lerno. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include +#include "semantic_analyser.h" +#include "../utils/file_utils.h" +#include "symtab.h" + +CompilationUnit current_unit; + +void sema_init(File *file) +{ + LOG_FUNC + current_unit.file = file; + current_unit.module_name.type = INVALID_TOKEN; +} + +void sema_add_module(Token module_name) +{ + LOG_FUNC + current_unit.module_name = module_name; +} + +void sema_add_module_from_filename(void) +{ + LOG_FUNC + char buffer[MAX_IDENTIFIER_LENGTH + 1]; + int len = filename_to_module(current_unit.file->full_path, buffer); + if (!len) + { + TODO + } + + TokenType type = TOKEN_VAR_IDENT; + const char *module_name = symtab_add(buffer, len, fnv1a(buffer, len), &type); + if (type != TOKEN_VAR_IDENT) + { + TODO + } + current_unit.module_name.string = module_name; + TODO +} +void sema_add_import(Token module_name, Token alias, ImportType import_type) +{ + TODO +} + +void sema_add_errors(Token error_type_name /* todo values */) +{ + TODO +} +void sema_add_macro_var(Token macro_var_name /* , expr **/ ) +{ + TODO +} + +// If we have a public parameter, then the next one will be the real one. +void sema_mark_next_public(void) +{ + TODO +} + +void sema_verror_at(SourceRange range, const char *message, va_list args) +{ + TODO +} diff --git a/src/compiler/semantic_analyser.h b/src/compiler/semantic_analyser.h new file mode 100644 index 000000000..1cee042d5 --- /dev/null +++ b/src/compiler/semantic_analyser.h @@ -0,0 +1,32 @@ +#pragma once + +// Copyright (c) 2019 Christoffer Lerno. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "compiler_common.h" + +typedef struct +{ + Token module_name; + File *file; +} CompilationUnit; + +typedef enum { + IMPORT_TYPE_FULL, + IMPORT_TYPE_ALIAS, + IMPORT_TYPE_ALIAS_LOCAL, + IMPORT_TYPE_LOCAL +} ImportType; + +void sema_init(File *file); +void sema_add_module(Token module_name); +void sema_add_module_from_filename(void); +void sema_add_import(Token module_name, Token alias, ImportType import_type); +void sema_add_errors(Token error_type_name /* todo values */); +void sema_add_macro_var(Token macro_var_name /* , expr **/ ); + +// If we have a public parameter, then the next one will be the real one. +void sema_mark_next_public(void); + +void sema_verror_at(SourceRange range, const char *message, va_list args); diff --git a/src/compiler/source_file.c b/src/compiler/source_file.c new file mode 100644 index 000000000..9439673e5 --- /dev/null +++ b/src/compiler/source_file.c @@ -0,0 +1,89 @@ +// Copyright (c) 2019 Christoffer Lerno. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include +#include "../build/build_options.h" +#include "source_file.h" +#include "../utils/lib.h" +#include "../utils/file_utils.h" +#include "lexer.h" + +static const size_t LEXER_FILES_START_CAPACITY = 128; + +File pseudo_file; + +typedef struct +{ + File **files; +} SourceFiles; + +SourceFiles source_files; + +File *source_file_load(const char *filename, bool *already_loaded) +{ + if (already_loaded) *already_loaded = false; + if (!source_files.files) source_files.files = VECNEW(File *, LEXER_FILES_START_CAPACITY); + + char *full_path = malloc_arena(PATH_MAX + 1); + if (!realpath(filename, full_path)) + { + error_exit("Failed to resolve %s", filename); + } + + VECEACH(source_files.files, index) + { + if (strcmp(source_files.files[index]->full_path, full_path) == 0) + { + *already_loaded = true; + return source_files.files[index]; + } + } + if (vec_size(source_files.files) == MAX_FILES) + { + error_exit("Exceeded max number of files %d", MAX_FILES); + } + + size_t size; + const char* source_text = read_file(filename, &size); + File *file = malloc(sizeof(File)); + file->full_path = full_path; + file->start_id = vec_size(source_files.files) ? VECLAST(source_files.files)->end_id : 0; + file->contents = source_text; + ASSERT(file->start_id + size < UINT32_MAX, "Total files loaded exceeded %d bytes", UINT32_MAX); + file->end_id = file->start_id + size; + file->name = filename; + source_files.files = VECADD(source_files.files, file); + return file; +} + +File *source_file_from_position(SourceLoc loc) +{ + if (loc == INVALID_LOC) + { + pseudo_file.contents = "---"; + return &pseudo_file; + } + if (lexer_current_file()->start_id <= loc) return lexer_current_file(); + unsigned low = 0; + unsigned high = vec_size(source_files.files) - 2; + assert(vec_size(source_files.files) > 1); + while (1) + { + // Binary search + unsigned mid = (high + low) / 2; + File *file = source_files.files[mid]; + if (file->start_id > loc) + { + high = mid - 1; + continue; + } + if (file->end_id < loc) + { + low = mid + 1; + continue; + } + return file; + } +} diff --git a/src/compiler/source_file.h b/src/compiler/source_file.h new file mode 100644 index 000000000..e023c68ea --- /dev/null +++ b/src/compiler/source_file.h @@ -0,0 +1,11 @@ +#pragma once + +// Copyright (c) 2019 Christoffer Lerno. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + + +#include "compiler_common.h" + +File *source_file_load(const char *filename, bool *already_loaded); +File *source_file_from_position(SourceLoc loc); diff --git a/src/compiler/symtab.c b/src/compiler/symtab.c index bd9488ef1..7cfc4ab4f 100644 --- a/src/compiler/symtab.c +++ b/src/compiler/symtab.c @@ -8,7 +8,7 @@ #include "../utils/errors.h" #include #include "../utils/lib.h" -#include "malloc.h" +#include "utils/malloc.h" #include "tokens.h" #define TABLE_MAX_LOAD 0.75 diff --git a/src/compiler/tokens.c b/src/compiler/tokens.c index 30fe37cd3..aed3eda02 100644 --- a/src/compiler/tokens.c +++ b/src/compiler/tokens.c @@ -112,15 +112,15 @@ const char *token_type_to_string(TokenType type) case TOKEN_BIT_XOR_ASSIGN: return "^="; case TOKEN_VAR_IDENT: - return ""; + return "VAR_IDENT"; case TOKEN_TYPE_IDENT: - return ""; + return "TYPE_IDENT"; case TOKEN_STRING: - return ""; + return "STRING"; case TOKEN_INTEGER: - return ""; + return "INTEGER"; case TOKEN_REAL: - return ""; + return "FLOAT"; case TOKEN_QUESTION: return "?"; case TOKEN_ELVIS: @@ -192,9 +192,9 @@ const char *token_type_to_string(TokenType type) case TOKEN_AT: return "@"; case TOKEN_ERROR: - return ""; + return "error"; case TOKEN_EOF: - return ""; + return "EOF"; case TOKEN_CAST: return "cast"; case TOKEN_C_LONGDOUBLE: @@ -208,7 +208,7 @@ const char *token_type_to_string(TokenType type) case TOKEN_C_ULONGLONG: return "c_ulonglong"; case TOKEN_C_SHORT: - return "c_ishort"; + return "c_short"; case TOKEN_C_INT: return "c_int"; case TOKEN_C_LONG: @@ -251,6 +251,8 @@ const char *token_type_to_string(TokenType type) return "i8"; case TOKEN_U8: return "u8"; + case TOKEN_U1: + return "u1"; case TOKEN_BOOL: return "bool"; case TOKEN_QUAD: @@ -294,25 +296,25 @@ const char *token_type_to_string(TokenType type) case TOKEN_HALF: return "half"; case TOKEN_CAPS_IDENT: - return ""; + return "CAPS_IDENT"; case TOKEN_AT_IDENT: - return "<@ident>"; + return "AT_IDENT"; case TOKEN_HASH_IDENT: - return "<#ident>"; + return "HASH_IDENT"; case TOKEN_DOLLAR_IDENT: - return "<$ident>"; + return "DOLLAR_IDENT"; case TOKEN_CATCH: return "catch"; case INVALID_TOKEN: - return "<\?\?\?>"; + return "INVALID_TOKEN"; case TOKEN_DOCS_EOL: - return ""; + return "EOL"; case TOKEN_DOCS_START: return "/**"; case TOKEN_DOCS_END: return "*/"; case TOKEN_DOCS_LINE: - return ""; + return "DOCS_LINE"; case TOKEN_AT_REQUIRE: return "@require"; case TOKEN_AT_ENSURE: @@ -333,4 +335,5 @@ const char *token_type_to_string(TokenType type) return "@deprecated"; } UNREACHABLE -} \ No newline at end of file +} + diff --git a/src/compiler/tokens.h b/src/compiler/tokens.h index 1f21fc337..d76c39e79 100644 --- a/src/compiler/tokens.h +++ b/src/compiler/tokens.h @@ -90,6 +90,7 @@ typedef enum _TokenType TOKEN_U16, // u16 TOKEN_I8, // i8 TOKEN_U8, // u8 + TOKEN_U1, // u1 // Basic types names TOKEN_BYTE, diff --git a/src/compiler_tests/shorttest.c b/src/compiler_tests/shorttest.c index 68c5152fc..61357986d 100644 --- a/src/compiler_tests/shorttest.c +++ b/src/compiler_tests/shorttest.c @@ -34,7 +34,7 @@ static const char* test_parse = "struct Node\n" " */\n" "void Heap.init(Heap& heap, usize start) \n" "{\n" -" Node& init_region = @cast(Node&, start);\n" +" Node& init_region = cast(Node&, start);\n" " init_region.hole = 1;\n" " init_region.size = HEAP_INIT_SIZE - @sizeof(Node) - @sizeof(Footer);\n" "\n" @@ -42,14 +42,14 @@ static const char* test_parse = "struct Node\n" "\n" " heap.bins[get_bin_index(init_region.size)].add(init_region);\n" "\n" -" heap.start = @cast(void*, start);\n" -" heap.end = @cast(void*, start + HEAP_INIT_SIZE);\n" +" heap.start = cast(void*, start);\n" +" heap.end = cast(void*, start + HEAP_INIT_SIZE);\n" "}\n" "\n" "void* Heap.alloc(Heap& heap, usize size) \n" "{\n" " uint index = get_bin_index(size);\n" -" Bin& temp = @cast(Bin&, heap.bins[index]);\n" +" Bin& temp = cast(Bin&, heap.bins[index]);\n" " Node* found = temp.getBestFit(size);\n" "\n" " while (!found) \n" @@ -60,7 +60,7 @@ static const char* test_parse = "struct Node\n" "\n" " if ((found.size - size) > (overhead + MIN_ALLOC_SZ)) \n" " {\n" -" Node& split = @cast(Node*, @cast(char&, found) + sizeof(Node) + sizeof(Footer)) + size);\n" +" Node& split = cast(Node*, cast(char&, found) + sizeof(Node) + sizeof(Footer)) + size);\n" " split.size = found.size - size - sizeof(Node) - sizeof(Footer);\n" " split.hole = 1;\n" " \n" diff --git a/src/compiler_tests/tests.c b/src/compiler_tests/tests.c index 493a8d003..47bb7c5ff 100644 --- a/src/compiler_tests/tests.c +++ b/src/compiler_tests/tests.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "benchmark.h" #include "../compiler/symtab.h" @@ -18,13 +19,14 @@ static void test_lexer(void) #ifdef __OPTIMIZE__ printf("--- RUNNING OPTIMIZED ---\n"); #endif + run_arena_allocator_tests(); printf("Begin lexer testing.\n"); printf("-- Check number of keywords...\n"); int tokens_found = 0; - const int EXPECTED_TOKENS = 91; + const int EXPECTED_TOKENS = 12 + 72 + 9; const char* tokens[TOKEN_EOF]; int len[TOKEN_EOF]; - compiler_init(); + lexer_check_init(); for (int i = 1; i < TOKEN_EOF; i++) { const char* token = token_type_to_string((TokenType)i); @@ -34,7 +36,7 @@ static void test_lexer(void) const char* interned = symtab_add(token, len[i], fnv1a(token, len[i]), &lookup); if (lookup != TOKEN_VAR_IDENT) { - Token scanned = scan_ident_test(token); + Token scanned = lexer_scan_ident_test(token); TEST_ASSERT(scanned.type == i, "Mismatch scanning: was '%s', expected '%s' - lookup: %s - interned: %s.", token_type_to_string(scanned.type), token_type_to_string(i), @@ -59,7 +61,7 @@ static void test_lexer(void) { for (int i = 1; i < TOKEN_EOF; i++) { - volatile TokenType t = scan_ident_test(tokens[i]).type; + volatile TokenType t = lexer_scan_ident_test(tokens[i]).type; } } @@ -77,13 +79,14 @@ static void test_lexer(void) bench_begin(); int tokens_parsed = 0; + size_t test_len = strlen(test_parse); for (int b = 0; b < BENCH_REPEATS; b++) { - lexer_test_setup(test_parse); + lexer_test_setup(test_parse, test_len); Token token; while (1) { - token = scan_token(); + token = lexer_scan_token(); if (token.type == TOKEN_EOF) break; TEST_ASSERT(token.type != INVALID_TOKEN, "Got invalid token"); tokens_parsed++; @@ -92,7 +95,6 @@ static void test_lexer(void) printf("-> Test complete in %fs, %.0f kloc/s, %.0f ktokens/s\n", bench_mark(), loc * BENCH_REPEATS / (1000 * bench_mark()), tokens_parsed / (1000 * bench_mark())); - } void test_compiler(void) diff --git a/src/main.c b/src/main.c index ead3c153f..114e2040d 100644 --- a/src/main.c +++ b/src/main.c @@ -3,7 +3,7 @@ #include "build/project_creation.h" #include "utils/errors.h" #include "compiler_tests/tests.h" -#include "compiler/malloc.h" +#include "utils/malloc.h" int main(int argc, const char *argv[]) { init_arena(); @@ -16,6 +16,8 @@ int main(int argc, const char *argv[]) case COMMAND_UNIT_TEST: compiler_tests(); case COMMAND_COMPILE: + compile_file(); + break; case COMMAND_COMPILE_RUN: case COMMAND_MISSING: case COMMAND_BUILD: diff --git a/src/utils/common.h b/src/utils/common.h new file mode 100644 index 000000000..ba850c2b5 --- /dev/null +++ b/src/utils/common.h @@ -0,0 +1,14 @@ +#pragma once + +// Copyright (c) 2019 Christoffer Lerno. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + + +#include +#include +#include +#include +#include "errors.h" + +#define MAX_IDENTIFIER_LENGTH 31 \ No newline at end of file diff --git a/src/utils/errors.c b/src/utils/errors.c index 510abde73..19efa4dc6 100644 --- a/src/utils/errors.c +++ b/src/utils/errors.c @@ -3,3 +3,22 @@ // license that can be found in the LICENSE file. #include "errors.h" +#include + +void eprintf(const char *format, ...) +{ + va_list arglist; + va_start(arglist, format); + vfprintf(stderr, format, arglist); + va_end(arglist); +} + +void error_exit(const char *format, ...) +{ + va_list arglist; + va_start(arglist, format); + vfprintf(stderr, format, arglist); + fprintf(stderr, "\n"); + va_end(arglist); + exit(EXIT_FAILURE); +} diff --git a/src/utils/errors.h b/src/utils/errors.h index 6b40240b9..c060b56e0 100644 --- a/src/utils/errors.h +++ b/src/utils/errors.h @@ -7,17 +7,28 @@ #include #include -#define error_exit(...) do { fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); exit(EXIT_FAILURE); } while(0) +void eprintf(const char *format, ...); +void error_exit(const char *format, ...); -#define FATAL_ERROR(_string, ...) do { printf("FATAL ERROR at %s:%d: " _string, __func__, __LINE__, ##__VA_ARGS__); printf("\n"); exit(-1); } while(0) +#define FATAL_ERROR(_string, ...) do { error_exit("FATAL ERROR at %s:%d: " _string, __func__, __LINE__, ##__VA_ARGS__); } while(0) #define ASSERT(_condition, _string, ...) while (!(_condition)) { FATAL_ERROR(_string, ##__VA_ARGS__); } #define UNREACHABLE FATAL_ERROR("Cannot reach %s:%d", __func__, __LINE__); -#define TODO FATAL_ERROR("Not done yet %s:%d", __func__, __LINE__); + +#define TODO FATAL_ERROR("TODO reached", __func__, __LINE__); #define TEST_ASSERT(_condition, _string, ...) while (!(_condition)) { FATAL_ERROR(_string, ##__VA_ARGS__); } #define EXPECT(_string, _value, _expected) \ do { long long __tempval1 = _value; long long __tempval2 = _expected; \ TEST_ASSERT(__tempval1 == __tempval2, "Checking " _string ": expected %lld but was %lld.", __tempval2, __tempval1); } while(0) + + +#ifndef NDEBUG +#define DEBUG_LOG(_string, ...) eprintf("-- DEBUG: "); eprintf(_string, ##__VA_ARGS__); eprintf("\n"); +#else +#define DEBUG_LOG(_string, ...) +#endif +#define LOG_FUNC DEBUG_LOG("%s entered", __func__); + diff --git a/src/utils/file_utils.c b/src/utils/file_utils.c index bd57da698..be0ef4f62 100644 --- a/src/utils/file_utils.c +++ b/src/utils/file_utils.c @@ -3,6 +3,9 @@ // license that can be found in the LICENSE file. #include "file_utils.h" +#include "errors.h" +#include "malloc.h" +#include "lib.h" #include #include @@ -17,4 +20,67 @@ const char* expand_path(const char* path) return ret; } return path; -} \ No newline at end of file +} + +int filename_to_module(const char *path, char buffer[MAX_IDENTIFIER_LENGTH + 1]) +{ + size_t len = strlen(path); + int last_slash = 0; + int last_dot = -1; + for (int i = 0; i < len; i++) + { + if (path[i] == '/') last_slash = i; + if (path[i] == '.') last_dot = i; + } + int namelen = last_dot - last_slash - 1; + if (namelen < 2) return 0; + if (namelen > MAX_IDENTIFIER_LENGTH) namelen = MAX_IDENTIFIER_LENGTH; + for (int i = last_slash + 1; i < last_dot; i++) + { + char c = path[i]; + if (is_letter(c)) + { + c = is_upper(c) ? c + 'a' - 'A' : c; + } + else + { + c = '_'; + } + buffer[i - last_slash - 1] = c; + } + buffer[namelen] = '\0'; + return namelen; +} + +char *read_file(const char *path, size_t *return_size) +{ + FILE *file = fopen(path, "rb"); + + if (file == NULL) + { + error_exit("Could not open file \"%s\".\n", path); + exit(74); + } + + fseek(file, 0L, SEEK_END); + size_t file_size = (size_t)ftell(file); + *return_size = file_size; + rewind(file); + + char *buffer = (char *)malloc((size_t)file_size + 1); + if (buffer == NULL) + { + error_exit("Not enough memory to read \"%s\".\n", path); + } + + size_t bytesRead = fread(buffer, sizeof(char), (size_t)file_size, file); + if (bytesRead < file_size) + { + error_exit("Failed to read file \"%s\".\n", path); + } + + buffer[bytesRead] = '\0'; + + fclose(file); + return buffer; +} diff --git a/src/utils/file_utils.h b/src/utils/file_utils.h index cd2143979..f747691bc 100644 --- a/src/utils/file_utils.h +++ b/src/utils/file_utils.h @@ -4,5 +4,10 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +#include "common.h" const char* expand_path(const char* path); + +char *read_file(const char *path, size_t *return_size); + +int filename_to_module(const char *path, char buffer[MAX_IDENTIFIER_LENGTH + 1]); diff --git a/src/utils/lib.h b/src/utils/lib.h index fb81ff37c..3fdb46d36 100644 --- a/src/utils/lib.h +++ b/src/utils/lib.h @@ -6,6 +6,7 @@ #include #include +#include "malloc.h" static inline bool is_power_of_two(uint64_t x) { @@ -180,4 +181,54 @@ static inline uint32_t fnv1a(const char *key, uint32_t len) hash = FNV1a(key[i], hash); } return hash; -} \ No newline at end of file +} + +typedef struct +{ + unsigned size; + unsigned capacity; +} _VHeader; + +static inline _VHeader* _vec_new(size_t element_size, size_t capacity) +{ + _VHeader *header = malloc_arena(element_size * capacity + sizeof(_VHeader)); + header->size = 0; + header->capacity = capacity; + return header; +} + +static inline unsigned vec_size(const void*vec) +{ + return vec ? (((_VHeader *)vec) - 1)->size : 0; +} + + +static inline void* _expand(void *vec, size_t element_size) +{ + if (vec == NULL) + { + vec = _vec_new(element_size, 16) + 1; + } + _VHeader *header = ((_VHeader *)vec) - 1; + header->size++; + if (header->size == header->capacity) + { + _VHeader *new_array = _vec_new(element_size, header->capacity >> 1u); + memcpy(new_array, header, element_size * header->capacity + sizeof(_VHeader)); + header = new_array; + vec = header + 1; + } + return vec; +} + +#define VECEACH(_vec, _index) \ + unsigned __vecsize = vec_size(_vec); \ + for (unsigned _index = 0; _index < __vecsize; _index++) + +#define VECNEW(_type, _capacity) ((_type *)(_vec_new(sizeof(_type), _capacity) + 1)) +#define VECADD(_vec, _value) \ + ({ \ + typeof(_vec) __temp = (typeof(_vec))_expand((_vec), sizeof((_vec)[0])); \ + __temp[vec_size(__temp) - 1] = _value; \ + __temp; }) +#define VECLAST(_vec) ( (_vec) ? (_vec)[vec_size(_vec) - 1] : NULL) diff --git a/src/compiler/malloc.c b/src/utils/malloc.c similarity index 61% rename from src/compiler/malloc.c rename to src/utils/malloc.c index 4b03577e8..35be3339a 100644 --- a/src/compiler/malloc.c +++ b/src/utils/malloc.c @@ -4,14 +4,11 @@ #include "malloc.h" -#include -#include -#include "../utils/errors.h" - -#define KB 1024L +static const size_t KB = 1024ul; // Use 1MB at a time. -#define BUCKET_SIZE (1024 * KB) -#define ARENA_BUCKET_START_SIZE 16 +static const size_t MB = KB * 1024ul; +static const size_t BUCKET_SIZE = MB; +static const size_t STARTING_ARENA_BUCKETS = 16; static uint8_t **arena_buckets; static int arena_buckets_used; @@ -21,10 +18,9 @@ static void *current_arena; static int allocations_done; void init_arena(void) { - printf("---- ARENA ALLOCATED ----\n"); - arena_buckets = malloc(ARENA_BUCKET_START_SIZE * sizeof(void *)); + arena_buckets = malloc(STARTING_ARENA_BUCKETS * sizeof(void *)); arena_buckets_used = 1; - arena_buckets_array_size = ARENA_BUCKET_START_SIZE; + arena_buckets_array_size = STARTING_ARENA_BUCKETS; arena_buckets[0] = malloc(BUCKET_SIZE); allocations_done = 0; current_use = 0; @@ -34,13 +30,15 @@ void init_arena(void) // Simple bump allocator with buckets. void *malloc_arena(size_t mem) { - if (mem == 0) return NULL; + assert(mem > 0); // Round to multiple of 16 size_t oldmem = mem; mem = (mem + 15u) & ~15ull; assert(mem >= oldmem); if (mem >= BUCKET_SIZE / 4) { + void *ret = malloc(mem); + ASSERT(ret, "Out of memory."); return malloc(mem); } if (current_use + mem > BUCKET_SIZE) @@ -61,20 +59,22 @@ void *malloc_arena(size_t mem) allocations_done++; if (mem > 4096) { - printf("Allocated large chunk %llu\n", (unsigned long long)mem); + // printf("Allocated large chunk %llu\n", (unsigned long long)mem); } return (void *)ptr; } - -void free_arena(void) +void print_arena_status(void) { - printf("-- FREEING ARENA -- \n"); + printf("-- ARENA INFO -- \n"); printf(" * Memory used: %ld Kb\n", ((arena_buckets_used - 1) * BUCKET_SIZE + current_use) / 1024); printf(" * Buckets used: %d\n", arena_buckets_used); printf(" * Allocations: %d\n", allocations_done); +} +void free_arena(void) +{ for (int i = 0; i < arena_buckets_used; i++) { free(arena_buckets[i]); @@ -84,23 +84,43 @@ void free_arena(void) arena_buckets = NULL; arena_buckets_array_size = 0; current_use = 0; - printf("-- FREE DONE -- \n"); } void run_arena_allocator_tests(void) { - init_arena(); + printf("Begin arena allocator testing.\n"); + bool was_init = arena_buckets != NULL; + if (!was_init) init_arena(); free_arena(); init_arena(); ASSERT(malloc_arena(10) != malloc_arena(10), "Expected different values..."); - ASSERT(current_use == 32, "Expected allocations rounded to next 8 bytes"); + printf("-- Tested basic allocation - OK.\n"); + ASSERT(current_use == 32, "Expected allocations rounded to next 16 bytes"); + malloc_arena(1); + ASSERT(current_use == 48, "Expected allocations rounded to next 16 bytes"); + printf("-- Tested allocation alignment - OK.\n"); EXPECT("buckets in use", arena_buckets_used, 1); - ASSERT(malloc_arena(BUCKET_SIZE), "Should be possible to allocate this"); + for (int i = 0; i < 8; i++) + { + ASSERT(malloc_arena(BUCKET_SIZE / 8), "Should be possible to allocate this"); + } EXPECT("buckets in use", arena_buckets_used, 2); - ASSERT(malloc_arena(1), "Expected alloc to pass"); + for (int i = 0; i < 7; i++) + { + ASSERT(malloc_arena(BUCKET_SIZE / 8), "Should be possible to allocate this"); + } + EXPECT("buckets in use", arena_buckets_used, 2); + ASSERT(malloc_arena(BUCKET_SIZE / 8), "Expected alloc to pass"); EXPECT("buckets in use", arena_buckets_used, 3); + for (int i = 0; i < 8 * STARTING_ARENA_BUCKETS; i++) + { + ASSERT(malloc_arena(BUCKET_SIZE / 8), "Should be possible to allocate this"); + } + EXPECT("buckets in use", arena_buckets_used, STARTING_ARENA_BUCKETS + 3); + printf("-- Test switching buckets - OK.\n"); free_arena(); ASSERT(arena_buckets_array_size == 0, "Arena not freed?"); - printf("Passed all arena tests\n"); + printf("-- Test freeing arena - OK.\n"); + if (was_init) init_arena(); } diff --git a/src/compiler/malloc.h b/src/utils/malloc.h similarity index 95% rename from src/compiler/malloc.h rename to src/utils/malloc.h index ddc971a31..75fcfccd2 100644 --- a/src/compiler/malloc.h +++ b/src/utils/malloc.h @@ -4,7 +4,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. - +#include "common.h" void init_arena(void); void *malloc_arena(unsigned long mem);