Started a bit on parsing. Picking pieces from Titanos and trying to do things better.

This commit is contained in:
Christoffer Lerno
2019-07-31 23:19:45 +02:00
parent 8f611a400d
commit 386ef35f36
31 changed files with 1586 additions and 135 deletions

View File

@@ -17,5 +17,6 @@ add_executable(c3c
src/compiler/lexer.c
src/compiler/tokens.c
src/compiler/symtab.c
src/compiler/parser.c
src/compiler_tests/tests.c
src/compiler_tests/benchmark.c src/compiler/malloc.c src/compiler/malloc.h src/compiler/compiler.c src/compiler/compiler.h)
src/compiler_tests/benchmark.c src/utils/malloc.c src/utils/malloc.h src/compiler/compiler.c src/compiler/compiler.h src/compiler/semantic_analyser.c src/compiler/semantic_analyser.h src/utils/common.h src/compiler/source_file.c src/compiler/source_file.h src/compiler/diagnostics.c src/compiler/diagnostics.h)

View File

@@ -0,0 +1,24 @@
module foo;
const int GLOBAL = 0;
struct Boo
{
int i;
union
{
int death;
};
}
func void test()
{
int i = 0;
i++;
if (i < 100)
{
int j = 0;
j += i;
return;
}
}

View File

@@ -9,6 +9,7 @@
#include <unistd.h>
#include <stdbool.h>
#include <string.h>
#include <utils/lib.h>
#include "../utils/errors.h"
@@ -97,12 +98,12 @@ static inline bool match_shortopt(const char* name)
void append_file()
{
if (build_options.file_count == MAX_FILES)
if (vec_size(build_options.files) == MAX_FILES)
{
fprintf(stderr, "Max %d files may be specified\n", MAX_FILES);
exit(EXIT_FAILURE);
}
build_options.files[build_options.file_count++] = current_arg;
build_options.files = VECADD(build_options.files, current_arg);
}
static bool arg_match(const char *candidate)
@@ -196,6 +197,20 @@ static void parse_option()
{
case 'h':
break;
case 'E':
if (build_options.compile_option != COMPILE_NORMAL)
{
FAIL_WITH_ERR("Illegal combination of compile options.");
}
build_options.compile_option = COMPILE_LEX_ONLY;
return;
case 'P':
if (build_options.compile_option != COMPILE_NORMAL)
{
FAIL_WITH_ERR("Illegal combination of compile options.");
}
build_options.compile_option = COMPILE_LEX_PARSE_ONLY;
return;
case '-':
if (match_longopt("about"))
{
@@ -220,12 +235,11 @@ static void parse_option()
{
if (at_end() || next_is_opt()) error_exit("error: --symtab needs a number.");
const char *number = next_arg();
int size = atoi(number);
int size = atoi(number); // NOLINT(cert-err34-c)
if (size < 1024) error_exit("error: --symtab valid size > 1024.");
if (size > MAX_SYMTAB_SIZE) error_exit("error: --symptab size cannot exceed %d", MAX_SYMTAB_SIZE);
build_options.symtab_size = size;
return;
}
if (match_longopt("help"))
{
@@ -251,6 +265,20 @@ void parse_arguments(int argc, const char *argv[])
build_options.path = ".";
build_options.command = COMMAND_MISSING;
build_options.symtab_size = DEFAULT_SYMTAB_SIZE;
build_options.files = VECNEW(const char *, MAX_FILES);
for (int i = DIAG_NONE; i < DIAG_WARNING_TYPE; i++)
{
build_options.severity[i] = DIAG_IGNORE;
}
for (int i = DIAG_WARNING_TYPE; i < DIAG_ERROR_TYPE; i++)
{
build_options.severity[i] = DIAG_WARN;
}
for (int i = DIAG_ERROR_TYPE; i < DIAG_END_SENTINEL; i++)
{
build_options.severity[i] = DIAG_ERROR;
}
arg_count = argc;
args = argv;
for (arg_index = 1; arg_index < arg_count; arg_index++)

View File

@@ -4,7 +4,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <stdint.h>
#include "../utils/common.h"
#define MAX_LIB_DIRS 1024
#define MAX_FILES 2048
@@ -25,19 +25,67 @@ typedef enum
COMMAND_UNIT_TEST,
} CompilerCommand;
typedef enum
{
DIAG_NONE = 0, // Don't use!
DIAG_WARNING_TYPE, // Don't use!
DIAG_UNUSED,
DIAG_UNUSED_PARAMETER,
DIAG_UNUSED_FUNCTION,
DIAG_UNUSED_VARIABLE,
DIAG_UNUSED_IMPORT,
DIAG_UNUSED_MODULE,
DIAG_UNUSED_LABEL,
DIAG_UNUSED_PUBLIC,
DIAG_UNUSED_TYPE,
DIAG_CONVERSION,
DIAG_COVERED_SWITCH_DEFAULT,
DIAG_UNREACHABLE_DEFAULT,
DIAG_ERROR_TYPE, // Don't use this!
DIAG_FALLOFF_NONVOID,
DIAG_DUPLICATE_ATTRIBUTE,
DIAG_NOT_IN_ENUM,
DIAG_MISSING_CASE,
DIAG_REMAINDER_DIV_BY_ZERO,
DIAG_INT_TO_POINTER_CAST,
DIAG_SHIFT_LHS_NEGATIVE,
DIAG_SHIFT_NEGATIVE,
DIAG_SHIFT_GT_TYPEWIDTH,
DIAG_END_SENTINEL
} DiagnosticsType;
typedef enum
{
DIAG_IGNORE = 0,
DIAG_WARN,
DIAG_ERROR,
} DiagnosticsSeverity;
typedef enum
{
COMPILE_NORMAL,
COMPILE_LEX_ONLY,
COMPILE_LEX_PARSE_ONLY,
COMPILE_OUTPUT_AST,
} CompileOption;
typedef struct
{
const char* lib_dir[MAX_LIB_DIRS];
int lib_count;
const char* files[MAX_FILES];
int file_count;
const char** files;
const char* project_name;
const char* target;
const char* path;
CompilerCommand command;
uint32_t symtab_size;
CompileOption compile_option;
DiagnosticsSeverity severity[DIAG_END_SENTINEL];
} BuildOptions;
extern BuildOptions build_options;
void parse_arguments(int argc, const char *argv[]);

View File

@@ -2,14 +2,68 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <utils/errors.h>
#include <utils/file_utils.h>
#include "compiler.h"
#include "symtab.h"
#include "../build/build_options.h"
#include "../utils/lib.h"
#include "lexer.h"
#include "source_file.h"
#include "parser.h"
#include "diagnostics.h"
void compiler_init(void)
{
uint32_t symtab_size = nextHighestPowerOf2(build_options.symtab_size);
symtab_init(symtab_size);
}
static void compiler_lex()
{
VECEACH(build_options.files, i)
{
bool loaded = false;
File *file = source_file_load(build_options.files[i], &loaded);
if (loaded) continue;
lexer_add_file_for_lexing(file);
printf("# %s\n", file->full_path);
while (1)
{
Token token = lexer_scan_token();
printf("%s ", token_type_to_string(token.type));
if (token.type == TOKEN_EOF) break;
}
printf("\n");
}
exit(EXIT_SUCCESS);
}
void compiler_parse()
{
VECEACH(build_options.files, i)
{
bool loaded = false;
File *file = source_file_load(build_options.files[i], &loaded);
if (loaded) continue;
diag_reset();
parse_file(file);
printf("\n");
}
exit(EXIT_SUCCESS);
}
void compile_file()
{
if (!vec_size(build_options.files)) error_exit("No files to compile.");
switch (build_options.compile_option)
{
case COMPILE_LEX_ONLY:
compiler_lex();
break;
case COMPILE_LEX_PARSE_ONLY:
compiler_parse();
break;
default:
break;
}
TODO
}

View File

@@ -6,3 +6,4 @@
void compiler_init();
void compile_file();

View File

@@ -4,9 +4,11 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <stdint.h>
#include "tokens.h"
#include "utils/common.h"
typedef uint32_t SourceLoc;
#define INVALID_LOC UINT32_MAX
typedef struct
{
@@ -30,9 +32,10 @@ typedef struct
{
const char *contents;
const char *name;
SourceLoc start;
SourceLoc end;
const char *full_path;
SourceLoc start_id;
SourceLoc end_id;
} File;
#define TOKEN_MAX_LENGTH 0xFFFF
#define MAX_IDENTIFIER_LENGTH 31

317
src/compiler/diagnostics.c Normal file
View File

@@ -0,0 +1,317 @@
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "diagnostics.h"
#include "source_file.h"
#include <math.h>
#include <stdarg.h>
typedef struct _Diagnostics
{
bool panic_mode;
unsigned errors;
unsigned warnings;
bool use_color;
} Diagnostics;
Diagnostics diagnostics;
void diag_reset(void)
{
diagnostics.panic_mode = false;
diagnostics.errors = 0;
diagnostics.warnings = 0;
}
void reset_panic_mode(void)
{
diagnostics.panic_mode = false;
}
typedef enum
{
PRINT_TYPE_ERROR,
PRINT_TYPE_PREV,
PRINT_TYPE_WARN
} PrintType;
static void print_error(SourceRange source_range, const char *message, PrintType print_type)
{
File *file = source_file_from_position(source_range.loc);
const char *content = file->contents;
const char *error_start = file->contents + source_range.loc - file->start_id;
const static int LINES_SHOWN = 4;
const char *linestarts[LINES_SHOWN];
for (int i = 0; i < LINES_SHOWN; i++) linestarts[i] = NULL;
const char *current = content;
linestarts[0] = content;
unsigned line = 1;
while (current < error_start)
{
if (current[0] == '\n')
{
line++;
linestarts[3] = linestarts[2];
linestarts[2] = linestarts[1];
linestarts[1] = linestarts[0];
linestarts[0] = current + 1;
}
current++;
}
const char *end = NULL;
while (!end)
{
switch (current[0])
{
case '\n':
case '\0':
end = current;
break;
default:
current++;
break;
}
}
int max_line_length = (int)round(log10(line)) + 1;
char number_buffer[20];
snprintf(number_buffer, 20, "%%%dd: %%.*s\n", max_line_length);
for (unsigned i = 3; i > 0; i--)
{
int line_number = line - i;
const char *start = linestarts[i];
if (start == NULL) continue;
const char *line_end = linestarts[i - 1];
eprintf(number_buffer, line_number, line_end - start - 1, start);
}
eprintf(number_buffer, line, end - linestarts[0], linestarts[0]);
for (unsigned i = 0; i < max_line_length + 2 + error_start - linestarts[0]; i++)
{
eprintf(" ");
}
for (int i = 0; i < source_range.length; i++)
{
eprintf("^");
}
eprintf("\n");
switch (print_type)
{
case PRINT_TYPE_ERROR:
eprintf("(%s:%d) Error: %s\n", file->name, line, message);
break;
case PRINT_TYPE_PREV:
eprintf("(%s:%d) %s\n", file->name, line, message);
break;
case PRINT_TYPE_WARN:
eprintf("(%s:%d) Warning: %s\n", file->name, line, message);
break;
default:
UNREACHABLE
}
}
static void vprint_error(SourceRange span, const char *message, va_list args)
{
char buffer[256];
vsnprintf(buffer, 256, message, args);
print_error(span, buffer, PRINT_TYPE_ERROR);
}
void diag_error_at(SourceRange span, const char *message, ...)
{
if (diagnostics.panic_mode) return;
diagnostics.panic_mode = true;
va_list args;
va_start(args, message);
vprint_error(span, message, args);
va_end(args);
diagnostics.errors++;
}
void diag_verror_at(SourceRange span, const char *message, va_list args)
{
if (diagnostics.panic_mode) return;
diagnostics.panic_mode = true;
vprint_error(span, message, args);
diagnostics.errors++;
}
/*
bool diagnostics_silence_warnings(Array *warnings)
{
for (unsigned i = 0; i < warnings->count; i++)
{
const char *warning = warnings->entries[i];
if (strcmp("no-unused", warning) == 0)
{
diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED);
diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_PARAMETER);
continue;
}
if (strcmp("no-unused-variable", warning) == 0)
{
diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_VARIABLE);
continue;
}
if (strcmp("no-unused-function", warning) == 0)
{
diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_FUNCTION);
continue;
}
if (strcmp("no-unused-type", warning) == 0)
{
diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_TYPE);
continue;
}
if (strcmp("no-unused-module", warning) == 0)
{
diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_MODULE);
continue;
}
if (strcmp("no-unused-public", warning) == 0)
{
diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_PUBLIC);
continue;
}
if (strcmp("no-unused-import", warning) == 0)
{
diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_IMPORT);
continue;
}
if (strcmp("no-unused-label", warning) == 0)
{
diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_LABEL);
continue;
}
PRINT_ERROR("recipe has unknown warning: '%s'\n", warning);
return false;
}
return true;
}
void prev_at_range(SourceRange span, const char *message, ...)
{
va_list args;
va_start(args, message);
char buffer[256];
vsnprintf(buffer, 256, message, args);
print_error(span, buffer, PRINT_TYPE_PREV);
va_end(args);
}
void prev_at(SourceLoc loc, const char *message, ...)
{
va_list args;
va_start(args, message);
char buffer[256];
vsnprintf(buffer, 256, message, args);
print_error((SourceRange){ loc, 1 }, buffer, PRINT_TYPE_PREV);
va_end(args);
}
void sema_error_range(SourceRange token, const char *message, ...)
{
va_list args;
va_start(args, message);
vprint_error(token, message, args);
va_end(args);
diagnostics.errors++;
}
void sema_error_at(SourceLoc loc, const char *message, ...)
{
va_list args;
va_start(args, message);
vprint_error((SourceRange) { loc, 1 }, message, args);
va_end(args);
diagnostics.errors++;
}
void sema_warn_at(DiagnosticsType type, SourceLoc loc, const char *message, ...)
{
// TODO ENABLE
return;
SourceRange span = {.loc = loc, .length = 1};
switch (diagnostics.severity[type])
{
case DIAG_IGNORE:
return;
case DIAG_WARN:
break;
case DIAG_ERROR:
{
va_list args;
va_start(args, message);
vprint_error(span, message, args);
va_end(args);
diagnostics.errors++;
return;
}
}
va_list args;
va_start(args, message);
char buffer[256];
vsnprintf(buffer, 256, message, args);
if (diagnostics.severity[type])
print_error(span, buffer, PRINT_TYPE_WARN);
va_end(args);
diagnostics.warnings++;
}
void sema_warn_range(DiagnosticsType type, SourceRange span, const char *message, ...)
{
// TODO ENABLE
return;
switch (diagnostics.severity[type])
{
case DIAG_IGNORE:
return;
case DIAG_WARN:
break;
case DIAG_ERROR:
{
va_list args;
va_start(args, message);
vprint_error(span, message, args);
va_end(args);
diagnostics.errors++;
return;
}
}
va_list args;
va_start(args, message);
char buffer[256];
vsnprintf(buffer, 256, message, args);
if (diagnostics.severity[type]) print_error(span, buffer, PRINT_TYPE_WARN);
va_end(args);
diagnostics.warnings++;
}
unsigned errors()
{
return diagnostics.errors;
}
bool error_found()
{
return diagnostics.errors > 0;
}
*/

View File

@@ -0,0 +1,34 @@
#pragma once
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "compiler_common.h"
void diag_reset(void);
void diag_error_at(SourceRange span, const char *message, ...);
void diag_verror_at(SourceRange span, const char *message, va_list args);
/*
typedef struct _Array Array;
void diagnostics_init(void);
void diagnostics_reset(void);
void diagnostics_update_severity(DiagnosticsSeverity severity, DiagnosticsType type);
bool diagnostics_silence_warnings(Array *warnings);
void diagnostics_use_color(bool use_color);
void verror_at(SourceRange span, const char *message, va_list args);
void sema_error_range(SourceRange token, const char *message, ...);
void sema_error_at(SourceLoc loc, const char *message, ...);
void prev_at_range(SourceRange span, const char *message, ...);
void prev_at(SourceLoc loc, const char *message, ...);
void sema_warn_at(DiagnosticsType type, SourceLoc loc, const char *message, ...);
void sema_warn_range(DiagnosticsType type, SourceRange span, const char *message, ...);
bool in_panic_mode(void);
unsigned errors();
void reset_panic_mode(void);
bool error_found(void);
*/

View File

@@ -2,15 +2,14 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <stdint.h>
#include "lexer.h"
#include <string.h>
#include <assert.h>
#include <build/build_options.h>
#include "../utils/errors.h"
#include "../utils/lib.h"
#include "symtab.h"
#include "source_file.h"
#include "diagnostics.h"
#include <stdarg.h>
typedef enum
{
@@ -22,6 +21,7 @@ typedef enum
typedef struct
{
bool lexer_init_complete;
const char *begin;
const char *start;
const char *current;
@@ -78,14 +78,17 @@ static inline bool reached_end(void)
return *lexer.current == '\0';
}
static Token error_token(const char *message)
static Token error_token(const char *message, ...)
{
Token token;
token.type = TOKEN_ERROR;
token.type = INVALID_TOKEN;
token.start = lexer.start;
token.span.length = 1;
token.span.loc = lexer.current_file->start + (lexer.begin - lexer.start);
// TODO error_at(token.span, message);
token.span.loc = lexer.current_file->start_id + (lexer.begin - lexer.start);
va_list list;
va_start(list, message);
diag_verror_at(token.span, message, list);
va_end(list);
return token;
}
@@ -97,7 +100,7 @@ static Token make_token(TokenType type)
{
.type = type,
.start = lexer.start,
.span = { .loc = lexer.current_file->start + (lexer.start - lexer.begin), .length = token_size }
.span = { .loc = lexer.current_file->start_id + (lexer.start - lexer.begin), .length = token_size }
};
}
@@ -109,7 +112,7 @@ static Token make_string_token(TokenType type, const char* string)
{
.type = type,
.start = lexer.start,
.span = { .loc = lexer.current_file->start + (lexer.start - lexer.begin), .length = token_size },
.span = { .loc = lexer.current_file->start_id + (lexer.start - lexer.begin), .length = token_size },
.string = string,
};
}
@@ -293,6 +296,7 @@ static inline Token scan_ident(void)
hash = FNV1a(next(), hash);
}
EXIT:;
if (type == INVALID_TOKEN) error_token("An identifier may not only consist of '_'");
uint32_t len = lexer.current - lexer.start;
const char* interned_string = symtab_add(lexer.start, len, hash, &type);
return make_string_token(type, interned_string);
@@ -303,8 +307,8 @@ static inline Token scan_ident(void)
static Token scan_oct(void)
{
next(); // Skip the o
if (!is_oct(next())) return error_token("Invalid octal sequence");
char o = next(); // Skip the o
if (!is_oct(next())) return error_token("An expression starting with '0%c' would expect to be followed by octal numbers (0-7).", o);
while (is_oct_or_(peek())) next();
return make_token(TOKEN_INTEGER);
}
@@ -312,41 +316,49 @@ static Token scan_oct(void)
Token scan_binary(void)
{
next(); // Skip the b
if (!is_binary(next())) return error_token("Invalid binary sequence");
char b = next(); // Skip the b
if (!is_binary(next()))
{
return error_token("An expression starting with '0%c' would expect a sequence of zeroes and ones, "
"did you try to write a hex value but forgot the '0x'?", b);
}
while (is_binary_or_(peek())) next();
return make_token(TOKEN_INTEGER);
}
#define PARSE_SPECIAL_NUMBER(is_num, is_num_with_underscore, exp, EXP) \
while (is_num_with_underscore(peek())) next(); \
bool is_float = false; \
if (peek() == '.') \
{ \
while (is_num_with_underscore(peek())) next(); \
bool is_float = false; \
if (peek() == '.') \
{ \
is_float = true; \
next(); \
char c = peek(); \
if (c == '_') return error_token("Underscore may only appear between digits."); \
if (c == '_') return error_token("Can't parse this as a floating point value due to the '_' directly after decimal point."); \
if (is_num(c)) next(); \
while (is_num_with_underscore(peek())) next(); \
} \
char c = peek(); \
if (c == (exp) || c == (EXP)) \
{ \
} \
char c = peek(); \
if (c == (exp) || c == (EXP)) \
{ \
is_float = true; \
next(); \
char c2 = next(); \
if (c2 == '+' || c2 == '-') c2 = next(); \
if (!is_num(c2)) return error_token("Invalid exponential expression"); \
if (!is_num(c2)) return error_token("Parsing the floating point exponent failed, because '%c' is not a number.", c2); \
while (is_num(peek())) next(); \
} \
if (prev() == '_') return error_token("Underscore may only appear between digits."); \
return make_token(is_float ? TOKEN_FLOAT : TOKEN_INTEGER)
} \
if (prev() == '_') return error_token("The number ended with '_', but that character needs to be between, not after, digits."); \
return make_token(is_float ? TOKEN_FLOAT : TOKEN_INTEGER)
static inline Token scan_hex(void)
{
next(); // skip the x
if (!is_hex(next())) return error_token("Invalid hex sequence");
char x = next(); // skip the x
if (!is_hex(next()))
{
return error_token("'0%c' starts a hexadecimal number, "
"but it was followed by '%c' which is not part of a hexadecimal number.", x, prev());
}
PARSE_SPECIAL_NUMBER(is_hex, is_hex_or_, 'p', 'P');
}
@@ -363,7 +375,6 @@ static inline Token scan_digit(void)
{
switch (peek_next())
{
// case 'X': Let's not support this? REVISIT
case 'x':
case 'X':
advance(2);
@@ -397,11 +408,17 @@ static inline Token scan_char()
{
for (int i = 0; i < 2; i++)
{
if (!is_hex(next())) return error_token("Invalid escape sequence");
if (!is_hex(next()))
{
return error_token(
"An escape sequence starting with "
"'\\x' needs to be followed by "
"a two digit hexadecimal number.");
}
}
}
if (next() != '\'') return error_token("Invalid character value");
}
if (next() != '\'') return error_token("The character only consist of a single character, did you want to use \"\" instead?");
return make_token(TOKEN_INTEGER);
}
@@ -417,7 +434,7 @@ static inline Token scan_string()
}
if (reached_end())
{
return error_token("Unterminated string.");
return error_token("Reached the end looking for '\"'. Did you forget it?");
}
}
return make_token(TOKEN_STRING);
@@ -513,15 +530,15 @@ static inline Token scan_docs(void)
next();
return make_token(TOKEN_DOCS_LINE);
case '\0':
return error_token("Docs reached end of the file");
return error_token("The document ended without finding the end of the doc comment. "
"Did you forget a '*/' somewhere?");
default:
break;
}
}
}
Token scan_token(void)
Token lexer_scan_token(void)
{
// First we handle our "in docs" state.
if (lexer.lexer_state == LEXER_STATE_DOCS_PARSE)
@@ -544,7 +561,7 @@ Token scan_token(void)
lexer.lexer_state = LEXER_STATE_DOCS_PARSE;
return make_token(TOKEN_DOCS_START);
case WHITESPACE_COMMENT_REACHED_EOF:
return error_token("Comment was not terminated");
return error_token("Reached the end looking for '*/'. Did you forget it somewhere?");
case WHITESPACE_FOUND_EOF:
return make_token(TOKEN_EOF);
case WHITESPACE_FOUND_DOCS_EOL:
@@ -637,25 +654,52 @@ Token scan_token(void)
backtrack();
return is_digit(c) ? scan_digit() : scan_ident();
}
return error_token("Unexpected character.");
return error_token("'%c' may not be placed outside of a string or comment, did you perhaps forget a \" somewhere?", c);
}
}
void lexer_test_setup(const char* text)
File* lexer_current_file(void)
{
return lexer.current_file;
}
void lexer_check_init(void)
{
if (lexer.lexer_init_complete) return;
lexer.lexer_init_complete = true;
symtab_init(build_options.symtab_size);
}
void lexer_add_file_for_lexing(File *file)
{
LOG_FUNC
lexer_check_init();
lexer.current_file = file;
lexer.last_in_range = 0;
lexer.begin = lexer.current_file->contents;
lexer.start = lexer.begin;
lexer.current = lexer.start;
lexer.lexer_state = LEXER_STATE_NORMAL;
}
void lexer_test_setup(const char *text, size_t len)
{
lexer_check_init();
static File helper;
lexer.lexer_state = LEXER_STATE_NORMAL;
lexer.start = text;
lexer.current = text;
lexer.begin = text;
lexer.current_file = &helper;
lexer.current_file->start = 0;
lexer.current_file->start_id = 0;
lexer.current_file->contents = text;
lexer.current_file->end = 100000;
lexer.current_file->name = "Foo";
lexer.current_file->end_id = len;
lexer.current_file->name = "Test";
}
Token scan_ident_test(const char* scan)
Token lexer_scan_ident_test(const char *scan)
{
static File helper;
lexer.lexer_state = LEXER_STATE_NORMAL;
@@ -663,9 +707,9 @@ Token scan_ident_test(const char* scan)
lexer.current = scan;
lexer.begin = scan;
lexer.current_file = &helper;
lexer.current_file->start = 0;
lexer.current_file->start_id = 0;
lexer.current_file->contents = scan;
lexer.current_file->end = 1000;
lexer.current_file->end_id = 1000;
lexer.current_file->name = "Foo";
if (scan[0] == '@' && is_letter(scan[1]))
@@ -674,5 +718,5 @@ Token scan_ident_test(const char* scan)
return scan_docs();
}
return scan_token();
return lexer_scan_token();
}

View File

@@ -4,14 +4,14 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "tokens.h"
#include "compiler_common.h"
Token scan_token(void);
Token lexer_scan_token(void);
TokenType identifier_type(const char* restrict start, int len);
TokenType ident_type_fnv1(const char *restrict start, int len);
Token lexer_scan_ident_test(const char *scan);
Token scan_ident_test(const char* scan);
void lexer_test_setup(const char* text);
void lexer_test_setup(const char *text, size_t len);
void lexer_add_file_for_lexing(File *file);
File* lexer_current_file(void);
void lexer_check_init(void);

493
src/compiler/parser.c Normal file
View File

@@ -0,0 +1,493 @@
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <stdbool.h>
#include <stdarg.h>
#include "../utils/errors.h"
#include "parser.h"
#include "semantic_analyser.h"
#include "lexer.h"
const int MAX_DOCS_ROWS = 1024;
Token tok;
Token prev_tok;
Token poisoned = {
.type = INVALID_TOKEN,
};
// --- Parser base methods
Token token_wrap(const char* name)
{
TODO
}
void advance(void)
{
prev_tok = tok;
while (1)
{
tok = lexer_scan_token();
// printf(">>> %.*s => %s\n", tok.length, tok.start, token_type_to_string(tok.type));
if (tok.type != INVALID_TOKEN) break;
}
}
void advance_and_verify(TokenType token_type)
{
assert(tok.type == token_type);
advance();
}
bool try_consume(TokenType type)
{
if (tok.type == type)
{
advance();
return true;
}
return false;
}
bool consume(TokenType type, const char *message, ...)
{
if (try_consume(type))
{
return true;
}
va_list args;
va_start(args, message);
sema_verror_at(tok.span, message, args);
va_end(args);
return false;
}
bool was_ident()
{
switch (tok.type)
{
case TOKEN_VAR_IDENT:
case TOKEN_TYPE_IDENT:
case TOKEN_CAPS_IDENT:
return true;
default:
return false;
}
}
static void recover_to(TokenType type)
{
TODO
}
static void recover_and_consume(TokenType type)
{
TODO
}
static void recover_stmt(void)
{
recover_and_consume(TOKEN_EOS);
}
/**
* Walk until we find the first top level construct.
* (Note that this is the slow path, so no need to inline)
*/
static void recover_top_level(void)
{
while (tok.type != TOKEN_EOF)
{
switch (tok.type)
{
case TOKEN_FUNC:
case TOKEN_CONST:
case TOKEN_TYPEDEF:
case TOKEN_ERROR:
case TOKEN_STRUCT:
case TOKEN_IMPORT:
case TOKEN_UNION:
case TOKEN_ENUM:
case TOKEN_MACRO:
return;
default:
break;
}
}
}
static inline bool consume_stmt_end(void)
{
if (consume(TOKEN_EOS, "Expected ';'")) return true;
recover_and_consume(TOKEN_EOS);
return false;
}
void error_at_current(const char* message, ...)
{
va_list args;
va_start(args, message);
sema_verror_at(tok.span, message, args);
va_end(args);
}
// --- Parsing
/**
* Optionally parse docs
*
* docs ::= TOKEN_DOCS_START docs_body TOKEN_DOCS_END
*
* docs_body ::= docs_line
* | docs_body TOKEN_DOCS_EOL docs_line
*
* docs_line ::= TOKEN_DOCS_LINE
* | TOKEN_AT_IDENT [ignored]
* | TOKEN_AT_RETURN string
* | TOKEN_AT_PARAM VAR_IDENT string
* | TOKEN_AT_THROWS string
* | TOKEN_AT_REQUIRE [expr]+
* | TOKEN_AT_ENSURE [expr]+
* | TOKEN_AT_CONST VAR_IDENT
* | TOKEN_AT_PURE
* | TOKEN_AT_REQPARSE [expr | stmt]
* | TOKEN_AT_DEPRECATED
*/
static inline void parse_docs()
{
LOG_FUNC
if (!try_consume(TOKEN_DOCS_START)) return;;
static Token docs[MAX_DOCS_ROWS];
int lines = 0;
while (tok.type != TOKEN_DOCS_END)
{
switch (tok.type)
{
case TOKEN_DOCS_LINE:
if (lines == MAX_DOCS_ROWS) error_at_current("Exceeded max number of lines in docs: %d.", MAX_DOCS_ROWS);
docs[lines++] = tok;
break;
case TOKEN_AT_DEPRECATED:
break;
case TOKEN_AT_PURE:
break;
case TOKEN_AT_IDENT:
case TOKEN_AT_THROWS:
case TOKEN_AT_CONST:
case TOKEN_AT_REQPARSE:
case TOKEN_AT_PARAM:
case TOKEN_AT_RETURN:
default:
TODO
}
}
}
/**
* module ::= [docs]? MODULE IDENTIFIER EOS
*/
static inline void parse_module()
{
LOG_FUNC
parse_docs();
if (!try_consume(TOKEN_MODULE))
{
sema_add_module_from_filename();
return;
}
// Expect the module name
if (!consume(TOKEN_VAR_IDENT, "Expected a valid module name"))
{
sema_add_module(poisoned);
recover_top_level();
return;;
}
sema_add_module(prev_tok);
consume_stmt_end();
}
/**
* import ::= IMPORT VAR_IDENT EOS
* | IMPORT VAR_IDENT AS VAR_IDENT EOS
* | IMPORT VAR_IDENT LOCAL EOS
*/
static inline void parse_import()
{
advance_and_verify(TOKEN_IMPORT);
if (!consume(TOKEN_VAR_IDENT, "Expected a module name"))
{
recover_top_level();
return;
}
Token module_name = prev_tok;
Token alias = {};
ImportType import_type = IMPORT_TYPE_FULL;
if (try_consume(TOKEN_AS))
{
if (!consume(TOKEN_VAR_IDENT, "Expected a valid alias name"))
{
recover_and_consume(TOKEN_EOS);
return;
}
alias = prev_tok;
import_type = IMPORT_TYPE_ALIAS;
}
if (try_consume(TOKEN_LOCAL))
{
import_type = import_type == IMPORT_TYPE_ALIAS ? IMPORT_TYPE_ALIAS_LOCAL : IMPORT_TYPE_LOCAL;
}
sema_add_import(module_name, alias, import_type);
consume_stmt_end();
}
/**
* imports ::= import
* | imports import
*/
static inline void parse_imports(void)
{
while (tok.type == TOKEN_IMPORT)
{
parse_import();
}
}
static inline void parse_func(void)
{
TODO
}
static inline void *parse_type(void)
{
TODO
}
static inline void *parse_deferred_expression(void)
{
TODO
}
static inline void parse_const(void)
{
advance_and_verify(TOKEN_CONST);
// parse_type();
if (!consume(TOKEN_CAPS_IDENT, "Expected an upper case identifier"))
{
recover_top_level();
return;
}
if (!consume(TOKEN_EQEQ, "Expected '=' here"))
{
recover_top_level();
return;
}
parse_deferred_expression();
consume_stmt_end();
}
static inline void parse_union(void)
{
TODO;
}
static inline void parse_struct(void)
{
TODO;
}
static inline void parse_macro(void)
{
TODO;
}
/**
* error ::= ERROR TYPE_IDENT '{' CAPS_IDENT (',' CAPS_IDENT)* ','? '}'
*/
static inline void parse_error(void)
{
advance_and_verify(TOKEN_ERROR);
if (!consume(TOKEN_TYPE_IDENT, "Expected a valid error type name here"))
{
recover_top_level();
return;
}
Token name = prev_tok;
if (!consume(TOKEN_LBRACE, "Expected {' after error type name"))
{
recover_top_level();
return;
}
while (tok.type == TOKEN_CAPS_IDENT)
{
// TODO store
advance();
if (!try_consume(TOKEN_COMMA)) break;
}
if (!consume(TOKEN_RBRACE, "Expected '}' here"))
{
recover_top_level();
}
sema_add_errors(name /* todo values */);
}
/**
* enum ::= ENUM TYPE_NAME (':' type)? '{' enum_def (',' enum_def)* ','? '}'
*
* enum_def ::= CAPS_IDENT ('=' const_expr)?
*
* TODO enum extra data?
*/
static inline void parse_enum(void)
{
advance_and_verify(TOKEN_ENUM);
if (!consume(TOKEN_TYPE_IDENT, "Expected a valid enum type name here"))
{
recover_top_level();
return;
}
Token name = prev_tok;
void *type = NULL;
if (try_consume(TOKEN_COLON))
{
type = parse_type();
}
if (!consume(TOKEN_LBRACE, type ? "Expected '{' after enum type" : "Expected {' after enum type name"))
{
recover_top_level();
return;
}
while (tok.type == TOKEN_CAPS_IDENT)
{
// TODO store
advance();
if (try_consume(TOKEN_EQ))
{
// Store
parse_deferred_expression();
}
if (!try_consume(TOKEN_COMMA)) break;
}
if (!consume(TOKEN_RBRACE, "Expected '}' here"))
{
recover_top_level();
return;
}
sema_add_errors(name /* todo values */);
}
static inline void parse_global_var(void)
{
TODO;
}
static inline void parse_macro_var(void)
{
advance_and_verify(TOKEN_DOLLAR_IDENT);
Token var_name = prev_tok;
if (!consume(TOKEN_EQ, "Expected assignment here"))
{
recover_top_level();
return;
}
// TODO use the result
parse_deferred_expression();
sema_add_macro_var(var_name /* , expr **/ );
consume_stmt_end();
}
static inline void parse_macro_expansion(void)
{
TODO
}
static inline void parse_top_level()
{
LOG_FUNC
while (tok.type != TOKEN_EOF)
{
switch (tok.type)
{
case TOKEN_FUNC:
parse_func();
break;
case TOKEN_CONST:
parse_const();
break;
case TOKEN_STRUCT:
parse_struct();
break;
case TOKEN_UNION:
parse_union();
break;
case TOKEN_MACRO:
parse_macro();
break;
case TOKEN_ENUM:
parse_enum();
break;
case TOKEN_ERROR:
parse_error();
break;
case TOKEN_PUBLIC:
sema_mark_next_public();
break;
case TOKEN_TYPE_IDENT:
parse_global_var();
break;
case TOKEN_AT_IDENT:
parse_macro_expansion();
break;
case TOKEN_DOLLAR_IDENT:
parse_macro_var();
break;
case TOKEN_DOCS_START:
parse_docs();
break;
default:
error_at_current("Unexpected token found");
recover_top_level();
break;
}
}
}
void parse_current(void)
{
LOG_FUNC
advance();
parse_module();
parse_imports();
parse_top_level();
}
void parse_file(File *file)
{
LOG_FUNC
lexer_add_file_for_lexing(file);
sema_init(file);
parse_current();
}

10
src/compiler/parser.h Normal file
View File

@@ -0,0 +1,10 @@
#pragma once
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "compiler_common.h"
#include "tokens.h"
void parse_file(File *file);

View File

@@ -0,0 +1,68 @@
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <utils/errors.h>
#include <utils/lib.h>
#include "semantic_analyser.h"
#include "../utils/file_utils.h"
#include "symtab.h"
CompilationUnit current_unit;
void sema_init(File *file)
{
LOG_FUNC
current_unit.file = file;
current_unit.module_name.type = INVALID_TOKEN;
}
void sema_add_module(Token module_name)
{
LOG_FUNC
current_unit.module_name = module_name;
}
void sema_add_module_from_filename(void)
{
LOG_FUNC
char buffer[MAX_IDENTIFIER_LENGTH + 1];
int len = filename_to_module(current_unit.file->full_path, buffer);
if (!len)
{
TODO
}
TokenType type = TOKEN_VAR_IDENT;
const char *module_name = symtab_add(buffer, len, fnv1a(buffer, len), &type);
if (type != TOKEN_VAR_IDENT)
{
TODO
}
current_unit.module_name.string = module_name;
TODO
}
void sema_add_import(Token module_name, Token alias, ImportType import_type)
{
TODO
}
void sema_add_errors(Token error_type_name /* todo values */)
{
TODO
}
void sema_add_macro_var(Token macro_var_name /* , expr **/ )
{
TODO
}
// If we have a public parameter, then the next one will be the real one.
void sema_mark_next_public(void)
{
TODO
}
void sema_verror_at(SourceRange range, const char *message, va_list args)
{
TODO
}

View File

@@ -0,0 +1,32 @@
#pragma once
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "compiler_common.h"
typedef struct
{
Token module_name;
File *file;
} CompilationUnit;
typedef enum {
IMPORT_TYPE_FULL,
IMPORT_TYPE_ALIAS,
IMPORT_TYPE_ALIAS_LOCAL,
IMPORT_TYPE_LOCAL
} ImportType;
void sema_init(File *file);
void sema_add_module(Token module_name);
void sema_add_module_from_filename(void);
void sema_add_import(Token module_name, Token alias, ImportType import_type);
void sema_add_errors(Token error_type_name /* todo values */);
void sema_add_macro_var(Token macro_var_name /* , expr **/ );
// If we have a public parameter, then the next one will be the real one.
void sema_mark_next_public(void);
void sema_verror_at(SourceRange range, const char *message, va_list args);

View File

@@ -0,0 +1,89 @@
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <sys/stat.h>
#include <limits.h>
#include "../build/build_options.h"
#include "source_file.h"
#include "../utils/lib.h"
#include "../utils/file_utils.h"
#include "lexer.h"
static const size_t LEXER_FILES_START_CAPACITY = 128;
File pseudo_file;
typedef struct
{
File **files;
} SourceFiles;
SourceFiles source_files;
File *source_file_load(const char *filename, bool *already_loaded)
{
if (already_loaded) *already_loaded = false;
if (!source_files.files) source_files.files = VECNEW(File *, LEXER_FILES_START_CAPACITY);
char *full_path = malloc_arena(PATH_MAX + 1);
if (!realpath(filename, full_path))
{
error_exit("Failed to resolve %s", filename);
}
VECEACH(source_files.files, index)
{
if (strcmp(source_files.files[index]->full_path, full_path) == 0)
{
*already_loaded = true;
return source_files.files[index];
}
}
if (vec_size(source_files.files) == MAX_FILES)
{
error_exit("Exceeded max number of files %d", MAX_FILES);
}
size_t size;
const char* source_text = read_file(filename, &size);
File *file = malloc(sizeof(File));
file->full_path = full_path;
file->start_id = vec_size(source_files.files) ? VECLAST(source_files.files)->end_id : 0;
file->contents = source_text;
ASSERT(file->start_id + size < UINT32_MAX, "Total files loaded exceeded %d bytes", UINT32_MAX);
file->end_id = file->start_id + size;
file->name = filename;
source_files.files = VECADD(source_files.files, file);
return file;
}
File *source_file_from_position(SourceLoc loc)
{
if (loc == INVALID_LOC)
{
pseudo_file.contents = "---";
return &pseudo_file;
}
if (lexer_current_file()->start_id <= loc) return lexer_current_file();
unsigned low = 0;
unsigned high = vec_size(source_files.files) - 2;
assert(vec_size(source_files.files) > 1);
while (1)
{
// Binary search
unsigned mid = (high + low) / 2;
File *file = source_files.files[mid];
if (file->start_id > loc)
{
high = mid - 1;
continue;
}
if (file->end_id < loc)
{
low = mid + 1;
continue;
}
return file;
}
}

View File

@@ -0,0 +1,11 @@
#pragma once
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "compiler_common.h"
File *source_file_load(const char *filename, bool *already_loaded);
File *source_file_from_position(SourceLoc loc);

View File

@@ -8,7 +8,7 @@
#include "../utils/errors.h"
#include <assert.h>
#include "../utils/lib.h"
#include "malloc.h"
#include "utils/malloc.h"
#include "tokens.h"
#define TABLE_MAX_LOAD 0.75

View File

@@ -112,15 +112,15 @@ const char *token_type_to_string(TokenType type)
case TOKEN_BIT_XOR_ASSIGN:
return "^=";
case TOKEN_VAR_IDENT:
return "<varIdent>";
return "VAR_IDENT";
case TOKEN_TYPE_IDENT:
return "<TypeIdent>";
return "TYPE_IDENT";
case TOKEN_STRING:
return "<string>";
return "STRING";
case TOKEN_INTEGER:
return "<int>";
return "INTEGER";
case TOKEN_REAL:
return "<float>";
return "FLOAT";
case TOKEN_QUESTION:
return "?";
case TOKEN_ELVIS:
@@ -192,9 +192,9 @@ const char *token_type_to_string(TokenType type)
case TOKEN_AT:
return "@";
case TOKEN_ERROR:
return "<error>";
return "error";
case TOKEN_EOF:
return "<eof>";
return "EOF";
case TOKEN_CAST:
return "cast";
case TOKEN_C_LONGDOUBLE:
@@ -208,7 +208,7 @@ const char *token_type_to_string(TokenType type)
case TOKEN_C_ULONGLONG:
return "c_ulonglong";
case TOKEN_C_SHORT:
return "c_ishort";
return "c_short";
case TOKEN_C_INT:
return "c_int";
case TOKEN_C_LONG:
@@ -251,6 +251,8 @@ const char *token_type_to_string(TokenType type)
return "i8";
case TOKEN_U8:
return "u8";
case TOKEN_U1:
return "u1";
case TOKEN_BOOL:
return "bool";
case TOKEN_QUAD:
@@ -294,25 +296,25 @@ const char *token_type_to_string(TokenType type)
case TOKEN_HALF:
return "half";
case TOKEN_CAPS_IDENT:
return "<CAPS_IDENT>";
return "CAPS_IDENT";
case TOKEN_AT_IDENT:
return "<@ident>";
return "AT_IDENT";
case TOKEN_HASH_IDENT:
return "<#ident>";
return "HASH_IDENT";
case TOKEN_DOLLAR_IDENT:
return "<$ident>";
return "DOLLAR_IDENT";
case TOKEN_CATCH:
return "catch";
case INVALID_TOKEN:
return "<\?\?\?>";
return "INVALID_TOKEN";
case TOKEN_DOCS_EOL:
return "<EOL>";
return "EOL";
case TOKEN_DOCS_START:
return "/**";
case TOKEN_DOCS_END:
return "*/";
case TOKEN_DOCS_LINE:
return "<docs line>";
return "DOCS_LINE";
case TOKEN_AT_REQUIRE:
return "@require";
case TOKEN_AT_ENSURE:
@@ -334,3 +336,4 @@ const char *token_type_to_string(TokenType type)
}
UNREACHABLE
}

View File

@@ -90,6 +90,7 @@ typedef enum _TokenType
TOKEN_U16, // u16
TOKEN_I8, // i8
TOKEN_U8, // u8
TOKEN_U1, // u1
// Basic types names
TOKEN_BYTE,

View File

@@ -34,7 +34,7 @@ static const char* test_parse = "struct Node\n"
" */\n"
"void Heap.init(Heap& heap, usize start) \n"
"{\n"
" Node& init_region = @cast(Node&, start);\n"
" Node& init_region = cast(Node&, start);\n"
" init_region.hole = 1;\n"
" init_region.size = HEAP_INIT_SIZE - @sizeof(Node) - @sizeof(Footer);\n"
"\n"
@@ -42,14 +42,14 @@ static const char* test_parse = "struct Node\n"
"\n"
" heap.bins[get_bin_index(init_region.size)].add(init_region);\n"
"\n"
" heap.start = @cast(void*, start);\n"
" heap.end = @cast(void*, start + HEAP_INIT_SIZE);\n"
" heap.start = cast(void*, start);\n"
" heap.end = cast(void*, start + HEAP_INIT_SIZE);\n"
"}\n"
"\n"
"void* Heap.alloc(Heap& heap, usize size) \n"
"{\n"
" uint index = get_bin_index(size);\n"
" Bin& temp = @cast(Bin&, heap.bins[index]);\n"
" Bin& temp = cast(Bin&, heap.bins[index]);\n"
" Node* found = temp.getBestFit(size);\n"
"\n"
" while (!found) \n"
@@ -60,7 +60,7 @@ static const char* test_parse = "struct Node\n"
"\n"
" if ((found.size - size) > (overhead + MIN_ALLOC_SZ)) \n"
" {\n"
" Node& split = @cast(Node*, @cast(char&, found) + sizeof(Node) + sizeof(Footer)) + size);\n"
" Node& split = cast(Node*, cast(char&, found) + sizeof(Node) + sizeof(Footer)) + size);\n"
" split.size = found.size - size - sizeof(Node) - sizeof(Footer);\n"
" split.hole = 1;\n"
" \n"

View File

@@ -10,6 +10,7 @@
#include <compiler/compiler.h>
#include <utils/lib.h>
#include <utils/errors.h>
#include <utils/malloc.h>
#include "benchmark.h"
#include "../compiler/symtab.h"
@@ -18,13 +19,14 @@ static void test_lexer(void)
#ifdef __OPTIMIZE__
printf("--- RUNNING OPTIMIZED ---\n");
#endif
run_arena_allocator_tests();
printf("Begin lexer testing.\n");
printf("-- Check number of keywords...\n");
int tokens_found = 0;
const int EXPECTED_TOKENS = 91;
const int EXPECTED_TOKENS = 12 + 72 + 9;
const char* tokens[TOKEN_EOF];
int len[TOKEN_EOF];
compiler_init();
lexer_check_init();
for (int i = 1; i < TOKEN_EOF; i++)
{
const char* token = token_type_to_string((TokenType)i);
@@ -34,7 +36,7 @@ static void test_lexer(void)
const char* interned = symtab_add(token, len[i], fnv1a(token, len[i]), &lookup);
if (lookup != TOKEN_VAR_IDENT)
{
Token scanned = scan_ident_test(token);
Token scanned = lexer_scan_ident_test(token);
TEST_ASSERT(scanned.type == i, "Mismatch scanning: was '%s', expected '%s' - lookup: %s - interned: %s.",
token_type_to_string(scanned.type),
token_type_to_string(i),
@@ -59,7 +61,7 @@ static void test_lexer(void)
{
for (int i = 1; i < TOKEN_EOF; i++)
{
volatile TokenType t = scan_ident_test(tokens[i]).type;
volatile TokenType t = lexer_scan_ident_test(tokens[i]).type;
}
}
@@ -77,13 +79,14 @@ static void test_lexer(void)
bench_begin();
int tokens_parsed = 0;
size_t test_len = strlen(test_parse);
for (int b = 0; b < BENCH_REPEATS; b++)
{
lexer_test_setup(test_parse);
lexer_test_setup(test_parse, test_len);
Token token;
while (1)
{
token = scan_token();
token = lexer_scan_token();
if (token.type == TOKEN_EOF) break;
TEST_ASSERT(token.type != INVALID_TOKEN, "Got invalid token");
tokens_parsed++;
@@ -92,7 +95,6 @@ static void test_lexer(void)
printf("-> Test complete in %fs, %.0f kloc/s, %.0f ktokens/s\n", bench_mark(),
loc * BENCH_REPEATS / (1000 * bench_mark()), tokens_parsed / (1000 * bench_mark()));
}
void test_compiler(void)

View File

@@ -3,7 +3,7 @@
#include "build/project_creation.h"
#include "utils/errors.h"
#include "compiler_tests/tests.h"
#include "compiler/malloc.h"
#include "utils/malloc.h"
int main(int argc, const char *argv[])
{
init_arena();
@@ -16,6 +16,8 @@ int main(int argc, const char *argv[])
case COMMAND_UNIT_TEST:
compiler_tests();
case COMMAND_COMPILE:
compile_file();
break;
case COMMAND_COMPILE_RUN:
case COMMAND_MISSING:
case COMMAND_BUILD:

14
src/utils/common.h Normal file
View File

@@ -0,0 +1,14 @@
#pragma once
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <assert.h>
#include "errors.h"
#define MAX_IDENTIFIER_LENGTH 31

View File

@@ -3,3 +3,22 @@
// license that can be found in the LICENSE file.
#include "errors.h"
#include <stdarg.h>
void eprintf(const char *format, ...)
{
va_list arglist;
va_start(arglist, format);
vfprintf(stderr, format, arglist);
va_end(arglist);
}
void error_exit(const char *format, ...)
{
va_list arglist;
va_start(arglist, format);
vfprintf(stderr, format, arglist);
fprintf(stderr, "\n");
va_end(arglist);
exit(EXIT_FAILURE);
}

View File

@@ -7,17 +7,28 @@
#include <stdio.h>
#include <stdlib.h>
#define error_exit(...) do { fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); exit(EXIT_FAILURE); } while(0)
void eprintf(const char *format, ...);
void error_exit(const char *format, ...);
#define FATAL_ERROR(_string, ...) do { printf("FATAL ERROR at %s:%d: " _string, __func__, __LINE__, ##__VA_ARGS__); printf("\n"); exit(-1); } while(0)
#define FATAL_ERROR(_string, ...) do { error_exit("FATAL ERROR at %s:%d: " _string, __func__, __LINE__, ##__VA_ARGS__); } while(0)
#define ASSERT(_condition, _string, ...) while (!(_condition)) { FATAL_ERROR(_string, ##__VA_ARGS__); }
#define UNREACHABLE FATAL_ERROR("Cannot reach %s:%d", __func__, __LINE__);
#define TODO FATAL_ERROR("Not done yet %s:%d", __func__, __LINE__);
#define TODO FATAL_ERROR("TODO reached", __func__, __LINE__);
#define TEST_ASSERT(_condition, _string, ...) while (!(_condition)) { FATAL_ERROR(_string, ##__VA_ARGS__); }
#define EXPECT(_string, _value, _expected) \
do { long long __tempval1 = _value; long long __tempval2 = _expected; \
TEST_ASSERT(__tempval1 == __tempval2, "Checking " _string ": expected %lld but was %lld.", __tempval2, __tempval1); } while(0)
#ifndef NDEBUG
#define DEBUG_LOG(_string, ...) eprintf("-- DEBUG: "); eprintf(_string, ##__VA_ARGS__); eprintf("\n");
#else
#define DEBUG_LOG(_string, ...)
#endif
#define LOG_FUNC DEBUG_LOG("%s entered", __func__);

View File

@@ -3,6 +3,9 @@
// license that can be found in the LICENSE file.
#include "file_utils.h"
#include "errors.h"
#include "malloc.h"
#include "lib.h"
#include <stdio.h>
#include <stdlib.h>
@@ -18,3 +21,66 @@ const char* expand_path(const char* path)
}
return path;
}
int filename_to_module(const char *path, char buffer[MAX_IDENTIFIER_LENGTH + 1])
{
size_t len = strlen(path);
int last_slash = 0;
int last_dot = -1;
for (int i = 0; i < len; i++)
{
if (path[i] == '/') last_slash = i;
if (path[i] == '.') last_dot = i;
}
int namelen = last_dot - last_slash - 1;
if (namelen < 2) return 0;
if (namelen > MAX_IDENTIFIER_LENGTH) namelen = MAX_IDENTIFIER_LENGTH;
for (int i = last_slash + 1; i < last_dot; i++)
{
char c = path[i];
if (is_letter(c))
{
c = is_upper(c) ? c + 'a' - 'A' : c;
}
else
{
c = '_';
}
buffer[i - last_slash - 1] = c;
}
buffer[namelen] = '\0';
return namelen;
}
char *read_file(const char *path, size_t *return_size)
{
FILE *file = fopen(path, "rb");
if (file == NULL)
{
error_exit("Could not open file \"%s\".\n", path);
exit(74);
}
fseek(file, 0L, SEEK_END);
size_t file_size = (size_t)ftell(file);
*return_size = file_size;
rewind(file);
char *buffer = (char *)malloc((size_t)file_size + 1);
if (buffer == NULL)
{
error_exit("Not enough memory to read \"%s\".\n", path);
}
size_t bytesRead = fread(buffer, sizeof(char), (size_t)file_size, file);
if (bytesRead < file_size)
{
error_exit("Failed to read file \"%s\".\n", path);
}
buffer[bytesRead] = '\0';
fclose(file);
return buffer;
}

View File

@@ -4,5 +4,10 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "common.h"
const char* expand_path(const char* path);
char *read_file(const char *path, size_t *return_size);
int filename_to_module(const char *path, char buffer[MAX_IDENTIFIER_LENGTH + 1]);

View File

@@ -6,6 +6,7 @@
#include <stdint.h>
#include <stdbool.h>
#include "malloc.h"
static inline bool is_power_of_two(uint64_t x)
{
@@ -181,3 +182,53 @@ static inline uint32_t fnv1a(const char *key, uint32_t len)
}
return hash;
}
typedef struct
{
unsigned size;
unsigned capacity;
} _VHeader;
static inline _VHeader* _vec_new(size_t element_size, size_t capacity)
{
_VHeader *header = malloc_arena(element_size * capacity + sizeof(_VHeader));
header->size = 0;
header->capacity = capacity;
return header;
}
static inline unsigned vec_size(const void*vec)
{
return vec ? (((_VHeader *)vec) - 1)->size : 0;
}
static inline void* _expand(void *vec, size_t element_size)
{
if (vec == NULL)
{
vec = _vec_new(element_size, 16) + 1;
}
_VHeader *header = ((_VHeader *)vec) - 1;
header->size++;
if (header->size == header->capacity)
{
_VHeader *new_array = _vec_new(element_size, header->capacity >> 1u);
memcpy(new_array, header, element_size * header->capacity + sizeof(_VHeader));
header = new_array;
vec = header + 1;
}
return vec;
}
#define VECEACH(_vec, _index) \
unsigned __vecsize = vec_size(_vec); \
for (unsigned _index = 0; _index < __vecsize; _index++)
#define VECNEW(_type, _capacity) ((_type *)(_vec_new(sizeof(_type), _capacity) + 1))
#define VECADD(_vec, _value) \
({ \
typeof(_vec) __temp = (typeof(_vec))_expand((_vec), sizeof((_vec)[0])); \
__temp[vec_size(__temp) - 1] = _value; \
__temp; })
#define VECLAST(_vec) ( (_vec) ? (_vec)[vec_size(_vec) - 1] : NULL)

View File

@@ -4,14 +4,11 @@
#include "malloc.h"
#include <stdlib.h>
#include <assert.h>
#include "../utils/errors.h"
#define KB 1024L
static const size_t KB = 1024ul;
// Use 1MB at a time.
#define BUCKET_SIZE (1024 * KB)
#define ARENA_BUCKET_START_SIZE 16
static const size_t MB = KB * 1024ul;
static const size_t BUCKET_SIZE = MB;
static const size_t STARTING_ARENA_BUCKETS = 16;
static uint8_t **arena_buckets;
static int arena_buckets_used;
@@ -21,10 +18,9 @@ static void *current_arena;
static int allocations_done;
void init_arena(void)
{
printf("---- ARENA ALLOCATED ----\n");
arena_buckets = malloc(ARENA_BUCKET_START_SIZE * sizeof(void *));
arena_buckets = malloc(STARTING_ARENA_BUCKETS * sizeof(void *));
arena_buckets_used = 1;
arena_buckets_array_size = ARENA_BUCKET_START_SIZE;
arena_buckets_array_size = STARTING_ARENA_BUCKETS;
arena_buckets[0] = malloc(BUCKET_SIZE);
allocations_done = 0;
current_use = 0;
@@ -34,13 +30,15 @@ void init_arena(void)
// Simple bump allocator with buckets.
void *malloc_arena(size_t mem)
{
if (mem == 0) return NULL;
assert(mem > 0);
// Round to multiple of 16
size_t oldmem = mem;
mem = (mem + 15u) & ~15ull;
assert(mem >= oldmem);
if (mem >= BUCKET_SIZE / 4)
{
void *ret = malloc(mem);
ASSERT(ret, "Out of memory.");
return malloc(mem);
}
if (current_use + mem > BUCKET_SIZE)
@@ -61,20 +59,22 @@ void *malloc_arena(size_t mem)
allocations_done++;
if (mem > 4096)
{
printf("Allocated large chunk %llu\n", (unsigned long long)mem);
// printf("Allocated large chunk %llu\n", (unsigned long long)mem);
}
return (void *)ptr;
}
void free_arena(void)
void print_arena_status(void)
{
printf("-- FREEING ARENA -- \n");
printf("-- ARENA INFO -- \n");
printf(" * Memory used: %ld Kb\n", ((arena_buckets_used - 1) * BUCKET_SIZE + current_use) / 1024);
printf(" * Buckets used: %d\n", arena_buckets_used);
printf(" * Allocations: %d\n", allocations_done);
}
void free_arena(void)
{
for (int i = 0; i < arena_buckets_used; i++)
{
free(arena_buckets[i]);
@@ -84,23 +84,43 @@ void free_arena(void)
arena_buckets = NULL;
arena_buckets_array_size = 0;
current_use = 0;
printf("-- FREE DONE -- \n");
}
void run_arena_allocator_tests(void)
{
init_arena();
printf("Begin arena allocator testing.\n");
bool was_init = arena_buckets != NULL;
if (!was_init) init_arena();
free_arena();
init_arena();
ASSERT(malloc_arena(10) != malloc_arena(10), "Expected different values...");
ASSERT(current_use == 32, "Expected allocations rounded to next 8 bytes");
printf("-- Tested basic allocation - OK.\n");
ASSERT(current_use == 32, "Expected allocations rounded to next 16 bytes");
malloc_arena(1);
ASSERT(current_use == 48, "Expected allocations rounded to next 16 bytes");
printf("-- Tested allocation alignment - OK.\n");
EXPECT("buckets in use", arena_buckets_used, 1);
ASSERT(malloc_arena(BUCKET_SIZE), "Should be possible to allocate this");
for (int i = 0; i < 8; i++)
{
ASSERT(malloc_arena(BUCKET_SIZE / 8), "Should be possible to allocate this");
}
EXPECT("buckets in use", arena_buckets_used, 2);
ASSERT(malloc_arena(1), "Expected alloc to pass");
for (int i = 0; i < 7; i++)
{
ASSERT(malloc_arena(BUCKET_SIZE / 8), "Should be possible to allocate this");
}
EXPECT("buckets in use", arena_buckets_used, 2);
ASSERT(malloc_arena(BUCKET_SIZE / 8), "Expected alloc to pass");
EXPECT("buckets in use", arena_buckets_used, 3);
for (int i = 0; i < 8 * STARTING_ARENA_BUCKETS; i++)
{
ASSERT(malloc_arena(BUCKET_SIZE / 8), "Should be possible to allocate this");
}
EXPECT("buckets in use", arena_buckets_used, STARTING_ARENA_BUCKETS + 3);
printf("-- Test switching buckets - OK.\n");
free_arena();
ASSERT(arena_buckets_array_size == 0, "Arena not freed?");
printf("Passed all arena tests\n");
printf("-- Test freeing arena - OK.\n");
if (was_init) init_arena();
}

View File

@@ -4,7 +4,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "common.h"
void init_arena(void);
void *malloc_arena(unsigned long mem);