Lexer cleanup

This commit is contained in:
Christoffer Lerno
2020-03-13 17:11:31 +01:00
parent ebbea2ac42
commit 2e3bbf119c
10 changed files with 94 additions and 123 deletions

View File

@@ -18,8 +18,8 @@ Checks: >
-google-runtime-references,
-misc-non-private-member-variables-in-classes,
-readability-named-parameter,
-readability-magic-numbers,
-readability-braces-around-statements,
-readability-magic-numbers
# Turn all the warnings from the checks above into errors.
WarningsAsErrors: "*"

View File

@@ -21,7 +21,7 @@ static void compiler_lex(BuildTarget *target)
File *file = source_file_load(target->sources[i], &loaded);
if (loaded) continue;
Lexer lexer;
lexer_add_file_for_lexing(&lexer, file);
lexer_init_with_file(&lexer, file);
printf("# %s\n", file->full_path);
while (1)
{

View File

@@ -9,4 +9,5 @@
void compiler_init();
void compile_files(BuildTarget *target);
void build();
void symtab_init(uint32_t max_size);

View File

@@ -1003,10 +1003,9 @@ static inline bool func_has_error_return(FunctionSignature *func_sig)
Token lexer_scan_token(Lexer *lexer);
Token lexer_scan_ident_test(Lexer *lexer, const char *scan);
void lexer_test_setup(Lexer *lexer, const char *text, size_t len);
void lexer_add_file_for_lexing(Lexer *lexer, File *file);
void lexer_init_for_test(Lexer *lexer, const char *text, size_t len);
void lexer_init_with_file(Lexer *lexer, File *file);
File* lexer_current_file(Lexer *lexer);
void lexer_check_init(void);
typedef enum
@@ -1059,7 +1058,6 @@ void *stable_get(STable *table, const char *key);
void *stable_delete(STable *table, const char *key);
void stable_clear(STable *table);
void symtab_init(uint32_t max_size);
const char *symtab_add(const char *symbol, uint32_t len, uint32_t fnv1hash, TokenType *type);
void target_setup();

View File

@@ -4,11 +4,7 @@
#include "compiler_internal.h"
Token next_tok;
Token tok;
// --- Lexing general methods.
#pragma mark --- Lexing general methods.
static inline char peek(Lexer *lexer)
{
@@ -30,17 +26,11 @@ void lexer_store_line_end(Lexer *lexer)
source_file_append_line_end(lexer->current_file, lexer->current_file->start_id + lexer->current - lexer->file_begin);
}
static inline char peek_next(Lexer *lexer)
{
return lexer->current[1];
}
static inline char peek_next_next(Lexer *lexer)
{
return lexer->current[2];
}
static inline char next(Lexer *lexer)
{
return *(lexer->current++);
@@ -56,9 +46,12 @@ static inline bool reached_end(Lexer *lexer)
return *lexer->current == '\0';
}
static inline SourceLoc loc_from_ptr(Lexer *lexer, const char *start)
static inline bool match(Lexer *lexer, char expected)
{
return (SourceLoc) (lexer->current_file->start_id + (start - lexer->file_begin));
if (reached_end(lexer)) return false;
if (*lexer->current != expected) return false;
lexer->current++;
return true;
}
static inline SourceRange range_from_ptr(Lexer *lexer, const char *start, const char *end)
@@ -69,6 +62,8 @@ static inline SourceRange range_from_ptr(Lexer *lexer, const char *start, const
};
}
#pragma mark --- Token creation
static Token error_token(Lexer *lexer, const char *message, ...)
{
Token token = {
@@ -108,27 +103,16 @@ static Token make_string_token(Lexer *lexer, TokenType type, const char* string)
};
}
static inline bool match(Lexer *lexer, char expected)
{
if (reached_end(lexer)) return false;
if (*lexer->current != expected) return false;
lexer->current++;
return true;
}
static inline void match_assert(Lexer *lexer, char expected)
{
assert(!reached_end(lexer));
assert(lexer->current[0] == expected);
lexer->current++;
}
#pragma mark --- Comment parsing
static inline Token parse_line_comment(Lexer *lexer)
{
// // style comment
// Skip forward to the end.
/// is a doc line comment.
TokenType comment_type = match(lexer, '/') ? TOKEN_DOC_COMMENT : TOKEN_COMMENT;
while (!reached_end(lexer) && peek(lexer) != '\n')
{
next(lexer);
@@ -207,12 +191,11 @@ static inline Token parse_multiline_comment(Lexer *lexer)
next(lexer);
}
}
/**
* Skip regular comments.
*
* @return the result of the skip (did we enter docs? did we have any errors?)
* Skip regular whitespace.
*/
void skip_whitespace(Lexer *lexer)
static void skip_whitespace(Lexer *lexer)
{
while (1)
{
@@ -235,7 +218,7 @@ void skip_whitespace(Lexer *lexer)
}
// --- Normal scanning methods start here
#pragma mark --- Identifier scanning
static inline Token scan_prefixed_ident(Lexer *lexer, TokenType type, TokenType no_ident_type, bool ends_with_bang, const char *start)
{
@@ -254,12 +237,6 @@ static inline Token scan_prefixed_ident(Lexer *lexer, TokenType type, TokenType
return make_string_token(lexer, type, interned);
}
static inline void scan_skipped_ident(Lexer *lexer)
{
while (is_alphanum_(peek(lexer))) next(lexer);
}
// Parses identifiers. Note that this is a bit complicated here since
// we split identifiers into 2 types + find keywords.
@@ -315,8 +292,7 @@ static inline Token scan_ident(Lexer *lexer)
return make_string_token(lexer, type, interned_string);
}
#pragma mark ----- Number scanning
#pragma mark --- Number scanning
static Token scan_oct(Lexer *lexer)
{
@@ -327,42 +303,18 @@ static Token scan_oct(Lexer *lexer)
}
Token scan_binary(Lexer *lexer)
static Token scan_binary(Lexer *lexer)
{
char b = next(lexer); // Skip the b
next(lexer); // Skip the b
if (!is_binary(next(lexer)))
{
return error_token(lexer, "An expression starting with '0%c' would expect a sequence of zeroes and ones, "
"did you try to write a hex value but forgot the '0x'?", b);
return error_token(lexer, "An expression starting with '0b' would expect a sequence of zeroes and ones, "
"did you try to write a hex value but forgot the '0x'?");
}
while (is_binary_or_(peek(lexer))) next(lexer);
return make_token(lexer, TOKEN_INTEGER, lexer->lexing_start);
}
#define PARSE_SPECIAL_NUMBER(is_num, is_num_with_underscore, exp, EXP) \
while (is_num_with_underscore(peek(lexer))) next(lexer); \
bool is_float = false; \
if (peek(lexer) == '.') \
{ \
is_float = true; \
next(lexer); \
char c = peek(lexer); \
if (c == '_') return error_token(lexer, "Can't parse this as a floating point value due to the '_' directly after decimal point."); \
if (is_num(c)) next(lexer); \
while (is_num_with_underscore(peek(lexer))) next(lexer); \
} \
char c = peek(lexer); \
if (c == (exp) || c == (EXP)) \
{ \
is_float = true; \
next(lexer); \
char c2 = next(lexer); \
if (c2 == '+' || c2 == '-') c2 = next(lexer); \
if (!is_num(c2)) return error_token(lexer, "Parsing the floating point exponent failed, because '%c' is not a number.", c2); \
while (is_num(peek(lexer))) next(lexer); \
} \
if (prev(lexer) == '_') return error_token(lexer, "The number ended with '_', but that character needs to be between, not after, digits."); \
return make_token(lexer, is_float ? TOKEN_FLOAT : TOKEN_INTEGER, lexer->lexing_start)
static inline Token scan_hex(Lexer *lexer)
{
@@ -372,15 +324,58 @@ static inline Token scan_hex(Lexer *lexer)
return error_token(lexer, "'0%c' starts a hexadecimal number, "
"but it was followed by '%c' which is not part of a hexadecimal number.", x, prev(lexer));
}
PARSE_SPECIAL_NUMBER(is_hex, is_hex_or_, 'p', 'P');
while (is_hex_or_(peek(lexer))) next(lexer);
bool is_float = false;
if (peek(lexer) == '.')
{
is_float = true;
next(lexer);
char c = peek(lexer);
if (c == '_') return error_token(lexer, "Can't parse this as a floating point value due to the '_' directly after decimal point.");
if (is_hex(c)) next(lexer);
while (is_hex_or_(peek(lexer))) next(lexer);
}
char c = peek(lexer);
if (c == 'p' || c == 'P')
{
is_float = true;
next(lexer);
char c2 = next(lexer);
if (c2 == '+' || c2 == '-') c2 = next(lexer);
if (!is_hex(c2)) return error_token(lexer, "Parsing the floating point exponent failed, because '%c' is not a number.", c2);
while (is_hex(peek(lexer))) next(lexer);
}
if (prev(lexer) == '_') return error_token(lexer, "The number ended with '_', but that character needs to be between, not after, digits.");
return make_token(lexer, is_float ? TOKEN_FLOAT : TOKEN_INTEGER, lexer->lexing_start);
}
static inline Token scan_dec(Lexer *lexer)
{
PARSE_SPECIAL_NUMBER(is_digit, is_digit_or_, 'e', 'E');
while (is_digit_or_(peek(lexer))) next(lexer);
bool is_float = false;
if (peek(lexer) == '.')
{
is_float = true;
next(lexer);
char c = peek(lexer);
if (c == '_') return error_token(lexer, "Can't parse this as a floating point value due to the '_' directly after decimal point.");
if (is_digit(c)) next(lexer);
while (is_digit_or_(peek(lexer))) next(lexer);
}
char c = peek(lexer);
if (c == 'e' || c == 'E')
{
is_float = true;
next(lexer);
char c2 = next(lexer);
if (c2 == '+' || c2 == '-') c2 = next(lexer);
if (!is_digit(c2)) return error_token(lexer, "Parsing the floating point exponent failed, because '%c' is not a number.", c2);
while (is_digit(peek(lexer))) next(lexer);
}
if (prev(lexer) == '_') return error_token(lexer, "The number ended with '_', but that character needs to be between, not after, digits.");
return make_token(lexer, is_float ? TOKEN_FLOAT : TOKEN_INTEGER, lexer->lexing_start);
}
#undef PARSE_SPECIAL_NUMBER
static inline Token scan_digit(Lexer *lexer)
{
@@ -407,8 +402,7 @@ static inline Token scan_digit(Lexer *lexer)
return scan_dec(lexer);
}
#pragma mark -----
#pragma mark --- Character & string scan
static inline Token scan_char(Lexer *lexer)
{
@@ -467,25 +461,7 @@ static inline Token scan_string(Lexer *lexer)
return make_token(lexer, TOKEN_STRING, lexer->lexing_start);
}
static inline void skip_docs_whitespace(Lexer *lexer)
{
while (1)
{
char c = peek(lexer);
switch (c)
{
case ' ':
case '\t':
case '\r':
case '\f':
next(lexer);
break;
default:
return;
}
}
}
#pragma mark --- Lexer public functions
Token lexer_scan_token(Lexer *lexer)
{
@@ -597,27 +573,18 @@ File* lexer_current_file(Lexer *lexer)
return lexer->current_file;
}
void lexer_check_init()
void lexer_init_with_file(Lexer *lexer, File *file)
{
static bool symtab_has_init = false;
if (symtab_has_init) return;
symtab_has_init = true;
symtab_init(build_options.symtab_size);
}
void lexer_add_file_for_lexing(Lexer *lexer, File *file)
{
lexer_check_init();
lexer->current_file = file;
lexer->file_begin = lexer->current_file->contents;
lexer->lexing_start = lexer->file_begin;
lexer->current = lexer->lexing_start;
}
void lexer_test_setup(Lexer *lexer, const char *text, size_t len)
#pragma mark --- Test methods
void lexer_init_for_test(Lexer *lexer, const char *text, size_t len)
{
lexer_check_init();
static File helper;
lexer->lexing_start = text;
lexer->current = text;
@@ -629,8 +596,6 @@ void lexer_test_setup(Lexer *lexer, const char *text, size_t len)
lexer->current_file->name = "Test";
}
Token lexer_scan_ident_test(Lexer *lexer, const char *scan)
{
static File helper;

View File

@@ -2847,7 +2847,7 @@ void parse_current(Context *context)
void parse_file(Context *context)
{
lexer_add_file_for_lexing(&context->lexer, context->file);
lexer_init_with_file(&context->lexer, context->file);
parse_current(context);
}

View File

@@ -22,7 +22,6 @@ static void test_lexer(void)
const char* tokens[TOKEN_EOF];
int len[TOKEN_EOF];
Lexer lexer;
lexer_check_init();
for (int i = 1; i < TOKEN_EOF; i++)
{
const char* token = token_type_to_string((TokenType)i);
@@ -78,7 +77,7 @@ static void test_lexer(void)
size_t test_len = strlen(test_parse);
for (int b = 0; b < BENCH_REPEATS; b++)
{
lexer_test_setup(&lexer, test_parse, test_len);
lexer_init_for_test(&lexer, test_parse, test_len);
Token token;
while (1)
{

View File

@@ -7,8 +7,15 @@
int main(int argc, const char *argv[])
{
init_arena();
// First setup memory
memory_init();
// Parse arguments.
parse_arguments(argc, argv);
// Now we set up the symtab.
symtab_init(build_options.symtab_size);
switch (build_options.command)
{
case COMMAND_INIT:

View File

@@ -13,7 +13,7 @@ void path_get_dir_and_filename_from_full(const char *full_path, char **filename,
void file_find_top_dir();
void file_add_wildcard_files(const char ***files, const char *path, bool recursive);
void init_arena(void);
void memory_init(void);
void *malloc_arena(unsigned long mem);
void free_arena(void);
@@ -74,6 +74,7 @@ static inline bool is_binary(char c)
return c == '0' || c == '1';
}
static inline bool is_binary_or_(char c)
{
switch (c)

View File

@@ -16,7 +16,7 @@ static size_t arena_buckets_array_size;
static size_t current_use;
static void *current_arena;
static int allocations_done;
void init_arena(void)
void memory_init(void)
{
arena_buckets = malloc(STARTING_ARENA_BUCKETS * sizeof(void *));
arena_buckets_used = 1;
@@ -91,9 +91,9 @@ void run_arena_allocator_tests(void)
{
printf("Begin arena allocator testing.\n");
bool was_init = arena_buckets != NULL;
if (!was_init) init_arena();
if (!was_init) memory_init();
free_arena();
init_arena();
memory_init();
ASSERT(malloc_arena(10) != malloc_arena(10), "Expected different values...");
printf("-- Tested basic allocation - OK.\n");
ASSERT(current_use == 32, "Expected allocations rounded to next 16 bytes");
@@ -122,5 +122,5 @@ void run_arena_allocator_tests(void)
free_arena();
ASSERT(arena_buckets_array_size == 0, "Arena not freed?");
printf("-- Test freeing arena - OK.\n");
if (was_init) init_arena();
if (was_init) memory_init();
}