mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 12:01:16 +00:00
Lexer cleanup
This commit is contained in:
@@ -18,8 +18,8 @@ Checks: >
|
||||
-google-runtime-references,
|
||||
-misc-non-private-member-variables-in-classes,
|
||||
-readability-named-parameter,
|
||||
-readability-magic-numbers,
|
||||
-readability-braces-around-statements,
|
||||
-readability-magic-numbers
|
||||
|
||||
# Turn all the warnings from the checks above into errors.
|
||||
WarningsAsErrors: "*"
|
||||
|
||||
@@ -21,7 +21,7 @@ static void compiler_lex(BuildTarget *target)
|
||||
File *file = source_file_load(target->sources[i], &loaded);
|
||||
if (loaded) continue;
|
||||
Lexer lexer;
|
||||
lexer_add_file_for_lexing(&lexer, file);
|
||||
lexer_init_with_file(&lexer, file);
|
||||
printf("# %s\n", file->full_path);
|
||||
while (1)
|
||||
{
|
||||
|
||||
@@ -9,4 +9,5 @@
|
||||
void compiler_init();
|
||||
void compile_files(BuildTarget *target);
|
||||
void build();
|
||||
void symtab_init(uint32_t max_size);
|
||||
|
||||
|
||||
@@ -1003,10 +1003,9 @@ static inline bool func_has_error_return(FunctionSignature *func_sig)
|
||||
|
||||
Token lexer_scan_token(Lexer *lexer);
|
||||
Token lexer_scan_ident_test(Lexer *lexer, const char *scan);
|
||||
void lexer_test_setup(Lexer *lexer, const char *text, size_t len);
|
||||
void lexer_add_file_for_lexing(Lexer *lexer, File *file);
|
||||
void lexer_init_for_test(Lexer *lexer, const char *text, size_t len);
|
||||
void lexer_init_with_file(Lexer *lexer, File *file);
|
||||
File* lexer_current_file(Lexer *lexer);
|
||||
void lexer_check_init(void);
|
||||
|
||||
|
||||
typedef enum
|
||||
@@ -1059,7 +1058,6 @@ void *stable_get(STable *table, const char *key);
|
||||
void *stable_delete(STable *table, const char *key);
|
||||
void stable_clear(STable *table);
|
||||
|
||||
void symtab_init(uint32_t max_size);
|
||||
const char *symtab_add(const char *symbol, uint32_t len, uint32_t fnv1hash, TokenType *type);
|
||||
|
||||
void target_setup();
|
||||
|
||||
@@ -4,11 +4,7 @@
|
||||
|
||||
#include "compiler_internal.h"
|
||||
|
||||
|
||||
Token next_tok;
|
||||
Token tok;
|
||||
|
||||
// --- Lexing general methods.
|
||||
#pragma mark --- Lexing general methods.
|
||||
|
||||
static inline char peek(Lexer *lexer)
|
||||
{
|
||||
@@ -30,17 +26,11 @@ void lexer_store_line_end(Lexer *lexer)
|
||||
source_file_append_line_end(lexer->current_file, lexer->current_file->start_id + lexer->current - lexer->file_begin);
|
||||
}
|
||||
|
||||
|
||||
static inline char peek_next(Lexer *lexer)
|
||||
{
|
||||
return lexer->current[1];
|
||||
}
|
||||
|
||||
static inline char peek_next_next(Lexer *lexer)
|
||||
{
|
||||
return lexer->current[2];
|
||||
}
|
||||
|
||||
static inline char next(Lexer *lexer)
|
||||
{
|
||||
return *(lexer->current++);
|
||||
@@ -56,9 +46,12 @@ static inline bool reached_end(Lexer *lexer)
|
||||
return *lexer->current == '\0';
|
||||
}
|
||||
|
||||
static inline SourceLoc loc_from_ptr(Lexer *lexer, const char *start)
|
||||
static inline bool match(Lexer *lexer, char expected)
|
||||
{
|
||||
return (SourceLoc) (lexer->current_file->start_id + (start - lexer->file_begin));
|
||||
if (reached_end(lexer)) return false;
|
||||
if (*lexer->current != expected) return false;
|
||||
lexer->current++;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline SourceRange range_from_ptr(Lexer *lexer, const char *start, const char *end)
|
||||
@@ -69,6 +62,8 @@ static inline SourceRange range_from_ptr(Lexer *lexer, const char *start, const
|
||||
};
|
||||
}
|
||||
|
||||
#pragma mark --- Token creation
|
||||
|
||||
static Token error_token(Lexer *lexer, const char *message, ...)
|
||||
{
|
||||
Token token = {
|
||||
@@ -108,27 +103,16 @@ static Token make_string_token(Lexer *lexer, TokenType type, const char* string)
|
||||
};
|
||||
}
|
||||
|
||||
static inline bool match(Lexer *lexer, char expected)
|
||||
{
|
||||
if (reached_end(lexer)) return false;
|
||||
if (*lexer->current != expected) return false;
|
||||
lexer->current++;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void match_assert(Lexer *lexer, char expected)
|
||||
{
|
||||
assert(!reached_end(lexer));
|
||||
assert(lexer->current[0] == expected);
|
||||
lexer->current++;
|
||||
}
|
||||
|
||||
#pragma mark --- Comment parsing
|
||||
|
||||
static inline Token parse_line_comment(Lexer *lexer)
|
||||
{
|
||||
// // style comment
|
||||
// Skip forward to the end.
|
||||
|
||||
/// is a doc line comment.
|
||||
TokenType comment_type = match(lexer, '/') ? TOKEN_DOC_COMMENT : TOKEN_COMMENT;
|
||||
|
||||
while (!reached_end(lexer) && peek(lexer) != '\n')
|
||||
{
|
||||
next(lexer);
|
||||
@@ -207,12 +191,11 @@ static inline Token parse_multiline_comment(Lexer *lexer)
|
||||
next(lexer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip regular comments.
|
||||
*
|
||||
* @return the result of the skip (did we enter docs? did we have any errors?)
|
||||
* Skip regular whitespace.
|
||||
*/
|
||||
void skip_whitespace(Lexer *lexer)
|
||||
static void skip_whitespace(Lexer *lexer)
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
@@ -235,7 +218,7 @@ void skip_whitespace(Lexer *lexer)
|
||||
}
|
||||
|
||||
|
||||
// --- Normal scanning methods start here
|
||||
#pragma mark --- Identifier scanning
|
||||
|
||||
static inline Token scan_prefixed_ident(Lexer *lexer, TokenType type, TokenType no_ident_type, bool ends_with_bang, const char *start)
|
||||
{
|
||||
@@ -254,12 +237,6 @@ static inline Token scan_prefixed_ident(Lexer *lexer, TokenType type, TokenType
|
||||
return make_string_token(lexer, type, interned);
|
||||
}
|
||||
|
||||
static inline void scan_skipped_ident(Lexer *lexer)
|
||||
{
|
||||
while (is_alphanum_(peek(lexer))) next(lexer);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Parses identifiers. Note that this is a bit complicated here since
|
||||
// we split identifiers into 2 types + find keywords.
|
||||
@@ -315,8 +292,7 @@ static inline Token scan_ident(Lexer *lexer)
|
||||
return make_string_token(lexer, type, interned_string);
|
||||
}
|
||||
|
||||
|
||||
#pragma mark ----- Number scanning
|
||||
#pragma mark --- Number scanning
|
||||
|
||||
static Token scan_oct(Lexer *lexer)
|
||||
{
|
||||
@@ -327,42 +303,18 @@ static Token scan_oct(Lexer *lexer)
|
||||
}
|
||||
|
||||
|
||||
Token scan_binary(Lexer *lexer)
|
||||
static Token scan_binary(Lexer *lexer)
|
||||
{
|
||||
char b = next(lexer); // Skip the b
|
||||
next(lexer); // Skip the b
|
||||
if (!is_binary(next(lexer)))
|
||||
{
|
||||
return error_token(lexer, "An expression starting with '0%c' would expect a sequence of zeroes and ones, "
|
||||
"did you try to write a hex value but forgot the '0x'?", b);
|
||||
return error_token(lexer, "An expression starting with '0b' would expect a sequence of zeroes and ones, "
|
||||
"did you try to write a hex value but forgot the '0x'?");
|
||||
}
|
||||
while (is_binary_or_(peek(lexer))) next(lexer);
|
||||
return make_token(lexer, TOKEN_INTEGER, lexer->lexing_start);
|
||||
}
|
||||
|
||||
#define PARSE_SPECIAL_NUMBER(is_num, is_num_with_underscore, exp, EXP) \
|
||||
while (is_num_with_underscore(peek(lexer))) next(lexer); \
|
||||
bool is_float = false; \
|
||||
if (peek(lexer) == '.') \
|
||||
{ \
|
||||
is_float = true; \
|
||||
next(lexer); \
|
||||
char c = peek(lexer); \
|
||||
if (c == '_') return error_token(lexer, "Can't parse this as a floating point value due to the '_' directly after decimal point."); \
|
||||
if (is_num(c)) next(lexer); \
|
||||
while (is_num_with_underscore(peek(lexer))) next(lexer); \
|
||||
} \
|
||||
char c = peek(lexer); \
|
||||
if (c == (exp) || c == (EXP)) \
|
||||
{ \
|
||||
is_float = true; \
|
||||
next(lexer); \
|
||||
char c2 = next(lexer); \
|
||||
if (c2 == '+' || c2 == '-') c2 = next(lexer); \
|
||||
if (!is_num(c2)) return error_token(lexer, "Parsing the floating point exponent failed, because '%c' is not a number.", c2); \
|
||||
while (is_num(peek(lexer))) next(lexer); \
|
||||
} \
|
||||
if (prev(lexer) == '_') return error_token(lexer, "The number ended with '_', but that character needs to be between, not after, digits."); \
|
||||
return make_token(lexer, is_float ? TOKEN_FLOAT : TOKEN_INTEGER, lexer->lexing_start)
|
||||
|
||||
static inline Token scan_hex(Lexer *lexer)
|
||||
{
|
||||
@@ -372,15 +324,58 @@ static inline Token scan_hex(Lexer *lexer)
|
||||
return error_token(lexer, "'0%c' starts a hexadecimal number, "
|
||||
"but it was followed by '%c' which is not part of a hexadecimal number.", x, prev(lexer));
|
||||
}
|
||||
PARSE_SPECIAL_NUMBER(is_hex, is_hex_or_, 'p', 'P');
|
||||
while (is_hex_or_(peek(lexer))) next(lexer);
|
||||
bool is_float = false;
|
||||
if (peek(lexer) == '.')
|
||||
{
|
||||
is_float = true;
|
||||
next(lexer);
|
||||
char c = peek(lexer);
|
||||
if (c == '_') return error_token(lexer, "Can't parse this as a floating point value due to the '_' directly after decimal point.");
|
||||
if (is_hex(c)) next(lexer);
|
||||
while (is_hex_or_(peek(lexer))) next(lexer);
|
||||
}
|
||||
char c = peek(lexer);
|
||||
if (c == 'p' || c == 'P')
|
||||
{
|
||||
is_float = true;
|
||||
next(lexer);
|
||||
char c2 = next(lexer);
|
||||
if (c2 == '+' || c2 == '-') c2 = next(lexer);
|
||||
if (!is_hex(c2)) return error_token(lexer, "Parsing the floating point exponent failed, because '%c' is not a number.", c2);
|
||||
while (is_hex(peek(lexer))) next(lexer);
|
||||
}
|
||||
if (prev(lexer) == '_') return error_token(lexer, "The number ended with '_', but that character needs to be between, not after, digits.");
|
||||
return make_token(lexer, is_float ? TOKEN_FLOAT : TOKEN_INTEGER, lexer->lexing_start);
|
||||
}
|
||||
|
||||
static inline Token scan_dec(Lexer *lexer)
|
||||
{
|
||||
PARSE_SPECIAL_NUMBER(is_digit, is_digit_or_, 'e', 'E');
|
||||
while (is_digit_or_(peek(lexer))) next(lexer);
|
||||
bool is_float = false;
|
||||
if (peek(lexer) == '.')
|
||||
{
|
||||
is_float = true;
|
||||
next(lexer);
|
||||
char c = peek(lexer);
|
||||
if (c == '_') return error_token(lexer, "Can't parse this as a floating point value due to the '_' directly after decimal point.");
|
||||
if (is_digit(c)) next(lexer);
|
||||
while (is_digit_or_(peek(lexer))) next(lexer);
|
||||
}
|
||||
char c = peek(lexer);
|
||||
if (c == 'e' || c == 'E')
|
||||
{
|
||||
is_float = true;
|
||||
next(lexer);
|
||||
char c2 = next(lexer);
|
||||
if (c2 == '+' || c2 == '-') c2 = next(lexer);
|
||||
if (!is_digit(c2)) return error_token(lexer, "Parsing the floating point exponent failed, because '%c' is not a number.", c2);
|
||||
while (is_digit(peek(lexer))) next(lexer);
|
||||
}
|
||||
if (prev(lexer) == '_') return error_token(lexer, "The number ended with '_', but that character needs to be between, not after, digits.");
|
||||
return make_token(lexer, is_float ? TOKEN_FLOAT : TOKEN_INTEGER, lexer->lexing_start);
|
||||
}
|
||||
|
||||
#undef PARSE_SPECIAL_NUMBER
|
||||
|
||||
static inline Token scan_digit(Lexer *lexer)
|
||||
{
|
||||
@@ -407,8 +402,7 @@ static inline Token scan_digit(Lexer *lexer)
|
||||
return scan_dec(lexer);
|
||||
}
|
||||
|
||||
#pragma mark -----
|
||||
|
||||
#pragma mark --- Character & string scan
|
||||
|
||||
static inline Token scan_char(Lexer *lexer)
|
||||
{
|
||||
@@ -467,25 +461,7 @@ static inline Token scan_string(Lexer *lexer)
|
||||
return make_token(lexer, TOKEN_STRING, lexer->lexing_start);
|
||||
}
|
||||
|
||||
static inline void skip_docs_whitespace(Lexer *lexer)
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
char c = peek(lexer);
|
||||
switch (c)
|
||||
{
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\r':
|
||||
case '\f':
|
||||
next(lexer);
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#pragma mark --- Lexer public functions
|
||||
|
||||
Token lexer_scan_token(Lexer *lexer)
|
||||
{
|
||||
@@ -597,27 +573,18 @@ File* lexer_current_file(Lexer *lexer)
|
||||
return lexer->current_file;
|
||||
}
|
||||
|
||||
void lexer_check_init()
|
||||
void lexer_init_with_file(Lexer *lexer, File *file)
|
||||
{
|
||||
static bool symtab_has_init = false;
|
||||
if (symtab_has_init) return;
|
||||
symtab_has_init = true;
|
||||
symtab_init(build_options.symtab_size);
|
||||
}
|
||||
|
||||
|
||||
void lexer_add_file_for_lexing(Lexer *lexer, File *file)
|
||||
{
|
||||
lexer_check_init();
|
||||
lexer->current_file = file;
|
||||
lexer->file_begin = lexer->current_file->contents;
|
||||
lexer->lexing_start = lexer->file_begin;
|
||||
lexer->current = lexer->lexing_start;
|
||||
}
|
||||
|
||||
void lexer_test_setup(Lexer *lexer, const char *text, size_t len)
|
||||
#pragma mark --- Test methods
|
||||
|
||||
void lexer_init_for_test(Lexer *lexer, const char *text, size_t len)
|
||||
{
|
||||
lexer_check_init();
|
||||
static File helper;
|
||||
lexer->lexing_start = text;
|
||||
lexer->current = text;
|
||||
@@ -629,8 +596,6 @@ void lexer_test_setup(Lexer *lexer, const char *text, size_t len)
|
||||
lexer->current_file->name = "Test";
|
||||
}
|
||||
|
||||
|
||||
|
||||
Token lexer_scan_ident_test(Lexer *lexer, const char *scan)
|
||||
{
|
||||
static File helper;
|
||||
|
||||
@@ -2847,7 +2847,7 @@ void parse_current(Context *context)
|
||||
|
||||
void parse_file(Context *context)
|
||||
{
|
||||
lexer_add_file_for_lexing(&context->lexer, context->file);
|
||||
lexer_init_with_file(&context->lexer, context->file);
|
||||
parse_current(context);
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,6 @@ static void test_lexer(void)
|
||||
const char* tokens[TOKEN_EOF];
|
||||
int len[TOKEN_EOF];
|
||||
Lexer lexer;
|
||||
lexer_check_init();
|
||||
for (int i = 1; i < TOKEN_EOF; i++)
|
||||
{
|
||||
const char* token = token_type_to_string((TokenType)i);
|
||||
@@ -78,7 +77,7 @@ static void test_lexer(void)
|
||||
size_t test_len = strlen(test_parse);
|
||||
for (int b = 0; b < BENCH_REPEATS; b++)
|
||||
{
|
||||
lexer_test_setup(&lexer, test_parse, test_len);
|
||||
lexer_init_for_test(&lexer, test_parse, test_len);
|
||||
Token token;
|
||||
while (1)
|
||||
{
|
||||
|
||||
@@ -7,8 +7,15 @@
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
init_arena();
|
||||
// First setup memory
|
||||
memory_init();
|
||||
|
||||
// Parse arguments.
|
||||
parse_arguments(argc, argv);
|
||||
|
||||
// Now we set up the symtab.
|
||||
symtab_init(build_options.symtab_size);
|
||||
|
||||
switch (build_options.command)
|
||||
{
|
||||
case COMMAND_INIT:
|
||||
|
||||
@@ -13,7 +13,7 @@ void path_get_dir_and_filename_from_full(const char *full_path, char **filename,
|
||||
void file_find_top_dir();
|
||||
void file_add_wildcard_files(const char ***files, const char *path, bool recursive);
|
||||
|
||||
void init_arena(void);
|
||||
void memory_init(void);
|
||||
void *malloc_arena(unsigned long mem);
|
||||
void free_arena(void);
|
||||
|
||||
@@ -74,6 +74,7 @@ static inline bool is_binary(char c)
|
||||
return c == '0' || c == '1';
|
||||
}
|
||||
|
||||
|
||||
static inline bool is_binary_or_(char c)
|
||||
{
|
||||
switch (c)
|
||||
|
||||
@@ -16,7 +16,7 @@ static size_t arena_buckets_array_size;
|
||||
static size_t current_use;
|
||||
static void *current_arena;
|
||||
static int allocations_done;
|
||||
void init_arena(void)
|
||||
void memory_init(void)
|
||||
{
|
||||
arena_buckets = malloc(STARTING_ARENA_BUCKETS * sizeof(void *));
|
||||
arena_buckets_used = 1;
|
||||
@@ -91,9 +91,9 @@ void run_arena_allocator_tests(void)
|
||||
{
|
||||
printf("Begin arena allocator testing.\n");
|
||||
bool was_init = arena_buckets != NULL;
|
||||
if (!was_init) init_arena();
|
||||
if (!was_init) memory_init();
|
||||
free_arena();
|
||||
init_arena();
|
||||
memory_init();
|
||||
ASSERT(malloc_arena(10) != malloc_arena(10), "Expected different values...");
|
||||
printf("-- Tested basic allocation - OK.\n");
|
||||
ASSERT(current_use == 32, "Expected allocations rounded to next 16 bytes");
|
||||
@@ -122,5 +122,5 @@ void run_arena_allocator_tests(void)
|
||||
free_arena();
|
||||
ASSERT(arena_buckets_array_size == 0, "Arena not freed?");
|
||||
printf("-- Test freeing arena - OK.\n");
|
||||
if (was_init) init_arena();
|
||||
if (was_init) memory_init();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user