mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 03:51:18 +00:00
Lexing up and running.
This commit is contained in:
@@ -14,5 +14,8 @@ add_executable(c3c
|
||||
src/build/project_creation.c
|
||||
src/utils/errors.c
|
||||
src/utils/file_utils.c
|
||||
src/utils/string_utils.c
|
||||
src/compiler/lexer.c src/compiler/lexer.h src/compiler_tests/tests.c src/compiler_tests/tests.h src/compiler_tests/benchmark.c src/compiler_tests/benchmark.h)
|
||||
src/compiler/lexer.c
|
||||
src/compiler/tokens.c
|
||||
src/compiler/symtab.c
|
||||
src/compiler_tests/tests.c
|
||||
src/compiler_tests/benchmark.c src/compiler/malloc.c src/compiler/malloc.h src/compiler/compiler.c src/compiler/compiler.h)
|
||||
@@ -13,6 +13,8 @@
|
||||
#include "../utils/errors.h"
|
||||
|
||||
static const char* DEFAULT_TARGET = "default";
|
||||
static const int DEFAULT_SYMTAB_SIZE = 64 * 1024;
|
||||
static const int MAX_SYMTAB_SIZE = 1024 * 1024;
|
||||
|
||||
BuildOptions build_options;
|
||||
static int arg_index;
|
||||
@@ -46,6 +48,7 @@ static void usage(void)
|
||||
OUTPUT(" --path <dir> - Use this as the base directory for the current command.");
|
||||
OUTPUT(" --template <template> - Use a different template: \"lib\", \"staticlib\" or a path.");
|
||||
OUTPUT(" --about - Prints a short description of C3.");
|
||||
OUTPUT(" --symtab <value> - Sets the preferred symtab size.");
|
||||
}
|
||||
|
||||
|
||||
@@ -213,6 +216,17 @@ static void parse_option()
|
||||
build_options.path = check_dir(next_arg());
|
||||
return;
|
||||
}
|
||||
if (match_longopt("symtab"))
|
||||
{
|
||||
if (at_end() || next_is_opt()) error_exit("error: --symtab needs a number.");
|
||||
const char *number = next_arg();
|
||||
int size = atoi(number);
|
||||
if (size < 1024) error_exit("error: --symtab valid size > 1024.");
|
||||
if (size > MAX_SYMTAB_SIZE) error_exit("error: --symptab size cannot exceed %d", MAX_SYMTAB_SIZE);
|
||||
build_options.symtab_size = size;
|
||||
return;
|
||||
|
||||
}
|
||||
if (match_longopt("help"))
|
||||
{
|
||||
break;
|
||||
@@ -236,7 +250,7 @@ void parse_arguments(int argc, const char *argv[])
|
||||
|
||||
build_options.path = ".";
|
||||
build_options.command = COMMAND_MISSING;
|
||||
|
||||
build_options.symtab_size = DEFAULT_SYMTAB_SIZE;
|
||||
arg_count = argc;
|
||||
args = argv;
|
||||
for (arg_index = 1; arg_index < arg_count; arg_index++)
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define MAX_LIB_DIRS 1024
|
||||
#define MAX_FILES 2048
|
||||
|
||||
@@ -33,6 +35,7 @@ typedef struct
|
||||
const char* target;
|
||||
const char* path;
|
||||
CompilerCommand command;
|
||||
uint32_t symtab_size;
|
||||
} BuildOptions;
|
||||
|
||||
extern BuildOptions build_options;
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
#include <string.h>
|
||||
#include "project_creation.h"
|
||||
#include "build_options.h"
|
||||
#include "../utils/string_utils.h"
|
||||
#include "../utils/lib.h"
|
||||
|
||||
const char* TOML =
|
||||
"[[executable]]\n"
|
||||
|
||||
15
src/compiler/compiler.c
Normal file
15
src/compiler/compiler.c
Normal file
@@ -0,0 +1,15 @@
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "compiler.h"
|
||||
#include "symtab.h"
|
||||
#include "../build/build_options.h"
|
||||
#include "../utils/lib.h"
|
||||
|
||||
void compiler_init(void)
|
||||
{
|
||||
uint32_t symtab_size = nextHighestPowerOf2(build_options.symtab_size);
|
||||
symtab_init(symtab_size);
|
||||
|
||||
}
|
||||
@@ -1,6 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "string_utils.h"
|
||||
|
||||
void compiler_init();
|
||||
38
src/compiler/compiler_common.h
Normal file
38
src/compiler/compiler_common.h
Normal file
@@ -0,0 +1,38 @@
|
||||
#pragma once
|
||||
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef uint32_t SourceLoc;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
SourceLoc loc;
|
||||
uint32_t length;
|
||||
} SourceRange;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
const char* start;
|
||||
SourceRange span;
|
||||
TokenType type : 8;
|
||||
union
|
||||
{
|
||||
const char *string;
|
||||
};
|
||||
} Token;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
const char *contents;
|
||||
const char *name;
|
||||
SourceLoc start;
|
||||
SourceLoc end;
|
||||
} File;
|
||||
|
||||
#define TOKEN_MAX_LENGTH 0xFFFF
|
||||
#define MAX_IDENTIFIER_LENGTH 31
|
||||
1249
src/compiler/lexer.c
1249
src/compiler/lexer.c
File diff suppressed because it is too large
Load Diff
@@ -4,179 +4,14 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "tokens.h"
|
||||
#include "compiler_common.h"
|
||||
|
||||
typedef enum _TokenType
|
||||
{
|
||||
// Single-character tokens.
|
||||
TOKEN_LPAREN,
|
||||
TOKEN_RPAREN,
|
||||
TOKEN_LBRACE,
|
||||
TOKEN_RBRACE,
|
||||
TOKEN_LBRACKET,
|
||||
TOKEN_RBRACKET,
|
||||
TOKEN_COMMA,
|
||||
TOKEN_DOT,
|
||||
TOKEN_EOS,
|
||||
TOKEN_AT,
|
||||
Token scan_token(void);
|
||||
|
||||
// One or two character tokens.
|
||||
TOKEN_PLUS,
|
||||
TOKEN_PLUSPLUS,
|
||||
TOKEN_PLUS_ASSIGN,
|
||||
TOKEN_BIT_NOT,
|
||||
TOKEN_NOT,
|
||||
TOKEN_MINUS,
|
||||
TOKEN_MINUSMINUS,
|
||||
TOKEN_MINUS_ASSIGN,
|
||||
TOKEN_STAR,
|
||||
TOKEN_MULT_ASSIGN,
|
||||
TOKEN_DIV,
|
||||
TOKEN_DIV_ASSIGN,
|
||||
TOKEN_MOD,
|
||||
TOKEN_MOD_ASSIGN,
|
||||
TOKEN_NOT_EQUAL,
|
||||
TOKEN_EQ,
|
||||
TOKEN_EQEQ,
|
||||
TOKEN_COLON,
|
||||
TOKEN_COLCOLON, // Not used but reserved
|
||||
TOKEN_DOTDOT,
|
||||
TOKEN_QUESTION,
|
||||
|
||||
// Three or more
|
||||
TOKEN_ELIPSIS,
|
||||
TOKEN_GREATER,
|
||||
TOKEN_GREATER_EQ,
|
||||
TOKEN_RIGHT_SHIFT,
|
||||
TOKEN_RIGHT_SHIFT_ASSIGN,
|
||||
TOKEN_LESS,
|
||||
TOKEN_LESS_EQ,
|
||||
TOKEN_LEFT_SHIFT,
|
||||
TOKEN_LEFT_SHIFT_ASSIGN,
|
||||
TOKEN_ARROW, // Not used but reserved
|
||||
TOKEN_AND,
|
||||
TOKEN_AND_ASSIGN,
|
||||
TOKEN_AMP,
|
||||
TOKEN_BIT_AND_ASSIGN,
|
||||
TOKEN_OR,
|
||||
TOKEN_OR_ASSIGN,
|
||||
TOKEN_BIT_OR,
|
||||
TOKEN_BIT_OR_ASSIGN,
|
||||
TOKEN_BIT_XOR,
|
||||
TOKEN_BIT_XOR_ASSIGN,
|
||||
TOKEN_ELVIS,
|
||||
|
||||
TOKEN_F256,
|
||||
TOKEN_I256,
|
||||
TOKEN_U256,
|
||||
TOKEN_F128,
|
||||
TOKEN_I128,
|
||||
TOKEN_U128,
|
||||
TOKEN_F64,
|
||||
TOKEN_I64,
|
||||
TOKEN_U64,
|
||||
TOKEN_F32,
|
||||
TOKEN_I32,
|
||||
TOKEN_U32,
|
||||
TOKEN_F16,
|
||||
TOKEN_I16,
|
||||
TOKEN_U16,
|
||||
TOKEN_I8,
|
||||
TOKEN_U8,
|
||||
TOKEN_QUAD,
|
||||
TOKEN_DOUBLE,
|
||||
TOKEN_FLOAT,
|
||||
TOKEN_HALF,
|
||||
TOKEN_LONG,
|
||||
TOKEN_ULONG,
|
||||
TOKEN_INT,
|
||||
TOKEN_UINT,
|
||||
TOKEN_SHORT,
|
||||
TOKEN_USHORT,
|
||||
TOKEN_BYTE,
|
||||
TOKEN_CHAR,
|
||||
TOKEN_BOOL,
|
||||
TOKEN_ISIZE,
|
||||
TOKEN_USIZE,
|
||||
|
||||
// Literals.
|
||||
|
||||
// In order to make the grammar
|
||||
// non ambiguous, we split tokens at the
|
||||
// lexer level
|
||||
TOKEN_TYPE_IDENT,
|
||||
TOKEN_CAPS_IDENT,
|
||||
TOKEN_VAR_IDENT,
|
||||
|
||||
// We want to parse @foo / #foo / $foo separately.
|
||||
// Otherwise we allow things like "@ foo" which would be pretty bad.
|
||||
TOKEN_AT_IDENT,
|
||||
TOKEN_HASH_IDENT,
|
||||
TOKEN_DOLLAR_IDENT,
|
||||
|
||||
TOKEN_STRING,
|
||||
TOKEN_INTEGER,
|
||||
TOKEN_REAL,
|
||||
|
||||
// Keywords.
|
||||
TOKEN_ALIAS, // Reserved
|
||||
TOKEN_AS,
|
||||
TOKEN_ASM,
|
||||
TOKEN_BREAK,
|
||||
TOKEN_CASE,
|
||||
TOKEN_CAST,
|
||||
TOKEN_CATCH,
|
||||
TOKEN_CONST,
|
||||
TOKEN_CONTINUE,
|
||||
TOKEN_DEFAULT,
|
||||
TOKEN_DEFER,
|
||||
TOKEN_DO,
|
||||
TOKEN_ELSE,
|
||||
TOKEN_ENUM,
|
||||
TOKEN_ERROR,
|
||||
TOKEN_FALSE,
|
||||
TOKEN_FOR,
|
||||
TOKEN_FUNC,
|
||||
TOKEN_GENERIC,
|
||||
TOKEN_GOTO,
|
||||
TOKEN_IF,
|
||||
TOKEN_IMPORT,
|
||||
TOKEN_LOCAL,
|
||||
TOKEN_MACRO,
|
||||
TOKEN_MODULE,
|
||||
TOKEN_NIL,
|
||||
TOKEN_PUBLIC,
|
||||
TOKEN_RETURN,
|
||||
TOKEN_STRUCT,
|
||||
TOKEN_SWITCH,
|
||||
TOKEN_THROW,
|
||||
TOKEN_THROWS,
|
||||
TOKEN_TRUE,
|
||||
TOKEN_TRY,
|
||||
TOKEN_TYPE, // Reserved
|
||||
TOKEN_TYPEDEF,
|
||||
TOKEN_UNION,
|
||||
TOKEN_UNTIL,
|
||||
TOKEN_VAR, // Reserved
|
||||
TOKEN_VOID,
|
||||
TOKEN_VOLATILE,
|
||||
TOKEN_WHILE,
|
||||
|
||||
|
||||
TOKEN_C_USHORT,
|
||||
TOKEN_C_SHORT,
|
||||
TOKEN_C_INT,
|
||||
TOKEN_C_UINT,
|
||||
TOKEN_C_LONG,
|
||||
TOKEN_C_ULONG,
|
||||
TOKEN_C_LONGLONG,
|
||||
TOKEN_C_ULONGLONG,
|
||||
TOKEN_C_LONGDOUBLE,
|
||||
|
||||
INVALID_TOKEN,
|
||||
TOKEN_EOF,
|
||||
|
||||
} TokenType;
|
||||
|
||||
const char *token_type_to_string(TokenType type);
|
||||
TokenType identifier_type(const char* restrict start, int len);
|
||||
TokenType ident_type_fnv1(const char *restrict start, int len);
|
||||
|
||||
Token scan_ident_test(const char* scan);
|
||||
|
||||
void lexer_test_setup(const char* text);
|
||||
|
||||
106
src/compiler/malloc.c
Normal file
106
src/compiler/malloc.c
Normal file
@@ -0,0 +1,106 @@
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "malloc.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include "../utils/errors.h"
|
||||
|
||||
#define KB 1024L
|
||||
// Use 1MB at a time.
|
||||
#define BUCKET_SIZE (1024 * KB)
|
||||
#define ARENA_BUCKET_START_SIZE 16
|
||||
|
||||
static uint8_t **arena_buckets;
|
||||
static int arena_buckets_used;
|
||||
static size_t arena_buckets_array_size;
|
||||
static size_t current_use;
|
||||
static void *current_arena;
|
||||
static int allocations_done;
|
||||
void init_arena(void)
|
||||
{
|
||||
printf("---- ARENA ALLOCATED ----\n");
|
||||
arena_buckets = malloc(ARENA_BUCKET_START_SIZE * sizeof(void *));
|
||||
arena_buckets_used = 1;
|
||||
arena_buckets_array_size = ARENA_BUCKET_START_SIZE;
|
||||
arena_buckets[0] = malloc(BUCKET_SIZE);
|
||||
allocations_done = 0;
|
||||
current_use = 0;
|
||||
current_arena = arena_buckets[0];
|
||||
}
|
||||
|
||||
// Simple bump allocator with buckets.
|
||||
void *malloc_arena(size_t mem)
|
||||
{
|
||||
if (mem == 0) return NULL;
|
||||
// Round to multiple of 16
|
||||
size_t oldmem = mem;
|
||||
mem = (mem + 15u) & ~15ull;
|
||||
assert(mem >= oldmem);
|
||||
if (mem >= BUCKET_SIZE / 4)
|
||||
{
|
||||
return malloc(mem);
|
||||
}
|
||||
if (current_use + mem > BUCKET_SIZE)
|
||||
{
|
||||
if (arena_buckets_used == arena_buckets_array_size)
|
||||
{
|
||||
arena_buckets_array_size *= 2;
|
||||
arena_buckets = realloc(arena_buckets, arena_buckets_array_size * sizeof(void *));
|
||||
ASSERT(arena_buckets, "Ran out of memory after allocating %ld KB", BUCKET_SIZE * arena_buckets_used / KB);
|
||||
}
|
||||
current_arena = malloc(BUCKET_SIZE);
|
||||
ASSERT(current_arena, "Ran out of memory after allocating %ld KB", BUCKET_SIZE * arena_buckets_used / KB);
|
||||
arena_buckets[arena_buckets_used++] = current_arena;
|
||||
current_use = 0;
|
||||
}
|
||||
uint8_t *ptr = current_arena + current_use;
|
||||
current_use += mem;
|
||||
allocations_done++;
|
||||
if (mem > 4096)
|
||||
{
|
||||
printf("Allocated large chunk %llu\n", (unsigned long long)mem);
|
||||
}
|
||||
return (void *)ptr;
|
||||
|
||||
}
|
||||
|
||||
|
||||
void free_arena(void)
|
||||
{
|
||||
printf("-- FREEING ARENA -- \n");
|
||||
printf(" * Memory used: %ld Kb\n", ((arena_buckets_used - 1) * BUCKET_SIZE + current_use) / 1024);
|
||||
printf(" * Buckets used: %d\n", arena_buckets_used);
|
||||
printf(" * Allocations: %d\n", allocations_done);
|
||||
|
||||
for (int i = 0; i < arena_buckets_used; i++)
|
||||
{
|
||||
free(arena_buckets[i]);
|
||||
}
|
||||
current_arena = NULL;
|
||||
arena_buckets_used = 0;
|
||||
arena_buckets = NULL;
|
||||
arena_buckets_array_size = 0;
|
||||
current_use = 0;
|
||||
printf("-- FREE DONE -- \n");
|
||||
}
|
||||
|
||||
|
||||
void run_arena_allocator_tests(void)
|
||||
{
|
||||
init_arena();
|
||||
free_arena();
|
||||
init_arena();
|
||||
ASSERT(malloc_arena(10) != malloc_arena(10), "Expected different values...");
|
||||
ASSERT(current_use == 32, "Expected allocations rounded to next 8 bytes");
|
||||
EXPECT("buckets in use", arena_buckets_used, 1);
|
||||
ASSERT(malloc_arena(BUCKET_SIZE), "Should be possible to allocate this");
|
||||
EXPECT("buckets in use", arena_buckets_used, 2);
|
||||
ASSERT(malloc_arena(1), "Expected alloc to pass");
|
||||
EXPECT("buckets in use", arena_buckets_used, 3);
|
||||
free_arena();
|
||||
ASSERT(arena_buckets_array_size == 0, "Arena not freed?");
|
||||
printf("Passed all arena tests\n");
|
||||
}
|
||||
16
src/compiler/malloc.h
Normal file
16
src/compiler/malloc.h
Normal file
@@ -0,0 +1,16 @@
|
||||
#pragma once
|
||||
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
||||
|
||||
void init_arena(void);
|
||||
void *malloc_arena(unsigned long mem);
|
||||
void free_arena(void);
|
||||
|
||||
void run_arena_allocator_tests(void);
|
||||
|
||||
#define MALLOC(mem) malloc_arena(mem)
|
||||
#define MALLOCS(type) malloc_arena(sizeof(type))
|
||||
213
src/compiler/symtab.c
Normal file
213
src/compiler/symtab.c
Normal file
@@ -0,0 +1,213 @@
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "symtab.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "../utils/errors.h"
|
||||
#include <assert.h>
|
||||
#include "../utils/lib.h"
|
||||
#include "malloc.h"
|
||||
#include "tokens.h"
|
||||
|
||||
#define TABLE_MAX_LOAD 0.75
|
||||
#define MAX_HASH_SIZE (1024 * 1024)
|
||||
|
||||
|
||||
typedef struct _SymEntry
|
||||
{
|
||||
const char *value;
|
||||
TokenType type;
|
||||
uint32_t key_len;
|
||||
uint32_t hash;
|
||||
} SymEntry;
|
||||
|
||||
typedef struct _SymTab
|
||||
{
|
||||
uint32_t count;
|
||||
uint32_t capacity;
|
||||
SymEntry *entries;
|
||||
} SymTab;
|
||||
|
||||
typedef struct _Entry
|
||||
{
|
||||
const char *key;
|
||||
uint32_t key_len;
|
||||
uint32_t hash;
|
||||
void *value;
|
||||
} Entry;
|
||||
|
||||
|
||||
static SymTab symtab;
|
||||
|
||||
void symtab_init(uint32_t capacity)
|
||||
{
|
||||
assert (is_power_of_two(capacity) && "Must be a power of two");
|
||||
if (symtab.capacity != 0)
|
||||
{
|
||||
free(symtab.entries);
|
||||
}
|
||||
size_t size = capacity * sizeof(SymEntry);
|
||||
symtab.entries = MALLOC(size);
|
||||
memset(symtab.entries, 0, size);
|
||||
symtab.count = 0;
|
||||
symtab.capacity = capacity;
|
||||
|
||||
// Add keywords.
|
||||
for (int i = 0; i < TOKEN_EOF; i++)
|
||||
{
|
||||
const char* name = token_type_to_string(i);
|
||||
// Skip non-keywords
|
||||
if (!is_lower(name[0]))
|
||||
{
|
||||
if (name[0] != '@' || !is_lower(name[1])) continue;
|
||||
}
|
||||
int len = strlen(name);
|
||||
TokenType type = (TokenType)i;
|
||||
const char* interned = symtab_add(name, strlen(name), fnv1a(name, len), &type);
|
||||
assert(type == i);
|
||||
assert(symtab_add(name, strlen(name), fnv1a(name, len), &type) == interned);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static inline SymEntry *entry_find(const char *key, uint32_t key_len, uint32_t hash)
|
||||
{
|
||||
uint32_t index = hash & (symtab.capacity - 1);
|
||||
while (1)
|
||||
{
|
||||
SymEntry *entry = &symtab.entries[index];
|
||||
if (entry->key_len == key_len && (entry->value == key || memcmp(key, entry->value, key_len) == 0)) return entry;
|
||||
if (entry->value == NULL)
|
||||
{
|
||||
return entry;
|
||||
}
|
||||
index = (index + 1) % (symtab.capacity - 1);
|
||||
}
|
||||
}
|
||||
|
||||
const char *symtab_add(const char *symbol, uint32_t len, uint32_t fnv1hash, TokenType *type)
|
||||
{
|
||||
if (symtab.count + 1 > symtab.capacity * TABLE_MAX_LOAD)
|
||||
{
|
||||
FATAL_ERROR("Symtab exceeded capacity, please increase --symtab.");
|
||||
}
|
||||
SymEntry *entry = entry_find(symbol, len, fnv1hash);
|
||||
if (entry->value)
|
||||
{
|
||||
*type = entry->type;
|
||||
return entry->value;
|
||||
}
|
||||
|
||||
char *copy = MALLOC(len + 1);
|
||||
memcpy(copy, symbol, len);
|
||||
copy[len] = '\0';
|
||||
entry->value = copy;
|
||||
entry->key_len = len;
|
||||
entry->hash = fnv1hash;
|
||||
entry->type = *type;
|
||||
symtab.count++;
|
||||
return entry->value;
|
||||
}
|
||||
|
||||
void stable_init(STable *table, uint32_t initial_size)
|
||||
{
|
||||
assert(initial_size && "Size must be larger than 0");
|
||||
assert (is_power_of_two(initial_size) && "Must be a power of two");
|
||||
|
||||
SEntry *entries = MALLOC(initial_size * sizeof(Entry));
|
||||
for (uint32_t i = 0; i < initial_size; i++)
|
||||
{
|
||||
entries[i].key = NULL;
|
||||
entries[i].value = NULL;
|
||||
}
|
||||
table->count = 0;
|
||||
table->capacity = initial_size;
|
||||
table->entries = entries;
|
||||
}
|
||||
|
||||
void stable_clear(STable *table)
|
||||
{
|
||||
memset(table->entries, 0, table->capacity * sizeof(Entry));
|
||||
table->count = 0;
|
||||
}
|
||||
|
||||
#define TOMBSTONE ((void *)0x01)
|
||||
static SEntry *sentry_find(SEntry *entries, uint32_t capacity, const char *key)
|
||||
{
|
||||
uint32_t index = (uint32_t)((((uintptr_t)key) >> 2u) & (capacity - 1));
|
||||
SEntry *tombstone = NULL;
|
||||
while (1)
|
||||
{
|
||||
SEntry *entry = &entries[index];
|
||||
if (entry->key == key) return entry;
|
||||
if (entry->key == NULL)
|
||||
{
|
||||
if (entry->value != TOMBSTONE)
|
||||
{
|
||||
return tombstone ? tombstone : entry;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!tombstone) tombstone = entry;
|
||||
}
|
||||
}
|
||||
index = (index + 1) & (capacity - 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void *stable_set(STable *table, const char *key, void *value)
|
||||
{
|
||||
assert(value && "Cannot insert NULL");
|
||||
if (table->count + 1 > table->capacity * TABLE_MAX_LOAD)
|
||||
{
|
||||
ASSERT(table->capacity < MAX_HASH_SIZE, "Table size too large, exceeded %d", MAX_HASH_SIZE);
|
||||
|
||||
uint32_t new_capacity = table->capacity ? (table->capacity << 1u) : 16u;
|
||||
SEntry *new_data = MALLOC(new_capacity * sizeof(SEntry));
|
||||
for (uint32_t i = 0; i < new_capacity; i++)
|
||||
{
|
||||
new_data[i].key = NULL;
|
||||
new_data[i].value = NULL;
|
||||
}
|
||||
table->count = 0;
|
||||
for (uint32_t i = 0; i < table->capacity; i++)
|
||||
{
|
||||
SEntry *entry = &table->entries[i];
|
||||
if (!entry->key) continue;
|
||||
table->count++;
|
||||
SEntry *dest = sentry_find(new_data, new_capacity, entry->key);
|
||||
*dest = *entry;
|
||||
}
|
||||
table->entries = new_data;
|
||||
table->capacity = new_capacity;
|
||||
}
|
||||
|
||||
SEntry *entry = sentry_find(table->entries, table->capacity, key);
|
||||
void *old = entry->value && entry->value != TOMBSTONE ? entry->value : NULL;
|
||||
entry->key = key;
|
||||
entry->value = value;
|
||||
if (!old) table->count++;
|
||||
return old;
|
||||
}
|
||||
|
||||
|
||||
void *stable_get(STable *table, const char *key)
|
||||
{
|
||||
if (!table->entries) return NULL;
|
||||
SEntry *entry = sentry_find(table->entries, table->capacity, key);
|
||||
return entry->key == NULL ? NULL : entry->value;
|
||||
}
|
||||
|
||||
void *stable_delete(STable *table, const char *key)
|
||||
{
|
||||
if (!table->count) return NULL;
|
||||
SEntry *entry = sentry_find(table->entries, table->capacity, key);
|
||||
if (!entry->key) return NULL;
|
||||
void *value = entry->value;
|
||||
entry->key = NULL;
|
||||
entry->value = TOMBSTONE;
|
||||
return value;
|
||||
}
|
||||
30
src/compiler/symtab.h
Normal file
30
src/compiler/symtab.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include <stdint.h>
|
||||
#include "tokens.h"
|
||||
|
||||
void symtab_init(uint32_t max_size);
|
||||
const char *symtab_add(const char *symbol, uint32_t len, uint32_t fnv1hash, TokenType *type);
|
||||
|
||||
typedef struct _VoidEntry
|
||||
{
|
||||
const char *key;
|
||||
void *value;
|
||||
} SEntry;
|
||||
|
||||
typedef struct _STable
|
||||
{
|
||||
uint32_t count;
|
||||
uint32_t capacity;
|
||||
SEntry *entries;
|
||||
} STable;
|
||||
|
||||
void stable_init(STable *table, uint32_t initial_size);
|
||||
void *stable_set(STable *table, const char *key, void *value);
|
||||
void *stable_get(STable *table, const char *key);
|
||||
void *stable_delete(STable *table, const char *key);
|
||||
void stable_clear(STable *table);
|
||||
336
src/compiler/tokens.c
Normal file
336
src/compiler/tokens.c
Normal file
@@ -0,0 +1,336 @@
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "tokens.h"
|
||||
#include "../utils/errors.h"
|
||||
|
||||
const char *token_type_to_string(TokenType type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case TOKEN_HASH:
|
||||
return "#";
|
||||
case TOKEN_DOLLAR:
|
||||
return "$";
|
||||
case TOKEN_LPAREN:
|
||||
return "(";
|
||||
case TOKEN_RPAREN:
|
||||
return ")";
|
||||
case TOKEN_LBRACE:
|
||||
return "{";
|
||||
case TOKEN_RBRACE:
|
||||
return "}";
|
||||
case TOKEN_LBRACKET:
|
||||
return "[";
|
||||
case TOKEN_RBRACKET:
|
||||
return "]";
|
||||
case TOKEN_COMMA:
|
||||
return ",";
|
||||
case TOKEN_DOT:
|
||||
return ".";
|
||||
case TOKEN_EOS:
|
||||
return ";";
|
||||
case TOKEN_PLUS:
|
||||
return "+";
|
||||
case TOKEN_PLUSPLUS:
|
||||
return "++";
|
||||
case TOKEN_PLUS_ASSIGN:
|
||||
return "+=";
|
||||
case TOKEN_BIT_NOT:
|
||||
return "~";
|
||||
case TOKEN_NOT:
|
||||
return "!";
|
||||
case TOKEN_MINUS:
|
||||
return "-";
|
||||
case TOKEN_MINUSMINUS:
|
||||
return "--";
|
||||
case TOKEN_MINUS_ASSIGN:
|
||||
return "-=";
|
||||
case TOKEN_STAR:
|
||||
return "*";
|
||||
case TOKEN_MULT_ASSIGN:
|
||||
return "*=";
|
||||
case TOKEN_MOD:
|
||||
return "%";
|
||||
case TOKEN_MOD_ASSIGN:
|
||||
return "%=";
|
||||
case TOKEN_DIV:
|
||||
return "/";
|
||||
case TOKEN_DIV_ASSIGN:
|
||||
return "/=";
|
||||
case TOKEN_NOT_EQUAL:
|
||||
return "!=";
|
||||
case TOKEN_EQ:
|
||||
return "=";
|
||||
case TOKEN_EQEQ:
|
||||
return "==";
|
||||
case TOKEN_COLON:
|
||||
return ":";
|
||||
case TOKEN_COLCOLON:
|
||||
return "::";
|
||||
case TOKEN_DOTDOT:
|
||||
return "..";
|
||||
case TOKEN_ELIPSIS:
|
||||
return "...";
|
||||
case TOKEN_GREATER:
|
||||
return ">";
|
||||
case TOKEN_GREATER_EQ:
|
||||
return ">=";
|
||||
case TOKEN_SHR:
|
||||
return ">>";
|
||||
case TOKEN_SHR_ASSIGN:
|
||||
return ">>=";
|
||||
case TOKEN_LESS:
|
||||
return "<";
|
||||
case TOKEN_LESS_EQ:
|
||||
return "<=";
|
||||
case TOKEN_SHL:
|
||||
return "<<";
|
||||
case TOKEN_SHL_ASSIGN:
|
||||
return "<<=";
|
||||
case TOKEN_ARROW:
|
||||
return "->";
|
||||
case TOKEN_AND:
|
||||
return "&&";
|
||||
case TOKEN_AND_ASSIGN:
|
||||
return "&&=";
|
||||
case TOKEN_AMP:
|
||||
return "&";
|
||||
case TOKEN_BIT_AND_ASSIGN:
|
||||
return "&=";
|
||||
case TOKEN_OR:
|
||||
return "||";
|
||||
case TOKEN_OR_ASSIGN:
|
||||
return "||=";
|
||||
case TOKEN_BIT_OR:
|
||||
return "|";
|
||||
case TOKEN_BIT_OR_ASSIGN:
|
||||
return "|=";
|
||||
case TOKEN_BIT_XOR:
|
||||
return "^";
|
||||
case TOKEN_BIT_XOR_ASSIGN:
|
||||
return "^=";
|
||||
case TOKEN_VAR_IDENT:
|
||||
return "<varIdent>";
|
||||
case TOKEN_TYPE_IDENT:
|
||||
return "<TypeIdent>";
|
||||
case TOKEN_STRING:
|
||||
return "<string>";
|
||||
case TOKEN_INTEGER:
|
||||
return "<int>";
|
||||
case TOKEN_REAL:
|
||||
return "<float>";
|
||||
case TOKEN_QUESTION:
|
||||
return "?";
|
||||
case TOKEN_ELVIS:
|
||||
return "?:";
|
||||
case TOKEN_VOID:
|
||||
return "void";
|
||||
case TOKEN_ALIAS:
|
||||
return "alias";
|
||||
case TOKEN_CONST:
|
||||
return "const";
|
||||
case TOKEN_VOLATILE:
|
||||
return "volatile";
|
||||
case TOKEN_ELSE:
|
||||
return "else";
|
||||
case TOKEN_FALSE:
|
||||
return "false";
|
||||
case TOKEN_CONTINUE:
|
||||
return "continue";
|
||||
case TOKEN_FUNC:
|
||||
return "func";
|
||||
case TOKEN_FOR:
|
||||
return "for";
|
||||
case TOKEN_IMPORT:
|
||||
return "import";
|
||||
case TOKEN_MODULE:
|
||||
return "module";
|
||||
case TOKEN_IF:
|
||||
return "if";
|
||||
case TOKEN_NIL:
|
||||
return "nil";
|
||||
case TOKEN_RETURN:
|
||||
return "return";
|
||||
case TOKEN_GOTO:
|
||||
return "goto";
|
||||
case TOKEN_DEFER:
|
||||
return "defer";
|
||||
case TOKEN_TRUE:
|
||||
return "true";
|
||||
case TOKEN_WHILE:
|
||||
return "while";
|
||||
case TOKEN_CASE:
|
||||
return "case";
|
||||
case TOKEN_ASM:
|
||||
return "asm";
|
||||
case TOKEN_DEFAULT:
|
||||
return "default";
|
||||
case TOKEN_SWITCH:
|
||||
return "switch";
|
||||
case TOKEN_UNTIL:
|
||||
return "until";
|
||||
case TOKEN_BREAK:
|
||||
return "break";
|
||||
case TOKEN_TYPE:
|
||||
return "type";
|
||||
case TOKEN_DO:
|
||||
return "do";
|
||||
case TOKEN_PUBLIC:
|
||||
return "public";
|
||||
case TOKEN_LOCAL:
|
||||
return "local";
|
||||
case TOKEN_STRUCT:
|
||||
return "struct";
|
||||
case TOKEN_UNION:
|
||||
return "union";
|
||||
case TOKEN_ENUM:
|
||||
return "enum";
|
||||
case TOKEN_AS:
|
||||
return "as";
|
||||
case TOKEN_AT:
|
||||
return "@";
|
||||
case TOKEN_ERROR:
|
||||
return "<error>";
|
||||
case TOKEN_EOF:
|
||||
return "<eof>";
|
||||
case TOKEN_CAST:
|
||||
return "cast";
|
||||
case TOKEN_C_LONGDOUBLE:
|
||||
return "c_longdouble";
|
||||
case TOKEN_C_USHORT:
|
||||
return "c_ushort";
|
||||
case TOKEN_C_UINT:
|
||||
return "c_uint";
|
||||
case TOKEN_C_ULONG:
|
||||
return "c_ulong";
|
||||
case TOKEN_C_ULONGLONG:
|
||||
return "c_ulonglong";
|
||||
case TOKEN_C_SHORT:
|
||||
return "c_ishort";
|
||||
case TOKEN_C_INT:
|
||||
return "c_int";
|
||||
case TOKEN_C_LONG:
|
||||
return "c_long";
|
||||
case TOKEN_C_LONGLONG:
|
||||
return "c_longlong";
|
||||
case TOKEN_MACRO:
|
||||
return "macro";
|
||||
case TOKEN_F256:
|
||||
return "f256";
|
||||
case TOKEN_I256:
|
||||
return "i256";
|
||||
case TOKEN_U256:
|
||||
return "u256";
|
||||
case TOKEN_F128:
|
||||
return "f128";
|
||||
case TOKEN_I128:
|
||||
return "i128";
|
||||
case TOKEN_U128:
|
||||
return "u128";
|
||||
case TOKEN_F64:
|
||||
return "f64";
|
||||
case TOKEN_I64:
|
||||
return "i64";
|
||||
case TOKEN_U64:
|
||||
return "u64";
|
||||
case TOKEN_F32:
|
||||
return "f32";
|
||||
case TOKEN_I32:
|
||||
return "i32";
|
||||
case TOKEN_U32:
|
||||
return "u32";
|
||||
case TOKEN_F16:
|
||||
return "f16";
|
||||
case TOKEN_I16:
|
||||
return "i16";
|
||||
case TOKEN_U16:
|
||||
return "u16";
|
||||
case TOKEN_I8:
|
||||
return "i8";
|
||||
case TOKEN_U8:
|
||||
return "u8";
|
||||
case TOKEN_BOOL:
|
||||
return "bool";
|
||||
case TOKEN_QUAD:
|
||||
return "quad";
|
||||
case TOKEN_DOUBLE:
|
||||
return "double";
|
||||
case TOKEN_FLOAT:
|
||||
return "float";
|
||||
case TOKEN_LONG:
|
||||
return "long";
|
||||
case TOKEN_ULONG:
|
||||
return "ulong";
|
||||
case TOKEN_INT:
|
||||
return "int";
|
||||
case TOKEN_UINT:
|
||||
return "uint";
|
||||
case TOKEN_SHORT:
|
||||
return "short";
|
||||
case TOKEN_USHORT:
|
||||
return "ushort";
|
||||
case TOKEN_BYTE:
|
||||
return "byte";
|
||||
case TOKEN_CHAR:
|
||||
return "char";
|
||||
case TOKEN_ISIZE:
|
||||
return "isize";
|
||||
case TOKEN_USIZE:
|
||||
return "usize";
|
||||
case TOKEN_GENERIC:
|
||||
return "generic";
|
||||
case TOKEN_THROW:
|
||||
return "throw";
|
||||
case TOKEN_THROWS:
|
||||
return "throws";
|
||||
case TOKEN_TRY:
|
||||
return "try";
|
||||
case TOKEN_TYPEDEF:
|
||||
return "typedef";
|
||||
case TOKEN_VAR:
|
||||
return "var";
|
||||
case TOKEN_HALF:
|
||||
return "half";
|
||||
case TOKEN_CAPS_IDENT:
|
||||
return "<CAPS_IDENT>";
|
||||
case TOKEN_AT_IDENT:
|
||||
return "<@ident>";
|
||||
case TOKEN_HASH_IDENT:
|
||||
return "<#ident>";
|
||||
case TOKEN_DOLLAR_IDENT:
|
||||
return "<$ident>";
|
||||
case TOKEN_CATCH:
|
||||
return "catch";
|
||||
case INVALID_TOKEN:
|
||||
return "<\?\?\?>";
|
||||
case TOKEN_DOCS_EOL:
|
||||
return "<EOL>";
|
||||
case TOKEN_DOCS_START:
|
||||
return "/**";
|
||||
case TOKEN_DOCS_END:
|
||||
return "*/";
|
||||
case TOKEN_DOCS_LINE:
|
||||
return "<docs line>";
|
||||
case TOKEN_AT_REQUIRE:
|
||||
return "@require";
|
||||
case TOKEN_AT_ENSURE:
|
||||
return "@ensure";
|
||||
case TOKEN_AT_PARAM:
|
||||
return "@param";
|
||||
case TOKEN_AT_CONST:
|
||||
return "@const";
|
||||
case TOKEN_AT_PURE:
|
||||
return "@pure";
|
||||
case TOKEN_AT_RETURN:
|
||||
return "@return";
|
||||
case TOKEN_AT_THROWS:
|
||||
return "@throws";
|
||||
case TOKEN_AT_REQPARSE:
|
||||
return "@reqparse";
|
||||
case TOKEN_AT_DEPRECATED:
|
||||
return "@deprecated";
|
||||
}
|
||||
UNREACHABLE
|
||||
}
|
||||
200
src/compiler/tokens.h
Normal file
200
src/compiler/tokens.h
Normal file
@@ -0,0 +1,200 @@
|
||||
#pragma once
|
||||
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
||||
typedef enum _TokenType
|
||||
{
|
||||
INVALID_TOKEN = 0,
|
||||
|
||||
// Single-character tokens.
|
||||
TOKEN_AT, // @
|
||||
TOKEN_COMMA, // ,
|
||||
TOKEN_EOS, // ;
|
||||
TOKEN_DOLLAR, // $
|
||||
TOKEN_DOT, // .
|
||||
TOKEN_HASH, // #
|
||||
TOKEN_LBRACE, // {
|
||||
TOKEN_LBRACKET, // [
|
||||
TOKEN_LPAREN, // (
|
||||
TOKEN_RBRACE, // }
|
||||
TOKEN_RBRACKET, // ]
|
||||
TOKEN_RPAREN, // )
|
||||
|
||||
// One or two character tokens.
|
||||
TOKEN_BIT_NOT, // ~
|
||||
TOKEN_COLON, // :
|
||||
TOKEN_COLCOLON, // :: Not used but reserved
|
||||
TOKEN_DIV, // /
|
||||
TOKEN_DIV_ASSIGN, // /=
|
||||
TOKEN_DOCS_START, // /** (will consume an arbitrary number of `*` after this.
|
||||
TOKEN_DOCS_END, // */ (may start with an arbitrary number of `*`
|
||||
TOKEN_DOCS_EOL, // "\n" only seen in docs.
|
||||
TOKEN_DOTDOT, // ..
|
||||
TOKEN_EQ, // =
|
||||
TOKEN_EQEQ, // ==
|
||||
TOKEN_NOT, // !
|
||||
TOKEN_NOT_EQUAL, // !=
|
||||
TOKEN_MINUS, // -
|
||||
TOKEN_MINUSMINUS, // --
|
||||
TOKEN_MINUS_ASSIGN, // -=
|
||||
TOKEN_MOD, // %
|
||||
TOKEN_MOD_ASSIGN, // %=
|
||||
TOKEN_MULT_ASSIGN, // *=
|
||||
TOKEN_PLUS, // +
|
||||
TOKEN_PLUSPLUS, // ++
|
||||
TOKEN_PLUS_ASSIGN, // +=
|
||||
TOKEN_QUESTION, // ?
|
||||
TOKEN_STAR, // *
|
||||
|
||||
// Three or more
|
||||
TOKEN_BIT_AND_ASSIGN, // &=
|
||||
TOKEN_BIT_OR, // =
|
||||
TOKEN_BIT_OR_ASSIGN, // |=
|
||||
TOKEN_BIT_XOR, // ^
|
||||
TOKEN_BIT_XOR_ASSIGN, // ^=
|
||||
TOKEN_AMP, // &
|
||||
TOKEN_AND, // &&
|
||||
TOKEN_AND_ASSIGN, // &&=
|
||||
TOKEN_ARROW, // -> // Not used but reserved
|
||||
TOKEN_ELIPSIS, // ...
|
||||
TOKEN_ELVIS, // ?:
|
||||
TOKEN_GREATER, // >
|
||||
TOKEN_GREATER_EQ, // >=
|
||||
TOKEN_OR, // |
|
||||
TOKEN_OR_ASSIGN, // ||=
|
||||
TOKEN_SHR, // >>
|
||||
TOKEN_SHR_ASSIGN, // >>=
|
||||
TOKEN_LESS, // <
|
||||
TOKEN_LESS_EQ, // <=
|
||||
TOKEN_SHL, // >>
|
||||
TOKEN_SHL_ASSIGN, // >>=
|
||||
|
||||
// Basic types bit
|
||||
TOKEN_F256, // f256
|
||||
TOKEN_I256, // i256
|
||||
TOKEN_U256, // u256
|
||||
TOKEN_F128, // f128
|
||||
TOKEN_I128, // i128
|
||||
TOKEN_U128, // u128
|
||||
TOKEN_F64, // f64
|
||||
TOKEN_I64, // i64
|
||||
TOKEN_U64, // u64
|
||||
TOKEN_F32, // f32
|
||||
TOKEN_I32, // i32
|
||||
TOKEN_U32, // u32
|
||||
TOKEN_F16, // f16
|
||||
TOKEN_I16, // i16
|
||||
TOKEN_U16, // u16
|
||||
TOKEN_I8, // i8
|
||||
TOKEN_U8, // u8
|
||||
|
||||
// Basic types names
|
||||
TOKEN_BYTE,
|
||||
TOKEN_BOOL,
|
||||
TOKEN_CHAR,
|
||||
TOKEN_DOUBLE,
|
||||
TOKEN_FLOAT,
|
||||
TOKEN_HALF,
|
||||
TOKEN_INT,
|
||||
TOKEN_ISIZE,
|
||||
TOKEN_LONG,
|
||||
TOKEN_SHORT,
|
||||
TOKEN_UINT,
|
||||
TOKEN_ULONG,
|
||||
TOKEN_USHORT,
|
||||
TOKEN_USIZE,
|
||||
TOKEN_QUAD,
|
||||
|
||||
// C compatibility types
|
||||
TOKEN_C_USHORT,
|
||||
TOKEN_C_SHORT,
|
||||
TOKEN_C_INT,
|
||||
TOKEN_C_UINT,
|
||||
TOKEN_C_LONG,
|
||||
TOKEN_C_ULONG,
|
||||
TOKEN_C_LONGLONG,
|
||||
TOKEN_C_ULONGLONG,
|
||||
TOKEN_C_LONGDOUBLE,
|
||||
|
||||
// Literals.
|
||||
|
||||
// In order to make the grammar
|
||||
// non ambiguous, we split tokens at the
|
||||
// lexer level
|
||||
TOKEN_TYPE_IDENT, // FooBarBaz
|
||||
TOKEN_CAPS_IDENT, // FOO_BAR_BAZ
|
||||
TOKEN_VAR_IDENT, // fooBarBaz
|
||||
|
||||
// We want to parse @foo / #foo / $foo separately.
|
||||
// Otherwise we allow things like "@ foo" which would be pretty bad.
|
||||
TOKEN_AT_IDENT, // @foobar
|
||||
TOKEN_HASH_IDENT, // #foobar
|
||||
TOKEN_DOLLAR_IDENT, // $foobar
|
||||
|
||||
TOKEN_STRING, // "Teststring"
|
||||
TOKEN_INTEGER, // 123 0x23 0b10010 0o327
|
||||
TOKEN_REAL, // 0x23.2p-2a 43.23e23
|
||||
TOKEN_DOCS_LINE, // Any line within /** **/
|
||||
|
||||
// Keywords
|
||||
TOKEN_ALIAS, // Reserved
|
||||
TOKEN_AS,
|
||||
TOKEN_ASM,
|
||||
TOKEN_BREAK,
|
||||
TOKEN_CASE,
|
||||
TOKEN_CAST,
|
||||
TOKEN_CATCH,
|
||||
TOKEN_CONST,
|
||||
TOKEN_CONTINUE,
|
||||
TOKEN_DEFAULT,
|
||||
TOKEN_DEFER,
|
||||
TOKEN_DO,
|
||||
TOKEN_ELSE,
|
||||
TOKEN_ENUM,
|
||||
TOKEN_ERROR,
|
||||
TOKEN_FALSE,
|
||||
TOKEN_FOR,
|
||||
TOKEN_FUNC,
|
||||
TOKEN_GENERIC,
|
||||
TOKEN_GOTO,
|
||||
TOKEN_IF,
|
||||
TOKEN_IMPORT,
|
||||
TOKEN_LOCAL,
|
||||
TOKEN_MACRO,
|
||||
TOKEN_MODULE,
|
||||
TOKEN_NIL,
|
||||
TOKEN_PUBLIC,
|
||||
TOKEN_RETURN,
|
||||
TOKEN_STRUCT,
|
||||
TOKEN_SWITCH,
|
||||
TOKEN_THROW,
|
||||
TOKEN_THROWS,
|
||||
TOKEN_TRUE,
|
||||
TOKEN_TRY,
|
||||
TOKEN_TYPE, // Reserved
|
||||
TOKEN_TYPEDEF,
|
||||
TOKEN_UNION,
|
||||
TOKEN_UNTIL,
|
||||
TOKEN_VAR, // Reserved
|
||||
TOKEN_VOID,
|
||||
TOKEN_VOLATILE,
|
||||
TOKEN_WHILE,
|
||||
|
||||
TOKEN_AT_PARAM, // @param
|
||||
TOKEN_AT_THROWS, // @throws
|
||||
TOKEN_AT_RETURN, // @return
|
||||
TOKEN_AT_ENSURE, // @ensure
|
||||
TOKEN_AT_REQUIRE, // @require
|
||||
TOKEN_AT_PURE, // @pure
|
||||
TOKEN_AT_CONST, // @const
|
||||
TOKEN_AT_REQPARSE, // @reqparse
|
||||
TOKEN_AT_DEPRECATED, // @deprecated
|
||||
|
||||
TOKEN_EOF, // \n - SHOULD ALWAYS BE THE LAST TOKEN.
|
||||
|
||||
} TokenType;
|
||||
|
||||
const char *token_type_to_string(TokenType type);
|
||||
97
src/compiler_tests/shorttest.c
Normal file
97
src/compiler_tests/shorttest.c
Normal file
@@ -0,0 +1,97 @@
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
static const char* test_parse = "struct Node\n"
|
||||
"{\n"
|
||||
" uint hole;\n"
|
||||
" uint size;\n"
|
||||
" Node* next;\n"
|
||||
" Node* prev;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"struct Footer\n"
|
||||
"{ \n"
|
||||
" Node &header;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"struct Bin \n"
|
||||
"{\n"
|
||||
" Node& head;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"struct Heap \n"
|
||||
"{\n"
|
||||
" size start;\n"
|
||||
" size end;\n"
|
||||
" Bin* bins[BIN_COUNT];\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"const uint OFFSET = 8;\n"
|
||||
"\n"
|
||||
"/**\n"
|
||||
" * @require start > 0\n"
|
||||
" */\n"
|
||||
"void Heap.init(Heap& heap, usize start) \n"
|
||||
"{\n"
|
||||
" Node& init_region = @cast(Node&, start);\n"
|
||||
" init_region.hole = 1;\n"
|
||||
" init_region.size = HEAP_INIT_SIZE - @sizeof(Node) - @sizeof(Footer);\n"
|
||||
"\n"
|
||||
" init_region.createFoot();\n"
|
||||
"\n"
|
||||
" heap.bins[get_bin_index(init_region.size)].add(init_region);\n"
|
||||
"\n"
|
||||
" heap.start = @cast(void*, start);\n"
|
||||
" heap.end = @cast(void*, start + HEAP_INIT_SIZE);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void* Heap.alloc(Heap& heap, usize size) \n"
|
||||
"{\n"
|
||||
" uint index = get_bin_index(size);\n"
|
||||
" Bin& temp = @cast(Bin&, heap.bins[index]);\n"
|
||||
" Node* found = temp.getBestFit(size);\n"
|
||||
"\n"
|
||||
" while (!found) \n"
|
||||
" {\n"
|
||||
" temp = heap.bins[++index];\n"
|
||||
" found = temp.getBestFit(size);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if ((found.size - size) > (overhead + MIN_ALLOC_SZ)) \n"
|
||||
" {\n"
|
||||
" Node& split = @cast(Node*, @cast(char&, found) + sizeof(Node) + sizeof(Footer)) + size);\n"
|
||||
" split.size = found.size - size - sizeof(Node) - sizeof(Footer);\n"
|
||||
" split.hole = 1;\n"
|
||||
" \n"
|
||||
" split.createFoot();\n"
|
||||
"\n"
|
||||
" uint new_idx = get_bin_index(split.size);\n"
|
||||
"\n"
|
||||
" heap.bins[new_idx].addNode(split); \n"
|
||||
"\n"
|
||||
" found.size = size; \n"
|
||||
" found.createFoot(found); \n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" found.hole = 0; \n"
|
||||
" heap.bins[index].removeNode(found);\n"
|
||||
" \n"
|
||||
" Node& wild = heap.getWilderness(heap);\n"
|
||||
" if (wild.size < MIN_WILDERNESS) \n"
|
||||
" {\n"
|
||||
" uint success = heap.expand(0x1000);\n"
|
||||
" if (success == 0) \n"
|
||||
" {\n"
|
||||
" return nil;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" else if (wild.size > MAX_WILDERNESS) \n"
|
||||
" {\n"
|
||||
" heap.contract(0x1000);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" found.prev = nil;\n"
|
||||
" found.next = nil;\n"
|
||||
" return &found.next; \n"
|
||||
"}";
|
||||
@@ -7,76 +7,103 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <compiler/compiler.h>
|
||||
#include <utils/lib.h>
|
||||
#include <utils/errors.h>
|
||||
#include "benchmark.h"
|
||||
#include "../compiler/symtab.h"
|
||||
|
||||
#define TEST_ASSERT(cond, text, ...) do { if (!(cond)) { printf("\nTEST FAILED: " text "\n", ##__VA_ARGS__); exit(-1); } } while (0)
|
||||
static void test_lexer(void)
|
||||
{
|
||||
#ifdef __OPTIMIZE__
|
||||
printf("--- RUNNING OPTIMIZED ---\n");
|
||||
#endif
|
||||
printf("Begin lexer testing.\n");
|
||||
printf("1. Check number of keywords...");
|
||||
printf("-- Check number of keywords...\n");
|
||||
int tokens_found = 0;
|
||||
const int EXPECTED_TOKENS = 81;
|
||||
const char* tokens[INVALID_TOKEN];
|
||||
int len[INVALID_TOKEN];
|
||||
for (int i = 0; i < INVALID_TOKEN; i++)
|
||||
const int EXPECTED_TOKENS = 91;
|
||||
const char* tokens[TOKEN_EOF];
|
||||
int len[TOKEN_EOF];
|
||||
compiler_init();
|
||||
for (int i = 1; i < TOKEN_EOF; i++)
|
||||
{
|
||||
const char* token = token_type_to_string((TokenType)i);
|
||||
tokens[i] = token;
|
||||
len[i] = strlen(token);
|
||||
TokenType type = identifier_type(token, len[i]);
|
||||
TokenType type2 = ident_type_fnv1(token, len[i]);
|
||||
|
||||
if (type != TOKEN_VAR_IDENT)
|
||||
TokenType lookup = TOKEN_VAR_IDENT;
|
||||
const char* interned = symtab_add(token, len[i], fnv1a(token, len[i]), &lookup);
|
||||
if (lookup != TOKEN_VAR_IDENT)
|
||||
{
|
||||
Token scanned = scan_ident_test(token);
|
||||
TEST_ASSERT(scanned.type == i, "Mismatch scanning: was '%s', expected '%s' - lookup: %s - interned: %s.",
|
||||
token_type_to_string(scanned.type),
|
||||
token_type_to_string(i),
|
||||
token_type_to_string(lookup),
|
||||
interned);
|
||||
tokens_found++;
|
||||
TEST_ASSERT(type == i, "Mismatch on token %s", token);
|
||||
if (type2 != type)
|
||||
{
|
||||
printf("\n(fnv1) Test mismatch on token %s, generated %s\n", token, token_type_to_string(type2));
|
||||
}
|
||||
}
|
||||
tokens[i] = "byte";
|
||||
len[i] = 4;
|
||||
else
|
||||
{
|
||||
tokens[i] = "casi";
|
||||
len[i] = 4;
|
||||
}
|
||||
}
|
||||
printf(" %d found.\n", tokens_found);
|
||||
TEST_ASSERT(ident_type_fnv1("alias ", 6) == TOKEN_VAR_IDENT, "Error in fnv1 ident");
|
||||
TEST_ASSERT(identifier_type("alias ", 6) == TOKEN_VAR_IDENT, "Error in switch ident");
|
||||
TEST_ASSERT(ident_type_fnv1("alias ", 5) != TOKEN_VAR_IDENT, "Error in fnv1 ident2");
|
||||
TEST_ASSERT(identifier_type("alias ", 5) != TOKEN_VAR_IDENT, "Error in switch ident2");
|
||||
TEST_ASSERT(tokens_found == EXPECTED_TOKENS, "Unexpected number of identifiers! Expected %d.", EXPECTED_TOKENS);
|
||||
printf("-> %d keywords found.\n", tokens_found);
|
||||
EXPECT("Keywords", tokens_found, EXPECTED_TOKENS);
|
||||
|
||||
const int BENCH_REPEATS = 10000000;
|
||||
const int BENCH_REPEATS = 100000;
|
||||
|
||||
printf("2. Test keyword lexing speed (switch)... ");
|
||||
printf("-- Test keyword lexing speed...\n");
|
||||
bench_begin();
|
||||
for (int b = 0; b < BENCH_REPEATS; b++)
|
||||
{
|
||||
for (int i = 0; i < INVALID_TOKEN; i++)
|
||||
for (int i = 1; i < TOKEN_EOF; i++)
|
||||
{
|
||||
identifier_type(tokens[i], len[i]);
|
||||
volatile TokenType t = scan_ident_test(tokens[i]).type;
|
||||
}
|
||||
}
|
||||
printf("complete in %fs\n", bench_mark());
|
||||
|
||||
printf("3. Test keyword lexing speed (fnv1)... ");
|
||||
printf("-> Test complete in %fs, %.0f kkeywords/s\n", bench_mark(), (BENCH_REPEATS * (TOKEN_EOF - 1)) / (1000 * bench_mark()));
|
||||
|
||||
#include "shorttest.c"
|
||||
|
||||
printf("-- Test token lexing speed...\n");
|
||||
const char *pointer = test_parse;
|
||||
int loc = 0;
|
||||
while (*pointer != '\0')
|
||||
{
|
||||
if (*(pointer++) == '\n') loc++;
|
||||
}
|
||||
|
||||
bench_begin();
|
||||
int tokens_parsed = 0;
|
||||
for (int b = 0; b < BENCH_REPEATS; b++)
|
||||
{
|
||||
for (int i = 0; i < INVALID_TOKEN; i++)
|
||||
lexer_test_setup(test_parse);
|
||||
Token token;
|
||||
while (1)
|
||||
{
|
||||
ident_type_fnv1(tokens[i], len[i]);
|
||||
token = scan_token();
|
||||
if (token.type == TOKEN_EOF) break;
|
||||
TEST_ASSERT(token.type != INVALID_TOKEN, "Got invalid token");
|
||||
tokens_parsed++;
|
||||
}
|
||||
}
|
||||
printf("complete in %fs\n", bench_mark());
|
||||
|
||||
printf("-> Test complete in %fs, %.0f kloc/s, %.0f ktokens/s\n", bench_mark(),
|
||||
loc * BENCH_REPEATS / (1000 * bench_mark()), tokens_parsed / (1000 * bench_mark()));
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
void test_compiler(void)
|
||||
{
|
||||
compiler_init();
|
||||
}
|
||||
|
||||
void compiler_tests(void)
|
||||
{
|
||||
test_lexer();
|
||||
test_compiler();
|
||||
|
||||
exit(0);
|
||||
}
|
||||
@@ -3,9 +3,10 @@
|
||||
#include "build/project_creation.h"
|
||||
#include "utils/errors.h"
|
||||
#include "compiler_tests/tests.h"
|
||||
|
||||
#include "compiler/malloc.h"
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
init_arena();
|
||||
parse_arguments(argc, argv);
|
||||
switch (build_options.command)
|
||||
{
|
||||
@@ -26,7 +27,7 @@ int main(int argc, const char *argv[])
|
||||
case COMMAND_BENCH:
|
||||
printf("TODO\n");
|
||||
}
|
||||
|
||||
free_arena();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
|
||||
#define FATAL_ERROR(_string, ...) do { printf("FATAL ERROR at %s:%d: " _string, __func__, __LINE__, ##__VA_ARGS__); printf("\n"); exit(-1); } while(0)
|
||||
|
||||
#define ASSERT(_condition, _string, ...) while (!(_condition)) { FATAL_ERROR(_string, ##__VA_ARGS__); }
|
||||
|
||||
#define UNREACHABLE FATAL_ERROR("Cannot reach %s:%d", __func__, __LINE__);
|
||||
#define TODO FATAL_ERROR("Not done yet %s:%d", __func__, __LINE__);
|
||||
|
||||
@@ -18,4 +20,4 @@
|
||||
|
||||
#define EXPECT(_string, _value, _expected) \
|
||||
do { long long __tempval1 = _value; long long __tempval2 = _expected; \
|
||||
TEST_ASSERT(__tempval1 == __tempval2, "Checking " _string ": expected %lld but was %lld.", __tempval2, __tempval1); } while(0);
|
||||
TEST_ASSERT(__tempval1 == __tempval2, "Checking " _string ": expected %lld but was %lld.", __tempval2, __tempval1); } while(0)
|
||||
|
||||
183
src/utils/lib.h
Normal file
183
src/utils/lib.h
Normal file
@@ -0,0 +1,183 @@
|
||||
#pragma once
|
||||
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
static inline bool is_power_of_two(uint64_t x)
|
||||
{
|
||||
return x != 0 && (x & (x - 1)) == 0;
|
||||
}
|
||||
|
||||
static inline uint32_t nextHighestPowerOf2(uint32_t v)
|
||||
{
|
||||
v--;
|
||||
v |= v >> 1u;
|
||||
v |= v >> 2u;
|
||||
v |= v >> 4u;
|
||||
v |= v >> 8u;
|
||||
v |= v >> 16u;
|
||||
v++;
|
||||
return v;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static inline bool is_lower(char c)
|
||||
{
|
||||
return c >= 'a' && c <= 'z';
|
||||
}
|
||||
|
||||
static inline bool is_upper(char c)
|
||||
{
|
||||
return c >= 'A' && c <= 'Z';
|
||||
}
|
||||
|
||||
static inline bool is_oct(char c)
|
||||
{
|
||||
return c >= '0' && c <= '7';
|
||||
}
|
||||
|
||||
static inline bool is_oct_or_(char c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '_':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool is_binary(c)
|
||||
{
|
||||
return c == '0' || c == '1';
|
||||
}
|
||||
|
||||
static inline bool is_binary_or_(c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '0': case '1': case '_':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool is_digit_or_(char c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
case '_':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool is_digit(char c)
|
||||
{
|
||||
return c >= '0' && c <= '9';
|
||||
}
|
||||
|
||||
static inline bool is_hex_or_(char c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case 'a': case 'b': case 'c': case 'd': case 'e':
|
||||
case 'f':
|
||||
case 'A': case 'B': case 'C': case 'D': case 'E':
|
||||
case 'F':
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
case '_':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool is_hex(char c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case 'a': case 'b': case 'c': case 'd': case 'e':
|
||||
case 'f':
|
||||
case 'A': case 'B': case 'C': case 'D': case 'E':
|
||||
case 'F':
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool is_alphanum_(char c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case 'a': case 'b': case 'c': case 'd': case 'e':
|
||||
case 'f': case 'g': case 'h': case 'i': case 'j':
|
||||
case 'k': case 'l': case 'm': case 'n': case 'o':
|
||||
case 'p': case 'q': case 'r': case 's': case 't':
|
||||
case 'u': case 'v': case 'w': case 'x': case 'y':
|
||||
case 'z':
|
||||
case 'A': case 'B': case 'C': case 'D': case 'E':
|
||||
case 'F': case 'G': case 'H': case 'I': case 'J':
|
||||
case 'K': case 'L': case 'M': case 'N': case 'O':
|
||||
case 'P': case 'Q': case 'R': case 'S': case 'T':
|
||||
case 'U': case 'V': case 'W': case 'X': case 'Y':
|
||||
case 'Z':
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
case '_':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool is_letter(char c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case 'a': case 'b': case 'c': case 'd': case 'e':
|
||||
case 'f': case 'g': case 'h': case 'i': case 'j':
|
||||
case 'k': case 'l': case 'm': case 'n': case 'o':
|
||||
case 'p': case 'q': case 'r': case 's': case 't':
|
||||
case 'u': case 'v': case 'w': case 'x': case 'y':
|
||||
case 'z':
|
||||
case 'A': case 'B': case 'C': case 'D': case 'E':
|
||||
case 'F': case 'G': case 'H': case 'I': case 'J':
|
||||
case 'K': case 'L': case 'M': case 'N': case 'O':
|
||||
case 'P': case 'Q': case 'R': case 'S': case 'T':
|
||||
case 'U': case 'V': case 'W': case 'X': case 'Y':
|
||||
case 'Z':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define FNV1_PRIME 0x01000193u
|
||||
#define FNV1_SEED 0x811C9DC5u
|
||||
#define FNV1a(c, seed) ((uint32_t)((((unsigned)(c)) ^ (seed)) * FNV1_PRIME))
|
||||
|
||||
static inline uint32_t fnv1a(const char *key, uint32_t len)
|
||||
{
|
||||
uint32_t hash = FNV1_SEED;
|
||||
for (int i = 0; i < len; i++)
|
||||
{
|
||||
hash = FNV1a(key[i], hash);
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
static inline bool is_lower(char c)
|
||||
{
|
||||
return c >= 'a' && c <= 'z';
|
||||
}
|
||||
|
||||
static inline bool is_upper(char c)
|
||||
{
|
||||
return c >= 'A' && c <= 'Z';
|
||||
}
|
||||
|
||||
static inline bool is_alphanum_(char c)
|
||||
{
|
||||
return (c >= 'a' && c <= 'z')
|
||||
|| (c >= 'A' && c <= 'Z')
|
||||
|| (c >= '0' && c <= '9')
|
||||
|| c == '_';
|
||||
}
|
||||
Reference in New Issue
Block a user