Lexing up and running.

2026-02-27 03:51:18 +00:00 · 2019-07-27 17:20:31 +02:00
parent a9c0d2505c
commit 8f611a400d
21 changed files with 1983 additions and 839 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -14,5 +14,8 @@ add_executable(c3c
        src/build/project_creation.c
        src/utils/errors.c
        src/utils/file_utils.c
-        src/utils/string_utils.c
-        src/compiler/lexer.c src/compiler/lexer.h src/compiler_tests/tests.c src/compiler_tests/tests.h src/compiler_tests/benchmark.c src/compiler_tests/benchmark.h)
+        src/compiler/lexer.c
+        src/compiler/tokens.c
+        src/compiler/symtab.c
+        src/compiler_tests/tests.c
+        src/compiler_tests/benchmark.c src/compiler/malloc.c src/compiler/malloc.h src/compiler/compiler.c src/compiler/compiler.h)
--- a/src/build/build_options.c
+++ b/src/build/build_options.c
@@ -13,6 +13,8 @@
 #include "../utils/errors.h"

 static const char* DEFAULT_TARGET = "default";
+static const int DEFAULT_SYMTAB_SIZE = 64 * 1024;
+static const int MAX_SYMTAB_SIZE = 1024 * 1024;

 BuildOptions build_options;
 static int arg_index;
@@ -46,6 +48,7 @@ static void usage(void)
 	OUTPUT("  --path <dir>          - Use this as the base directory for the current command.");
 	OUTPUT("  --template <template> - Use a different template: \"lib\", \"staticlib\" or a path.");
 	OUTPUT("  --about               - Prints a short description of C3.");
+	OUTPUT("  --symtab <value>      - Sets the preferred symtab size.");
 }


@@ -213,6 +216,17 @@ static void parse_option()
 				build_options.path = check_dir(next_arg());
 				return;
 			}
+			if (match_longopt("symtab"))
+			{
+				if (at_end() || next_is_opt()) error_exit("error: --symtab needs a number.");
+				const char *number = next_arg();
+				int size = atoi(number);
+				if (size < 1024) error_exit("error: --symtab valid size > 1024.");
+				if (size > MAX_SYMTAB_SIZE) error_exit("error: --symptab size cannot exceed %d", MAX_SYMTAB_SIZE);
+				build_options.symtab_size = size;
+				return;
+
+			}
 			if (match_longopt("help"))
 			{
 				break;
@@ -236,7 +250,7 @@ void parse_arguments(int argc, const char *argv[])

 	build_options.path = ".";
 	build_options.command = COMMAND_MISSING;
-
+	build_options.symtab_size = DEFAULT_SYMTAB_SIZE;
 	arg_count = argc;
 	args = argv;
 	for (arg_index = 1; arg_index < arg_count; arg_index++)
--- a/src/build/build_options.h
+++ b/src/build/build_options.h
@@ -4,6 +4,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+#include <stdint.h>
+
 #define MAX_LIB_DIRS 1024
 #define MAX_FILES 2048

@@ -33,6 +35,7 @@ typedef struct
 	const char* target;
 	const char* path;
 	CompilerCommand command;
+	uint32_t symtab_size;
 } BuildOptions;

 extern BuildOptions build_options;
--- a/src/build/project_creation.c
+++ b/src/build/project_creation.c
@@ -10,7 +10,7 @@
 #include <string.h>
 #include "project_creation.h"
 #include "build_options.h"
-#include "../utils/string_utils.h"
+#include "../utils/lib.h"

 const char* TOML =
 	"[[executable]]\n"
--- a/src/compiler/compiler.c
+++ b/src/compiler/compiler.c
@@ -0,0 +1,15 @@
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "compiler.h"
+#include "symtab.h"
+#include "../build/build_options.h"
+#include "../utils/lib.h"
+
+void compiler_init(void)
+{
+	uint32_t symtab_size = nextHighestPowerOf2(build_options.symtab_size);
+	symtab_init(symtab_size);
+
+}
--- a/src/utils/string_utils.c
+++ b/src/utils/string_utils.c
@@ -1,6 +1,8 @@
+#pragma once
+
 // Copyright (c) 2019 Christoffer Lerno. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-#include "string_utils.h"

+void compiler_init();
--- a/src/compiler/compiler_common.h
+++ b/src/compiler/compiler_common.h
@@ -0,0 +1,38 @@
+#pragma once
+
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+
+typedef uint32_t SourceLoc;
+
+typedef struct
+{
+	SourceLoc loc;
+	uint32_t length;
+} SourceRange;
+
+
+typedef struct
+{
+	const char* start;
+	SourceRange span;
+	TokenType type : 8;
+	union
+	{
+		const char *string;
+	};
+} Token;
+
+typedef struct
+{
+	const char *contents;
+	const char *name;
+	SourceLoc start;
+	SourceLoc end;
+} File;
+
+#define TOKEN_MAX_LENGTH 0xFFFF
+#define MAX_IDENTIFIER_LENGTH 31
--- a/src/compiler/lexer.c
+++ b/src/compiler/lexer.c
--- a/src/compiler/lexer.h
+++ b/src/compiler/lexer.h
@@ -4,179 +4,14 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+#include "tokens.h"
+#include "compiler_common.h"

-typedef enum _TokenType
-{
-	// Single-character tokens.
-	TOKEN_LPAREN,
-	TOKEN_RPAREN,
-	TOKEN_LBRACE,
-	TOKEN_RBRACE,
-	TOKEN_LBRACKET,
-	TOKEN_RBRACKET,
-	TOKEN_COMMA,
-	TOKEN_DOT,
-	TOKEN_EOS,
-	TOKEN_AT,
+Token scan_token(void);

-	// One or two character tokens.
-	TOKEN_PLUS,
-	TOKEN_PLUSPLUS,
-	TOKEN_PLUS_ASSIGN,
-	TOKEN_BIT_NOT,
-	TOKEN_NOT,
-	TOKEN_MINUS,
-	TOKEN_MINUSMINUS,
-	TOKEN_MINUS_ASSIGN,
-	TOKEN_STAR,
-	TOKEN_MULT_ASSIGN,
-	TOKEN_DIV,
-	TOKEN_DIV_ASSIGN,
-	TOKEN_MOD,
-	TOKEN_MOD_ASSIGN,
-	TOKEN_NOT_EQUAL,
-	TOKEN_EQ,
-	TOKEN_EQEQ,
-	TOKEN_COLON,
-	TOKEN_COLCOLON, // Not used but reserved
-	TOKEN_DOTDOT,
-	TOKEN_QUESTION,
-
-	// Three or more
-	TOKEN_ELIPSIS,
-	TOKEN_GREATER,
-	TOKEN_GREATER_EQ,
-	TOKEN_RIGHT_SHIFT,
-	TOKEN_RIGHT_SHIFT_ASSIGN,
-	TOKEN_LESS,
-	TOKEN_LESS_EQ,
-	TOKEN_LEFT_SHIFT,
-	TOKEN_LEFT_SHIFT_ASSIGN,
-	TOKEN_ARROW, // Not used but reserved
-	TOKEN_AND,
-	TOKEN_AND_ASSIGN,
-	TOKEN_AMP,
-	TOKEN_BIT_AND_ASSIGN,
-	TOKEN_OR,
-	TOKEN_OR_ASSIGN,
-	TOKEN_BIT_OR,
-	TOKEN_BIT_OR_ASSIGN,
-	TOKEN_BIT_XOR,
-	TOKEN_BIT_XOR_ASSIGN,
-	TOKEN_ELVIS,
-
-	TOKEN_F256,
-	TOKEN_I256,
-	TOKEN_U256,
-	TOKEN_F128,
-	TOKEN_I128,
-	TOKEN_U128,
-	TOKEN_F64,
-	TOKEN_I64,
-	TOKEN_U64,
-	TOKEN_F32,
-	TOKEN_I32,
-	TOKEN_U32,
-	TOKEN_F16,
-	TOKEN_I16,
-	TOKEN_U16,
-	TOKEN_I8,
-	TOKEN_U8,
-	TOKEN_QUAD,
-	TOKEN_DOUBLE,
-	TOKEN_FLOAT,
-	TOKEN_HALF,
-	TOKEN_LONG,
-	TOKEN_ULONG,
-	TOKEN_INT,
-	TOKEN_UINT,
-	TOKEN_SHORT,
-	TOKEN_USHORT,
-	TOKEN_BYTE,
-	TOKEN_CHAR,
-	TOKEN_BOOL,
-	TOKEN_ISIZE,
-	TOKEN_USIZE,
-
-	// Literals.
-
-	// In order to make the grammar
-	// non ambiguous, we split tokens at the
-	// lexer level
-	TOKEN_TYPE_IDENT,
-	TOKEN_CAPS_IDENT,
-	TOKEN_VAR_IDENT,
-
-	// We want to parse @foo / #foo / $foo separately.
-	// Otherwise we allow things like "@ foo" which would be pretty bad.
-	TOKEN_AT_IDENT,
-	TOKEN_HASH_IDENT,
-	TOKEN_DOLLAR_IDENT,
-
-	TOKEN_STRING,
-	TOKEN_INTEGER,
-	TOKEN_REAL,
-
-	// Keywords.
-	TOKEN_ALIAS, // Reserved
-	TOKEN_AS,
-	TOKEN_ASM,
-	TOKEN_BREAK,
-	TOKEN_CASE,
-	TOKEN_CAST,
-	TOKEN_CATCH,
-	TOKEN_CONST,
-	TOKEN_CONTINUE,
-	TOKEN_DEFAULT,
-	TOKEN_DEFER,
-	TOKEN_DO,
-	TOKEN_ELSE,
-	TOKEN_ENUM,
-	TOKEN_ERROR,
-	TOKEN_FALSE,
-	TOKEN_FOR,
-	TOKEN_FUNC,
-	TOKEN_GENERIC,
-	TOKEN_GOTO,
-	TOKEN_IF,
-	TOKEN_IMPORT,
-	TOKEN_LOCAL,
-	TOKEN_MACRO,
-	TOKEN_MODULE,
-	TOKEN_NIL,
-	TOKEN_PUBLIC,
-	TOKEN_RETURN,
-	TOKEN_STRUCT,
-	TOKEN_SWITCH,
-	TOKEN_THROW,
-	TOKEN_THROWS,
-	TOKEN_TRUE,
-	TOKEN_TRY,
-	TOKEN_TYPE, // Reserved
-	TOKEN_TYPEDEF,
-	TOKEN_UNION,
-	TOKEN_UNTIL,
-	TOKEN_VAR, // Reserved
-	TOKEN_VOID,
-	TOKEN_VOLATILE,
-	TOKEN_WHILE,
-
-
-	TOKEN_C_USHORT,
-	TOKEN_C_SHORT,
-	TOKEN_C_INT,
-	TOKEN_C_UINT,
-	TOKEN_C_LONG,
-	TOKEN_C_ULONG,
-	TOKEN_C_LONGLONG,
-	TOKEN_C_ULONGLONG,
-	TOKEN_C_LONGDOUBLE,
-
-	INVALID_TOKEN,
-	TOKEN_EOF,
-
-} TokenType;
-
-const char *token_type_to_string(TokenType type);
 TokenType identifier_type(const char* restrict start, int len);
 TokenType ident_type_fnv1(const char *restrict start, int len);
+
+Token scan_ident_test(const char* scan);
+
+void lexer_test_setup(const char* text);
--- a/src/compiler/malloc.c
+++ b/src/compiler/malloc.c
@@ -0,0 +1,106 @@
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "malloc.h"
+
+#include <stdlib.h>
+#include <assert.h>
+#include "../utils/errors.h"
+
+#define KB 1024L
+// Use 1MB at a time.
+#define BUCKET_SIZE (1024 * KB)
+#define ARENA_BUCKET_START_SIZE 16
+
+static uint8_t **arena_buckets;
+static int arena_buckets_used;
+static size_t arena_buckets_array_size;
+static size_t current_use;
+static void *current_arena;
+static int allocations_done;
+void init_arena(void)
+{
+	printf("---- ARENA ALLOCATED ----\n");
+	arena_buckets = malloc(ARENA_BUCKET_START_SIZE * sizeof(void *));
+	arena_buckets_used = 1;
+	arena_buckets_array_size = ARENA_BUCKET_START_SIZE;
+	arena_buckets[0] = malloc(BUCKET_SIZE);
+	allocations_done = 0;
+	current_use = 0;
+	current_arena = arena_buckets[0];
+}
+
+// Simple bump allocator with buckets.
+void *malloc_arena(size_t mem)
+{
+	if (mem == 0) return NULL;
+	// Round to multiple of 16
+	size_t oldmem = mem;
+	mem = (mem + 15u) & ~15ull;
+	assert(mem >= oldmem);
+	if (mem >= BUCKET_SIZE / 4)
+	{
+		return malloc(mem);
+	}
+	if (current_use + mem > BUCKET_SIZE)
+	{
+		if (arena_buckets_used == arena_buckets_array_size)
+		{
+			arena_buckets_array_size *= 2;
+			arena_buckets = realloc(arena_buckets, arena_buckets_array_size * sizeof(void *));
+			ASSERT(arena_buckets, "Ran out of memory after allocating %ld KB", BUCKET_SIZE * arena_buckets_used / KB);
+		}
+		current_arena = malloc(BUCKET_SIZE);
+		ASSERT(current_arena, "Ran out of memory after allocating %ld KB", BUCKET_SIZE * arena_buckets_used / KB);
+		arena_buckets[arena_buckets_used++] = current_arena;
+		current_use = 0;
+	}
+	uint8_t *ptr = current_arena + current_use;
+	current_use += mem;
+	allocations_done++;
+	if (mem > 4096)
+	{
+		printf("Allocated large chunk %llu\n", (unsigned long long)mem);
+	}
+	return (void *)ptr;
+
+}
+
+
+void free_arena(void)
+{
+	printf("-- FREEING ARENA -- \n");
+	printf(" * Memory used:  %ld Kb\n", ((arena_buckets_used - 1) * BUCKET_SIZE + current_use) / 1024);
+	printf(" * Buckets used: %d\n", arena_buckets_used);
+	printf(" * Allocations: %d\n", allocations_done);
+
+	for (int i = 0; i < arena_buckets_used; i++)
+	{
+		free(arena_buckets[i]);
+	}
+	current_arena = NULL;
+	arena_buckets_used = 0;
+	arena_buckets = NULL;
+	arena_buckets_array_size = 0;
+	current_use = 0;
+	printf("-- FREE DONE -- \n");
+}
+
+
+void run_arena_allocator_tests(void)
+{
+	init_arena();
+	free_arena();
+	init_arena();
+	ASSERT(malloc_arena(10) != malloc_arena(10), "Expected different values...");
+	ASSERT(current_use == 32, "Expected allocations rounded to next 8 bytes");
+	EXPECT("buckets in use", arena_buckets_used, 1);
+	ASSERT(malloc_arena(BUCKET_SIZE), "Should be possible to allocate this");
+	EXPECT("buckets in use", arena_buckets_used, 2);
+	ASSERT(malloc_arena(1), "Expected alloc to pass");
+	EXPECT("buckets in use", arena_buckets_used, 3);
+	free_arena();
+	ASSERT(arena_buckets_array_size == 0, "Arena not freed?");
+	printf("Passed all arena tests\n");
+}
--- a/src/compiler/malloc.h
+++ b/src/compiler/malloc.h
@@ -0,0 +1,16 @@
+#pragma once
+
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+
+
+void init_arena(void);
+void *malloc_arena(unsigned long mem);
+void free_arena(void);
+
+void run_arena_allocator_tests(void);
+
+#define MALLOC(mem) malloc_arena(mem)
+#define MALLOCS(type) malloc_arena(sizeof(type))
--- a/src/compiler/symtab.c
+++ b/src/compiler/symtab.c
@@ -0,0 +1,213 @@
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "symtab.h"
+#include <string.h>
+#include <stdlib.h>
+#include "../utils/errors.h"
+#include <assert.h>
+#include "../utils/lib.h"
+#include "malloc.h"
+#include "tokens.h"
+
+#define TABLE_MAX_LOAD 0.75
+#define MAX_HASH_SIZE (1024 * 1024)
+
+
+typedef struct _SymEntry
+{
+	const char *value;
+	TokenType type;
+	uint32_t key_len;
+	uint32_t hash;
+} SymEntry;
+
+typedef struct _SymTab
+{
+	uint32_t count;
+	uint32_t capacity;
+	SymEntry *entries;
+} SymTab;
+
+typedef struct _Entry
+{
+	const char *key;
+	uint32_t key_len;
+	uint32_t hash;
+	void *value;
+} Entry;
+
+
+static SymTab symtab;
+
+void symtab_init(uint32_t capacity)
+{
+	assert (is_power_of_two(capacity) && "Must be a power of two");
+	if (symtab.capacity != 0)
+	{
+		free(symtab.entries);
+	}
+	size_t size = capacity * sizeof(SymEntry);
+	symtab.entries = MALLOC(size);
+	memset(symtab.entries, 0, size);
+	symtab.count = 0;
+	symtab.capacity = capacity;
+
+	// Add keywords.
+	for (int i = 0; i < TOKEN_EOF; i++)
+	{
+		const char* name = token_type_to_string(i);
+		// Skip non-keywords
+		if (!is_lower(name[0]))
+		{
+			if (name[0] != '@' || !is_lower(name[1])) continue;
+		}
+		int len = strlen(name);
+		TokenType type = (TokenType)i;
+		const char* interned = symtab_add(name, strlen(name), fnv1a(name, len), &type);
+		assert(type == i);
+		assert(symtab_add(name, strlen(name), fnv1a(name, len), &type) == interned);
+
+	}
+}
+
+static inline SymEntry *entry_find(const char *key, uint32_t key_len, uint32_t hash)
+{
+	uint32_t index = hash & (symtab.capacity - 1);
+	while (1)
+	{
+		SymEntry *entry = &symtab.entries[index];
+		if (entry->key_len == key_len && (entry->value == key || memcmp(key, entry->value, key_len) == 0)) return entry;
+		if (entry->value == NULL)
+		{
+			return entry;
+		}
+		index = (index + 1) % (symtab.capacity - 1);
+	}
+}
+
+const char *symtab_add(const char *symbol, uint32_t len, uint32_t fnv1hash, TokenType *type)
+{
+	if (symtab.count + 1 > symtab.capacity * TABLE_MAX_LOAD)
+	{
+		FATAL_ERROR("Symtab exceeded capacity, please increase --symtab.");
+	}
+	SymEntry *entry = entry_find(symbol, len, fnv1hash);
+	if (entry->value)
+	{
+		*type = entry->type;
+		return entry->value;
+	}
+
+	char *copy = MALLOC(len + 1);
+	memcpy(copy, symbol, len);
+	copy[len] = '\0';
+	entry->value = copy;
+	entry->key_len = len;
+	entry->hash = fnv1hash;
+	entry->type = *type;
+	symtab.count++;
+	return entry->value;
+}
+
+void stable_init(STable *table, uint32_t initial_size)
+{
+	assert(initial_size && "Size must be larger than 0");
+	assert (is_power_of_two(initial_size) && "Must be a power of two");
+
+	SEntry *entries = MALLOC(initial_size * sizeof(Entry));
+	for (uint32_t i = 0; i < initial_size; i++)
+	{
+		entries[i].key = NULL;
+		entries[i].value = NULL;
+	}
+	table->count = 0;
+	table->capacity = initial_size;
+	table->entries = entries;
+}
+
+void stable_clear(STable *table)
+{
+	memset(table->entries, 0, table->capacity * sizeof(Entry));
+	table->count = 0;
+}
+
+#define TOMBSTONE ((void *)0x01)
+static SEntry *sentry_find(SEntry *entries, uint32_t capacity, const char *key)
+{
+	uint32_t index = (uint32_t)((((uintptr_t)key) >> 2u) & (capacity - 1));
+	SEntry *tombstone = NULL;
+	while (1)
+	{
+		SEntry *entry = &entries[index];
+		if (entry->key == key) return entry;
+		if (entry->key == NULL)
+		{
+			if (entry->value != TOMBSTONE)
+			{
+				return tombstone ? tombstone : entry;
+			}
+			else
+			{
+				if (!tombstone) tombstone = entry;
+			}
+		}
+		index = (index + 1) & (capacity - 1);
+	}
+}
+
+
+void *stable_set(STable *table, const char *key, void *value)
+{
+	assert(value && "Cannot insert NULL");
+	if (table->count + 1 > table->capacity * TABLE_MAX_LOAD)
+	{
+		ASSERT(table->capacity < MAX_HASH_SIZE, "Table size too large, exceeded %d", MAX_HASH_SIZE);
+
+		uint32_t new_capacity = table->capacity ? (table->capacity << 1u) : 16u;
+		SEntry *new_data = MALLOC(new_capacity * sizeof(SEntry));
+		for (uint32_t i = 0; i < new_capacity; i++)
+		{
+			new_data[i].key = NULL;
+			new_data[i].value = NULL;
+		}
+		table->count = 0;
+		for (uint32_t i = 0; i < table->capacity; i++)
+		{
+			SEntry *entry = &table->entries[i];
+			if (!entry->key) continue;
+			table->count++;
+			SEntry *dest = sentry_find(new_data, new_capacity, entry->key);
+			*dest = *entry;
+		}
+		table->entries = new_data;
+		table->capacity = new_capacity;
+	}
+
+	SEntry *entry = sentry_find(table->entries, table->capacity, key);
+	void *old = entry->value && entry->value != TOMBSTONE ? entry->value : NULL;
+	entry->key = key;
+	entry->value = value;
+	if (!old) table->count++;
+	return old;
+}
+
+
+void *stable_get(STable *table, const char *key)
+{
+	if (!table->entries) return NULL;
+	SEntry *entry = sentry_find(table->entries, table->capacity, key);
+	return entry->key == NULL ? NULL : entry->value;
+}
+
+void *stable_delete(STable *table, const char *key)
+{
+	if (!table->count) return NULL;
+	SEntry *entry = sentry_find(table->entries, table->capacity, key);
+	if (!entry->key) return NULL;
+	void *value = entry->value;
+	entry->key = NULL;
+	entry->value = TOMBSTONE;
+	return value;
+}
--- a/src/compiler/symtab.h
+++ b/src/compiler/symtab.h
@@ -0,0 +1,30 @@
+#pragma once
+
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include "tokens.h"
+
+void symtab_init(uint32_t max_size);
+const char *symtab_add(const char *symbol, uint32_t len, uint32_t fnv1hash, TokenType *type);
+
+typedef struct _VoidEntry
+{
+	const char *key;
+	void *value;
+} SEntry;
+
+typedef struct _STable
+{
+	uint32_t count;
+	uint32_t capacity;
+	SEntry *entries;
+} STable;
+
+void stable_init(STable *table, uint32_t initial_size);
+void *stable_set(STable *table, const char *key, void *value);
+void *stable_get(STable *table, const char *key);
+void *stable_delete(STable *table, const char *key);
+void stable_clear(STable *table);
--- a/src/compiler/tokens.c
+++ b/src/compiler/tokens.c
@@ -0,0 +1,336 @@
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tokens.h"
+#include "../utils/errors.h"
+
+const char *token_type_to_string(TokenType type)
+{
+	switch (type)
+	{
+		case TOKEN_HASH:
+			return "#";
+		case TOKEN_DOLLAR:
+			return "$";
+		case TOKEN_LPAREN:
+			return "(";
+		case TOKEN_RPAREN:
+			return ")";
+		case TOKEN_LBRACE:
+			return "{";
+		case TOKEN_RBRACE:
+			return "}";
+		case TOKEN_LBRACKET:
+			return "[";
+		case TOKEN_RBRACKET:
+			return "]";
+		case TOKEN_COMMA:
+			return ",";
+		case TOKEN_DOT:
+			return ".";
+		case TOKEN_EOS:
+			return ";";
+		case TOKEN_PLUS:
+			return "+";
+		case TOKEN_PLUSPLUS:
+			return "++";
+		case TOKEN_PLUS_ASSIGN:
+			return "+=";
+		case TOKEN_BIT_NOT:
+			return "~";
+		case TOKEN_NOT:
+			return "!";
+		case TOKEN_MINUS:
+			return "-";
+		case TOKEN_MINUSMINUS:
+			return "--";
+		case TOKEN_MINUS_ASSIGN:
+			return "-=";
+		case TOKEN_STAR:
+			return "*";
+		case TOKEN_MULT_ASSIGN:
+			return "*=";
+		case TOKEN_MOD:
+			return "%";
+		case TOKEN_MOD_ASSIGN:
+			return "%=";
+		case TOKEN_DIV:
+			return "/";
+		case TOKEN_DIV_ASSIGN:
+			return "/=";
+		case TOKEN_NOT_EQUAL:
+			return "!=";
+		case TOKEN_EQ:
+			return "=";
+		case TOKEN_EQEQ:
+			return "==";
+		case TOKEN_COLON:
+			return ":";
+		case TOKEN_COLCOLON:
+			return "::";
+		case TOKEN_DOTDOT:
+			return "..";
+		case TOKEN_ELIPSIS:
+			return "...";
+		case TOKEN_GREATER:
+			return ">";
+		case TOKEN_GREATER_EQ:
+			return ">=";
+		case TOKEN_SHR:
+			return ">>";
+		case TOKEN_SHR_ASSIGN:
+			return ">>=";
+		case TOKEN_LESS:
+			return "<";
+		case TOKEN_LESS_EQ:
+			return "<=";
+		case TOKEN_SHL:
+			return "<<";
+		case TOKEN_SHL_ASSIGN:
+			return "<<=";
+		case TOKEN_ARROW:
+			return "->";
+		case TOKEN_AND:
+			return "&&";
+		case TOKEN_AND_ASSIGN:
+			return "&&=";
+		case TOKEN_AMP:
+			return "&";
+		case TOKEN_BIT_AND_ASSIGN:
+			return "&=";
+		case TOKEN_OR:
+			return "||";
+		case TOKEN_OR_ASSIGN:
+			return "||=";
+		case TOKEN_BIT_OR:
+			return "|";
+		case TOKEN_BIT_OR_ASSIGN:
+			return "|=";
+		case TOKEN_BIT_XOR:
+			return "^";
+		case TOKEN_BIT_XOR_ASSIGN:
+			return "^=";
+		case TOKEN_VAR_IDENT:
+			return "<varIdent>";
+		case TOKEN_TYPE_IDENT:
+			return "<TypeIdent>";
+		case TOKEN_STRING:
+			return "<string>";
+		case TOKEN_INTEGER:
+			return "<int>";
+		case TOKEN_REAL:
+			return "<float>";
+		case TOKEN_QUESTION:
+			return "?";
+		case TOKEN_ELVIS:
+			return "?:";
+		case TOKEN_VOID:
+			return "void";
+		case TOKEN_ALIAS:
+			return "alias";
+		case TOKEN_CONST:
+			return "const";
+		case TOKEN_VOLATILE:
+			return "volatile";
+		case TOKEN_ELSE:
+			return "else";
+		case TOKEN_FALSE:
+			return "false";
+		case TOKEN_CONTINUE:
+			return "continue";
+		case TOKEN_FUNC:
+			return "func";
+		case TOKEN_FOR:
+			return "for";
+		case TOKEN_IMPORT:
+			return "import";
+		case TOKEN_MODULE:
+			return "module";
+		case TOKEN_IF:
+			return "if";
+		case TOKEN_NIL:
+			return "nil";
+		case TOKEN_RETURN:
+			return "return";
+		case TOKEN_GOTO:
+			return "goto";
+		case TOKEN_DEFER:
+			return "defer";
+		case TOKEN_TRUE:
+			return "true";
+		case TOKEN_WHILE:
+			return "while";
+		case TOKEN_CASE:
+			return "case";
+		case TOKEN_ASM:
+			return "asm";
+		case TOKEN_DEFAULT:
+			return "default";
+		case TOKEN_SWITCH:
+			return "switch";
+		case TOKEN_UNTIL:
+			return "until";
+		case TOKEN_BREAK:
+			return "break";
+		case TOKEN_TYPE:
+			return "type";
+		case TOKEN_DO:
+			return "do";
+		case TOKEN_PUBLIC:
+			return "public";
+		case TOKEN_LOCAL:
+			return "local";
+		case TOKEN_STRUCT:
+			return "struct";
+		case TOKEN_UNION:
+			return "union";
+		case TOKEN_ENUM:
+			return "enum";
+		case TOKEN_AS:
+			return "as";
+		case TOKEN_AT:
+			return "@";
+		case TOKEN_ERROR:
+			return "<error>";
+		case TOKEN_EOF:
+			return "<eof>";
+		case TOKEN_CAST:
+			return "cast";
+		case TOKEN_C_LONGDOUBLE:
+			return "c_longdouble";
+		case TOKEN_C_USHORT:
+			return "c_ushort";
+		case TOKEN_C_UINT:
+			return "c_uint";
+		case TOKEN_C_ULONG:
+			return "c_ulong";
+		case TOKEN_C_ULONGLONG:
+			return "c_ulonglong";
+		case TOKEN_C_SHORT:
+			return "c_ishort";
+		case TOKEN_C_INT:
+			return "c_int";
+		case TOKEN_C_LONG:
+			return "c_long";
+		case TOKEN_C_LONGLONG:
+			return "c_longlong";
+		case TOKEN_MACRO:
+			return "macro";
+		case TOKEN_F256:
+			return "f256";
+		case TOKEN_I256:
+			return "i256";
+		case TOKEN_U256:
+			return "u256";
+		case TOKEN_F128:
+			return "f128";
+		case TOKEN_I128:
+			return "i128";
+		case TOKEN_U128:
+			return "u128";
+		case TOKEN_F64:
+			return "f64";
+		case TOKEN_I64:
+			return "i64";
+		case TOKEN_U64:
+			return "u64";
+		case TOKEN_F32:
+			return "f32";
+		case TOKEN_I32:
+			return "i32";
+		case TOKEN_U32:
+			return "u32";
+		case TOKEN_F16:
+			return "f16";
+		case TOKEN_I16:
+			return "i16";
+		case TOKEN_U16:
+			return "u16";
+		case TOKEN_I8:
+			return "i8";
+		case TOKEN_U8:
+			return "u8";
+		case TOKEN_BOOL:
+			return "bool";
+		case TOKEN_QUAD:
+			return "quad";
+		case TOKEN_DOUBLE:
+			return "double";
+		case TOKEN_FLOAT:
+			return "float";
+		case TOKEN_LONG:
+			return "long";
+		case TOKEN_ULONG:
+			return "ulong";
+		case TOKEN_INT:
+			return "int";
+		case TOKEN_UINT:
+			return "uint";
+		case TOKEN_SHORT:
+			return "short";
+		case TOKEN_USHORT:
+			return "ushort";
+		case TOKEN_BYTE:
+			return "byte";
+		case TOKEN_CHAR:
+			return "char";
+		case TOKEN_ISIZE:
+			return "isize";
+		case TOKEN_USIZE:
+			return "usize";
+		case TOKEN_GENERIC:
+			return "generic";
+		case TOKEN_THROW:
+			return "throw";
+		case TOKEN_THROWS:
+			return "throws";
+		case TOKEN_TRY:
+			return "try";
+		case TOKEN_TYPEDEF:
+			return "typedef";
+		case TOKEN_VAR:
+			return "var";
+		case TOKEN_HALF:
+			return "half";
+		case TOKEN_CAPS_IDENT:
+			return "<CAPS_IDENT>";
+		case TOKEN_AT_IDENT:
+			return "<@ident>";
+		case TOKEN_HASH_IDENT:
+			return "<#ident>";
+		case TOKEN_DOLLAR_IDENT:
+			return "<$ident>";
+		case TOKEN_CATCH:
+			return "catch";
+		case INVALID_TOKEN:
+			return "<\?\?\?>";
+		case TOKEN_DOCS_EOL:
+			return "<EOL>";
+		case TOKEN_DOCS_START:
+			return "/**";
+		case TOKEN_DOCS_END:
+			return "*/";
+		case TOKEN_DOCS_LINE:
+			return "<docs line>";
+		case TOKEN_AT_REQUIRE:
+			return "@require";
+		case TOKEN_AT_ENSURE:
+			return "@ensure";
+		case TOKEN_AT_PARAM:
+			return "@param";
+		case TOKEN_AT_CONST:
+			return "@const";
+		case TOKEN_AT_PURE:
+			return "@pure";
+		case TOKEN_AT_RETURN:
+			return "@return";
+		case TOKEN_AT_THROWS:
+			return "@throws";
+		case TOKEN_AT_REQPARSE:
+			return "@reqparse";
+		case TOKEN_AT_DEPRECATED:
+			return "@deprecated";
+	}
+	UNREACHABLE
+}
--- a/src/compiler/tokens.h
+++ b/src/compiler/tokens.h
@@ -0,0 +1,200 @@
+#pragma once
+
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+
+typedef enum _TokenType
+{
+	INVALID_TOKEN = 0,
+
+	// Single-character tokens.
+	TOKEN_AT,               // @
+	TOKEN_COMMA,            // ,
+	TOKEN_EOS,              // ;
+	TOKEN_DOLLAR,           // $
+	TOKEN_DOT,              // .
+	TOKEN_HASH,             // #
+	TOKEN_LBRACE,           // {
+	TOKEN_LBRACKET,         // [
+	TOKEN_LPAREN,           // (
+	TOKEN_RBRACE,           // }
+	TOKEN_RBRACKET,         // ]
+	TOKEN_RPAREN,           // )
+
+	// One or two character tokens.
+	TOKEN_BIT_NOT,          // ~
+	TOKEN_COLON,            // :
+	TOKEN_COLCOLON,         // :: Not used but reserved
+	TOKEN_DIV,              // /
+	TOKEN_DIV_ASSIGN,       // /=
+	TOKEN_DOCS_START,       // /** (will consume an arbitrary number of `*` after this.
+	TOKEN_DOCS_END,         // */ (may start with an arbitrary number of `*`
+	TOKEN_DOCS_EOL,         // "\n" only seen in docs.
+	TOKEN_DOTDOT,           // ..
+	TOKEN_EQ,               // =
+	TOKEN_EQEQ,             // ==
+	TOKEN_NOT,              // !
+	TOKEN_NOT_EQUAL,        // !=
+	TOKEN_MINUS,            // -
+	TOKEN_MINUSMINUS,       // --
+	TOKEN_MINUS_ASSIGN,     // -=
+	TOKEN_MOD,              // %
+	TOKEN_MOD_ASSIGN,       // %=
+	TOKEN_MULT_ASSIGN,      // *=
+	TOKEN_PLUS,             // +
+	TOKEN_PLUSPLUS,         // ++
+	TOKEN_PLUS_ASSIGN,      // +=
+	TOKEN_QUESTION,         // ?
+	TOKEN_STAR,             // *
+
+	// Three or more
+	TOKEN_BIT_AND_ASSIGN,   // &=
+	TOKEN_BIT_OR,           // =
+	TOKEN_BIT_OR_ASSIGN,    // |=
+	TOKEN_BIT_XOR,          // ^
+	TOKEN_BIT_XOR_ASSIGN,   // ^=
+	TOKEN_AMP,              // &
+	TOKEN_AND,              // &&
+	TOKEN_AND_ASSIGN,       // &&=
+	TOKEN_ARROW,            // -> // Not used but reserved
+	TOKEN_ELIPSIS,          // ...
+	TOKEN_ELVIS,            // ?:
+	TOKEN_GREATER,          // >
+	TOKEN_GREATER_EQ,       // >=
+	TOKEN_OR,               // |
+	TOKEN_OR_ASSIGN,        // ||=
+	TOKEN_SHR,              // >>
+	TOKEN_SHR_ASSIGN,       // >>=
+	TOKEN_LESS,             // <
+	TOKEN_LESS_EQ,          // <=
+	TOKEN_SHL,              // >>
+	TOKEN_SHL_ASSIGN,       // >>=
+
+	// Basic types bit
+	TOKEN_F256,             // f256
+	TOKEN_I256,             // i256
+	TOKEN_U256,             // u256
+	TOKEN_F128,             // f128
+	TOKEN_I128,             // i128
+	TOKEN_U128,             // u128
+	TOKEN_F64,              // f64
+	TOKEN_I64,              // i64
+	TOKEN_U64,              // u64
+	TOKEN_F32,              // f32
+	TOKEN_I32,              // i32
+	TOKEN_U32,              // u32
+	TOKEN_F16,              // f16
+	TOKEN_I16,              // i16
+	TOKEN_U16,              // u16
+	TOKEN_I8,               // i8
+	TOKEN_U8,               // u8
+
+	// Basic types names
+	TOKEN_BYTE,
+	TOKEN_BOOL,
+	TOKEN_CHAR,
+	TOKEN_DOUBLE,
+	TOKEN_FLOAT,
+	TOKEN_HALF,
+	TOKEN_INT,
+	TOKEN_ISIZE,
+	TOKEN_LONG,
+	TOKEN_SHORT,
+	TOKEN_UINT,
+	TOKEN_ULONG,
+	TOKEN_USHORT,
+	TOKEN_USIZE,
+	TOKEN_QUAD,
+
+	// C compatibility types
+	TOKEN_C_USHORT,
+	TOKEN_C_SHORT,
+	TOKEN_C_INT,
+	TOKEN_C_UINT,
+	TOKEN_C_LONG,
+	TOKEN_C_ULONG,
+	TOKEN_C_LONGLONG,
+	TOKEN_C_ULONGLONG,
+	TOKEN_C_LONGDOUBLE,
+
+	// Literals.
+
+	// In order to make the grammar
+	// non ambiguous, we split tokens at the
+	// lexer level
+	TOKEN_TYPE_IDENT,       // FooBarBaz
+	TOKEN_CAPS_IDENT,       // FOO_BAR_BAZ
+	TOKEN_VAR_IDENT,        // fooBarBaz
+
+	// We want to parse @foo / #foo / $foo separately.
+	// Otherwise we allow things like "@ foo" which would be pretty bad.
+	TOKEN_AT_IDENT,         // @foobar
+	TOKEN_HASH_IDENT,       // #foobar
+	TOKEN_DOLLAR_IDENT,     // $foobar
+
+	TOKEN_STRING,           // "Teststring"
+	TOKEN_INTEGER,          // 123 0x23 0b10010 0o327
+	TOKEN_REAL,             // 0x23.2p-2a 43.23e23
+	TOKEN_DOCS_LINE,        // Any line within /** **/
+
+	// Keywords
+	TOKEN_ALIAS,            // Reserved
+	TOKEN_AS,
+	TOKEN_ASM,
+	TOKEN_BREAK,
+	TOKEN_CASE,
+	TOKEN_CAST,
+	TOKEN_CATCH,
+	TOKEN_CONST,
+	TOKEN_CONTINUE,
+	TOKEN_DEFAULT,
+	TOKEN_DEFER,
+	TOKEN_DO,
+	TOKEN_ELSE,
+	TOKEN_ENUM,
+	TOKEN_ERROR,
+	TOKEN_FALSE,
+	TOKEN_FOR,
+	TOKEN_FUNC,
+	TOKEN_GENERIC,
+	TOKEN_GOTO,
+	TOKEN_IF,
+	TOKEN_IMPORT,
+	TOKEN_LOCAL,
+	TOKEN_MACRO,
+	TOKEN_MODULE,
+	TOKEN_NIL,
+	TOKEN_PUBLIC,
+	TOKEN_RETURN,
+	TOKEN_STRUCT,
+	TOKEN_SWITCH,
+	TOKEN_THROW,
+	TOKEN_THROWS,
+	TOKEN_TRUE,
+	TOKEN_TRY,
+	TOKEN_TYPE,             // Reserved
+	TOKEN_TYPEDEF,
+	TOKEN_UNION,
+	TOKEN_UNTIL,
+	TOKEN_VAR,              // Reserved
+	TOKEN_VOID,
+	TOKEN_VOLATILE,
+	TOKEN_WHILE,
+
+	TOKEN_AT_PARAM,         // @param
+	TOKEN_AT_THROWS,        // @throws
+	TOKEN_AT_RETURN,        // @return
+	TOKEN_AT_ENSURE,        // @ensure
+	TOKEN_AT_REQUIRE,       // @require
+	TOKEN_AT_PURE,          // @pure
+	TOKEN_AT_CONST,         // @const
+	TOKEN_AT_REQPARSE,      // @reqparse
+	TOKEN_AT_DEPRECATED,    // @deprecated
+
+	TOKEN_EOF,              // \n - SHOULD ALWAYS BE THE LAST TOKEN.
+
+} TokenType;
+
+const char *token_type_to_string(TokenType type);
--- a/src/compiler_tests/shorttest.c
+++ b/src/compiler_tests/shorttest.c
@@ -0,0 +1,97 @@
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+static const char* test_parse = "struct Node\n"
+"{\n"
+"    uint hole;\n"
+"    uint size;\n"
+"    Node* next;\n"
+"    Node* prev;\n"
+"}\n"
+"\n"
+"struct Footer\n"
+"{ \n"
+"    Node &header;\n"
+"}\n"
+"\n"
+"struct Bin  \n"
+"{\n"
+"    Node& head;\n"
+"}\n"
+"\n"
+"struct Heap  \n"
+"{\n"
+"    size start;\n"
+"    size end;\n"
+"    Bin* bins[BIN_COUNT];\n"
+"}\n"
+"\n"
+"const uint OFFSET = 8;\n"
+"\n"
+"/**\n"
+" * @require start > 0\n"
+" */\n"
+"void Heap.init(Heap& heap, usize start) \n"
+"{\n"
+"    Node& init_region = @cast(Node&, start);\n"
+"    init_region.hole = 1;\n"
+"    init_region.size = HEAP_INIT_SIZE - @sizeof(Node) - @sizeof(Footer);\n"
+"\n"
+"    init_region.createFoot();\n"
+"\n"
+"    heap.bins[get_bin_index(init_region.size)].add(init_region);\n"
+"\n"
+"    heap.start = @cast(void*, start);\n"
+"    heap.end   = @cast(void*, start + HEAP_INIT_SIZE);\n"
+"}\n"
+"\n"
+"void* Heap.alloc(Heap& heap, usize size) \n"
+"{\n"
+"    uint index = get_bin_index(size);\n"
+"    Bin& temp = @cast(Bin&, heap.bins[index]);\n"
+"    Node* found = temp.getBestFit(size);\n"
+"\n"
+"    while (!found) \n"
+"    {\n"
+"        temp = heap.bins[++index];\n"
+"        found = temp.getBestFit(size);\n"
+"    }\n"
+"\n"
+"    if ((found.size - size) > (overhead + MIN_ALLOC_SZ)) \n"
+"    {\n"
+"        Node& split = @cast(Node*, @cast(char&, found) + sizeof(Node) + sizeof(Footer)) + size);\n"
+"        split.size = found.size - size - sizeof(Node) - sizeof(Footer);\n"
+"        split.hole = 1;\n"
+"   \n"
+"        split.createFoot();\n"
+"\n"
+"        uint new_idx = get_bin_index(split.size);\n"
+"\n"
+"        heap.bins[new_idx].addNode(split); \n"
+"\n"
+"        found.size = size; \n"
+"        found.createFoot(found); \n"
+"    }\n"
+"\n"
+"    found.hole = 0; \n"
+"    heap.bins[index].removeNode(found);\n"
+"    \n"
+"    Node& wild = heap.getWilderness(heap);\n"
+"    if (wild.size < MIN_WILDERNESS) \n"
+"    {\n"
+"        uint success = heap.expand(0x1000);\n"
+"        if (success == 0) \n"
+"        {\n"
+"            return nil;\n"
+"        }\n"
+"    }\n"
+"    else if (wild.size > MAX_WILDERNESS) \n"
+"    {\n"
+"        heap.contract(0x1000);\n"
+"    }\n"
+"\n"
+"    found.prev = nil;\n"
+"    found.next = nil;\n"
+"    return &found.next; \n"
+"}";
--- a/src/compiler_tests/tests.c
+++ b/src/compiler_tests/tests.c
@@ -7,76 +7,103 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
+#include <compiler/compiler.h>
+#include <utils/lib.h>
+#include <utils/errors.h>
 #include "benchmark.h"
+#include "../compiler/symtab.h"

-#define TEST_ASSERT(cond, text, ...) do { if (!(cond)) { printf("\nTEST FAILED: " text "\n", ##__VA_ARGS__); exit(-1); } } while (0)
 static void test_lexer(void)
 {
 #ifdef __OPTIMIZE__
 	printf("--- RUNNING OPTIMIZED ---\n");
 #endif
 	printf("Begin lexer testing.\n");
-	printf("1. Check number of keywords...");
+	printf("-- Check number of keywords...\n");
 	int tokens_found = 0;
-	const int EXPECTED_TOKENS = 81;
-	const char* tokens[INVALID_TOKEN];
-	int len[INVALID_TOKEN];
-	for (int i = 0; i < INVALID_TOKEN; i++)
+	const int EXPECTED_TOKENS = 91;
+	const char* tokens[TOKEN_EOF];
+	int len[TOKEN_EOF];
+	compiler_init();
+	for (int i = 1; i < TOKEN_EOF; i++)
 	{
 		const char* token = token_type_to_string((TokenType)i);
 		tokens[i] = token;
 		len[i] = strlen(token);
-		TokenType type = identifier_type(token, len[i]);
-		TokenType type2 = ident_type_fnv1(token, len[i]);
-
-		if (type != TOKEN_VAR_IDENT)
+		TokenType lookup = TOKEN_VAR_IDENT;
+		const char* interned = symtab_add(token, len[i], fnv1a(token, len[i]), &lookup);
+		if (lookup != TOKEN_VAR_IDENT)
 		{
+			Token scanned = scan_ident_test(token);
+			TEST_ASSERT(scanned.type == i, "Mismatch scanning: was '%s', expected '%s' - lookup: %s - interned: %s.",
+					token_type_to_string(scanned.type),
+					token_type_to_string(i),
+					token_type_to_string(lookup),
+					interned);
 			tokens_found++;
-			TEST_ASSERT(type == i, "Mismatch on token %s", token);
-			if (type2 != type)
-			{
-				printf("\n(fnv1) Test mismatch on token %s, generated %s\n", token, token_type_to_string(type2));
-			}
 		}
-		tokens[i] = "byte";
-		len[i] = 4;
+		else
+		{
+			tokens[i] = "casi";
+			len[i] = 4;
+		}
 	}
-	printf(" %d found.\n", tokens_found);
-	TEST_ASSERT(ident_type_fnv1("alias ", 6) == TOKEN_VAR_IDENT, "Error in fnv1 ident");
-	TEST_ASSERT(identifier_type("alias ", 6) == TOKEN_VAR_IDENT, "Error in switch ident");
-	TEST_ASSERT(ident_type_fnv1("alias ", 5) != TOKEN_VAR_IDENT, "Error in fnv1 ident2");
-	TEST_ASSERT(identifier_type("alias ", 5) != TOKEN_VAR_IDENT, "Error in switch ident2");
-	TEST_ASSERT(tokens_found == EXPECTED_TOKENS, "Unexpected number of identifiers! Expected %d.", EXPECTED_TOKENS);
+	printf("-> %d keywords found.\n", tokens_found);
+	EXPECT("Keywords", tokens_found, EXPECTED_TOKENS);

-	const int BENCH_REPEATS = 10000000;
+	const int BENCH_REPEATS = 100000;

-	printf("2. Test keyword lexing speed (switch)... ");
+	printf("-- Test keyword lexing speed...\n");
 	bench_begin();
 	for (int b = 0; b < BENCH_REPEATS; b++)
 	{
-		for (int i = 0; i < INVALID_TOKEN; i++)
+		for (int i = 1; i < TOKEN_EOF; i++)
 		{
-			identifier_type(tokens[i], len[i]);
+			volatile TokenType t = scan_ident_test(tokens[i]).type;
 		}
 	}
-	printf("complete in %fs\n", bench_mark());

-	printf("3. Test keyword lexing speed (fnv1)... ");
+	printf("-> Test complete in %fs, %.0f kkeywords/s\n", bench_mark(), (BENCH_REPEATS * (TOKEN_EOF - 1)) / (1000 * bench_mark()));
+
+#include "shorttest.c"
+
+	printf("-- Test token lexing speed...\n");
+	const char *pointer = test_parse;
+	int loc = 0;
+	while (*pointer != '\0')
+	{
+		if (*(pointer++) == '\n') loc++;
+	}
+
 	bench_begin();
+	int tokens_parsed = 0;
 	for (int b = 0; b < BENCH_REPEATS; b++)
 	{
-		for (int i = 0; i < INVALID_TOKEN; i++)
+		lexer_test_setup(test_parse);
+		Token token;
+		while (1)
 		{
-			ident_type_fnv1(tokens[i], len[i]);
+			token = scan_token();
+			if (token.type == TOKEN_EOF) break;
+			TEST_ASSERT(token.type != INVALID_TOKEN, "Got invalid token");
+			tokens_parsed++;
 		}
 	}
-	printf("complete in %fs\n", bench_mark());

+	printf("-> Test complete in %fs, %.0f kloc/s, %.0f ktokens/s\n", bench_mark(),
+			loc * BENCH_REPEATS / (1000 * bench_mark()), tokens_parsed / (1000 * bench_mark()));

-	exit(0);
+}
+
+void test_compiler(void)
+{
+	compiler_init();
 }

 void compiler_tests(void)
 {
 	test_lexer();
+	test_compiler();
+
+	exit(0);
 }
--- a/src/main.c
+++ b/src/main.c
@@ -3,9 +3,10 @@
 #include "build/project_creation.h"
 #include "utils/errors.h"
 #include "compiler_tests/tests.h"
-
+#include "compiler/malloc.h"
 int main(int argc, const char *argv[])
 {
+	init_arena();
 	parse_arguments(argc, argv);
 	switch (build_options.command)
 	{
@@ -26,7 +27,7 @@ int main(int argc, const char *argv[])
 		case COMMAND_BENCH:
 			printf("TODO\n");
 	}
-
+	free_arena();
 	return 0;
 }

--- a/src/utils/errors.h
+++ b/src/utils/errors.h
@@ -11,6 +11,8 @@

 #define FATAL_ERROR(_string, ...) do { printf("FATAL ERROR at %s:%d: " _string, __func__, __LINE__, ##__VA_ARGS__); printf("\n"); exit(-1); } while(0)

+#define ASSERT(_condition, _string, ...) while (!(_condition)) { FATAL_ERROR(_string, ##__VA_ARGS__); }
+
 #define UNREACHABLE FATAL_ERROR("Cannot reach %s:%d", __func__, __LINE__);
 #define TODO FATAL_ERROR("Not done yet %s:%d", __func__, __LINE__);

@@ -18,4 +20,4 @@

 #define EXPECT(_string, _value, _expected) \
 do { long long __tempval1 = _value; long long __tempval2 = _expected; \
-    TEST_ASSERT(__tempval1 == __tempval2, "Checking " _string ": expected %lld but was %lld.", __tempval2, __tempval1); } while(0);
+    TEST_ASSERT(__tempval1 == __tempval2, "Checking " _string ": expected %lld but was %lld.", __tempval2, __tempval1); } while(0)
--- a/src/utils/lib.h
+++ b/src/utils/lib.h
@@ -0,0 +1,183 @@
+#pragma once
+
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdbool.h>
+
+static inline bool is_power_of_two(uint64_t x)
+{
+	return x != 0 && (x & (x - 1)) == 0;
+}
+
+static inline uint32_t nextHighestPowerOf2(uint32_t v)
+{
+	v--;
+	v |= v >> 1u;
+	v |= v >> 2u;
+	v |= v >> 4u;
+	v |= v >> 8u;
+	v |= v >> 16u;
+	v++;
+	return v;
+}
+
+
+
+static inline bool is_lower(char c)
+{
+	return c >= 'a' && c <= 'z';
+}
+
+static inline bool is_upper(char c)
+{
+	return c >= 'A' && c <= 'Z';
+}
+
+static inline bool is_oct(char c)
+{
+	return c >= '0' && c <= '7';
+}
+
+static inline bool is_oct_or_(char c)
+{
+	switch (c)
+	{
+		case '0': case '1': case '2': case '3': case '4':
+		case '5': case '6': case '7': case '_':
+			return true;
+		default:
+			return false;
+	}
+}
+
+static inline bool is_binary(c)
+{
+	return c  == '0' || c == '1';
+}
+
+static inline bool is_binary_or_(c)
+{
+	switch (c)
+	{
+		case '0': case '1': case '_':
+			return true;
+		default:
+			return false;
+	}
+}
+
+static inline bool is_digit_or_(char c)
+{
+	switch (c)
+	{
+		case '0': case '1': case '2': case '3': case '4':
+		case '5': case '6': case '7': case '8': case '9':
+		case '_':
+			return true;
+		default:
+			return false;
+	}
+}
+
+static inline bool is_digit(char c)
+{
+	return c >= '0' && c <= '9';
+}
+
+static inline bool is_hex_or_(char c)
+{
+	switch (c)
+	{
+		case 'a': case 'b': case 'c': case 'd': case 'e':
+		case 'f':
+		case 'A': case 'B': case 'C': case 'D': case 'E':
+		case 'F':
+		case '0': case '1': case '2': case '3': case '4':
+		case '5': case '6': case '7': case '8': case '9':
+		case '_':
+			return true;
+		default:
+			return false;
+	}
+}
+
+static inline bool is_hex(char c)
+{
+	switch (c)
+	{
+		case 'a': case 'b': case 'c': case 'd': case 'e':
+		case 'f':
+		case 'A': case 'B': case 'C': case 'D': case 'E':
+		case 'F':
+		case '0': case '1': case '2': case '3': case '4':
+		case '5': case '6': case '7': case '8': case '9':
+			return true;
+		default:
+			return false;
+	}
+}
+
+static inline bool is_alphanum_(char c)
+{
+	switch (c)
+	{
+		case 'a': case 'b': case 'c': case 'd': case 'e':
+		case 'f': case 'g': case 'h': case 'i': case 'j':
+		case 'k': case 'l': case 'm': case 'n': case 'o':
+		case 'p': case 'q': case 'r': case 's': case 't':
+		case 'u': case 'v': case 'w': case 'x': case 'y':
+		case 'z':
+		case 'A': case 'B': case 'C': case 'D': case 'E':
+		case 'F': case 'G': case 'H': case 'I': case 'J':
+		case 'K': case 'L': case 'M': case 'N': case 'O':
+		case 'P': case 'Q': case 'R': case 'S': case 'T':
+		case 'U': case 'V': case 'W': case 'X': case 'Y':
+		case 'Z':
+		case '0': case '1': case '2': case '3': case '4':
+		case '5': case '6': case '7': case '8': case '9':
+		case '_':
+			return true;
+		default:
+			return false;
+	}
+}
+
+static inline bool is_letter(char c)
+{
+	switch (c)
+	{
+		case 'a': case 'b': case 'c': case 'd': case 'e':
+		case 'f': case 'g': case 'h': case 'i': case 'j':
+		case 'k': case 'l': case 'm': case 'n': case 'o':
+		case 'p': case 'q': case 'r': case 's': case 't':
+		case 'u': case 'v': case 'w': case 'x': case 'y':
+		case 'z':
+		case 'A': case 'B': case 'C': case 'D': case 'E':
+		case 'F': case 'G': case 'H': case 'I': case 'J':
+		case 'K': case 'L': case 'M': case 'N': case 'O':
+		case 'P': case 'Q': case 'R': case 'S': case 'T':
+		case 'U': case 'V': case 'W': case 'X': case 'Y':
+		case 'Z':
+			return true;
+		default:
+			return false;
+	}
+}
+
+
+#define FNV1_PRIME 0x01000193u
+#define FNV1_SEED 0x811C9DC5u
+#define FNV1a(c, seed) ((uint32_t)((((unsigned)(c)) ^ (seed)) * FNV1_PRIME))
+
+static inline uint32_t fnv1a(const char *key, uint32_t len)
+{
+	uint32_t hash = FNV1_SEED;
+	for (int i = 0; i < len; i++)
+	{
+		hash = FNV1a(key[i], hash);
+	}
+	return hash;
+}
--- a/src/utils/string_utils.h
+++ b/src/utils/string_utils.h
@@ -1,26 +0,0 @@
-#pragma once
-
-// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-
-#include <stdbool.h>
-
-static inline bool is_lower(char c)
-{
-	return c >= 'a' && c <= 'z';
-}
-
-static inline bool is_upper(char c)
-{
-	return c >= 'A' && c <= 'Z';
-}
-
-static inline bool is_alphanum_(char c)
-{
-	return (c >= 'a' && c <= 'z')
-		|| (c >= 'A' && c <= 'Z')
-		|| (c >= '0' && c <= '9')
-		|| c == '_';
-}