Started a bit on parsing. Picking pieces from Titanos and trying to do things better.

2026-02-27 12:01:16 +00:00 · 2019-07-31 23:19:45 +02:00
parent 8f611a400d
commit 386ef35f36
31 changed files with 1586 additions and 135 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,5 +17,6 @@ add_executable(c3c
        src/compiler/lexer.c
        src/compiler/tokens.c
        src/compiler/symtab.c
+        src/compiler/parser.c
        src/compiler_tests/tests.c
-        src/compiler_tests/benchmark.c src/compiler/malloc.c src/compiler/malloc.h src/compiler/compiler.c src/compiler/compiler.h)
+        src/compiler_tests/benchmark.c src/utils/malloc.c src/utils/malloc.h src/compiler/compiler.c src/compiler/compiler.h src/compiler/semantic_analyser.c src/compiler/semantic_analyser.h src/utils/common.h src/compiler/source_file.c src/compiler/source_file.h src/compiler/diagnostics.c src/compiler/diagnostics.h)
--- a/resources/testfragments/lexertest.c3
+++ b/resources/testfragments/lexertest.c3
@@ -0,0 +1,24 @@
+module foo;
+
+const int GLOBAL = 0;
+
+struct Boo
+{
+	int i;
+	union
+	{
+		int death;
+	};
+}
+
+func void test()
+{
+	int i = 0;
+	i++;
+	if (i < 100)
+	{
+		int j = 0;
+		j += i;
+		return;
+	}
+}
--- a/src/build/build_options.c
+++ b/src/build/build_options.c
@@ -9,6 +9,7 @@
 #include <unistd.h>
 #include <stdbool.h>
 #include <string.h>
+#include <utils/lib.h>

 #include "../utils/errors.h"

@@ -97,12 +98,12 @@ static inline bool match_shortopt(const char* name)

 void append_file()
 {
-	if (build_options.file_count == MAX_FILES)
+	if (vec_size(build_options.files) == MAX_FILES)
 	{
 		fprintf(stderr, "Max %d files may be specified\n", MAX_FILES);
 		exit(EXIT_FAILURE);
 	}
-	build_options.files[build_options.file_count++] = current_arg;
+	build_options.files = VECADD(build_options.files, current_arg);
 }

 static bool arg_match(const char *candidate)
@@ -196,6 +197,20 @@ static void parse_option()
 	{
 		case 'h':
 			break;
+		case 'E':
+			if (build_options.compile_option != COMPILE_NORMAL)
+			{
+				FAIL_WITH_ERR("Illegal combination of compile options.");
+			}
+			build_options.compile_option = COMPILE_LEX_ONLY;
+			return;
+		case 'P':
+			if (build_options.compile_option != COMPILE_NORMAL)
+			{
+				FAIL_WITH_ERR("Illegal combination of compile options.");
+			}
+			build_options.compile_option = COMPILE_LEX_PARSE_ONLY;
+			return;
 		case '-':
 			if (match_longopt("about"))
 			{
@@ -220,12 +235,11 @@ static void parse_option()
 			{
 				if (at_end() || next_is_opt()) error_exit("error: --symtab needs a number.");
 				const char *number = next_arg();
-				int size = atoi(number);
+				int size = atoi(number); // NOLINT(cert-err34-c)
 				if (size < 1024) error_exit("error: --symtab valid size > 1024.");
 				if (size > MAX_SYMTAB_SIZE) error_exit("error: --symptab size cannot exceed %d", MAX_SYMTAB_SIZE);
 				build_options.symtab_size = size;
 				return;
-
 			}
 			if (match_longopt("help"))
 			{
@@ -251,6 +265,20 @@ void parse_arguments(int argc, const char *argv[])
 	build_options.path = ".";
 	build_options.command = COMMAND_MISSING;
 	build_options.symtab_size = DEFAULT_SYMTAB_SIZE;
+	build_options.files = VECNEW(const char *, MAX_FILES);
+	for (int i = DIAG_NONE; i < DIAG_WARNING_TYPE; i++)
+	{
+		build_options.severity[i] = DIAG_IGNORE;
+	}
+	for (int i = DIAG_WARNING_TYPE; i < DIAG_ERROR_TYPE; i++)
+	{
+		build_options.severity[i] = DIAG_WARN;
+	}
+	for (int i = DIAG_ERROR_TYPE; i < DIAG_END_SENTINEL; i++)
+	{
+		build_options.severity[i] = DIAG_ERROR;
+	}
+
 	arg_count = argc;
 	args = argv;
 	for (arg_index = 1; arg_index < arg_count; arg_index++)
--- a/src/build/build_options.h
+++ b/src/build/build_options.h
@@ -4,7 +4,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-#include <stdint.h>
+#include "../utils/common.h"

 #define MAX_LIB_DIRS 1024
 #define MAX_FILES 2048
@@ -25,19 +25,67 @@ typedef enum
 	COMMAND_UNIT_TEST,
 } CompilerCommand;

+typedef enum
+{
+	DIAG_NONE = 0, // Don't use!
+	DIAG_WARNING_TYPE, // Don't use!
+	DIAG_UNUSED,
+	DIAG_UNUSED_PARAMETER,
+	DIAG_UNUSED_FUNCTION,
+	DIAG_UNUSED_VARIABLE,
+	DIAG_UNUSED_IMPORT,
+	DIAG_UNUSED_MODULE,
+	DIAG_UNUSED_LABEL,
+	DIAG_UNUSED_PUBLIC,
+	DIAG_UNUSED_TYPE,
+	DIAG_CONVERSION,
+	DIAG_COVERED_SWITCH_DEFAULT,
+	DIAG_UNREACHABLE_DEFAULT,
+	DIAG_ERROR_TYPE, // Don't use this!
+	DIAG_FALLOFF_NONVOID,
+	DIAG_DUPLICATE_ATTRIBUTE,
+	DIAG_NOT_IN_ENUM,
+	DIAG_MISSING_CASE,
+	DIAG_REMAINDER_DIV_BY_ZERO,
+	DIAG_INT_TO_POINTER_CAST,
+	DIAG_SHIFT_LHS_NEGATIVE,
+	DIAG_SHIFT_NEGATIVE,
+	DIAG_SHIFT_GT_TYPEWIDTH,
+	DIAG_END_SENTINEL
+} DiagnosticsType;
+
+typedef enum
+{
+	DIAG_IGNORE = 0,
+	DIAG_WARN,
+	DIAG_ERROR,
+} DiagnosticsSeverity;
+
+typedef enum
+{
+	COMPILE_NORMAL,
+	COMPILE_LEX_ONLY,
+	COMPILE_LEX_PARSE_ONLY,
+	COMPILE_OUTPUT_AST,
+} CompileOption;
+
 typedef struct
 {
 	const char* lib_dir[MAX_LIB_DIRS];
 	int lib_count;
-	const char* files[MAX_FILES];
-	int file_count;
+	const char** files;
 	const char* project_name;
 	const char* target;
 	const char* path;
 	CompilerCommand command;
 	uint32_t symtab_size;
+	CompileOption compile_option;
+	DiagnosticsSeverity severity[DIAG_END_SENTINEL];
 } BuildOptions;

+
+
+
 extern BuildOptions build_options;

 void parse_arguments(int argc, const char *argv[]);
--- a/src/compiler/compiler.c
+++ b/src/compiler/compiler.c
@@ -2,14 +2,68 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+#include <utils/errors.h>
+#include <utils/file_utils.h>
 #include "compiler.h"
 #include "symtab.h"
 #include "../build/build_options.h"
 #include "../utils/lib.h"
+#include "lexer.h"
+#include "source_file.h"
+#include "parser.h"
+#include "diagnostics.h"

 void compiler_init(void)
 {
-	uint32_t symtab_size = nextHighestPowerOf2(build_options.symtab_size);
-	symtab_init(symtab_size);
-
+}
+
+static void compiler_lex()
+{
+	VECEACH(build_options.files, i)
+	{
+		bool loaded = false;
+		File *file = source_file_load(build_options.files[i], &loaded);
+		if (loaded) continue;
+		lexer_add_file_for_lexing(file);
+		printf("# %s\n", file->full_path);
+		while (1)
+		{
+			Token token = lexer_scan_token();
+			printf("%s ", token_type_to_string(token.type));
+			if (token.type == TOKEN_EOF) break;
+		}
+		printf("\n");
+	}
+	exit(EXIT_SUCCESS);
+}
+
+void compiler_parse()
+{
+	VECEACH(build_options.files, i)
+	{
+		bool loaded = false;
+		File *file = source_file_load(build_options.files[i], &loaded);
+		if (loaded) continue;
+		diag_reset();
+		parse_file(file);
+		printf("\n");
+	}
+	exit(EXIT_SUCCESS);
+}
+
+void compile_file()
+{
+	if (!vec_size(build_options.files)) error_exit("No files to compile.");
+	switch (build_options.compile_option)
+	{
+		case COMPILE_LEX_ONLY:
+			compiler_lex();
+			break;
+		case COMPILE_LEX_PARSE_ONLY:
+			compiler_parse();
+			break;
+		default:
+			break;
+	}
+	TODO
 }
--- a/src/compiler/compiler.h
+++ b/src/compiler/compiler.h
@@ -6,3 +6,4 @@


 void compiler_init();
+void compile_file();
--- a/src/compiler/compiler_common.h
+++ b/src/compiler/compiler_common.h
@@ -4,9 +4,11 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-#include <stdint.h>
+#include "tokens.h"
+#include "utils/common.h"

 typedef uint32_t SourceLoc;
+#define INVALID_LOC UINT32_MAX

 typedef struct
 {
@@ -30,9 +32,10 @@ typedef struct
 {
 	const char *contents;
 	const char *name;
-	SourceLoc start;
-	SourceLoc end;
+	const char *full_path;
+	SourceLoc start_id;
+	SourceLoc end_id;
 } File;

 #define TOKEN_MAX_LENGTH 0xFFFF
-#define MAX_IDENTIFIER_LENGTH 31
+
--- a/src/compiler/diagnostics.c
+++ b/src/compiler/diagnostics.c
@@ -0,0 +1,317 @@
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "diagnostics.h"
+#include "source_file.h"
+#include <math.h>
+#include <stdarg.h>
+
+typedef struct _Diagnostics
+{
+	bool panic_mode;
+	unsigned errors;
+	unsigned warnings;
+	bool use_color;
+} Diagnostics;
+
+Diagnostics diagnostics;
+
+void diag_reset(void)
+{
+	diagnostics.panic_mode = false;
+	diagnostics.errors = 0;
+	diagnostics.warnings = 0;
+}
+
+void reset_panic_mode(void)
+{
+	diagnostics.panic_mode = false;
+}
+
+typedef enum
+{
+	PRINT_TYPE_ERROR,
+	PRINT_TYPE_PREV,
+	PRINT_TYPE_WARN
+} PrintType;
+
+static void print_error(SourceRange source_range, const char *message, PrintType print_type)
+{
+	File *file =  source_file_from_position(source_range.loc);
+
+	const char *content = file->contents;
+	const char *error_start = file->contents + source_range.loc - file->start_id;
+
+	const static int LINES_SHOWN = 4;
+
+	const char *linestarts[LINES_SHOWN];
+	for (int i = 0; i < LINES_SHOWN; i++) linestarts[i] = NULL;
+	const char *current = content;
+	linestarts[0] = content;
+	unsigned line = 1;
+	while (current < error_start)
+	{
+		if (current[0] == '\n')
+		{
+			line++;
+			linestarts[3] = linestarts[2];
+			linestarts[2] = linestarts[1];
+			linestarts[1] = linestarts[0];
+			linestarts[0] = current + 1;
+		}
+		current++;
+	}
+
+	const char *end = NULL;
+	while (!end)
+	{
+		switch (current[0])
+		{
+			case '\n':
+			case '\0':
+				end = current;
+				break;
+			default:
+				current++;
+				break;
+		}
+	}
+
+	int max_line_length = (int)round(log10(line)) + 1;
+
+	char number_buffer[20];
+	snprintf(number_buffer, 20, "%%%dd: %%.*s\n", max_line_length);
+
+	for (unsigned i = 3; i > 0; i--)
+	{
+		int line_number = line - i;
+		const char *start = linestarts[i];
+		if (start == NULL) continue;
+		const char *line_end = linestarts[i - 1];
+		eprintf(number_buffer, line_number, line_end - start - 1, start);
+	}
+	eprintf(number_buffer, line, end - linestarts[0], linestarts[0]);
+	for (unsigned i = 0; i < max_line_length + 2 + error_start - linestarts[0]; i++)
+	{
+		eprintf(" ");
+	}
+	for (int i = 0; i < source_range.length; i++)
+	{
+		eprintf("^");
+	}
+	eprintf("\n");
+
+	switch (print_type)
+	{
+		case PRINT_TYPE_ERROR:
+			eprintf("(%s:%d) Error: %s\n", file->name, line, message);
+			break;
+		case PRINT_TYPE_PREV:
+			eprintf("(%s:%d) %s\n", file->name, line, message);
+			break;
+		case PRINT_TYPE_WARN:
+			eprintf("(%s:%d) Warning: %s\n", file->name, line, message);
+			break;
+		default:
+			UNREACHABLE
+	}
+
+}
+
+
+static void vprint_error(SourceRange span, const char *message, va_list args)
+{
+	char buffer[256];
+	vsnprintf(buffer, 256, message, args);
+	print_error(span, buffer, PRINT_TYPE_ERROR);
+}
+
+void diag_error_at(SourceRange span, const char *message, ...)
+{
+	if (diagnostics.panic_mode) return;
+	diagnostics.panic_mode = true;
+	va_list args;
+	va_start(args, message);
+	vprint_error(span, message, args);
+	va_end(args);
+	diagnostics.errors++;
+}
+
+void diag_verror_at(SourceRange span, const char *message, va_list args)
+{
+	if (diagnostics.panic_mode) return;
+	diagnostics.panic_mode = true;
+	vprint_error(span, message, args);
+	diagnostics.errors++;
+}
+
+/*
+
+
+
+bool diagnostics_silence_warnings(Array *warnings)
+{
+	for (unsigned i = 0; i < warnings->count; i++)
+	{
+		const char *warning = warnings->entries[i];
+		if (strcmp("no-unused", warning) == 0)
+		{
+			diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED);
+			diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_PARAMETER);
+			continue;
+		}
+		if (strcmp("no-unused-variable", warning) == 0)
+		{
+			diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_VARIABLE);
+			continue;
+		}
+		if (strcmp("no-unused-function", warning) == 0)
+		{
+			diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_FUNCTION);
+			continue;
+		}
+		if (strcmp("no-unused-type", warning) == 0)
+		{
+			diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_TYPE);
+			continue;
+		}
+		if (strcmp("no-unused-module", warning) == 0)
+		{
+			diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_MODULE);
+			continue;
+		}
+		if (strcmp("no-unused-public", warning) == 0)
+		{
+			diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_PUBLIC);
+			continue;
+		}
+		if (strcmp("no-unused-import", warning) == 0)
+		{
+			diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_IMPORT);
+			continue;
+		}
+		if (strcmp("no-unused-label", warning) == 0)
+		{
+			diagnostics_update_severity(DIAG_IGNORE, DIAG_UNUSED_LABEL);
+			continue;
+		}
+		PRINT_ERROR("recipe has unknown warning: '%s'\n", warning);
+		return false;
+	}
+	return true;
+}
+
+
+
+
+
+
+void prev_at_range(SourceRange span, const char *message, ...)
+{
+	va_list args;
+	va_start(args, message);
+	char buffer[256];
+	vsnprintf(buffer, 256, message, args);
+	print_error(span, buffer, PRINT_TYPE_PREV);
+	va_end(args);
+}
+
+void prev_at(SourceLoc loc, const char *message, ...)
+{
+	va_list args;
+	va_start(args, message);
+	char buffer[256];
+	vsnprintf(buffer, 256, message, args);
+	print_error((SourceRange){ loc, 1 }, buffer, PRINT_TYPE_PREV);
+	va_end(args);
+}
+
+void sema_error_range(SourceRange token, const char *message, ...)
+{
+	va_list args;
+	va_start(args, message);
+	vprint_error(token, message, args);
+	va_end(args);
+	diagnostics.errors++;
+}
+
+void sema_error_at(SourceLoc loc, const char *message, ...)
+{
+	va_list args;
+	va_start(args, message);
+	vprint_error((SourceRange) { loc, 1 }, message, args);
+	va_end(args);
+	diagnostics.errors++;
+}
+
+void sema_warn_at(DiagnosticsType type, SourceLoc loc, const char *message, ...)
+{
+	// TODO ENABLE
+	return;
+	SourceRange span = {.loc = loc, .length = 1};
+	switch (diagnostics.severity[type])
+	{
+		case DIAG_IGNORE:
+			return;
+		case DIAG_WARN:
+			break;
+		case DIAG_ERROR:
+		{
+			va_list args;
+			va_start(args, message);
+			vprint_error(span, message, args);
+			va_end(args);
+			diagnostics.errors++;
+			return;
+		}
+	}
+	va_list args;
+	va_start(args, message);
+	char buffer[256];
+	vsnprintf(buffer, 256, message, args);
+	if (diagnostics.severity[type])
+		print_error(span, buffer, PRINT_TYPE_WARN);
+	va_end(args);
+	diagnostics.warnings++;
+}
+
+void sema_warn_range(DiagnosticsType type, SourceRange span, const char *message, ...)
+{
+	// TODO ENABLE
+	return;
+	switch (diagnostics.severity[type])
+	{
+		case DIAG_IGNORE:
+			return;
+		case DIAG_WARN:
+			break;
+		case DIAG_ERROR:
+		{
+			va_list args;
+			va_start(args, message);
+			vprint_error(span, message, args);
+			va_end(args);
+			diagnostics.errors++;
+			return;
+		}
+	}
+	va_list args;
+	va_start(args, message);
+	char buffer[256];
+	vsnprintf(buffer, 256, message, args);
+	if (diagnostics.severity[type]) print_error(span, buffer, PRINT_TYPE_WARN);
+	va_end(args);
+	diagnostics.warnings++;
+}
+
+unsigned errors()
+{
+	return diagnostics.errors;
+}
+
+bool error_found()
+{
+	return diagnostics.errors > 0;
+}
+*/
--- a/src/compiler/diagnostics.h
+++ b/src/compiler/diagnostics.h
@@ -0,0 +1,34 @@
+#pragma once
+
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "compiler_common.h"
+
+void diag_reset(void);
+void diag_error_at(SourceRange span, const char *message, ...);
+void diag_verror_at(SourceRange span, const char *message, va_list args);
+
+/*
+
+
+typedef struct _Array Array;
+
+void diagnostics_init(void);
+void diagnostics_reset(void);
+void diagnostics_update_severity(DiagnosticsSeverity severity, DiagnosticsType type);
+bool diagnostics_silence_warnings(Array *warnings);
+void diagnostics_use_color(bool use_color);
+void verror_at(SourceRange span, const char *message, va_list args);
+void sema_error_range(SourceRange token, const char *message, ...);
+void sema_error_at(SourceLoc loc, const char *message, ...);
+void prev_at_range(SourceRange span, const char *message, ...);
+void prev_at(SourceLoc loc, const char *message, ...);
+void sema_warn_at(DiagnosticsType type, SourceLoc loc, const char *message, ...);
+void sema_warn_range(DiagnosticsType type, SourceRange span, const char *message, ...);
+bool in_panic_mode(void);
+unsigned errors();
+void reset_panic_mode(void);
+bool error_found(void);
+*/
--- a/src/compiler/lexer.c
+++ b/src/compiler/lexer.c
@@ -2,15 +2,14 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-#include <stdint.h>
 #include "lexer.h"
-#include <string.h>
-#include <assert.h>
+#include <build/build_options.h>
 #include "../utils/errors.h"
 #include "../utils/lib.h"
 #include "symtab.h"
-
-
+#include "source_file.h"
+#include "diagnostics.h"
+#include <stdarg.h>

 typedef enum
 {
@@ -22,6 +21,7 @@ typedef enum

 typedef struct
 {
+	bool lexer_init_complete;
 	const char *begin;
 	const char *start;
 	const char *current;
@@ -78,14 +78,17 @@ static inline bool reached_end(void)
 	return *lexer.current == '\0';
 }

-static Token error_token(const char *message)
+static Token error_token(const char *message, ...)
 {
 	Token token;
-	token.type = TOKEN_ERROR;
+	token.type = INVALID_TOKEN;
 	token.start = lexer.start;
 	token.span.length = 1;
-	token.span.loc = lexer.current_file->start + (lexer.begin - lexer.start);
-	// TODO error_at(token.span, message);
+	token.span.loc = lexer.current_file->start_id + (lexer.begin - lexer.start);
+	va_list list;
+	va_start(list, message);
+	diag_verror_at(token.span, message, list);
+	va_end(list);
 	return token;
 }

@@ -97,7 +100,7 @@ static Token make_token(TokenType type)
 			{
 					.type = type,
 					.start = lexer.start,
-					.span = { .loc = lexer.current_file->start + (lexer.start - lexer.begin), .length = token_size }
+					.span = { .loc = lexer.current_file->start_id + (lexer.start - lexer.begin), .length = token_size }
 			};
 }

@@ -109,7 +112,7 @@ static Token make_string_token(TokenType type, const char* string)
 			{
 					.type = type,
 					.start = lexer.start,
-					.span = { .loc = lexer.current_file->start + (lexer.start - lexer.begin), .length = token_size },
+					.span = { .loc = lexer.current_file->start_id + (lexer.start - lexer.begin), .length = token_size },
 					.string = string,
 			};
 }
@@ -293,6 +296,7 @@ static inline Token scan_ident(void)
 		hash = FNV1a(next(), hash);
 	}
 	EXIT:;
+	if (type == INVALID_TOKEN) error_token("An identifier may not only consist of '_'");
 	uint32_t len = lexer.current - lexer.start;
 	const char* interned_string = symtab_add(lexer.start, len, hash, &type);
 	return make_string_token(type, interned_string);
@@ -303,8 +307,8 @@ static inline Token scan_ident(void)

 static Token scan_oct(void)
 {
-	next(); // Skip the o
-	if (!is_oct(next())) return error_token("Invalid octal sequence");
+	char o = next(); // Skip the o
+	if (!is_oct(next())) return error_token("An expression starting with '0%c' would expect to be followed by octal numbers (0-7).", o);
 	while (is_oct_or_(peek())) next();
 	return make_token(TOKEN_INTEGER);
 }
@@ -312,41 +316,49 @@ static Token scan_oct(void)

 Token scan_binary(void)
 {
-	next(); // Skip the b
-	if (!is_binary(next())) return error_token("Invalid binary sequence");
+	char b = next(); // Skip the b
+	if (!is_binary(next()))
+	{
+		return error_token("An expression starting with '0%c' would expect a sequence of zeroes and ones, "
+		                   "did you try to write a hex value but forgot the '0x'?", b);
+	}
 	while (is_binary_or_(peek())) next();
 	return make_token(TOKEN_INTEGER);
 }

 #define PARSE_SPECIAL_NUMBER(is_num, is_num_with_underscore, exp, EXP) \
-	while (is_num_with_underscore(peek())) next();  \
-	bool is_float = false;  \
-	if (peek() == '.')  \
-	{ \
+while (is_num_with_underscore(peek())) next();  \
+bool is_float = false;  \
+if (peek() == '.')  \
+{ \
 	is_float = true; \
 	next(); \
 	char c = peek(); \
-		if (c == '_') return error_token("Underscore may only appear between digits."); \
+	if (c == '_') return error_token("Can't parse this as a floating point value due to the '_' directly after decimal point."); \
 	if (is_num(c)) next(); \
 	while (is_num_with_underscore(peek())) next(); \
-	} \
-	char c = peek(); \
-	if (c == (exp) || c == (EXP)) \
-	{ \
+} \
+char c = peek(); \
+if (c == (exp) || c == (EXP)) \
+{ \
 	is_float = true; \
 	next(); \
 	char c2 = next(); \
 	if (c2 == '+' || c2 == '-') c2 = next(); \
-		if (!is_num(c2)) return error_token("Invalid exponential expression"); \
+	if (!is_num(c2)) return error_token("Parsing the floating point exponent failed, because '%c' is not a number.", c2); \
 	while (is_num(peek())) next(); \
-	} \
-	if (prev() == '_') return error_token("Underscore may only appear between digits."); \
-	return make_token(is_float ? TOKEN_FLOAT : TOKEN_INTEGER)
+} \
+if (prev() == '_') return error_token("The number ended with '_', but that character needs to be between, not after, digits."); \
+return make_token(is_float ? TOKEN_FLOAT : TOKEN_INTEGER)

 static inline Token scan_hex(void)
 {
-	next(); // skip the x
-	if (!is_hex(next())) return error_token("Invalid hex sequence");
+	char x = next(); // skip the x
+	if (!is_hex(next()))
+	{
+		return error_token("'0%c' starts a hexadecimal number, "
+					 "but it was followed by '%c' which is not part of a hexadecimal number.", x, prev());
+	}
 	PARSE_SPECIAL_NUMBER(is_hex, is_hex_or_, 'p', 'P');
 }

@@ -363,7 +375,6 @@ static inline Token scan_digit(void)
 	{
 		switch (peek_next())
 		{
-			// case 'X': Let's not support this? REVISIT
 			case 'x':
 			case 'X':
 				advance(2);
@@ -397,11 +408,17 @@ static inline Token scan_char()
 		{
 			for (int i = 0; i < 2; i++)
 			{
-				if (!is_hex(next())) return error_token("Invalid escape sequence");
+				if (!is_hex(next()))
+				{
+					return error_token(
+							"An escape sequence starting with "
+							"'\\x' needs to be followed by "
+							"a two digit hexadecimal number.");
 				}
 			}
 		}
-	if (next() != '\'') return error_token("Invalid character value");
+	}
+	if (next() != '\'') return error_token("The character only consist of a single character, did you want to use \"\" instead?");
 	return make_token(TOKEN_INTEGER);
 }

@@ -417,7 +434,7 @@ static inline Token scan_string()
 		}
 		if (reached_end())
 		{
-			return error_token("Unterminated string.");
+			return error_token("Reached the end looking for '\"'. Did you forget it?");
 		}
 	}
 	return make_token(TOKEN_STRING);
@@ -513,15 +530,15 @@ static inline Token scan_docs(void)
 				next();
 				return make_token(TOKEN_DOCS_LINE);
 			case '\0':
-				return error_token("Docs reached end of the file");
+				return error_token("The document ended without finding the end of the doc comment. "
+					   "Did you forget a '*/' somewhere?");
 			default:
 				break;
 		}
 	}
 }

-
-Token scan_token(void)
+Token lexer_scan_token(void)
 {
 	// First we handle our "in docs" state.
 	if (lexer.lexer_state == LEXER_STATE_DOCS_PARSE)
@@ -544,7 +561,7 @@ Token scan_token(void)
 			lexer.lexer_state = LEXER_STATE_DOCS_PARSE;
 			return make_token(TOKEN_DOCS_START);
 		case WHITESPACE_COMMENT_REACHED_EOF:
-			return error_token("Comment was not terminated");
+			return error_token("Reached the end looking for '*/'. Did you forget it somewhere?");
 		case WHITESPACE_FOUND_EOF:
 			return make_token(TOKEN_EOF);
 		case WHITESPACE_FOUND_DOCS_EOL:
@@ -637,25 +654,52 @@ Token scan_token(void)
 				backtrack();
 				return is_digit(c) ? scan_digit() : scan_ident();
 			}
-			return error_token("Unexpected character.");
+			return error_token("'%c' may not be placed outside of a string or comment, did you perhaps forget a \" somewhere?", c);
 	}
 }

-void lexer_test_setup(const char* text)
+File* lexer_current_file(void)
 {
+	return lexer.current_file;
+}
+
+void lexer_check_init(void)
+{
+	if (lexer.lexer_init_complete) return;
+	lexer.lexer_init_complete = true;
+	symtab_init(build_options.symtab_size);
+}
+
+void lexer_add_file_for_lexing(File *file)
+{
+	LOG_FUNC
+	lexer_check_init();
+	lexer.current_file = file;
+	lexer.last_in_range = 0;
+	lexer.begin = lexer.current_file->contents;
+	lexer.start = lexer.begin;
+	lexer.current = lexer.start;
+	lexer.lexer_state = LEXER_STATE_NORMAL;
+}
+
+void lexer_test_setup(const char *text, size_t len)
+{
+	lexer_check_init();
 	static File helper;
 	lexer.lexer_state = LEXER_STATE_NORMAL;
 	lexer.start = text;
 	lexer.current = text;
 	lexer.begin = text;
 	lexer.current_file = &helper;
-	lexer.current_file->start = 0;
+	lexer.current_file->start_id = 0;
 	lexer.current_file->contents = text;
-	lexer.current_file->end = 100000;
-	lexer.current_file->name = "Foo";
+	lexer.current_file->end_id = len;
+	lexer.current_file->name = "Test";
 }

-Token scan_ident_test(const char* scan)
+
+
+Token lexer_scan_ident_test(const char *scan)
 {
 	static File helper;
 	lexer.lexer_state = LEXER_STATE_NORMAL;
@@ -663,9 +707,9 @@ Token scan_ident_test(const char* scan)
 	lexer.current = scan;
 	lexer.begin = scan;
 	lexer.current_file = &helper;
-	lexer.current_file->start = 0;
+	lexer.current_file->start_id = 0;
 	lexer.current_file->contents = scan;
-	lexer.current_file->end = 1000;
+	lexer.current_file->end_id = 1000;
 	lexer.current_file->name = "Foo";

 	if (scan[0] == '@' && is_letter(scan[1]))
@@ -674,5 +718,5 @@ Token scan_ident_test(const char* scan)
 		return scan_docs();
 	}

-	return scan_token();
+	return lexer_scan_token();
 }
--- a/src/compiler/lexer.h
+++ b/src/compiler/lexer.h
@@ -4,14 +4,14 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-#include "tokens.h"
+
 #include "compiler_common.h"

-Token scan_token(void);
+Token lexer_scan_token(void);

-TokenType identifier_type(const char* restrict start, int len);
-TokenType ident_type_fnv1(const char *restrict start, int len);
+Token lexer_scan_ident_test(const char *scan);

-Token scan_ident_test(const char* scan);
-
-void lexer_test_setup(const char* text);
+void lexer_test_setup(const char *text, size_t len);
+void lexer_add_file_for_lexing(File *file);
+File* lexer_current_file(void);
+void lexer_check_init(void);
--- a/src/compiler/parser.c
+++ b/src/compiler/parser.c
@@ -0,0 +1,493 @@
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdbool.h>
+#include <stdarg.h>
+#include "../utils/errors.h"
+#include "parser.h"
+#include "semantic_analyser.h"
+#include "lexer.h"
+
+const int MAX_DOCS_ROWS = 1024;
+
+Token tok;
+Token prev_tok;
+Token poisoned = {
+		.type = INVALID_TOKEN,
+};
+
+// --- Parser base methods
+
+Token token_wrap(const char* name)
+{
+	TODO
+}
+void advance(void)
+{
+	prev_tok = tok;
+	while (1)
+	{
+		tok = lexer_scan_token();
+		// printf(">>> %.*s => %s\n", tok.length, tok.start, token_type_to_string(tok.type));
+		if (tok.type != INVALID_TOKEN) break;
+	}
+}
+
+void advance_and_verify(TokenType token_type)
+{
+	assert(tok.type == token_type);
+	advance();
+}
+
+bool try_consume(TokenType type)
+{
+	if (tok.type == type)
+	{
+		advance();
+		return true;
+	}
+	return false;
+}
+
+bool consume(TokenType type, const char *message, ...)
+{
+	if (try_consume(type))
+	{
+		return true;
+	}
+
+	va_list args;
+	va_start(args, message);
+	sema_verror_at(tok.span, message, args);
+	va_end(args);
+	return false;
+}
+
+bool was_ident()
+{
+	switch (tok.type)
+	{
+		case TOKEN_VAR_IDENT:
+		case TOKEN_TYPE_IDENT:
+		case TOKEN_CAPS_IDENT:
+			return true;
+		default:
+			return false;
+	}
+}
+
+static void recover_to(TokenType type)
+{
+	TODO
+}
+
+static void recover_and_consume(TokenType type)
+{
+	TODO
+}
+
+static void recover_stmt(void)
+{
+	recover_and_consume(TOKEN_EOS);
+}
+
+/**
+ * Walk until we find the first top level construct.
+ * (Note that this is the slow path, so no need to inline)
+ */
+static void recover_top_level(void)
+{
+	while (tok.type != TOKEN_EOF)
+	{
+		switch (tok.type)
+		{
+			case TOKEN_FUNC:
+			case TOKEN_CONST:
+			case TOKEN_TYPEDEF:
+			case TOKEN_ERROR:
+			case TOKEN_STRUCT:
+			case TOKEN_IMPORT:
+			case TOKEN_UNION:
+			case TOKEN_ENUM:
+			case TOKEN_MACRO:
+				return;
+			default:
+				break;
+		}
+	}
+}
+
+static inline bool consume_stmt_end(void)
+{
+	if (consume(TOKEN_EOS, "Expected ';'")) return true;
+	recover_and_consume(TOKEN_EOS);
+	return false;
+}
+
+void error_at_current(const char* message, ...)
+{
+	va_list args;
+	va_start(args, message);
+	sema_verror_at(tok.span, message, args);
+	va_end(args);
+}
+
+// --- Parsing
+
+/**
+ * Optionally parse docs
+ *
+ * docs ::= TOKEN_DOCS_START docs_body TOKEN_DOCS_END
+ *
+ * docs_body ::= docs_line
+ *             | docs_body TOKEN_DOCS_EOL docs_line
+ *
+ * docs_line ::= TOKEN_DOCS_LINE
+ *             | TOKEN_AT_IDENT [ignored]
+ *             | TOKEN_AT_RETURN string
+ *             | TOKEN_AT_PARAM VAR_IDENT string
+ *             | TOKEN_AT_THROWS string
+ *             | TOKEN_AT_REQUIRE [expr]+
+ *             | TOKEN_AT_ENSURE [expr]+
+ *             | TOKEN_AT_CONST VAR_IDENT
+ *             | TOKEN_AT_PURE
+ *             | TOKEN_AT_REQPARSE [expr | stmt]
+ *             | TOKEN_AT_DEPRECATED
+ */
+static inline void parse_docs()
+{
+	LOG_FUNC
+	if (!try_consume(TOKEN_DOCS_START)) return;;
+	static Token docs[MAX_DOCS_ROWS];
+	int lines = 0;
+	while (tok.type != TOKEN_DOCS_END)
+	{
+		switch (tok.type)
+		{
+			case TOKEN_DOCS_LINE:
+				if (lines == MAX_DOCS_ROWS) error_at_current("Exceeded max number of lines in docs: %d.", MAX_DOCS_ROWS);
+				docs[lines++] = tok;
+				break;
+			case TOKEN_AT_DEPRECATED:
+				break;
+			case TOKEN_AT_PURE:
+				break;
+			case TOKEN_AT_IDENT:
+			case TOKEN_AT_THROWS:
+			case TOKEN_AT_CONST:
+			case TOKEN_AT_REQPARSE:
+			case TOKEN_AT_PARAM:
+			case TOKEN_AT_RETURN:
+			default:
+				TODO
+		}
+	}
+}
+
+/**
+ * module ::= [docs]? MODULE IDENTIFIER EOS
+ */
+static inline void parse_module()
+{
+	LOG_FUNC
+	parse_docs();
+	if (!try_consume(TOKEN_MODULE))
+	{
+		sema_add_module_from_filename();
+		return;
+	}
+
+	// Expect the module name
+	if (!consume(TOKEN_VAR_IDENT, "Expected a valid module name"))
+	{
+		sema_add_module(poisoned);
+		recover_top_level();
+		return;;
+	}
+
+	sema_add_module(prev_tok);
+	consume_stmt_end();
+}
+
+
+/**
+ * import ::= IMPORT VAR_IDENT EOS
+ *          | IMPORT VAR_IDENT AS VAR_IDENT EOS
+ *          | IMPORT VAR_IDENT LOCAL EOS
+ */
+static inline void parse_import()
+{
+	advance_and_verify(TOKEN_IMPORT);
+
+	if (!consume(TOKEN_VAR_IDENT, "Expected a module name"))
+	{
+		recover_top_level();
+		return;
+	}
+
+	Token module_name = prev_tok;
+	Token alias = {};
+	ImportType import_type = IMPORT_TYPE_FULL;
+	if (try_consume(TOKEN_AS))
+	{
+		if (!consume(TOKEN_VAR_IDENT, "Expected a valid alias name"))
+		{
+			recover_and_consume(TOKEN_EOS);
+			return;
+		}
+		alias = prev_tok;
+		import_type = IMPORT_TYPE_ALIAS;
+	}
+	if (try_consume(TOKEN_LOCAL))
+	{
+		import_type = import_type == IMPORT_TYPE_ALIAS ? IMPORT_TYPE_ALIAS_LOCAL : IMPORT_TYPE_LOCAL;
+	}
+
+	sema_add_import(module_name, alias, import_type);
+
+	consume_stmt_end();
+
+}
+
+/**
+ * imports ::= import
+ *           | imports import
+ */
+static inline void parse_imports(void)
+{
+	while (tok.type == TOKEN_IMPORT)
+	{
+		parse_import();
+	}
+}
+
+static inline void parse_func(void)
+{
+	TODO
+}
+
+static inline void *parse_type(void)
+{
+	TODO
+}
+
+static inline void *parse_deferred_expression(void)
+{
+	TODO
+}
+static inline void parse_const(void)
+{
+	advance_and_verify(TOKEN_CONST);
+	// parse_type();
+	if (!consume(TOKEN_CAPS_IDENT, "Expected an upper case identifier"))
+	{
+		recover_top_level();
+		return;
+	}
+	if (!consume(TOKEN_EQEQ, "Expected '=' here"))
+	{
+		recover_top_level();
+		return;
+	}
+	parse_deferred_expression();
+	consume_stmt_end();
+}
+
+static inline void parse_union(void)
+{
+	TODO;
+}
+
+static inline void parse_struct(void)
+{
+	TODO;
+}
+
+static inline void parse_macro(void)
+{
+	TODO;
+}
+
+/**
+ * error ::= ERROR TYPE_IDENT '{' CAPS_IDENT (',' CAPS_IDENT)* ','? '}'
+ */
+static inline void parse_error(void)
+{
+	advance_and_verify(TOKEN_ERROR);
+	if (!consume(TOKEN_TYPE_IDENT, "Expected a valid error type name here"))
+	{
+		recover_top_level();
+		return;
+	}
+	Token name = prev_tok;
+
+	if (!consume(TOKEN_LBRACE, "Expected ’{' after error type name"))
+	{
+		recover_top_level();
+		return;
+	}
+
+	while (tok.type == TOKEN_CAPS_IDENT)
+	{
+		// TODO store
+		advance();
+		if (!try_consume(TOKEN_COMMA)) break;
+	}
+
+	if (!consume(TOKEN_RBRACE, "Expected '}' here"))
+	{
+		recover_top_level();
+	}
+
+	sema_add_errors(name /* todo values */);
+
+}
+
+/**
+ * enum ::= ENUM TYPE_NAME (':' type)? '{' enum_def (',' enum_def)* ','? '}'
+ *
+ * enum_def ::= CAPS_IDENT ('=' const_expr)?
+ *
+ * TODO enum extra data?
+ */
+static inline void parse_enum(void)
+{
+	advance_and_verify(TOKEN_ENUM);
+	if (!consume(TOKEN_TYPE_IDENT, "Expected a valid enum type name here"))
+	{
+		recover_top_level();
+		return;
+	}
+	Token name = prev_tok;
+
+	void *type = NULL;
+	if (try_consume(TOKEN_COLON))
+	{
+		type = parse_type();
+	}
+
+	if (!consume(TOKEN_LBRACE, type ? "Expected '{' after enum type" : "Expected ’{' after enum type name"))
+	{
+		recover_top_level();
+		return;
+	}
+
+	while (tok.type == TOKEN_CAPS_IDENT)
+	{
+		// TODO store
+		advance();
+		if (try_consume(TOKEN_EQ))
+		{
+			// Store
+			parse_deferred_expression();
+		}
+		if (!try_consume(TOKEN_COMMA)) break;
+	}
+
+	if (!consume(TOKEN_RBRACE, "Expected '}' here"))
+	{
+		recover_top_level();
+		return;
+	}
+
+	sema_add_errors(name /* todo values */);
+
+}
+
+static inline void parse_global_var(void)
+{
+
+	TODO;
+}
+
+static inline void parse_macro_var(void)
+{
+	advance_and_verify(TOKEN_DOLLAR_IDENT);
+	Token var_name = prev_tok;
+
+	if (!consume(TOKEN_EQ, "Expected assignment here"))
+	{
+		recover_top_level();
+		return;
+	}
+
+	// TODO use the result
+	parse_deferred_expression();
+
+	sema_add_macro_var(var_name /* , expr **/ );
+	consume_stmt_end();
+}
+
+static inline void parse_macro_expansion(void)
+{
+	TODO
+}
+
+static inline void parse_top_level()
+{
+	LOG_FUNC
+	while (tok.type != TOKEN_EOF)
+	{
+		switch (tok.type)
+		{
+			case TOKEN_FUNC:
+				parse_func();
+				break;
+			case TOKEN_CONST:
+				parse_const();
+				break;
+			case TOKEN_STRUCT:
+				parse_struct();
+				break;
+			case TOKEN_UNION:
+				parse_union();
+				break;
+			case TOKEN_MACRO:
+				parse_macro();
+				break;
+			case TOKEN_ENUM:
+				parse_enum();
+				break;
+			case TOKEN_ERROR:
+				parse_error();
+				break;
+			case TOKEN_PUBLIC:
+				sema_mark_next_public();
+				break;
+			case TOKEN_TYPE_IDENT:
+				parse_global_var();
+				break;
+			case TOKEN_AT_IDENT:
+				parse_macro_expansion();
+				break;
+			case TOKEN_DOLLAR_IDENT:
+				parse_macro_var();
+				break;
+			case TOKEN_DOCS_START:
+				parse_docs();
+				break;
+			default:
+				error_at_current("Unexpected token found");
+				recover_top_level();
+				break;
+		}
+	}
+}
+
+void parse_current(void)
+{
+	LOG_FUNC
+	advance();
+	parse_module();
+	parse_imports();
+	parse_top_level();
+}
+
+void parse_file(File *file)
+{
+	LOG_FUNC
+	lexer_add_file_for_lexing(file);
+	sema_init(file);
+	parse_current();
+}
--- a/src/compiler/parser.h
+++ b/src/compiler/parser.h
@@ -0,0 +1,10 @@
+#pragma once
+
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "compiler_common.h"
+#include "tokens.h"
+
+void parse_file(File *file);
--- a/src/compiler/semantic_analyser.c
+++ b/src/compiler/semantic_analyser.c
@@ -0,0 +1,68 @@
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <utils/errors.h>
+#include <utils/lib.h>
+#include "semantic_analyser.h"
+#include "../utils/file_utils.h"
+#include "symtab.h"
+
+CompilationUnit current_unit;
+
+void sema_init(File *file)
+{
+	LOG_FUNC
+	current_unit.file = file;
+	current_unit.module_name.type = INVALID_TOKEN;
+}
+
+void sema_add_module(Token module_name)
+{
+	LOG_FUNC
+	current_unit.module_name = module_name;
+}
+
+void sema_add_module_from_filename(void)
+{
+	LOG_FUNC
+	char buffer[MAX_IDENTIFIER_LENGTH + 1];
+	int len = filename_to_module(current_unit.file->full_path, buffer);
+	if (!len)
+	{
+		TODO
+	}
+
+	TokenType type = TOKEN_VAR_IDENT;
+	const char *module_name = symtab_add(buffer, len, fnv1a(buffer, len), &type);
+	if (type != TOKEN_VAR_IDENT)
+	{
+		TODO
+	}
+	current_unit.module_name.string = module_name;
+	TODO
+}
+void sema_add_import(Token module_name, Token alias, ImportType import_type)
+{
+	TODO
+}
+
+void sema_add_errors(Token error_type_name /* todo values */)
+{
+	TODO
+}
+void sema_add_macro_var(Token macro_var_name /* , expr **/ )
+{
+	TODO
+}
+
+// If we have a public parameter, then the next one will be the real one.
+void sema_mark_next_public(void)
+{
+	TODO
+}
+
+void sema_verror_at(SourceRange range, const char *message, va_list args)
+{
+	TODO
+}
--- a/src/compiler/semantic_analyser.h
+++ b/src/compiler/semantic_analyser.h
@@ -0,0 +1,32 @@
+#pragma once
+
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "compiler_common.h"
+
+typedef struct
+{
+	Token module_name;
+	File *file;
+} CompilationUnit;
+
+typedef enum {
+	IMPORT_TYPE_FULL,
+	IMPORT_TYPE_ALIAS,
+	IMPORT_TYPE_ALIAS_LOCAL,
+	IMPORT_TYPE_LOCAL
+} ImportType;
+
+void sema_init(File *file);
+void sema_add_module(Token module_name);
+void sema_add_module_from_filename(void);
+void sema_add_import(Token module_name, Token alias, ImportType import_type);
+void sema_add_errors(Token error_type_name /* todo values */);
+void sema_add_macro_var(Token macro_var_name /* , expr **/ );
+
+// If we have a public parameter, then the next one will be the real one.
+void sema_mark_next_public(void);
+
+void sema_verror_at(SourceRange range, const char *message, va_list args);
--- a/src/compiler/source_file.c
+++ b/src/compiler/source_file.c
@@ -0,0 +1,89 @@
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <sys/stat.h>
+#include <limits.h>
+#include "../build/build_options.h"
+#include "source_file.h"
+#include "../utils/lib.h"
+#include "../utils/file_utils.h"
+#include "lexer.h"
+
+static const size_t LEXER_FILES_START_CAPACITY = 128;
+
+File pseudo_file;
+
+typedef struct
+{
+	File **files;
+} SourceFiles;
+
+SourceFiles source_files;
+
+File *source_file_load(const char *filename, bool *already_loaded)
+{
+	if (already_loaded) *already_loaded = false;
+	if (!source_files.files) source_files.files = VECNEW(File *, LEXER_FILES_START_CAPACITY);
+
+	char *full_path = malloc_arena(PATH_MAX + 1);
+	if (!realpath(filename, full_path))
+	{
+		error_exit("Failed to resolve %s", filename);
+	}
+
+	VECEACH(source_files.files, index)
+	{
+		if (strcmp(source_files.files[index]->full_path, full_path) == 0)
+		{
+			*already_loaded = true;
+			return source_files.files[index];
+		}
+	}
+	if (vec_size(source_files.files) == MAX_FILES)
+	{
+		error_exit("Exceeded max number of files %d", MAX_FILES);
+	}
+
+	size_t size;
+	const char* source_text = read_file(filename, &size);
+	File *file = malloc(sizeof(File));
+	file->full_path = full_path;
+	file->start_id = vec_size(source_files.files) ? VECLAST(source_files.files)->end_id : 0;
+	file->contents = source_text;
+	ASSERT(file->start_id + size < UINT32_MAX, "Total files loaded exceeded %d bytes", UINT32_MAX);
+	file->end_id = file->start_id + size;
+	file->name = filename;
+	source_files.files = VECADD(source_files.files, file);
+	return file;
+}
+
+File *source_file_from_position(SourceLoc loc)
+{
+	if (loc == INVALID_LOC)
+	{
+		pseudo_file.contents = "---";
+		return &pseudo_file;
+	}
+	if (lexer_current_file()->start_id <= loc) return lexer_current_file();
+	unsigned low = 0;
+	unsigned high = vec_size(source_files.files) - 2;
+	assert(vec_size(source_files.files) > 1);
+	while (1)
+	{
+		// Binary search
+		unsigned mid = (high + low) / 2;
+		File *file = source_files.files[mid];
+		if (file->start_id > loc)
+		{
+			high = mid - 1;
+			continue;
+		}
+		if (file->end_id < loc)
+		{
+			low = mid + 1;
+			continue;
+		}
+		return file;
+	}
+}
--- a/src/compiler/source_file.h
+++ b/src/compiler/source_file.h
@@ -0,0 +1,11 @@
+#pragma once
+
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+
+#include "compiler_common.h"
+
+File *source_file_load(const char *filename, bool *already_loaded);
+File *source_file_from_position(SourceLoc loc);
--- a/src/compiler/symtab.c
+++ b/src/compiler/symtab.c
@@ -8,7 +8,7 @@
 #include "../utils/errors.h"
 #include <assert.h>
 #include "../utils/lib.h"
-#include "malloc.h"
+#include "utils/malloc.h"
 #include "tokens.h"

 #define TABLE_MAX_LOAD 0.75
--- a/src/compiler/tokens.c
+++ b/src/compiler/tokens.c
@@ -112,15 +112,15 @@ const char *token_type_to_string(TokenType type)
 		case TOKEN_BIT_XOR_ASSIGN:
 			return "^=";
 		case TOKEN_VAR_IDENT:
-			return "<varIdent>";
+			return "VAR_IDENT";
 		case TOKEN_TYPE_IDENT:
-			return "<TypeIdent>";
+			return "TYPE_IDENT";
 		case TOKEN_STRING:
-			return "<string>";
+			return "STRING";
 		case TOKEN_INTEGER:
-			return "<int>";
+			return "INTEGER";
 		case TOKEN_REAL:
-			return "<float>";
+			return "FLOAT";
 		case TOKEN_QUESTION:
 			return "?";
 		case TOKEN_ELVIS:
@@ -192,9 +192,9 @@ const char *token_type_to_string(TokenType type)
 		case TOKEN_AT:
 			return "@";
 		case TOKEN_ERROR:
-			return "<error>";
+			return "error";
 		case TOKEN_EOF:
-			return "<eof>";
+			return "EOF";
 		case TOKEN_CAST:
 			return "cast";
 		case TOKEN_C_LONGDOUBLE:
@@ -208,7 +208,7 @@ const char *token_type_to_string(TokenType type)
 		case TOKEN_C_ULONGLONG:
 			return "c_ulonglong";
 		case TOKEN_C_SHORT:
-			return "c_ishort";
+			return "c_short";
 		case TOKEN_C_INT:
 			return "c_int";
 		case TOKEN_C_LONG:
@@ -251,6 +251,8 @@ const char *token_type_to_string(TokenType type)
 			return "i8";
 		case TOKEN_U8:
 			return "u8";
+		case TOKEN_U1:
+			return "u1";
 		case TOKEN_BOOL:
 			return "bool";
 		case TOKEN_QUAD:
@@ -294,25 +296,25 @@ const char *token_type_to_string(TokenType type)
 		case TOKEN_HALF:
 			return "half";
 		case TOKEN_CAPS_IDENT:
-			return "<CAPS_IDENT>";
+			return "CAPS_IDENT";
 		case TOKEN_AT_IDENT:
-			return "<@ident>";
+			return "AT_IDENT";
 		case TOKEN_HASH_IDENT:
-			return "<#ident>";
+			return "HASH_IDENT";
 		case TOKEN_DOLLAR_IDENT:
-			return "<$ident>";
+			return "DOLLAR_IDENT";
 		case TOKEN_CATCH:
 			return "catch";
 		case INVALID_TOKEN:
-			return "<\?\?\?>";
+			return "INVALID_TOKEN";
 		case TOKEN_DOCS_EOL:
-			return "<EOL>";
+			return "EOL";
 		case TOKEN_DOCS_START:
 			return "/**";
 		case TOKEN_DOCS_END:
 			return "*/";
 		case TOKEN_DOCS_LINE:
-			return "<docs line>";
+			return "DOCS_LINE";
 		case TOKEN_AT_REQUIRE:
 			return "@require";
 		case TOKEN_AT_ENSURE:
@@ -334,3 +336,4 @@ const char *token_type_to_string(TokenType type)
 	}
 	UNREACHABLE
 }
+
--- a/src/compiler/tokens.h
+++ b/src/compiler/tokens.h
@@ -90,6 +90,7 @@ typedef enum _TokenType
 	TOKEN_U16,              // u16
 	TOKEN_I8,               // i8
 	TOKEN_U8,               // u8
+	TOKEN_U1,               // u1

 	// Basic types names
 	TOKEN_BYTE,
--- a/src/compiler_tests/shorttest.c
+++ b/src/compiler_tests/shorttest.c
@@ -34,7 +34,7 @@ static const char* test_parse = "struct Node\n"
 " */\n"
 "void Heap.init(Heap& heap, usize start) \n"
 "{\n"
-"    Node& init_region = @cast(Node&, start);\n"
+"    Node& init_region = cast(Node&, start);\n"
 "    init_region.hole = 1;\n"
 "    init_region.size = HEAP_INIT_SIZE - @sizeof(Node) - @sizeof(Footer);\n"
 "\n"
@@ -42,14 +42,14 @@ static const char* test_parse = "struct Node\n"
 "\n"
 "    heap.bins[get_bin_index(init_region.size)].add(init_region);\n"
 "\n"
-"    heap.start = @cast(void*, start);\n"
-"    heap.end   = @cast(void*, start + HEAP_INIT_SIZE);\n"
+"    heap.start = cast(void*, start);\n"
+"    heap.end   = cast(void*, start + HEAP_INIT_SIZE);\n"
 "}\n"
 "\n"
 "void* Heap.alloc(Heap& heap, usize size) \n"
 "{\n"
 "    uint index = get_bin_index(size);\n"
-"    Bin& temp = @cast(Bin&, heap.bins[index]);\n"
+"    Bin& temp = cast(Bin&, heap.bins[index]);\n"
 "    Node* found = temp.getBestFit(size);\n"
 "\n"
 "    while (!found) \n"
@@ -60,7 +60,7 @@ static const char* test_parse = "struct Node\n"
 "\n"
 "    if ((found.size - size) > (overhead + MIN_ALLOC_SZ)) \n"
 "    {\n"
-"        Node& split = @cast(Node*, @cast(char&, found) + sizeof(Node) + sizeof(Footer)) + size);\n"
+"        Node& split = cast(Node*, cast(char&, found) + sizeof(Node) + sizeof(Footer)) + size);\n"
 "        split.size = found.size - size - sizeof(Node) - sizeof(Footer);\n"
 "        split.hole = 1;\n"
 "   \n"
--- a/src/compiler_tests/tests.c
+++ b/src/compiler_tests/tests.c
@@ -10,6 +10,7 @@
 #include <compiler/compiler.h>
 #include <utils/lib.h>
 #include <utils/errors.h>
+#include <utils/malloc.h>
 #include "benchmark.h"
 #include "../compiler/symtab.h"

@@ -18,13 +19,14 @@ static void test_lexer(void)
 #ifdef __OPTIMIZE__
 	printf("--- RUNNING OPTIMIZED ---\n");
 #endif
+	run_arena_allocator_tests();
 	printf("Begin lexer testing.\n");
 	printf("-- Check number of keywords...\n");
 	int tokens_found = 0;
-	const int EXPECTED_TOKENS = 91;
+	const int EXPECTED_TOKENS = 12 + 72 + 9;
 	const char* tokens[TOKEN_EOF];
 	int len[TOKEN_EOF];
-	compiler_init();
+	lexer_check_init();
 	for (int i = 1; i < TOKEN_EOF; i++)
 	{
 		const char* token = token_type_to_string((TokenType)i);
@@ -34,7 +36,7 @@ static void test_lexer(void)
 		const char* interned = symtab_add(token, len[i], fnv1a(token, len[i]), &lookup);
 		if (lookup != TOKEN_VAR_IDENT)
 		{
-			Token scanned = scan_ident_test(token);
+			Token scanned = lexer_scan_ident_test(token);
 			TEST_ASSERT(scanned.type == i, "Mismatch scanning: was '%s', expected '%s' - lookup: %s - interned: %s.",
 					token_type_to_string(scanned.type),
 					token_type_to_string(i),
@@ -59,7 +61,7 @@ static void test_lexer(void)
 	{
 		for (int i = 1; i < TOKEN_EOF; i++)
 		{
-			volatile TokenType t = scan_ident_test(tokens[i]).type;
+			volatile TokenType t = lexer_scan_ident_test(tokens[i]).type;
 		}
 	}

@@ -77,13 +79,14 @@ static void test_lexer(void)

 	bench_begin();
 	int tokens_parsed = 0;
+	size_t test_len = strlen(test_parse);
 	for (int b = 0; b < BENCH_REPEATS; b++)
 	{
-		lexer_test_setup(test_parse);
+		lexer_test_setup(test_parse, test_len);
 		Token token;
 		while (1)
 		{
-			token = scan_token();
+			token = lexer_scan_token();
 			if (token.type == TOKEN_EOF) break;
 			TEST_ASSERT(token.type != INVALID_TOKEN, "Got invalid token");
 			tokens_parsed++;
@@ -92,7 +95,6 @@ static void test_lexer(void)

 	printf("-> Test complete in %fs, %.0f kloc/s, %.0f ktokens/s\n", bench_mark(),
 			loc * BENCH_REPEATS / (1000 * bench_mark()), tokens_parsed / (1000 * bench_mark()));
-
 }

 void test_compiler(void)
--- a/src/main.c
+++ b/src/main.c
@@ -3,7 +3,7 @@
 #include "build/project_creation.h"
 #include "utils/errors.h"
 #include "compiler_tests/tests.h"
-#include "compiler/malloc.h"
+#include "utils/malloc.h"
 int main(int argc, const char *argv[])
 {
 	init_arena();
@@ -16,6 +16,8 @@ int main(int argc, const char *argv[])
 		case COMMAND_UNIT_TEST:
 			compiler_tests();
 		case COMMAND_COMPILE:
+			compile_file();
+			break;
 		case COMMAND_COMPILE_RUN:
 		case COMMAND_MISSING:
 		case COMMAND_BUILD:
--- a/src/utils/common.h
+++ b/src/utils/common.h
@@ -0,0 +1,14 @@
+#pragma once
+
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+#include "errors.h"
+
+#define MAX_IDENTIFIER_LENGTH 31
--- a/src/utils/errors.c
+++ b/src/utils/errors.c
@@ -3,3 +3,22 @@
 // license that can be found in the LICENSE file.

 #include "errors.h"
+#include <stdarg.h>
+
+void eprintf(const char *format, ...)
+{
+	va_list arglist;
+	va_start(arglist, format);
+	vfprintf(stderr, format, arglist);
+	va_end(arglist);
+}
+
+void error_exit(const char *format, ...)
+{
+	va_list arglist;
+	va_start(arglist, format);
+	vfprintf(stderr, format, arglist);
+	fprintf(stderr, "\n");
+	va_end(arglist);
+	exit(EXIT_FAILURE);
+}
--- a/src/utils/errors.h
+++ b/src/utils/errors.h
@@ -7,17 +7,28 @@
 #include <stdio.h>
 #include <stdlib.h>

-#define error_exit(...) do { fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); exit(EXIT_FAILURE); } while(0)
+void eprintf(const char *format, ...);
+void error_exit(const char *format, ...);

-#define FATAL_ERROR(_string, ...) do { printf("FATAL ERROR at %s:%d: " _string, __func__, __LINE__, ##__VA_ARGS__); printf("\n"); exit(-1); } while(0)
+#define FATAL_ERROR(_string, ...) do { error_exit("FATAL ERROR at %s:%d: " _string, __func__, __LINE__, ##__VA_ARGS__); } while(0)

 #define ASSERT(_condition, _string, ...) while (!(_condition)) { FATAL_ERROR(_string, ##__VA_ARGS__); }

 #define UNREACHABLE FATAL_ERROR("Cannot reach %s:%d", __func__, __LINE__);
-#define TODO FATAL_ERROR("Not done yet %s:%d", __func__, __LINE__);
+
+#define TODO FATAL_ERROR("TODO reached", __func__, __LINE__);

 #define TEST_ASSERT(_condition, _string, ...) while (!(_condition)) { FATAL_ERROR(_string, ##__VA_ARGS__); }

 #define EXPECT(_string, _value, _expected) \
 do { long long __tempval1 = _value; long long __tempval2 = _expected; \
    TEST_ASSERT(__tempval1 == __tempval2, "Checking " _string ": expected %lld but was %lld.", __tempval2, __tempval1); } while(0)
+
+
+#ifndef NDEBUG
+#define DEBUG_LOG(_string, ...) eprintf("-- DEBUG: "); eprintf(_string, ##__VA_ARGS__); eprintf("\n");
+#else
+#define DEBUG_LOG(_string, ...)
+#endif
+#define LOG_FUNC DEBUG_LOG("%s entered", __func__);
+
--- a/src/utils/file_utils.c
+++ b/src/utils/file_utils.c
@@ -3,6 +3,9 @@
 // license that can be found in the LICENSE file.

 #include "file_utils.h"
+#include "errors.h"
+#include "malloc.h"
+#include "lib.h"
 #include <stdio.h>
 #include <stdlib.h>

@@ -18,3 +21,66 @@ const char* expand_path(const char* path)
 	}
 	return path;
 }
+
+int filename_to_module(const char *path, char buffer[MAX_IDENTIFIER_LENGTH + 1])
+{
+	size_t len = strlen(path);
+	int last_slash = 0;
+	int last_dot = -1;
+	for (int i = 0; i < len; i++)
+	{
+		if (path[i] == '/') last_slash = i;
+		if (path[i] == '.') last_dot = i;
+	}
+	int namelen = last_dot - last_slash - 1;
+	if (namelen < 2) return 0;
+	if (namelen > MAX_IDENTIFIER_LENGTH) namelen = MAX_IDENTIFIER_LENGTH;
+	for (int i = last_slash + 1; i < last_dot; i++)
+	{
+		char c = path[i];
+		if (is_letter(c))
+		{
+			c = is_upper(c) ? c + 'a' - 'A' : c;
+		}
+		else
+		{
+			c = '_';
+		}
+		buffer[i - last_slash - 1] = c;
+	}
+	buffer[namelen] = '\0';
+	return namelen;
+}
+
+char *read_file(const char *path, size_t *return_size)
+{
+	FILE *file = fopen(path, "rb");
+
+	if (file == NULL)
+	{
+		error_exit("Could not open file \"%s\".\n", path);
+		exit(74);
+	}
+
+	fseek(file, 0L, SEEK_END);
+	size_t file_size = (size_t)ftell(file);
+	*return_size = file_size;
+	rewind(file);
+
+	char *buffer = (char *)malloc((size_t)file_size + 1);
+	if (buffer == NULL)
+	{
+		error_exit("Not enough memory to read \"%s\".\n", path);
+	}
+
+	size_t bytesRead = fread(buffer, sizeof(char), (size_t)file_size, file);
+	if (bytesRead < file_size)
+	{
+		error_exit("Failed to read file \"%s\".\n", path);
+	}
+
+	buffer[bytesRead] = '\0';
+
+	fclose(file);
+	return buffer;
+}
--- a/src/utils/file_utils.h
+++ b/src/utils/file_utils.h
@@ -4,5 +4,10 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+#include "common.h"

 const char* expand_path(const char* path);
+
+char *read_file(const char *path, size_t *return_size);
+
+int filename_to_module(const char *path, char buffer[MAX_IDENTIFIER_LENGTH + 1]);
--- a/src/utils/lib.h
+++ b/src/utils/lib.h
@@ -6,6 +6,7 @@

 #include <stdint.h>
 #include <stdbool.h>
+#include "malloc.h"

 static inline bool is_power_of_two(uint64_t x)
 {
@@ -181,3 +182,53 @@ static inline uint32_t fnv1a(const char *key, uint32_t len)
 	}
 	return hash;
 }
+
+typedef struct
+{
+	unsigned size;
+	unsigned capacity;
+} _VHeader;
+
+static inline _VHeader* _vec_new(size_t element_size, size_t capacity)
+{
+	_VHeader *header = malloc_arena(element_size * capacity + sizeof(_VHeader));
+	header->size = 0;
+	header->capacity = capacity;
+	return header;
+}
+
+static inline unsigned vec_size(const void*vec)
+{
+	return vec ? (((_VHeader *)vec) - 1)->size : 0;
+}
+
+
+static inline void* _expand(void *vec, size_t element_size)
+{
+	if (vec == NULL)
+	{
+		vec = _vec_new(element_size, 16) + 1;
+	}
+	_VHeader *header = ((_VHeader *)vec) - 1;
+	header->size++;
+	if (header->size == header->capacity)
+	{
+		_VHeader *new_array = _vec_new(element_size, header->capacity >> 1u);
+		memcpy(new_array, header, element_size * header->capacity + sizeof(_VHeader));
+		header = new_array;
+		vec = header + 1;
+	}
+	return vec;
+}
+
+#define VECEACH(_vec, _index) \
+	unsigned __vecsize = vec_size(_vec); \
+	for (unsigned _index = 0; _index < __vecsize; _index++)
+
+#define VECNEW(_type, _capacity) ((_type *)(_vec_new(sizeof(_type), _capacity) + 1))
+#define VECADD(_vec, _value) \
+	({ \
+		typeof(_vec) __temp = (typeof(_vec))_expand((_vec), sizeof((_vec)[0])); \
+		__temp[vec_size(__temp) - 1] = _value; \
+		__temp; })
+#define VECLAST(_vec) ( (_vec) ? (_vec)[vec_size(_vec) - 1] : NULL)
--- a/src/compiler/malloc.c
+++ b/src/compiler/malloc.c
@@ -4,14 +4,11 @@

 #include "malloc.h"

-#include <stdlib.h>
-#include <assert.h>
-#include "../utils/errors.h"
-
-#define KB 1024L
+static const size_t KB = 1024ul;
 // Use 1MB at a time.
-#define BUCKET_SIZE (1024 * KB)
-#define ARENA_BUCKET_START_SIZE 16
+static const size_t MB = KB * 1024ul;
+static const size_t BUCKET_SIZE = MB;
+static const size_t  STARTING_ARENA_BUCKETS = 16;

 static uint8_t **arena_buckets;
 static int arena_buckets_used;
@@ -21,10 +18,9 @@ static void *current_arena;
 static int allocations_done;
 void init_arena(void)
 {
-	printf("---- ARENA ALLOCATED ----\n");
-	arena_buckets = malloc(ARENA_BUCKET_START_SIZE * sizeof(void *));
+	arena_buckets = malloc(STARTING_ARENA_BUCKETS * sizeof(void *));
 	arena_buckets_used = 1;
-	arena_buckets_array_size = ARENA_BUCKET_START_SIZE;
+	arena_buckets_array_size = STARTING_ARENA_BUCKETS;
 	arena_buckets[0] = malloc(BUCKET_SIZE);
 	allocations_done = 0;
 	current_use = 0;
@@ -34,13 +30,15 @@ void init_arena(void)
 // Simple bump allocator with buckets.
 void *malloc_arena(size_t mem)
 {
-	if (mem == 0) return NULL;
+	assert(mem > 0);
 	// Round to multiple of 16
 	size_t oldmem = mem;
 	mem = (mem + 15u) & ~15ull;
 	assert(mem >= oldmem);
 	if (mem >= BUCKET_SIZE / 4)
 	{
+		void *ret = malloc(mem);
+		ASSERT(ret, "Out of memory.");
 		return malloc(mem);
 	}
 	if (current_use + mem > BUCKET_SIZE)
@@ -61,20 +59,22 @@ void *malloc_arena(size_t mem)
 	allocations_done++;
 	if (mem > 4096)
 	{
-		printf("Allocated large chunk %llu\n", (unsigned long long)mem);
+		// printf("Allocated large chunk %llu\n", (unsigned long long)mem);
 	}
 	return (void *)ptr;

 }

-
-void free_arena(void)
+void print_arena_status(void)
 {
-	printf("-- FREEING ARENA -- \n");
+	printf("-- ARENA INFO -- \n");
 	printf(" * Memory used:  %ld Kb\n", ((arena_buckets_used - 1) * BUCKET_SIZE + current_use) / 1024);
 	printf(" * Buckets used: %d\n", arena_buckets_used);
 	printf(" * Allocations: %d\n", allocations_done);
+}

+void free_arena(void)
+{
 	for (int i = 0; i < arena_buckets_used; i++)
 	{
 		free(arena_buckets[i]);
@@ -84,23 +84,43 @@ void free_arena(void)
 	arena_buckets = NULL;
 	arena_buckets_array_size = 0;
 	current_use = 0;
-	printf("-- FREE DONE -- \n");
 }


 void run_arena_allocator_tests(void)
 {
-	init_arena();
+	printf("Begin arena allocator testing.\n");
+	bool was_init = arena_buckets != NULL;
+	if (!was_init) init_arena();
 	free_arena();
 	init_arena();
 	ASSERT(malloc_arena(10) != malloc_arena(10), "Expected different values...");
-	ASSERT(current_use == 32, "Expected allocations rounded to next 8 bytes");
+	printf("-- Tested basic allocation - OK.\n");
+	ASSERT(current_use == 32, "Expected allocations rounded to next 16 bytes");
+	malloc_arena(1);
+	ASSERT(current_use == 48, "Expected allocations rounded to next 16 bytes");
+	printf("-- Tested allocation alignment - OK.\n");
 	EXPECT("buckets in use", arena_buckets_used, 1);
-	ASSERT(malloc_arena(BUCKET_SIZE), "Should be possible to allocate this");
+	for (int i = 0; i < 8; i++)
+	{
+		ASSERT(malloc_arena(BUCKET_SIZE / 8), "Should be possible to allocate this");
+	}
 	EXPECT("buckets in use", arena_buckets_used, 2);
-	ASSERT(malloc_arena(1), "Expected alloc to pass");
+	for (int i = 0; i < 7; i++)
+	{
+		ASSERT(malloc_arena(BUCKET_SIZE / 8), "Should be possible to allocate this");
+	}
+	EXPECT("buckets in use", arena_buckets_used, 2);
+	ASSERT(malloc_arena(BUCKET_SIZE / 8), "Expected alloc to pass");
 	EXPECT("buckets in use", arena_buckets_used, 3);
+	for (int i = 0; i < 8 * STARTING_ARENA_BUCKETS; i++)
+	{
+		ASSERT(malloc_arena(BUCKET_SIZE / 8), "Should be possible to allocate this");
+	}
+	EXPECT("buckets in use", arena_buckets_used, STARTING_ARENA_BUCKETS + 3);
+	printf("-- Test switching buckets - OK.\n");
 	free_arena();
 	ASSERT(arena_buckets_array_size == 0, "Arena not freed?");
-	printf("Passed all arena tests\n");
+	printf("-- Test freeing arena - OK.\n");
+	if (was_init) init_arena();
 }
--- a/src/compiler/malloc.h
+++ b/src/compiler/malloc.h
@@ -4,7 +4,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-
+#include "common.h"

 void init_arena(void);
 void *malloc_arena(unsigned long mem);