Add a little testing and keyword parsing x2

2026-02-27 12:01:16 +00:00 · 2019-07-25 18:57:35 +02:00
parent e229d19b7c
commit 7439dccc53
11 changed files with 1129 additions and 2 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,6 @@
 cmake_minimum_required(VERSION 3.13)
 project(c3c C)
 set(CMAKE_CXX_FLAGS_RELEASE "-O3")
 set(CMAKE_C_STANDARD 11)
@@ -14,4 +15,4 @@ add_executable(c3c
        src/utils/errors.c
        src/utils/file_utils.c
        src/utils/string_utils.c
-        )
+        src/compiler/lexer.c src/compiler/lexer.h src/compiler_tests/tests.c src/compiler_tests/tests.h src/compiler_tests/benchmark.c src/compiler_tests/benchmark.h)
--- a/src/build/build_options.c
+++ b/src/build/build_options.c
@@ -128,6 +128,11 @@ static void parse_command(void)
 		build_options.project_name = next_arg();
 		return;
 	}
 	if (arg_match("utest"))
 	{
 		build_options.command = COMMAND_UNIT_TEST;
 		return;
 	}
 	if (arg_match("compile"))
 	{
 		build_options.command = COMMAND_COMPILE;
--- a/src/build/build_options.h
+++ b/src/build/build_options.h
@@ -20,6 +20,7 @@ typedef enum
 	COMMAND_DIST,
 	COMMAND_DOCS,
 	COMMAND_BENCH,
 	COMMAND_UNIT_TEST,
 } CompilerCommand;
 typedef struct
--- a/src/compiler/lexer.c
+++ b/src/compiler/lexer.c
@@ -0,0 +1,806 @@
 // Copyright (c) 2019 Christoffer Lerno. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 #include <stdint.h>
 #include "lexer.h"
 #include "../utils/string_utils.h"
 #include <string.h>
 #include "../utils/errors.h"
 typedef struct
 {
 	const char *begin;
 	const char *start;
 	const char *current;
 	uint16_t source_file;
 /*	LexerState lexer_state;
 	File *current_file;
 	Token saved_tok;
 	Token saved_prev_tok;
 	SourceLoc last_in_range;*/
 } Lexer;
 Lexer lexer;
 #define MATCH_KEYWORD_LEN(_keyword, _type) \
  ((sizeof(_keyword) != len + 1) ? TOKEN_VAR_IDENT : check_keyword(start, len, _keyword, _type))
 #define MATCH_KEYWORD(_keyword, _type) check_keyword(start, len, _keyword, _type)
 // Yes this is an ugly hand written keyword identifier. It should be benchmarked against
 // an table based state machine.
 static inline TokenType check_keyword(const char * restrict start, size_t len, const char * restrict keyword, TokenType type)
 {
 	if (memcmp(start + 1, keyword + 1, len - 1) == 0)
 	{
 		return type;
 	}
 	return TOKEN_VAR_IDENT;
 }
 static inline TokenType c_ident(const char *restrict start, const int len)
 {
 	switch (start[3])
 	{
 		case 'n':
 			return MATCH_KEYWORD_LEN("c_int", TOKEN_C_INT);
 		case 'i':
 			return MATCH_KEYWORD_LEN("c_uint", TOKEN_C_UINT);
 		case 's':
 			return MATCH_KEYWORD_LEN("c_ushort", TOKEN_C_USHORT);
 		case 'h':
 			return MATCH_KEYWORD_LEN("c_short", TOKEN_C_SHORT);
 		case 'o':
 			switch (len)
 			{
 				case 10:
 					return MATCH_KEYWORD("c_longlong", TOKEN_C_LONGLONG);
 				case 6:
 					return MATCH_KEYWORD("c_long", TOKEN_C_LONG);
 				case 12:
 					return MATCH_KEYWORD("c_longdouble", TOKEN_C_LONGDOUBLE);
 				default:
 					return TOKEN_VAR_IDENT;
 			}
 		case 'l':
 			return len == 11
 			       ? MATCH_KEYWORD("c_ulonglong", TOKEN_C_ULONGLONG)
 			       : MATCH_KEYWORD_LEN("c_ulong", TOKEN_C_ULONG);
 		default:
 			return TOKEN_VAR_IDENT;
 	}
 }
 static inline TokenType ident_type(const char *restrict start, const int len)
 {
 	char current_value = start[0];
 	if (len < 2) return TOKEN_VAR_IDENT;
 	if (current_value == 'c' && start[1] == '_') return c_ident(start, len);
 	if (len > 8 || !is_lower(current_value)) return TOKEN_VAR_IDENT;
 	switch (current_value)
 	{
 		case 'a':
 			if (len == 2) return MATCH_KEYWORD("as", TOKEN_AS);
 			switch (start[1])
 			{
 				case 's':
 					return MATCH_KEYWORD_LEN("asm", TOKEN_ASM);
 				case 'l':
 					return MATCH_KEYWORD_LEN("alias", TOKEN_ALIAS);
 				default:
 					return TOKEN_VAR_IDENT;
 			}
 		case 'b':
 			switch (start[1])
 			{
 				case 'o':
 					return MATCH_KEYWORD_LEN("bool", TOKEN_BOOL);
 				case 'y':
 					return MATCH_KEYWORD_LEN("byte", TOKEN_BYTE);
 				case 'r':
 					return MATCH_KEYWORD_LEN("break", TOKEN_BREAK);
 				default:
 					return TOKEN_VAR_IDENT;
 			}
 		case 'c':
 			if (len < 4) return TOKEN_VAR_IDENT;
 			if (len == 8) return MATCH_KEYWORD_LEN("continue", TOKEN_CONTINUE);
 			switch (start[3])
 			{
 				case 't':
 					return MATCH_KEYWORD_LEN("cast", TOKEN_CAST);
 				case 'e':
 					return MATCH_KEYWORD_LEN("case", TOKEN_CASE);
 				case 'r':
 					return MATCH_KEYWORD_LEN("char", TOKEN_CHAR);
 				case 's':
 					return MATCH_KEYWORD_LEN("const", TOKEN_CONST);
 				case 'c':
 					return MATCH_KEYWORD_LEN("catch", TOKEN_CATCH);
 				default:
 					return TOKEN_VAR_IDENT;
 			}
 		case 'd':
 			if (len < 5) return MATCH_KEYWORD_LEN("do", TOKEN_DO);
 			switch (start[3])
 			{
 				case 'e':
 					return MATCH_KEYWORD_LEN("defer", TOKEN_DEFER);
 				case 'a':
 					return MATCH_KEYWORD_LEN("default", TOKEN_DEFAULT);
 				case 'b':
 					return MATCH_KEYWORD_LEN("double", TOKEN_DOUBLE);
 				default:
 					return TOKEN_VAR_IDENT;
 			}
 		case 'e':
 			switch (start[1])
 			{
 				case 'l':
 					return MATCH_KEYWORD_LEN("else", TOKEN_ELSE);
 				case 'n':
 					return MATCH_KEYWORD_LEN("enum", TOKEN_ENUM);
 				case 'r':
 					return MATCH_KEYWORD_LEN("error", TOKEN_ERROR);
 				default:
 					return TOKEN_VAR_IDENT;
 			}
 		case 'f':
 			switch (start[2])
 			{
 				case '6':
 					return MATCH_KEYWORD_LEN("f16", TOKEN_F16);
 				case '2':
 					if (len == 4) return MATCH_KEYWORD_LEN("f128", TOKEN_F128);
 					return MATCH_KEYWORD_LEN("f32", TOKEN_F32);
 				case '4':
 					return MATCH_KEYWORD_LEN("f64", TOKEN_F64);
 				case '5':
 					return MATCH_KEYWORD_LEN("f256", TOKEN_F256);
 				case 'r':
 					return MATCH_KEYWORD_LEN("for", TOKEN_FOR);
 				case 'l':
 					return MATCH_KEYWORD_LEN("false", TOKEN_FALSE);
 				case 'o':
 					return MATCH_KEYWORD_LEN("float", TOKEN_FLOAT);
 				case 'n':
 					return MATCH_KEYWORD_LEN("func", TOKEN_FUNC);
 				default:
 					return TOKEN_VAR_IDENT;
 			}
 		case 'g':
 			switch (start[1])
 			{
 				case 'o':
 					return MATCH_KEYWORD_LEN("goto", TOKEN_GOTO);
 				case 'e':
 					return MATCH_KEYWORD_LEN("generic", TOKEN_GENERIC);
 				default:
 					return TOKEN_VAR_IDENT;
 			}
 		case 'h':
 			return MATCH_KEYWORD_LEN("half", TOKEN_HALF);
 		case 'i':
 			switch (start[1])
 			{
 				case 'f':
 					return MATCH_KEYWORD_LEN("if", TOKEN_IF);
 				case '8':
 					return MATCH_KEYWORD_LEN("i8", TOKEN_I8);
 				case '6':
 					return MATCH_KEYWORD_LEN("i64", TOKEN_I64);
 				case '2':
 					return MATCH_KEYWORD_LEN("i256", TOKEN_I256);
 				case '3':
 					return MATCH_KEYWORD_LEN("i32", TOKEN_I32);
 				case '1':
 					if (len == 4) return MATCH_KEYWORD_LEN("i128", TOKEN_I128);
 					return MATCH_KEYWORD_LEN("i16", TOKEN_I16);
 				case 'n':
 					return MATCH_KEYWORD_LEN("int", TOKEN_INT);
 				case 'm':
 					return MATCH_KEYWORD_LEN("import", TOKEN_IMPORT);
 				case 's':
 					return MATCH_KEYWORD_LEN("isize", TOKEN_ISIZE);
 				default:
 					return TOKEN_VAR_IDENT;
 			}
 		case 'l':
 			if (len < 4) return TOKEN_VAR_IDENT;
 			switch (start[2])
 			{
 				case 'n':
 					return MATCH_KEYWORD_LEN("long", TOKEN_LONG);
 				case 'c':
 					return MATCH_KEYWORD_LEN("local", TOKEN_LOCAL);
 				default:
 					return TOKEN_VAR_IDENT;
 			}
 		case 'm':
 			switch (start[1])
 			{
 				case 'a':
 					return MATCH_KEYWORD_LEN("macro", TOKEN_MACRO);
 				case 'o':
 					return MATCH_KEYWORD_LEN("module", TOKEN_MODULE);
 				default:
 					return TOKEN_VAR_IDENT;
 			}
 		case 'n':
 			return MATCH_KEYWORD_LEN("nil", TOKEN_NIL);
 		case 'p':
 			return MATCH_KEYWORD_LEN("public", TOKEN_PUBLIC);
 		case 'q':
 			return MATCH_KEYWORD_LEN("quad", TOKEN_QUAD);
 		case 'r':
 			return MATCH_KEYWORD_LEN("return", TOKEN_RETURN);
 		case 's':
 			switch (start[1])
 			{
 				case 'h':
 					return MATCH_KEYWORD_LEN("short", TOKEN_SHORT);
 				case 't':
 					return MATCH_KEYWORD_LEN("struct", TOKEN_STRUCT);
 				case 'w':
 					return MATCH_KEYWORD_LEN("switch", TOKEN_SWITCH);
 				default:
 					return TOKEN_VAR_IDENT;
 			}
 		case 't':
 			if (len < 3) return TOKEN_VAR_IDENT;
 			switch (start[2])
 			{
 				case 'p':
 					if (len == 7) return MATCH_KEYWORD_LEN("typedef", TOKEN_TYPEDEF);
 					return MATCH_KEYWORD_LEN("type", TOKEN_TYPE);
 				case 'u':
 					return MATCH_KEYWORD_LEN("true", TOKEN_TRUE);
 				case 'y':
 					return MATCH_KEYWORD_LEN("try", TOKEN_TRY);
 				case 'r':
 					if (len == 6) return MATCH_KEYWORD_LEN("throws", TOKEN_THROWS);
 					return MATCH_KEYWORD_LEN("throw", TOKEN_THROW);
 				default:
 					return TOKEN_VAR_IDENT;
 			}
 		case 'u':
 			if (len < 3) return MATCH_KEYWORD_LEN("u8", TOKEN_U8);
 			switch (start[1])
 			{
 				case '1':
 					if (len == 4) return MATCH_KEYWORD("u128", TOKEN_U128);
 					return MATCH_KEYWORD_LEN("u16", TOKEN_U16);
 				case '2':
 					return MATCH_KEYWORD_LEN("u256", TOKEN_U256);
 				case '3':
 					return MATCH_KEYWORD_LEN("u32", TOKEN_U32);
 				case '6':
 					return MATCH_KEYWORD_LEN("u64", TOKEN_U64);
 				case 'i':
 					return MATCH_KEYWORD_LEN("uint", TOKEN_UINT);
 				case 'n':
 					if (start[2] == 't') return MATCH_KEYWORD_LEN("until", TOKEN_UNTIL);
 					return MATCH_KEYWORD_LEN("union", TOKEN_UNION);
 				case 'l':
 					return MATCH_KEYWORD_LEN("ulong", TOKEN_ULONG);
 				case 's':
 					if (len == 5) return MATCH_KEYWORD("usize", TOKEN_USIZE);
 					return MATCH_KEYWORD_LEN("ushort", TOKEN_USHORT);
 				default:
 					return TOKEN_VAR_IDENT;
 			}
 		case 'v':
 			if (len < 3) return TOKEN_VAR_IDENT;
 			switch (start[2])
 			{
 				case 'r':
 					return MATCH_KEYWORD_LEN("var", TOKEN_VAR);
 				case 'i':
 					return MATCH_KEYWORD_LEN("void", TOKEN_VOID);
 				case 'l':
 					return MATCH_KEYWORD_LEN("volatile", TOKEN_VOLATILE);
 				default:
 					return TOKEN_VAR_IDENT;
 			}
 		case 'w':
 			return MATCH_KEYWORD_LEN("while", TOKEN_WHILE);
 		default:
 			return TOKEN_VAR_IDENT;
 	}
 }
 #define PRIME 0x01000193
 #define SEED 0x811C9DC5
 #define FNV1(a, seed) ((uint32_t)((((unsigned int)(a)) ^ (seed)) * PRIME))
 #define HASH(a, b, c) (FNV1(c, FNV1((a), FNV1(b, SEED))) & 0x1FFu)
 TokenType ident_type_fnv1(const char *restrict start, int len)
 {
 	char current_value = start[0];
 	if (len < 2) return TOKEN_VAR_IDENT;
 	char second = start[1];
 	if (current_value == 'c' && second == '_') return c_ident(start, len);
 	if (len > 8 || !is_lower(current_value)) return TOKEN_VAR_IDENT;
 	switch (HASH(len, current_value, second))
 	{
 		case HASH(2, 'a', 's'):
 			return MATCH_KEYWORD_LEN("as", TOKEN_AS);
 		case HASH(3, 'a', 's'):
 			return MATCH_KEYWORD_LEN("asm", TOKEN_ASM);
 		case HASH(5, 'a', 'l'):
 			return MATCH_KEYWORD_LEN("alias", TOKEN_ALIAS);
 		case HASH(4, 'b', 'o'):
 			return MATCH_KEYWORD_LEN("bool", TOKEN_BOOL);
 		case HASH(4, 'b', 'y'):
 			return MATCH_KEYWORD_LEN("byte", TOKEN_BYTE);
 		case HASH(5, 'b', 'r'):
 			return MATCH_KEYWORD_LEN("break", TOKEN_BREAK);
 		case HASH(8, 'c', 'o'):
 			return MATCH_KEYWORD_LEN("continue", TOKEN_CONTINUE);
 		case HASH(4, 'c', 'a'):
 			return len > 3 && start[3] == 't' ? MATCH_KEYWORD_LEN("cast", TOKEN_CAST) : MATCH_KEYWORD_LEN("case", TOKEN_CASE);
 		case HASH(5, 'c', '_'):
 			return MATCH_KEYWORD_LEN("c_int", TOKEN_C_INT);
 		case HASH(6, 'c', '_'):
 			return len > 3 && start[2] == 'u' ? MATCH_KEYWORD_LEN("c_uint", TOKEN_C_UINT) : MATCH_KEYWORD_LEN("c_long", TOKEN_C_LONG);
 		case HASH(7, 'c', '_'):
 			return MATCH_KEYWORD_LEN("c_short", TOKEN_C_SHORT);
 		case HASH(8, 'c', '_'):
 			return MATCH_KEYWORD_LEN("c_ushort", TOKEN_C_USHORT);
 		case HASH(4, 'c', 'h'):
 			return MATCH_KEYWORD_LEN("char", TOKEN_CHAR);
 		case HASH(5, 'c', 'o'):
 			return MATCH_KEYWORD_LEN("const", TOKEN_CONST);
 		case HASH(5, 'c', 'a'):
 			return MATCH_KEYWORD_LEN("catch", TOKEN_CATCH);
 		case HASH(2, 'd', 'o'):
 			return MATCH_KEYWORD_LEN("do", TOKEN_DO);
 		case HASH(5, 'd', 'e'):
 			return MATCH_KEYWORD_LEN("defer", TOKEN_DEFER);
 		case HASH(7, 'd', 'e'):
 			return MATCH_KEYWORD_LEN("default", TOKEN_DEFAULT);
 		case HASH(6, 'd', 'o'):
 			return MATCH_KEYWORD_LEN("double", TOKEN_DOUBLE);
 		case HASH(4, 'e', 'l'):
 			return MATCH_KEYWORD_LEN("else", TOKEN_ELSE);
 		case HASH(4, 'e', 'n'):
 			return MATCH_KEYWORD_LEN("enum", TOKEN_ENUM);
 		case HASH(5, 'e', 'r'):
 			return MATCH_KEYWORD_LEN("error", TOKEN_ERROR);
 		case HASH(3, 'f', '1'):
 			return MATCH_KEYWORD_LEN("f16", TOKEN_F16);
 		case HASH(4, 'f', '1'):
 			return MATCH_KEYWORD_LEN("f128", TOKEN_F128);
 		case HASH(3, 'f', '3'):
 			return MATCH_KEYWORD_LEN("f32", TOKEN_F32);
 		case HASH(3, 'f', '6'):
 			return MATCH_KEYWORD_LEN("f64", TOKEN_F64);
 		case HASH(4, 'f', '2'):
 			return MATCH_KEYWORD_LEN("f256", TOKEN_F256);
 		case HASH(3, 'f', 'o'):
 			return MATCH_KEYWORD_LEN("for", TOKEN_FOR);
 		case HASH(5, 'f', 'a'):
 			return MATCH_KEYWORD_LEN("false", TOKEN_FALSE);
 		case HASH(5, 'f', 'l'):
 			return MATCH_KEYWORD_LEN("float", TOKEN_FLOAT);
 		case HASH(4, 'f', 'u'):
 			return MATCH_KEYWORD_LEN("func", TOKEN_FUNC);
 		case HASH(4, 'g', 'o'):
 			return MATCH_KEYWORD_LEN("goto", TOKEN_GOTO);
 		case HASH(7, 'g', 'e'):
 			return MATCH_KEYWORD_LEN("generic", TOKEN_GENERIC);
 		case HASH(4, 'h', 'a'):
 			return MATCH_KEYWORD_LEN("half", TOKEN_HALF);
 		case HASH(2, 'i', 'f'):
 			return MATCH_KEYWORD_LEN("if", TOKEN_IF);
 		case HASH(2, 'i', '8'):
 			return MATCH_KEYWORD_LEN("i8", TOKEN_I8);
 		case HASH(3, 'i', '6'):
 			return MATCH_KEYWORD_LEN("i64", TOKEN_I64);
 		case HASH(4, 'i', '2'):
 			return MATCH_KEYWORD_LEN("i256", TOKEN_I256);
 		case HASH(3, 'i', '3'):
 			return MATCH_KEYWORD_LEN("i32", TOKEN_I32);
 		case HASH(4, 'i', '1'):
 			return MATCH_KEYWORD_LEN("i128", TOKEN_I128);
 		case HASH(3, 'i', '1'):
 			return MATCH_KEYWORD_LEN("i16", TOKEN_I16);
 		case HASH(3, 'i', 'n'):
 			return MATCH_KEYWORD_LEN("int", TOKEN_INT);
 		case HASH(6, 'i', 'm'):
 			return MATCH_KEYWORD_LEN("import", TOKEN_IMPORT);
 		case HASH(5, 'i', 's'):
 			return MATCH_KEYWORD_LEN("isize", TOKEN_ISIZE);
 		case HASH(4, 'l', 'o'):
 			return MATCH_KEYWORD_LEN("long", TOKEN_LONG);
 		case HASH(5, 'l', 'o'):
 			return MATCH_KEYWORD_LEN("local", TOKEN_LOCAL);
 		case HASH(5, 'm', 'a'):
 			return MATCH_KEYWORD_LEN("macro", TOKEN_MACRO);
 		case HASH(6, 'm', 'o'):
 			return MATCH_KEYWORD_LEN("module", TOKEN_MODULE);
 		case HASH(3, 'n', 'i'):
 			return MATCH_KEYWORD_LEN("nil", TOKEN_NIL);
 		case HASH(6, 'p', 'u'):
 			return MATCH_KEYWORD_LEN("public", TOKEN_PUBLIC);
 		case HASH(4, 'q', 'u'):
 			return MATCH_KEYWORD_LEN("quad", TOKEN_QUAD);
 		case HASH(6, 'r', 'e'):
 			return MATCH_KEYWORD_LEN("return", TOKEN_RETURN);
 		case HASH(5, 's', 'h'):
 			return MATCH_KEYWORD_LEN("short", TOKEN_SHORT);
 		case HASH(6, 's', 't'):
 			return MATCH_KEYWORD_LEN("struct", TOKEN_STRUCT);
 		case HASH(6, 's', 'w'):
 			return MATCH_KEYWORD_LEN("switch", TOKEN_SWITCH);
 		case HASH(7, 't', 'y'):
 			return MATCH_KEYWORD_LEN("typedef", TOKEN_TYPEDEF);
 		case HASH(4, 't', 'y'):
 			return MATCH_KEYWORD_LEN("type", TOKEN_TYPE);
 		case HASH(4, 't', 'r'):
 			return MATCH_KEYWORD_LEN("true", TOKEN_TRUE);
 		case HASH(3, 't', 'r'):
 			return MATCH_KEYWORD_LEN("try", TOKEN_TRY);
 		case HASH(6, 't', 'h'):
 			return MATCH_KEYWORD_LEN("throws", TOKEN_THROWS);
 		case HASH(5, 't', 'h'):
 			return MATCH_KEYWORD_LEN("throw", TOKEN_THROW);
 		case HASH(2, 'u', '8'):
 			return MATCH_KEYWORD_LEN("u8", TOKEN_U8);
 		case HASH(4, 'u', '1'):
 			return MATCH_KEYWORD_LEN("u128", TOKEN_U128);
 		case HASH(3, 'u', '1'):
 			return MATCH_KEYWORD_LEN("u16", TOKEN_U16);
 		case HASH(4, 'u', '2'):
 			return MATCH_KEYWORD_LEN("u256", TOKEN_U256);
 		case HASH(3, 'u', '3'):
 			return MATCH_KEYWORD_LEN("u32", TOKEN_U32);
 		case HASH(3, 'u', '6'):
 			return MATCH_KEYWORD_LEN("u64", TOKEN_U64);
 		case HASH(4, 'u', 'i'):
 			return MATCH_KEYWORD_LEN("uint", TOKEN_UINT);
 		case HASH(5, 'u', 'n'):
 			if (start[2] == 't') return MATCH_KEYWORD_LEN("until", TOKEN_UNTIL);
 			return MATCH_KEYWORD_LEN("union", TOKEN_UNION);
 		case HASH(5, 'u', 'l'):
 			return MATCH_KEYWORD_LEN("ulong", TOKEN_ULONG);
 		case HASH(5, 'u', 's'):
 			return MATCH_KEYWORD_LEN("usize", TOKEN_USIZE);
 		case HASH(6, 'u', 's'):
 			return MATCH_KEYWORD_LEN("ushort", TOKEN_USHORT);
 		case HASH(3, 'v', 'a'):
 			return MATCH_KEYWORD_LEN("var", TOKEN_VAR);
 		case HASH(4, 'v', 'o'):
 			return MATCH_KEYWORD_LEN("void", TOKEN_VOID);
 		case HASH(8, 'v', 'o'):
 			return MATCH_KEYWORD_LEN("volatile", TOKEN_VOLATILE);
 		case HASH(5, 'w', 'h'):
 			return MATCH_KEYWORD_LEN("while", TOKEN_WHILE);
 		default:
 			return TOKEN_VAR_IDENT;
 	}
 }
 #undef HASH4V
 #undef HASH4
 #undef HASH3V
 #undef HASH3
 #undef HASH2V
 #undef HASH2
 #undef HASH
 #undef MATCH_KEYWORD
 TokenType identifier_type(const char* restrict start, int len)
 {
 	return ident_type(start, len);
 }
 const char *token_type_to_string(TokenType type)
 {
 	switch (type)
 	{
 		case TOKEN_LPAREN:
 			return "(";
 		case TOKEN_RPAREN:
 			return ")";
 		case TOKEN_LBRACE:
 			return "{";
 		case TOKEN_RBRACE:
 			return "}";
 		case TOKEN_LBRACKET:
 			return "[";
 		case TOKEN_RBRACKET:
 			return "]";
 		case TOKEN_COMMA:
 			return ",";
 		case TOKEN_DOT:
 			return ".";
 		case TOKEN_EOS:
 			return ";";
 		case TOKEN_PLUS:
 			return "+";
 		case TOKEN_PLUSPLUS:
 			return "++";
 		case TOKEN_PLUS_ASSIGN:
 			return "+=";
 		case TOKEN_BIT_NOT:
 			return "~";
 		case TOKEN_NOT:
 			return "!";
 		case TOKEN_MINUS:
 			return "-";
 		case TOKEN_MINUSMINUS:
 			return "--";
 		case TOKEN_MINUS_ASSIGN:
 			return "-=";
 		case TOKEN_STAR:
 			return "*";
 		case TOKEN_MULT_ASSIGN:
 			return "*=";
 		case TOKEN_MOD:
 			return "%";
 		case TOKEN_MOD_ASSIGN:
 			return "%=";
 		case TOKEN_DIV:
 			return "/";
 		case TOKEN_DIV_ASSIGN:
 			return "/=";
 		case TOKEN_NOT_EQUAL:
 			return "!=";
 		case TOKEN_EQ:
 			return "=";
 		case TOKEN_EQEQ:
 			return "==";
 		case TOKEN_COLON:
 			return ":";
 		case TOKEN_COLCOLON:
 			return "::";
 		case TOKEN_DOTDOT:
 			return "..";
 		case TOKEN_ELIPSIS:
 			return "...";
 		case TOKEN_GREATER:
 			return ">";
 		case TOKEN_GREATER_EQ:
 			return ">=";
 		case TOKEN_RIGHT_SHIFT:
 			return ">>";
 		case TOKEN_RIGHT_SHIFT_ASSIGN:
 			return ">>=";
 		case TOKEN_LESS:
 			return "<";
 		case TOKEN_LESS_EQ:
 			return "<=";
 		case TOKEN_LEFT_SHIFT:
 			return "<<";
 		case TOKEN_LEFT_SHIFT_ASSIGN:
 			return "<<=";
 		case TOKEN_ARROW:
 			return "->";
 		case TOKEN_AND:
 			return "&&";
 		case TOKEN_AND_ASSIGN:
 			return "&&=";
 		case TOKEN_AMP:
 			return "&";
 		case TOKEN_BIT_AND_ASSIGN:
 			return "&=";
 		case TOKEN_OR:
 			return "||";
 		case TOKEN_OR_ASSIGN:
 			return "||=";
 		case TOKEN_BIT_OR:
 			return "|";
 		case TOKEN_BIT_OR_ASSIGN:
 			return "|=";
 		case TOKEN_BIT_XOR:
 			return "^";
 		case TOKEN_BIT_XOR_ASSIGN:
 			return "^=";
 		case TOKEN_VAR_IDENT:
 			return "<varIdent>";
 		case TOKEN_TYPE_IDENT:
 			return "<TypeIdent>";
 		case TOKEN_STRING:
 			return "<string>";
 		case TOKEN_INTEGER:
 			return "<int>";
 		case TOKEN_REAL:
 			return "<float>";
 		case TOKEN_QUESTION:
 			return "?";
 		case TOKEN_ELVIS:
 			return "?:";
 		case TOKEN_VOID:
 			return "void";
 		case TOKEN_ALIAS:
 			return "alias";
 		case TOKEN_CONST:
 			return "const";
 		case TOKEN_VOLATILE:
 			return "volatile";
 		case TOKEN_ELSE:
 			return "else";
 		case TOKEN_FALSE:
 			return "false";
 		case TOKEN_CONTINUE:
 			return "continue";
 		case TOKEN_FUNC:
 			return "func";
 		case TOKEN_FOR:
 			return "for";
 		case TOKEN_IMPORT:
 			return "import";
 		case TOKEN_MODULE:
 			return "module";
 		case TOKEN_IF:
 			return "if";
 		case TOKEN_NIL:
 			return "nil";
 		case TOKEN_RETURN:
 			return "return";
 		case TOKEN_GOTO:
 			return "goto";
 		case TOKEN_DEFER:
 			return "defer";
 		case TOKEN_TRUE:
 			return "true";
 		case TOKEN_WHILE:
 			return "while";
 		case TOKEN_CASE:
 			return "case";
 		case TOKEN_ASM:
 			return "asm";
 		case TOKEN_DEFAULT:
 			return "default";
 		case TOKEN_SWITCH:
 			return "switch";
 		case TOKEN_UNTIL:
 			return "until";
 		case TOKEN_BREAK:
 			return "break";
 		case TOKEN_TYPE:
 			return "type";
 		case TOKEN_DO:
 			return "do";
 		case TOKEN_PUBLIC:
 			return "public";
 		case TOKEN_LOCAL:
 			return "local";
 		case TOKEN_STRUCT:
 			return "struct";
 		case TOKEN_UNION:
 			return "union";
 		case TOKEN_ENUM:
 			return "enum";
 		case TOKEN_AT:
 			return "@";
 		case TOKEN_AS:
 			return "as";
 		case TOKEN_ERROR:
 			return "<error>";
 		case TOKEN_EOF:
 			return "<eof>";
 		case TOKEN_CAST:
 			return "cast";
 		case TOKEN_C_LONGDOUBLE:
 			return "c_longdouble";
 		case TOKEN_C_USHORT:
 			return "c_ushort";
 		case TOKEN_C_UINT:
 			return "c_uint";
 		case TOKEN_C_ULONG:
 			return "c_ulong";
 		case TOKEN_C_ULONGLONG:
 			return "c_ulonglong";
 		case TOKEN_C_SHORT:
 			return "c_ishort";
 		case TOKEN_C_INT:
 			return "c_int";
 		case TOKEN_C_LONG:
 			return "c_long";
 		case TOKEN_C_LONGLONG:
 			return "c_longlong";
 		case TOKEN_MACRO:
 			return "macro";
 		case TOKEN_F256:
 			return "f256";
 		case TOKEN_I256:
 			return "i256";
 		case TOKEN_U256:
 			return "u256";
 		case TOKEN_F128:
 			return "f128";
 		case TOKEN_I128:
 			return "i128";
 		case TOKEN_U128:
 			return "u128";
 		case TOKEN_F64:
 			return "f64";
 		case TOKEN_I64:
 			return "i64";
 		case TOKEN_U64:
 			return "u64";
 		case TOKEN_F32:
 			return "f32";
 		case TOKEN_I32:
 			return "i32";
 		case TOKEN_U32:
 			return "u32";
 		case TOKEN_F16:
 			return "f16";
 		case TOKEN_I16:
 			return "i16";
 		case TOKEN_U16:
 			return "u16";
 		case TOKEN_I8:
 			return "i8";
 		case TOKEN_U8:
 			return "u8";
 		case TOKEN_BOOL:
 			return "bool";
 		case TOKEN_QUAD:
 			return "quad";
 		case TOKEN_DOUBLE:
 			return "double";
 		case TOKEN_FLOAT:
 			return "float";
 		case TOKEN_LONG:
 			return "long";
 		case TOKEN_ULONG:
 			return "ulong";
 		case TOKEN_INT:
 			return "int";
 		case TOKEN_UINT:
 			return "uint";
 		case TOKEN_SHORT:
 			return "short";
 		case TOKEN_USHORT:
 			return "ushort";
 		case TOKEN_BYTE:
 			return "byte";
 		case TOKEN_CHAR:
 			return "char";
 		case TOKEN_ISIZE:
 			return "isize";
 		case TOKEN_USIZE:
 			return "usize";
 		case TOKEN_CAPS_IDENT:
 			return "<CAPS_IDENT>";
 		case TOKEN_AT_IDENT:
 			return "<@ident>";
 		case TOKEN_HASH_IDENT:
 			return "<#ident>";
 		case TOKEN_DOLLAR_IDENT:
 			return "<$ident>";
 		case TOKEN_CATCH:
 			return "catch";
 		case TOKEN_GENERIC:
 			return "generic";
 		case TOKEN_THROW:
 			return "throw";
 		case TOKEN_THROWS:
 			return "throws";
 		case TOKEN_TRY:
 			return "try";
 		case TOKEN_TYPEDEF:
 			return "typedef";
 		case TOKEN_VAR:
 			return "var";
 		case TOKEN_HALF:
 			return "half";
 		case INVALID_TOKEN:
 			return "<\?\?\?>";
 	}
 	UNREACHABLE
 }
--- a/src/compiler/lexer.h
+++ b/src/compiler/lexer.h
@@ -0,0 +1,182 @@
 #pragma once
 // Copyright (c) 2019 Christoffer Lerno. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 typedef enum _TokenType
 {
 	// Single-character tokens.
 	TOKEN_LPAREN,
 	TOKEN_RPAREN,
 	TOKEN_LBRACE,
 	TOKEN_RBRACE,
 	TOKEN_LBRACKET,
 	TOKEN_RBRACKET,
 	TOKEN_COMMA,
 	TOKEN_DOT,
 	TOKEN_EOS,
 	TOKEN_AT,
 	// One or two character tokens.
 	TOKEN_PLUS,
 	TOKEN_PLUSPLUS,
 	TOKEN_PLUS_ASSIGN,
 	TOKEN_BIT_NOT,
 	TOKEN_NOT,
 	TOKEN_MINUS,
 	TOKEN_MINUSMINUS,
 	TOKEN_MINUS_ASSIGN,
 	TOKEN_STAR,
 	TOKEN_MULT_ASSIGN,
 	TOKEN_DIV,
 	TOKEN_DIV_ASSIGN,
 	TOKEN_MOD,
 	TOKEN_MOD_ASSIGN,
 	TOKEN_NOT_EQUAL,
 	TOKEN_EQ,
 	TOKEN_EQEQ,
 	TOKEN_COLON,
 	TOKEN_COLCOLON, // Not used but reserved
 	TOKEN_DOTDOT,
 	TOKEN_QUESTION,
 	// Three or more
 	TOKEN_ELIPSIS,
 	TOKEN_GREATER,
 	TOKEN_GREATER_EQ,
 	TOKEN_RIGHT_SHIFT,
 	TOKEN_RIGHT_SHIFT_ASSIGN,
 	TOKEN_LESS,
 	TOKEN_LESS_EQ,
 	TOKEN_LEFT_SHIFT,
 	TOKEN_LEFT_SHIFT_ASSIGN,
 	TOKEN_ARROW, // Not used but reserved
 	TOKEN_AND,
 	TOKEN_AND_ASSIGN,
 	TOKEN_AMP,
 	TOKEN_BIT_AND_ASSIGN,
 	TOKEN_OR,
 	TOKEN_OR_ASSIGN,
 	TOKEN_BIT_OR,
 	TOKEN_BIT_OR_ASSIGN,
 	TOKEN_BIT_XOR,
 	TOKEN_BIT_XOR_ASSIGN,
 	TOKEN_ELVIS,
 	TOKEN_F256,
 	TOKEN_I256,
 	TOKEN_U256,
 	TOKEN_F128,
 	TOKEN_I128,
 	TOKEN_U128,
 	TOKEN_F64,
 	TOKEN_I64,
 	TOKEN_U64,
 	TOKEN_F32,
 	TOKEN_I32,
 	TOKEN_U32,
 	TOKEN_F16,
 	TOKEN_I16,
 	TOKEN_U16,
 	TOKEN_I8,
 	TOKEN_U8,
 	TOKEN_QUAD,
 	TOKEN_DOUBLE,
 	TOKEN_FLOAT,
 	TOKEN_HALF,
 	TOKEN_LONG,
 	TOKEN_ULONG,
 	TOKEN_INT,
 	TOKEN_UINT,
 	TOKEN_SHORT,
 	TOKEN_USHORT,
 	TOKEN_BYTE,
 	TOKEN_CHAR,
 	TOKEN_BOOL,
 	TOKEN_ISIZE,
 	TOKEN_USIZE,
 	// Literals.
 	// In order to make the grammar
 	// non ambiguous, we split tokens at the
 	// lexer level
 	TOKEN_TYPE_IDENT,
 	TOKEN_CAPS_IDENT,
 	TOKEN_VAR_IDENT,
 	// We want to parse @foo / #foo / $foo separately.
 	// Otherwise we allow things like "@ foo" which would be pretty bad.
 	TOKEN_AT_IDENT,
 	TOKEN_HASH_IDENT,
 	TOKEN_DOLLAR_IDENT,
 	TOKEN_STRING,
 	TOKEN_INTEGER,
 	TOKEN_REAL,
 	// Keywords.
 	TOKEN_ALIAS, // Reserved
 	TOKEN_AS,
 	TOKEN_ASM,
 	TOKEN_BREAK,
 	TOKEN_CASE,
 	TOKEN_CAST,
 	TOKEN_CATCH,
 	TOKEN_CONST,
 	TOKEN_CONTINUE,
 	TOKEN_DEFAULT,
 	TOKEN_DEFER,
 	TOKEN_DO,
 	TOKEN_ELSE,
 	TOKEN_ENUM,
 	TOKEN_ERROR,
 	TOKEN_FALSE,
 	TOKEN_FOR,
 	TOKEN_FUNC,
 	TOKEN_GENERIC,
 	TOKEN_GOTO,
 	TOKEN_IF,
 	TOKEN_IMPORT,
 	TOKEN_LOCAL,
 	TOKEN_MACRO,
 	TOKEN_MODULE,
 	TOKEN_NIL,
 	TOKEN_PUBLIC,
 	TOKEN_RETURN,
 	TOKEN_STRUCT,
 	TOKEN_SWITCH,
 	TOKEN_THROW,
 	TOKEN_THROWS,
 	TOKEN_TRUE,
 	TOKEN_TRY,
 	TOKEN_TYPE, // Reserved
 	TOKEN_TYPEDEF,
 	TOKEN_UNION,
 	TOKEN_UNTIL,
 	TOKEN_VAR, // Reserved
 	TOKEN_VOID,
 	TOKEN_VOLATILE,
 	TOKEN_WHILE,
 	TOKEN_C_USHORT,
 	TOKEN_C_SHORT,
 	TOKEN_C_INT,
 	TOKEN_C_UINT,
 	TOKEN_C_LONG,
 	TOKEN_C_ULONG,
 	TOKEN_C_LONGLONG,
 	TOKEN_C_ULONGLONG,
 	TOKEN_C_LONGDOUBLE,
 	INVALID_TOKEN,
 	TOKEN_EOF,
 } TokenType;
 const char *token_type_to_string(TokenType type);
 TokenType identifier_type(const char* restrict start, int len);
 TokenType ident_type_fnv1(const char *restrict start, int len);
--- a/src/compiler_tests/benchmark.c
+++ b/src/compiler_tests/benchmark.c
@@ -0,0 +1,17 @@
 // Copyright (c) 2019 Christoffer Lerno. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 #include "benchmark.h"
 #include <time.h>
 static int begin = 0;
 void bench_begin(void)
 {
 	begin = clock();
 }
 double bench_mark(void)
 {
 	return (clock() - begin) / (double)CLOCKS_PER_SEC;
 }
--- a/src/compiler_tests/benchmark.h
+++ b/src/compiler_tests/benchmark.h
@@ -0,0 +1,9 @@
 #pragma once
 // Copyright (c) 2019 Christoffer Lerno. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 void bench_begin(void);
 double bench_mark(void);
--- a/src/compiler_tests/tests.c
+++ b/src/compiler_tests/tests.c
@@ -0,0 +1,82 @@
 // Copyright (c) 2019 Christoffer Lerno. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 #include <compiler/lexer.h>
 #include "tests.h"
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include "benchmark.h"
 #define TEST_ASSERT(cond, text, ...) do { if (!(cond)) { printf("\nTEST FAILED: " text "\n", ##__VA_ARGS__); exit(-1); } } while (0)
 static void test_lexer(void)
 {
 #ifdef __OPTIMIZE__
 	printf("--- RUNNING OPTIMIZED ---\n");
 #endif
 	printf("Begin lexer testing.\n");
 	printf("1. Check number of keywords...");
 	int tokens_found = 0;
 	const int EXPECTED_TOKENS = 81;
 	const char* tokens[INVALID_TOKEN];
 	int len[INVALID_TOKEN];
 	for (int i = 0; i < INVALID_TOKEN; i++)
 	{
 		const char* token = token_type_to_string((TokenType)i);
 		tokens[i] = token;
 		len[i] = strlen(token);
 		TokenType type = identifier_type(token, len[i]);
 		TokenType type2 = ident_type_fnv1(token, len[i]);
 		if (type != TOKEN_VAR_IDENT)
 		{
 			tokens_found++;
 			TEST_ASSERT(type == i, "Mismatch on token %s", token);
 			if (type2 != type)
 			{
 				printf("\n(fnv1) Test mismatch on token %s, generated %s\n", token, token_type_to_string(type2));
 			}
 		}
 		tokens[i] = "byte";
 		len[i] = 4;
 	}
 	printf(" %d found.\n", tokens_found);
 	TEST_ASSERT(ident_type_fnv1("alias ", 6) == TOKEN_VAR_IDENT, "Error in fnv1 ident");
 	TEST_ASSERT(identifier_type("alias ", 6) == TOKEN_VAR_IDENT, "Error in switch ident");
 	TEST_ASSERT(ident_type_fnv1("alias ", 5) != TOKEN_VAR_IDENT, "Error in fnv1 ident2");
 	TEST_ASSERT(identifier_type("alias ", 5) != TOKEN_VAR_IDENT, "Error in switch ident2");
 	TEST_ASSERT(tokens_found == EXPECTED_TOKENS, "Unexpected number of identifiers! Expected %d.", EXPECTED_TOKENS);
 	const int BENCH_REPEATS = 10000000;
 	printf("2. Test keyword lexing speed (switch)... ");
 	bench_begin();
 	for (int b = 0; b < BENCH_REPEATS; b++)
 	{
 		for (int i = 0; i < INVALID_TOKEN; i++)
 		{
 			identifier_type(tokens[i], len[i]);
 		}
 	}
 	printf("complete in %fs\n", bench_mark());
 	printf("3. Test keyword lexing speed (fnv1)... ");
 	bench_begin();
 	for (int b = 0; b < BENCH_REPEATS; b++)
 	{
 		for (int i = 0; i < INVALID_TOKEN; i++)
 		{
 			ident_type_fnv1(tokens[i], len[i]);
 		}
 	}
 	printf("complete in %fs\n", bench_mark());
 	exit(0);
 }
 void compiler_tests(void)
 {
 	test_lexer();
 }
--- a/src/compiler_tests/tests.h
+++ b/src/compiler_tests/tests.h
@@ -0,0 +1,8 @@
 #pragma once
 // Copyright (c) 2019 Christoffer Lerno. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 void compiler_tests(void);
--- a/src/main.c
+++ b/src/main.c
@@ -2,7 +2,7 @@
 #include "build/build_options.h"
 #include "build/project_creation.h"
 #include "utils/errors.h"
-
+#include "compiler_tests/tests.h"
 int main(int argc, const char *argv[])
 {
@@ -12,6 +12,8 @@ int main(int argc, const char *argv[])
 		case COMMAND_INIT:
 			create_project();
 			break;
 		case COMMAND_UNIT_TEST:
 			compiler_tests();
 		case COMMAND_COMPILE:
 		case COMMAND_COMPILE_RUN:
 		case COMMAND_MISSING:
--- a/src/utils/errors.h
+++ b/src/utils/errors.h
@@ -4,4 +4,18 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 #include <stdio.h>
 #include <stdlib.h>
 #define error_exit(...) do { fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); exit(EXIT_FAILURE); } while(0)
 #define FATAL_ERROR(_string, ...) do { printf("FATAL ERROR at %s:%d: " _string, __func__, __LINE__, ##__VA_ARGS__); printf("\n"); exit(-1); } while(0)
 #define UNREACHABLE FATAL_ERROR("Cannot reach %s:%d", __func__, __LINE__);
 #define TODO FATAL_ERROR("Not done yet %s:%d", __func__, __LINE__);
 #define TEST_ASSERT(_condition, _string, ...) while (!(_condition)) { FATAL_ERROR(_string, ##__VA_ARGS__); }
 #define EXPECT(_string, _value, _expected) \
 do { long long __tempval1 = _value; long long __tempval2 = _expected; \
    TEST_ASSERT(__tempval1 == __tempval2, "Checking " _string ": expected %lld but was %lld.", __tempval2, __tempval1); } while(0);