diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7b2ccf434..8443aad82 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -14,5 +14,8 @@ add_executable(c3c
src/build/project_creation.c
src/utils/errors.c
src/utils/file_utils.c
- src/utils/string_utils.c
- src/compiler/lexer.c src/compiler/lexer.h src/compiler_tests/tests.c src/compiler_tests/tests.h src/compiler_tests/benchmark.c src/compiler_tests/benchmark.h)
\ No newline at end of file
+ src/compiler/lexer.c
+ src/compiler/tokens.c
+ src/compiler/symtab.c
+ src/compiler_tests/tests.c
+ src/compiler_tests/benchmark.c src/compiler/malloc.c src/compiler/malloc.h src/compiler/compiler.c src/compiler/compiler.h)
\ No newline at end of file
diff --git a/src/build/build_options.c b/src/build/build_options.c
index bc0cab9bf..8e7e56765 100644
--- a/src/build/build_options.c
+++ b/src/build/build_options.c
@@ -13,6 +13,8 @@
#include "../utils/errors.h"
static const char* DEFAULT_TARGET = "default";
+static const int DEFAULT_SYMTAB_SIZE = 64 * 1024;
+static const int MAX_SYMTAB_SIZE = 1024 * 1024;
BuildOptions build_options;
static int arg_index;
@@ -46,6 +48,7 @@ static void usage(void)
OUTPUT(" --path
- Use this as the base directory for the current command.");
OUTPUT(" --template - Use a different template: \"lib\", \"staticlib\" or a path.");
OUTPUT(" --about - Prints a short description of C3.");
+ OUTPUT(" --symtab - Sets the preferred symtab size.");
}
@@ -213,6 +216,17 @@ static void parse_option()
build_options.path = check_dir(next_arg());
return;
}
+ if (match_longopt("symtab"))
+ {
+ if (at_end() || next_is_opt()) error_exit("error: --symtab needs a number.");
+ const char *number = next_arg();
+ int size = atoi(number);
+ if (size < 1024) error_exit("error: --symtab valid size > 1024.");
+ if (size > MAX_SYMTAB_SIZE) error_exit("error: --symptab size cannot exceed %d", MAX_SYMTAB_SIZE);
+ build_options.symtab_size = size;
+ return;
+
+ }
if (match_longopt("help"))
{
break;
@@ -236,7 +250,7 @@ void parse_arguments(int argc, const char *argv[])
build_options.path = ".";
build_options.command = COMMAND_MISSING;
-
+ build_options.symtab_size = DEFAULT_SYMTAB_SIZE;
arg_count = argc;
args = argv;
for (arg_index = 1; arg_index < arg_count; arg_index++)
diff --git a/src/build/build_options.h b/src/build/build_options.h
index bfbb07874..292c05fa9 100644
--- a/src/build/build_options.h
+++ b/src/build/build_options.h
@@ -4,6 +4,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+#include
+
#define MAX_LIB_DIRS 1024
#define MAX_FILES 2048
@@ -33,6 +35,7 @@ typedef struct
const char* target;
const char* path;
CompilerCommand command;
+ uint32_t symtab_size;
} BuildOptions;
extern BuildOptions build_options;
diff --git a/src/build/project_creation.c b/src/build/project_creation.c
index 94d9ea5b0..f7d994ab0 100644
--- a/src/build/project_creation.c
+++ b/src/build/project_creation.c
@@ -10,7 +10,7 @@
#include
#include "project_creation.h"
#include "build_options.h"
-#include "../utils/string_utils.h"
+#include "../utils/lib.h"
const char* TOML =
"[[executable]]\n"
diff --git a/src/compiler/compiler.c b/src/compiler/compiler.c
new file mode 100644
index 000000000..b3abf200d
--- /dev/null
+++ b/src/compiler/compiler.c
@@ -0,0 +1,15 @@
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "compiler.h"
+#include "symtab.h"
+#include "../build/build_options.h"
+#include "../utils/lib.h"
+
+void compiler_init(void)
+{
+ uint32_t symtab_size = nextHighestPowerOf2(build_options.symtab_size);
+ symtab_init(symtab_size);
+
+}
\ No newline at end of file
diff --git a/src/utils/string_utils.c b/src/compiler/compiler.h
similarity index 82%
rename from src/utils/string_utils.c
rename to src/compiler/compiler.h
index 0996858f9..4457e1ef5 100644
--- a/src/utils/string_utils.c
+++ b/src/compiler/compiler.h
@@ -1,6 +1,8 @@
+#pragma once
+
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#include "string_utils.h"
+void compiler_init();
\ No newline at end of file
diff --git a/src/compiler/compiler_common.h b/src/compiler/compiler_common.h
new file mode 100644
index 000000000..70b17d9d3
--- /dev/null
+++ b/src/compiler/compiler_common.h
@@ -0,0 +1,38 @@
+#pragma once
+
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include
+
+typedef uint32_t SourceLoc;
+
+typedef struct
+{
+ SourceLoc loc;
+ uint32_t length;
+} SourceRange;
+
+
+typedef struct
+{
+ const char* start;
+ SourceRange span;
+ TokenType type : 8;
+ union
+ {
+ const char *string;
+ };
+} Token;
+
+typedef struct
+{
+ const char *contents;
+ const char *name;
+ SourceLoc start;
+ SourceLoc end;
+} File;
+
+#define TOKEN_MAX_LENGTH 0xFFFF
+#define MAX_IDENTIFIER_LENGTH 31
diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c
index bba78f0cf..c83570934 100644
--- a/src/compiler/lexer.c
+++ b/src/compiler/lexer.c
@@ -4,626 +4,675 @@
#include
#include "lexer.h"
-#include "../utils/string_utils.h"
#include
+#include
#include "../utils/errors.h"
+#include "../utils/lib.h"
+#include "symtab.h"
-#define MATCH_KEYWORD_LEN(_keyword, _type) \
- ((sizeof(_keyword) != len + 1) ? TOKEN_VAR_IDENT : check_keyword(start, len, _keyword, _type))
-
-#define MATCH_KEYWORD(_keyword, _type) check_keyword(start, len, _keyword, _type)
-
-static inline TokenType check_keyword(const char * restrict start, size_t len, const char * restrict keyword, TokenType type)
+typedef enum
{
- if (memcmp(start + 1, keyword + 1, len - 1) == 0)
+ LEXER_STATE_NORMAL,
+ LEXER_STATE_DEFERED_PARSING,
+ LEXER_STATE_DOCS_PARSE,
+ LEXER_STATE_DOCS_PARSE_DIRECTIVE,
+} LexerState;
+
+typedef struct
+{
+ const char *begin;
+ const char *start;
+ const char *current;
+ uint16_t source_file;
+ LexerState lexer_state;
+ File *current_file;
+ //Token saved_tok; Will be used later if doing deferred parsing.
+ //Token saved_prev_tok; Will be used later is doing deferred parsing.
+ SourceLoc last_in_range;
+} Lexer;
+
+Lexer lexer;
+
+
+// --- Lexing general methods.
+
+static inline char peek()
+{
+ return *lexer.current;
+}
+
+static inline char prev()
+{
+ return lexer.current[-1];
+}
+
+static inline void backtrack()
+{
+ lexer.current--;
+}
+
+static inline char lookahead(int steps)
+{
+ return lexer.current[steps];
+}
+
+static inline char peek_next()
+{
+ return lookahead(1);
+}
+
+static inline char next()
+{
+ return *(lexer.current++);
+}
+
+static inline void advance(int steps)
+{
+ lexer.current += steps;
+}
+
+static inline bool reached_end(void)
+{
+ return *lexer.current == '\0';
+}
+
+static Token error_token(const char *message)
+{
+ Token token;
+ token.type = TOKEN_ERROR;
+ token.start = lexer.start;
+ token.span.length = 1;
+ token.span.loc = lexer.current_file->start + (lexer.begin - lexer.start);
+ // TODO error_at(token.span, message);
+ return token;
+}
+
+static Token make_token(TokenType type)
+{
+ size_t token_size = lexer.current - lexer.start;
+ if (token_size > TOKEN_MAX_LENGTH) return error_token("Token exceeding max length");
+ return (Token)
+ {
+ .type = type,
+ .start = lexer.start,
+ .span = { .loc = lexer.current_file->start + (lexer.start - lexer.begin), .length = token_size }
+ };
+}
+
+static Token make_string_token(TokenType type, const char* string)
+{
+ size_t token_size = lexer.current - lexer.start;
+ if (token_size > TOKEN_MAX_LENGTH) return error_token("Token exceeding max length");
+ return (Token)
+ {
+ .type = type,
+ .start = lexer.start,
+ .span = { .loc = lexer.current_file->start + (lexer.start - lexer.begin), .length = token_size },
+ .string = string,
+ };
+}
+
+static inline bool match(char expected)
+{
+ if (reached_end()) return false;
+ if (*lexer.current != expected) return false;
+ lexer.current++;
+ return true;
+}
+
+static inline void match_assert(char expected)
+{
+ assert(!reached_end());
+ assert(lexer.current[0] == expected);
+ lexer.current++;
+}
+
+// --- Whitespace handling.
+
+typedef enum
+{
+ WHITESPACE_SKIPPED_OK,
+ WHITESPACE_FOUND_DOCS_START,
+ WHITESPACE_COMMENT_REACHED_EOF,
+ WHITESPACE_FOUND_EOF,
+ WHITESPACE_FOUND_DOCS_EOL,
+} SkipWhitespaceResult;
+
+/**
+ * Skip regular comments.
+ *
+ * @return the result of the skip (did we enter docs? did we have any errors?)
+ */
+SkipWhitespaceResult skip_whitespace()
+{
+ while (1)
{
- return type;
+ char c = peek();
+ switch (c)
+ {
+ case '\0':
+ return WHITESPACE_FOUND_EOF;
+ case '\n':
+ // If we are currently parsing docs, then end of line is considered meaningful.
+ if (lexer.lexer_state == LEXER_STATE_DOCS_PARSE_DIRECTIVE) return WHITESPACE_FOUND_DOCS_EOL;
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\f':
+ next();
+ break;
+ case '/':
+ if (peek_next() == '/')
+ {
+ while (!reached_end() && peek() != '\n') next();
+ break;
+ }
+ if (peek_next() == '*')
+ {
+ // Enter docs parsing on /**
+ if (lookahead(2) == '*' && lexer.lexer_state == LEXER_STATE_NORMAL)
+ {
+ return WHITESPACE_FOUND_DOCS_START;
+ }
+ while (1)
+ {
+ next();
+ if (reached_end()) return WHITESPACE_COMMENT_REACHED_EOF;
+ if (peek() == '*' && peek_next() == '/')
+ {
+ lexer.current += 2;
+ break;
+ }
+ }
+ break;
+ }
+ if (peek_next() == '+')
+ {
+ int nesting_depth = 1;
+ while (1)
+ {
+ next();
+ if (reached_end()) return WHITESPACE_COMMENT_REACHED_EOF;
+ if (peek() == '/' && peek_next() == '+')
+ {
+ lexer.current += 2;
+ nesting_depth++;
+ continue;
+ }
+ if (peek() == '+' && peek_next() == '/')
+ {
+ lexer.current += 2;
+ if (--nesting_depth == 0) break;
+ }
+ }
+ break;
+ }
+ return WHITESPACE_SKIPPED_OK;
+ default:
+ return WHITESPACE_SKIPPED_OK;
+ }
}
- return TOKEN_VAR_IDENT;
}
-// C idents should be rare, so just treat them uniformly.
-static inline TokenType c_ident(const char *restrict start, const int len)
+// --- Normal scanning methods start here
+
+static inline Token scan_prefixed_ident(TokenType type, TokenType no_ident_type)
{
- switch (start[3])
+ uint32_t hash = FNV1a(prev(), FNV1_SEED);
+ while (is_alphanum_(peek()))
{
- case 'n': return MATCH_KEYWORD_LEN("c_int", TOKEN_C_INT);
- case 'i': return MATCH_KEYWORD_LEN("c_uint", TOKEN_C_UINT);
- case 's': return MATCH_KEYWORD_LEN("c_ushort", TOKEN_C_USHORT);
- case 'h': return MATCH_KEYWORD_LEN("c_short", TOKEN_C_SHORT);
- case 'o':
- switch (len)
+ hash = FNV1a(next(), hash);
+ }
+ int len = lexer.current - lexer.start;
+ if (len == 1) return make_token(no_ident_type);
+ const char* interned = symtab_add(lexer.start, len, hash, &type);
+ return make_string_token(type, interned);
+}
+
+static inline void scan_skipped_ident()
+{
+ while (is_alphanum_(peek())) next();
+}
+
+
+// Parses identifiers. Note that this is a bit complicated here since
+// we split identifiers into 3 types + find keywords.
+static inline Token scan_ident(void)
+{
+ // If we're in ignore keywords state, simply skip stuff.
+ if (lexer.lexer_state == LEXER_STATE_DEFERED_PARSING)
+ {
+ scan_skipped_ident();
+ return make_token(TOKEN_VAR_IDENT);
+ }
+
+ TokenType type = 0;
+ uint32_t hash = FNV1_SEED;
+ while (peek() == '_')
+ {
+ hash = FNV1a(next(), hash);
+ }
+ while (1)
+ {
+ switch (peek())
+ {
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f': case 'g': case 'h': case 'i': case 'j':
+ case 'k': case 'l': case 'm': case 'n': case 'o':
+ case 'p': case 'q': case 'r': case 's': case 't':
+ case 'u': case 'v': case 'w': case 'x': case 'y':
+ case 'z':
+ if (!type)
+ {
+ type = TOKEN_VAR_IDENT;
+ }
+ else if (type == TOKEN_CAPS_IDENT)
+ {
+ type = TOKEN_TYPE_IDENT;
+ }
+ break;
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F': case 'G': case 'H': case 'I': case 'J':
+ case 'K': case 'L': case 'M': case 'N': case 'O':
+ case 'P': case 'Q': case 'R': case 'S': case 'T':
+ case 'U': case 'V': case 'W': case 'X': case 'Y':
+ case 'Z':
+ if (!type) type = TOKEN_CAPS_IDENT;
+ break;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ if (!type) return error_token("A letter must preceed any digit");
+ case '_':
+ break;
+ default:
+ goto EXIT;
+ }
+ hash = FNV1a(next(), hash);
+ }
+ EXIT:;
+ uint32_t len = lexer.current - lexer.start;
+ const char* interned_string = symtab_add(lexer.start, len, hash, &type);
+ return make_string_token(type, interned_string);
+}
+
+
+#pragma mark ----- Number scanning
+
+static Token scan_oct(void)
+{
+ next(); // Skip the o
+ if (!is_oct(next())) return error_token("Invalid octal sequence");
+ while (is_oct_or_(peek())) next();
+ return make_token(TOKEN_INTEGER);
+}
+
+
+Token scan_binary(void)
+{
+ next(); // Skip the b
+ if (!is_binary(next())) return error_token("Invalid binary sequence");
+ while (is_binary_or_(peek())) next();
+ return make_token(TOKEN_INTEGER);
+}
+
+#define PARSE_SPECIAL_NUMBER(is_num, is_num_with_underscore, exp, EXP) \
+ while (is_num_with_underscore(peek())) next(); \
+ bool is_float = false; \
+ if (peek() == '.') \
+ { \
+ is_float = true; \
+ next(); \
+ char c = peek(); \
+ if (c == '_') return error_token("Underscore may only appear between digits."); \
+ if (is_num(c)) next(); \
+ while (is_num_with_underscore(peek())) next(); \
+ } \
+ char c = peek(); \
+ if (c == (exp) || c == (EXP)) \
+ { \
+ is_float = true; \
+ next(); \
+ char c2 = next(); \
+ if (c2 == '+' || c2 == '-') c2 = next(); \
+ if (!is_num(c2)) return error_token("Invalid exponential expression"); \
+ while (is_num(peek())) next(); \
+ } \
+ if (prev() == '_') return error_token("Underscore may only appear between digits."); \
+ return make_token(is_float ? TOKEN_FLOAT : TOKEN_INTEGER)
+
+static inline Token scan_hex(void)
+{
+ next(); // skip the x
+ if (!is_hex(next())) return error_token("Invalid hex sequence");
+ PARSE_SPECIAL_NUMBER(is_hex, is_hex_or_, 'p', 'P');
+}
+
+static inline Token scan_dec(void)
+{
+ PARSE_SPECIAL_NUMBER(is_digit, is_digit_or_, 'e', 'E');
+}
+
+#undef PARSE_SPECIAL_NUMBER
+
+static inline Token scan_digit(void)
+{
+ if (peek() == '0')
+ {
+ switch (peek_next())
+ {
+ // case 'X': Let's not support this? REVISIT
+ case 'x':
+ case 'X':
+ advance(2);
+ return scan_hex();
+ case 'o':
+ case 'O':
+ advance(2);
+ return scan_oct();
+ case 'b':
+ case 'B':
+ advance(2);
+ return scan_binary();
+ default:
+ break;
+ }
+ }
+ return scan_dec();
+}
+
+#pragma mark -----
+
+
+static inline Token scan_char()
+{
+ next(); // Consume "'"
+ // Handle escaped char, also handle hex code.
+ if (next() == '\\')
+ {
+ // Escape seq? We don't support octal.
+ if (next() == 'x')
+ {
+ for (int i = 0; i < 2; i++)
{
- case 6: return MATCH_KEYWORD("c_long", TOKEN_C_LONG);
- case 10: return MATCH_KEYWORD("c_longlong", TOKEN_C_LONGLONG);
- case 12: return MATCH_KEYWORD("c_longdouble", TOKEN_C_LONGDOUBLE);
- default: return TOKEN_VAR_IDENT;
+ if (!is_hex(next())) return error_token("Invalid escape sequence");
}
- case 'l':
- return len == 11
- ? MATCH_KEYWORD("c_ulonglong", TOKEN_C_ULONGLONG)
- : MATCH_KEYWORD_LEN("c_ulong", TOKEN_C_ULONG);
+ }
+ }
+ if (next() != '\'') return error_token("Invalid character value");
+ return make_token(TOKEN_INTEGER);
+}
+
+static inline Token scan_string()
+{
+ char c;
+ while ((c = next()) != '"')
+ {
+ if (c == '\\' && peek() == '"')
+ {
+ next();
+ continue;
+ }
+ if (reached_end())
+ {
+ return error_token("Unterminated string.");
+ }
+ }
+ return make_token(TOKEN_STRING);
+}
+
+static inline void skip_docs_whitespace()
+{
+ while (1)
+ {
+ char c = peek();
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\f':
+ next();
+ break;
+ default:
+ return;
+ }
+ }
+}
+
+static inline Token scan_docs_directive(void)
+{
+ match_assert('@');
+ Token token = scan_prefixed_ident(TOKEN_AT_IDENT, TOKEN_AT);
+ assert(token.type != TOKEN_AT);
+ lexer.lexer_state = LEXER_STATE_DOCS_PARSE_DIRECTIVE;
+ return token;
+}
+
+static inline Token scan_docs(void)
+{
+ assert(lexer.lexer_state == LEXER_STATE_DOCS_PARSE);
+ // We assume we stand at the start of a docs comment or after a new line.
+ // First, skip any whitespace:
+ skip_docs_whitespace();
+
+ // At this point we might encounter any number of '*', consume those, unless followed by '/'
+ while (peek() == '*')
+ {
+ // We found the docs end
+ if (peek_next() == '/')
+ {
+ // Reset start
+ lexer.start = lexer.current;
+
+ // Consume the '*/'
+ advance(2);
+
+ // Return end
+ lexer.lexer_state = LEXER_STATE_NORMAL;
+ return make_token(TOKEN_DOCS_END);
+ }
+
+ // Otherwise continue consuming
+ next();
+ }
+
+ // This might be followed again by whitespace, such whitespace is skipped.
+ skip_docs_whitespace();
+
+ // Reset start
+ lexer.start = lexer.current;
+
+ // Now we passed through all of the whitespace. Here we might possibly see a "@",
+ // if so, we found a directive:
+ if (peek() == '@' && is_letter(peek_next()))
+ {
+ return scan_docs_directive();
+ }
+
+ // Otherwise this is just plain row, and we scan to the end of line *or* to a '*/'
+
+ while (1)
+ {
+ switch (peek())
+ {
+ case '*':
+ // Eat all * at the beginning.
+ while (peek_next() == '*') next();
+
+ // We found the end, so just make a token out of the rest.
+ // Note that this line will not get a linebreak at the end.
+ if (peek_next() == '/') return make_token(TOKEN_DOCS_LINE);
+ // Otherwise it's just something in the text, so continue.
+ next();
+ break;
+ case '\n':
+ // Normal line of text.
+ next();
+ return make_token(TOKEN_DOCS_LINE);
+ case '\0':
+ return error_token("Docs reached end of the file");
+ default:
+ break;
+ }
+ }
+}
+
+
+Token scan_token(void)
+{
+ // First we handle our "in docs" state.
+ if (lexer.lexer_state == LEXER_STATE_DOCS_PARSE)
+ {
+ return scan_docs();
+ }
+
+ // Now skip the whitespace.
+ SkipWhitespaceResult result = skip_whitespace();
+
+ // Point start to the first non-whitespace character.
+ lexer.start = lexer.current;
+
+ switch (result)
+ {
+ case WHITESPACE_FOUND_DOCS_START:
+ // Here we found '/**', so we skip past that
+ // and switch state.
+ advance(3);
+ lexer.lexer_state = LEXER_STATE_DOCS_PARSE;
+ return make_token(TOKEN_DOCS_START);
+ case WHITESPACE_COMMENT_REACHED_EOF:
+ return error_token("Comment was not terminated");
+ case WHITESPACE_FOUND_EOF:
+ return make_token(TOKEN_EOF);
+ case WHITESPACE_FOUND_DOCS_EOL:
+ advance(1);
+ lexer.lexer_state = LEXER_STATE_DOCS_PARSE;
+ return make_token(TOKEN_DOCS_EOL);
+ case WHITESPACE_SKIPPED_OK:
+ break;
+ }
+
+ char c = next();
+ switch (c)
+ {
+ case '@':
+ return scan_prefixed_ident(TOKEN_AT_IDENT, TOKEN_AT);
+ case '\'':
+ return scan_char();
+ case '"':
+ return scan_string();
+ case '#':
+ return scan_prefixed_ident(TOKEN_HASH_IDENT, TOKEN_HASH);
+ case '$':
+ return scan_prefixed_ident(TOKEN_DOLLAR_IDENT, TOKEN_DOLLAR);
+ case ',':
+ return make_token(TOKEN_COMMA);
+ case ';':
+ return make_token(TOKEN_EOS);
+ case '{':
+ return make_token(TOKEN_LBRACE);
+ case '}':
+ return make_token(TOKEN_RBRACE);
+ case '(':
+ return make_token(TOKEN_LPAREN);
+ case ')':
+ return make_token(TOKEN_RPAREN);
+ case '[':
+ return make_token(TOKEN_LBRACKET);
+ case ']':
+ return make_token(TOKEN_RBRACKET);
+ case '.':
+ if (match('.')) return make_token(match('.') ? TOKEN_ELIPSIS : TOKEN_DOTDOT);
+ return make_token(TOKEN_DOT);
+ case '~':
+ return make_token(TOKEN_BIT_NOT);
+ case ':':
+ return make_token(match(':') ? TOKEN_COLCOLON : TOKEN_COLON);
+ case '!':
+ return make_token(match('=') ? TOKEN_NOT_EQUAL : TOKEN_NOT);
+ case '/':
+ return make_token(match('=') ? TOKEN_DIV_ASSIGN : TOKEN_DIV);
+ case '*':
+ if (lexer.lexer_state == LEXER_STATE_DOCS_PARSE_DIRECTIVE && match('/'))
+ {
+ lexer.lexer_state = LEXER_STATE_NORMAL;
+ return make_token(TOKEN_DOCS_END);
+ }
+ return make_token(match('=') ? TOKEN_MULT_ASSIGN : TOKEN_STAR);
+ case '=':
+ return make_token(match('=') ? TOKEN_EQEQ : TOKEN_EQ);
+ case '^':
+ return make_token(match('=') ? TOKEN_BIT_XOR_ASSIGN : TOKEN_BIT_XOR);
+ case '?':
+ return make_token(match(':') ? TOKEN_ELVIS : TOKEN_QUESTION);
+ case '<':
+ if (match('<')) return make_token(match('=') ? TOKEN_SHL_ASSIGN : TOKEN_SHL);
+ return make_token(match('=') ? TOKEN_LESS_EQ : TOKEN_LESS);
+ case '>':
+ if (match('>')) return make_token(match('=') ? TOKEN_SHR_ASSIGN : TOKEN_SHR);
+ return make_token(match('=') ? TOKEN_GREATER_EQ : TOKEN_GREATER);
+ case '%':
+ return make_token(match('=') ? TOKEN_MOD_ASSIGN : TOKEN_MOD);
+ case '&':
+ if (match('&')) return make_token(match('=') ? TOKEN_AND_ASSIGN : TOKEN_AND);
+ return make_token(match('=') ? TOKEN_BIT_AND_ASSIGN : TOKEN_AMP);
+ case '|':
+ if (match('|')) return make_token(match('=') ? TOKEN_OR_ASSIGN : TOKEN_OR);
+ return make_token(match('=') ? TOKEN_BIT_OR_ASSIGN : TOKEN_BIT_OR);
+ case '+':
+ if (match('+')) return make_token(TOKEN_PLUSPLUS);
+ if (match('=')) return make_token(TOKEN_PLUS_ASSIGN);
+ return make_token(TOKEN_PLUS);
+ case '-':
+ if (match('>')) return make_token(TOKEN_ARROW);
+ if (match('-')) make_token(TOKEN_MINUSMINUS);
+ if (match('=')) return make_token(TOKEN_MINUS_ASSIGN);
+ return make_token(TOKEN_MINUS);
default:
- return TOKEN_VAR_IDENT;
+ if (is_alphanum_(c))
+ {
+ backtrack();
+ return is_digit(c) ? scan_digit() : scan_ident();
+ }
+ return error_token("Unexpected character.");
}
}
-// A simple switch based keyword identifier.
-// Some simple benchmarking reveals it's pretty fast compared to
-// Perfect hashing approaches.
-static inline TokenType ident_type(const char *restrict start, const int len)
+void lexer_test_setup(const char* text)
{
- char current_value = start[0];
- if (len < 2) return TOKEN_VAR_IDENT;
- if (current_value == 'c' && start[1] == '_') return c_ident(start, len);
- if (len > 8 || !is_lower(current_value)) return TOKEN_VAR_IDENT;
- switch (current_value)
+ static File helper;
+ lexer.lexer_state = LEXER_STATE_NORMAL;
+ lexer.start = text;
+ lexer.current = text;
+ lexer.begin = text;
+ lexer.current_file = &helper;
+ lexer.current_file->start = 0;
+ lexer.current_file->contents = text;
+ lexer.current_file->end = 100000;
+ lexer.current_file->name = "Foo";
+}
+
+Token scan_ident_test(const char* scan)
+{
+ static File helper;
+ lexer.lexer_state = LEXER_STATE_NORMAL;
+ lexer.start = scan;
+ lexer.current = scan;
+ lexer.begin = scan;
+ lexer.current_file = &helper;
+ lexer.current_file->start = 0;
+ lexer.current_file->contents = scan;
+ lexer.current_file->end = 1000;
+ lexer.current_file->name = "Foo";
+
+ if (scan[0] == '@' && is_letter(scan[1]))
{
- case 'a':
- if (len == 2) return MATCH_KEYWORD("as", TOKEN_AS);
- switch (start[1])
- {
- case 's': return MATCH_KEYWORD_LEN("asm", TOKEN_ASM);
- case 'l': return MATCH_KEYWORD_LEN("alias", TOKEN_ALIAS);
- default: return TOKEN_VAR_IDENT;
- }
- case 'b':
- switch (start[1])
- {
- case 'o': return MATCH_KEYWORD_LEN("bool", TOKEN_BOOL);
- case 'y': return MATCH_KEYWORD_LEN("byte", TOKEN_BYTE);
- case 'r': return MATCH_KEYWORD_LEN("break", TOKEN_BREAK);
- default: return TOKEN_VAR_IDENT;
- }
- case 'c':
- if (len < 4) return TOKEN_VAR_IDENT;
- if (len == 8) return MATCH_KEYWORD_LEN("continue", TOKEN_CONTINUE);
- switch (start[3])
- {
- case 't': return MATCH_KEYWORD_LEN("cast", TOKEN_CAST);
- case 'e': return MATCH_KEYWORD_LEN("case", TOKEN_CASE);
- case 'r': return MATCH_KEYWORD_LEN("char", TOKEN_CHAR);
- case 's': return MATCH_KEYWORD_LEN("const", TOKEN_CONST);
- case 'c': return MATCH_KEYWORD_LEN("catch", TOKEN_CATCH);
- default: return TOKEN_VAR_IDENT;
-
- }
- case 'd':
- if (len < 5) return MATCH_KEYWORD_LEN("do", TOKEN_DO);
- switch (start[3])
- {
- case 'e': return MATCH_KEYWORD_LEN("defer", TOKEN_DEFER);
- case 'a': return MATCH_KEYWORD_LEN("default", TOKEN_DEFAULT);
- case 'b': return MATCH_KEYWORD_LEN("double", TOKEN_DOUBLE);
- default: return TOKEN_VAR_IDENT;
- }
- case 'e':
- switch (start[1])
- {
- case 'l': return MATCH_KEYWORD_LEN("else", TOKEN_ELSE);
- case 'n': return MATCH_KEYWORD_LEN("enum", TOKEN_ENUM);
- case 'r': return MATCH_KEYWORD_LEN("error", TOKEN_ERROR);
- default: return TOKEN_VAR_IDENT;
- }
- case 'f':
- switch (start[2])
- {
- case '2':
- if (len == 4) return MATCH_KEYWORD("f128", TOKEN_F128);
- return MATCH_KEYWORD_LEN("f32", TOKEN_F32);
- case '6': return MATCH_KEYWORD_LEN("f16", TOKEN_F16);
- case '4': return MATCH_KEYWORD_LEN("f64", TOKEN_F64);
- case '5': return MATCH_KEYWORD_LEN("f256", TOKEN_F256);
- case 'r': return MATCH_KEYWORD_LEN("for", TOKEN_FOR);
- case 'l': return MATCH_KEYWORD_LEN("false", TOKEN_FALSE);
- case 'o': return MATCH_KEYWORD_LEN("float", TOKEN_FLOAT);
- case 'n': return MATCH_KEYWORD_LEN("func", TOKEN_FUNC);
- default: return TOKEN_VAR_IDENT;
- }
- case 'g':
- switch (start[1])
- {
- case 'o': return MATCH_KEYWORD_LEN("goto", TOKEN_GOTO);
- case 'e': return MATCH_KEYWORD_LEN("generic", TOKEN_GENERIC);
- default: return TOKEN_VAR_IDENT;
- }
- case 'h': return MATCH_KEYWORD_LEN("half", TOKEN_HALF);
- case 'i':
- switch (start[1])
- {
- case '1':
- if (len == 4) return MATCH_KEYWORD("i128", TOKEN_I128);
- return MATCH_KEYWORD_LEN("i16", TOKEN_I16);
- case 'f': return MATCH_KEYWORD_LEN("if", TOKEN_IF);
- case '8': return MATCH_KEYWORD_LEN("i8", TOKEN_I8);
- case '6': return MATCH_KEYWORD_LEN("i64", TOKEN_I64);
- case '2': return MATCH_KEYWORD_LEN("i256", TOKEN_I256);
- case '3': return MATCH_KEYWORD_LEN("i32", TOKEN_I32);
- case 'n': return MATCH_KEYWORD_LEN("int", TOKEN_INT);
- case 'm': return MATCH_KEYWORD_LEN("import", TOKEN_IMPORT);
- case 's': return MATCH_KEYWORD_LEN("isize", TOKEN_ISIZE);
- default: return TOKEN_VAR_IDENT;
- }
- case 'l':
- if (len < 4) return TOKEN_VAR_IDENT;
- switch (start[2])
- {
- case 'n': return MATCH_KEYWORD_LEN("long", TOKEN_LONG);
- case 'c': return MATCH_KEYWORD_LEN("local", TOKEN_LOCAL);
- default: return TOKEN_VAR_IDENT;
- }
- case 'm':
- switch (start[1])
- {
- case 'a': return MATCH_KEYWORD_LEN("macro", TOKEN_MACRO);
- case 'o': return MATCH_KEYWORD_LEN("module", TOKEN_MODULE);
- default: return TOKEN_VAR_IDENT;
- }
- case 'n': return MATCH_KEYWORD_LEN("nil", TOKEN_NIL);
- case 'p': return MATCH_KEYWORD_LEN("public", TOKEN_PUBLIC);
- case 'q': return MATCH_KEYWORD_LEN("quad", TOKEN_QUAD);
- case 'r': return MATCH_KEYWORD_LEN("return", TOKEN_RETURN);
- case 's':
- switch (start[1])
- {
- case 'h': return MATCH_KEYWORD_LEN("short", TOKEN_SHORT);
- case 't': return MATCH_KEYWORD_LEN("struct", TOKEN_STRUCT);
- case 'w': return MATCH_KEYWORD_LEN("switch", TOKEN_SWITCH);
- default: return TOKEN_VAR_IDENT;
- }
- case 't':
- if (len < 3) return TOKEN_VAR_IDENT;
- switch (start[2])
- {
- case 'p':
- if (len == 7) return MATCH_KEYWORD("typedef", TOKEN_TYPEDEF);
- return MATCH_KEYWORD_LEN("type", TOKEN_TYPE);
- case 'r':
- if (len == 6) return MATCH_KEYWORD("throws", TOKEN_THROWS);
- return MATCH_KEYWORD_LEN("throw", TOKEN_THROW);
- case 'u': return MATCH_KEYWORD_LEN("true", TOKEN_TRUE);
- case 'y': return MATCH_KEYWORD_LEN("try", TOKEN_TRY);
- default: return TOKEN_VAR_IDENT;
- }
- case 'u':
- if (len < 3) return MATCH_KEYWORD_LEN("u8", TOKEN_U8);
- switch (start[1])
- {
- case '1':
- if (len == 4) return MATCH_KEYWORD("u128", TOKEN_U128);
- return MATCH_KEYWORD_LEN("u16", TOKEN_U16);
- case 'n':
- if (start[2] == 't') return MATCH_KEYWORD_LEN("until", TOKEN_UNTIL);
- return MATCH_KEYWORD_LEN("union", TOKEN_UNION);
- case 's':
- if (len == 5) return MATCH_KEYWORD("usize", TOKEN_USIZE);
- return MATCH_KEYWORD_LEN("ushort", TOKEN_USHORT);
- case '2': return MATCH_KEYWORD_LEN("u256", TOKEN_U256);
- case '3': return MATCH_KEYWORD_LEN("u32", TOKEN_U32);
- case '6': return MATCH_KEYWORD_LEN("u64", TOKEN_U64);
- case 'i': return MATCH_KEYWORD_LEN("uint", TOKEN_UINT);
- case 'l': return MATCH_KEYWORD_LEN("ulong", TOKEN_ULONG);
- default: return TOKEN_VAR_IDENT;
- }
- case 'v':
- if (len < 3) return TOKEN_VAR_IDENT;
- switch (start[2])
- {
- case 'r': return MATCH_KEYWORD_LEN("var", TOKEN_VAR);
- case 'i': return MATCH_KEYWORD_LEN("void", TOKEN_VOID);
- case 'l': return MATCH_KEYWORD_LEN("volatile", TOKEN_VOLATILE);
- default: return TOKEN_VAR_IDENT;
- }
- case 'w': return MATCH_KEYWORD_LEN("while", TOKEN_WHILE);
- default: return TOKEN_VAR_IDENT;
+ lexer.lexer_state = LEXER_STATE_DOCS_PARSE;
+ return scan_docs();
}
+
+ return scan_token();
}
-
-#define PRIME 0x01000193
-#define SEED 0x811C9DC5
-
-#define FNV1(a, seed) ((uint32_t)((((unsigned int)(a)) ^ (seed)) * PRIME))
-#define HASH(a, b, c) (FNV1(c, FNV1((a), FNV1(b, SEED))) & 0x1FFu)
-
-// This method uses a light variant on FNV1, keeping 9 bits.
-// When keywords are added, make sure there are no collisions.
-TokenType ident_type_fnv1(const char *restrict start, int len)
-{
- char current_value = start[0];
- if (len < 2) return TOKEN_VAR_IDENT;
- char second = start[1];
- if (current_value == 'c' && second == '_') return c_ident(start, len);
- if (len > 8 || !is_lower(current_value)) return TOKEN_VAR_IDENT;
- switch (HASH(len, current_value, second))
- {
- case HASH(2, 'a', 's'): return MATCH_KEYWORD_LEN("as", TOKEN_AS);
- case HASH(3, 'a', 's'): return MATCH_KEYWORD_LEN("asm", TOKEN_ASM);
- case HASH(5, 'a', 'l'): return MATCH_KEYWORD_LEN("alias", TOKEN_ALIAS);
- case HASH(4, 'b', 'o'): return MATCH_KEYWORD_LEN("bool", TOKEN_BOOL);
- case HASH(4, 'b', 'y'): return MATCH_KEYWORD_LEN("byte", TOKEN_BYTE);
- case HASH(5, 'b', 'r'): return MATCH_KEYWORD_LEN("break", TOKEN_BREAK);
- case HASH(8, 'c', 'o'): return MATCH_KEYWORD_LEN("continue", TOKEN_CONTINUE);
- case HASH(4, 'c', 'a'):
- return len > 3 && start[3] == 't' ? MATCH_KEYWORD_LEN("cast", TOKEN_CAST) : MATCH_KEYWORD_LEN("case", TOKEN_CASE);
- case HASH(4, 'c', 'h'): return MATCH_KEYWORD_LEN("char", TOKEN_CHAR);
- case HASH(5, 'c', 'o'): return MATCH_KEYWORD_LEN("const", TOKEN_CONST);
- case HASH(5, 'c', 'a'): return MATCH_KEYWORD_LEN("catch", TOKEN_CATCH);
- case HASH(2, 'd', 'o'): return MATCH_KEYWORD_LEN("do", TOKEN_DO);
- case HASH(5, 'd', 'e'): return MATCH_KEYWORD_LEN("defer", TOKEN_DEFER);
- case HASH(7, 'd', 'e'): return MATCH_KEYWORD_LEN("default", TOKEN_DEFAULT);
- case HASH(6, 'd', 'o'): return MATCH_KEYWORD_LEN("double", TOKEN_DOUBLE);
- case HASH(4, 'e', 'l'): return MATCH_KEYWORD_LEN("else", TOKEN_ELSE);
- case HASH(4, 'e', 'n'): return MATCH_KEYWORD_LEN("enum", TOKEN_ENUM);
- case HASH(5, 'e', 'r'): return MATCH_KEYWORD_LEN("error", TOKEN_ERROR);
- case HASH(3, 'f', '1'): return MATCH_KEYWORD_LEN("f16", TOKEN_F16);
- case HASH(4, 'f', '1'): return MATCH_KEYWORD_LEN("f128", TOKEN_F128);
- case HASH(3, 'f', '3'): return MATCH_KEYWORD_LEN("f32", TOKEN_F32);
- case HASH(3, 'f', '6'): return MATCH_KEYWORD_LEN("f64", TOKEN_F64);
- case HASH(4, 'f', '2'): return MATCH_KEYWORD_LEN("f256", TOKEN_F256);
- case HASH(3, 'f', 'o'): return MATCH_KEYWORD_LEN("for", TOKEN_FOR);
- case HASH(5, 'f', 'a'): return MATCH_KEYWORD_LEN("false", TOKEN_FALSE);
- case HASH(5, 'f', 'l'): return MATCH_KEYWORD_LEN("float", TOKEN_FLOAT);
- case HASH(4, 'f', 'u'): return MATCH_KEYWORD_LEN("func", TOKEN_FUNC);
- case HASH(4, 'g', 'o'): return MATCH_KEYWORD_LEN("goto", TOKEN_GOTO);
- case HASH(7, 'g', 'e'): return MATCH_KEYWORD_LEN("generic", TOKEN_GENERIC);
- case HASH(4, 'h', 'a'): return MATCH_KEYWORD_LEN("half", TOKEN_HALF);
- case HASH(2, 'i', 'f'): return MATCH_KEYWORD_LEN("if", TOKEN_IF);
- case HASH(2, 'i', '8'): return MATCH_KEYWORD_LEN("i8", TOKEN_I8);
- case HASH(3, 'i', '6'): return MATCH_KEYWORD_LEN("i64", TOKEN_I64);
- case HASH(4, 'i', '2'): return MATCH_KEYWORD_LEN("i256", TOKEN_I256);
- case HASH(3, 'i', '3'): return MATCH_KEYWORD_LEN("i32", TOKEN_I32);
- case HASH(4, 'i', '1'): return MATCH_KEYWORD_LEN("i128", TOKEN_I128);
- case HASH(3, 'i', '1'): return MATCH_KEYWORD_LEN("i16", TOKEN_I16);
- case HASH(3, 'i', 'n'): return MATCH_KEYWORD_LEN("int", TOKEN_INT);
- case HASH(6, 'i', 'm'): return MATCH_KEYWORD_LEN("import", TOKEN_IMPORT);
- case HASH(5, 'i', 's'): return MATCH_KEYWORD_LEN("isize", TOKEN_ISIZE);
- case HASH(4, 'l', 'o'): return MATCH_KEYWORD_LEN("long", TOKEN_LONG);
- case HASH(5, 'l', 'o'): return MATCH_KEYWORD_LEN("local", TOKEN_LOCAL);
- case HASH(5, 'm', 'a'): return MATCH_KEYWORD_LEN("macro", TOKEN_MACRO);
- case HASH(6, 'm', 'o'): return MATCH_KEYWORD_LEN("module", TOKEN_MODULE);
- case HASH(3, 'n', 'i'): return MATCH_KEYWORD_LEN("nil", TOKEN_NIL);
- case HASH(6, 'p', 'u'): return MATCH_KEYWORD_LEN("public", TOKEN_PUBLIC);
- case HASH(4, 'q', 'u'): return MATCH_KEYWORD_LEN("quad", TOKEN_QUAD);
- case HASH(6, 'r', 'e'): return MATCH_KEYWORD_LEN("return", TOKEN_RETURN);
- case HASH(5, 's', 'h'): return MATCH_KEYWORD_LEN("short", TOKEN_SHORT);
- case HASH(6, 's', 't'): return MATCH_KEYWORD_LEN("struct", TOKEN_STRUCT);
- case HASH(6, 's', 'w'): return MATCH_KEYWORD_LEN("switch", TOKEN_SWITCH);
- case HASH(7, 't', 'y'): return MATCH_KEYWORD_LEN("typedef", TOKEN_TYPEDEF);
- case HASH(4, 't', 'y'): return MATCH_KEYWORD_LEN("type", TOKEN_TYPE);
- case HASH(4, 't', 'r'): return MATCH_KEYWORD_LEN("true", TOKEN_TRUE);
- case HASH(3, 't', 'r'): return MATCH_KEYWORD_LEN("try", TOKEN_TRY);
- case HASH(5, 't', 'h'): return MATCH_KEYWORD_LEN("throw", TOKEN_THROW);
- case HASH(6, 't', 'h'): return MATCH_KEYWORD_LEN("throws", TOKEN_THROWS);
- case HASH(2, 'u', '8'): return MATCH_KEYWORD_LEN("u8", TOKEN_U8);
- case HASH(4, 'u', '1'): return MATCH_KEYWORD_LEN("u128", TOKEN_U128);
- case HASH(3, 'u', '1'): return MATCH_KEYWORD_LEN("u16", TOKEN_U16);
- case HASH(4, 'u', '2'): return MATCH_KEYWORD_LEN("u256", TOKEN_U256);
- case HASH(3, 'u', '3'): return MATCH_KEYWORD_LEN("u32", TOKEN_U32);
- case HASH(3, 'u', '6'): return MATCH_KEYWORD_LEN("u64", TOKEN_U64);
- case HASH(4, 'u', 'i'): return MATCH_KEYWORD_LEN("uint", TOKEN_UINT);
- case HASH(5, 'u', 'n'):
- if (start[2] == 't') return MATCH_KEYWORD_LEN("until", TOKEN_UNTIL);
- return MATCH_KEYWORD_LEN("union", TOKEN_UNION);
- case HASH(5, 'u', 'l'): return MATCH_KEYWORD_LEN("ulong", TOKEN_ULONG);
- case HASH(5, 'u', 's'): return MATCH_KEYWORD_LEN("usize", TOKEN_USIZE);
- case HASH(6, 'u', 's'): return MATCH_KEYWORD_LEN("ushort", TOKEN_USHORT);
- case HASH(3, 'v', 'a'): return MATCH_KEYWORD_LEN("var", TOKEN_VAR);
- case HASH(4, 'v', 'o'): return MATCH_KEYWORD_LEN("void", TOKEN_VOID);
- case HASH(8, 'v', 'o'): return MATCH_KEYWORD_LEN("volatile", TOKEN_VOLATILE);
- case HASH(5, 'w', 'h'): return MATCH_KEYWORD_LEN("while", TOKEN_WHILE);
- default: return TOKEN_VAR_IDENT;
- }
-}
-
-
-#undef HASH
-#undef MATCH_KEYWORD
-#undef MATCH_KEYWORD_LEN
-
-TokenType identifier_type(const char* restrict start, int len)
-{
- return ident_type(start, len);
-}
-
-const char *token_type_to_string(TokenType type)
-{
- switch (type)
- {
- case TOKEN_LPAREN:
- return "(";
- case TOKEN_RPAREN:
- return ")";
- case TOKEN_LBRACE:
- return "{";
- case TOKEN_RBRACE:
- return "}";
- case TOKEN_LBRACKET:
- return "[";
- case TOKEN_RBRACKET:
- return "]";
- case TOKEN_COMMA:
- return ",";
- case TOKEN_DOT:
- return ".";
- case TOKEN_EOS:
- return ";";
- case TOKEN_PLUS:
- return "+";
- case TOKEN_PLUSPLUS:
- return "++";
- case TOKEN_PLUS_ASSIGN:
- return "+=";
- case TOKEN_BIT_NOT:
- return "~";
- case TOKEN_NOT:
- return "!";
- case TOKEN_MINUS:
- return "-";
- case TOKEN_MINUSMINUS:
- return "--";
- case TOKEN_MINUS_ASSIGN:
- return "-=";
- case TOKEN_STAR:
- return "*";
- case TOKEN_MULT_ASSIGN:
- return "*=";
- case TOKEN_MOD:
- return "%";
- case TOKEN_MOD_ASSIGN:
- return "%=";
- case TOKEN_DIV:
- return "/";
- case TOKEN_DIV_ASSIGN:
- return "/=";
- case TOKEN_NOT_EQUAL:
- return "!=";
- case TOKEN_EQ:
- return "=";
- case TOKEN_EQEQ:
- return "==";
- case TOKEN_COLON:
- return ":";
- case TOKEN_COLCOLON:
- return "::";
- case TOKEN_DOTDOT:
- return "..";
- case TOKEN_ELIPSIS:
- return "...";
- case TOKEN_GREATER:
- return ">";
- case TOKEN_GREATER_EQ:
- return ">=";
- case TOKEN_RIGHT_SHIFT:
- return ">>";
- case TOKEN_RIGHT_SHIFT_ASSIGN:
- return ">>=";
- case TOKEN_LESS:
- return "<";
- case TOKEN_LESS_EQ:
- return "<=";
- case TOKEN_LEFT_SHIFT:
- return "<<";
- case TOKEN_LEFT_SHIFT_ASSIGN:
- return "<<=";
- case TOKEN_ARROW:
- return "->";
- case TOKEN_AND:
- return "&&";
- case TOKEN_AND_ASSIGN:
- return "&&=";
- case TOKEN_AMP:
- return "&";
- case TOKEN_BIT_AND_ASSIGN:
- return "&=";
- case TOKEN_OR:
- return "||";
- case TOKEN_OR_ASSIGN:
- return "||=";
- case TOKEN_BIT_OR:
- return "|";
- case TOKEN_BIT_OR_ASSIGN:
- return "|=";
- case TOKEN_BIT_XOR:
- return "^";
- case TOKEN_BIT_XOR_ASSIGN:
- return "^=";
- case TOKEN_VAR_IDENT:
- return "";
- case TOKEN_TYPE_IDENT:
- return "";
- case TOKEN_STRING:
- return "";
- case TOKEN_INTEGER:
- return "";
- case TOKEN_REAL:
- return "";
- case TOKEN_QUESTION:
- return "?";
- case TOKEN_ELVIS:
- return "?:";
- case TOKEN_VOID:
- return "void";
- case TOKEN_ALIAS:
- return "alias";
- case TOKEN_CONST:
- return "const";
- case TOKEN_VOLATILE:
- return "volatile";
- case TOKEN_ELSE:
- return "else";
- case TOKEN_FALSE:
- return "false";
- case TOKEN_CONTINUE:
- return "continue";
- case TOKEN_FUNC:
- return "func";
- case TOKEN_FOR:
- return "for";
- case TOKEN_IMPORT:
- return "import";
- case TOKEN_MODULE:
- return "module";
- case TOKEN_IF:
- return "if";
- case TOKEN_NIL:
- return "nil";
- case TOKEN_RETURN:
- return "return";
- case TOKEN_GOTO:
- return "goto";
- case TOKEN_DEFER:
- return "defer";
- case TOKEN_TRUE:
- return "true";
- case TOKEN_WHILE:
- return "while";
- case TOKEN_CASE:
- return "case";
- case TOKEN_ASM:
- return "asm";
- case TOKEN_DEFAULT:
- return "default";
- case TOKEN_SWITCH:
- return "switch";
- case TOKEN_UNTIL:
- return "until";
- case TOKEN_BREAK:
- return "break";
- case TOKEN_TYPE:
- return "type";
- case TOKEN_DO:
- return "do";
- case TOKEN_PUBLIC:
- return "public";
- case TOKEN_LOCAL:
- return "local";
- case TOKEN_STRUCT:
- return "struct";
- case TOKEN_UNION:
- return "union";
- case TOKEN_ENUM:
- return "enum";
- case TOKEN_AT:
- return "@";
- case TOKEN_AS:
- return "as";
- case TOKEN_ERROR:
- return "";
- case TOKEN_EOF:
- return "";
- case TOKEN_CAST:
- return "cast";
- case TOKEN_C_LONGDOUBLE:
- return "c_longdouble";
- case TOKEN_C_USHORT:
- return "c_ushort";
- case TOKEN_C_UINT:
- return "c_uint";
- case TOKEN_C_ULONG:
- return "c_ulong";
- case TOKEN_C_ULONGLONG:
- return "c_ulonglong";
- case TOKEN_C_SHORT:
- return "c_ishort";
- case TOKEN_C_INT:
- return "c_int";
- case TOKEN_C_LONG:
- return "c_long";
- case TOKEN_C_LONGLONG:
- return "c_longlong";
- case TOKEN_MACRO:
- return "macro";
- case TOKEN_F256:
- return "f256";
- case TOKEN_I256:
- return "i256";
- case TOKEN_U256:
- return "u256";
- case TOKEN_F128:
- return "f128";
- case TOKEN_I128:
- return "i128";
- case TOKEN_U128:
- return "u128";
- case TOKEN_F64:
- return "f64";
- case TOKEN_I64:
- return "i64";
- case TOKEN_U64:
- return "u64";
- case TOKEN_F32:
- return "f32";
- case TOKEN_I32:
- return "i32";
- case TOKEN_U32:
- return "u32";
- case TOKEN_F16:
- return "f16";
- case TOKEN_I16:
- return "i16";
- case TOKEN_U16:
- return "u16";
- case TOKEN_I8:
- return "i8";
- case TOKEN_U8:
- return "u8";
- case TOKEN_BOOL:
- return "bool";
- case TOKEN_QUAD:
- return "quad";
- case TOKEN_DOUBLE:
- return "double";
- case TOKEN_FLOAT:
- return "float";
- case TOKEN_LONG:
- return "long";
- case TOKEN_ULONG:
- return "ulong";
- case TOKEN_INT:
- return "int";
- case TOKEN_UINT:
- return "uint";
- case TOKEN_SHORT:
- return "short";
- case TOKEN_USHORT:
- return "ushort";
- case TOKEN_BYTE:
- return "byte";
- case TOKEN_CHAR:
- return "char";
- case TOKEN_ISIZE:
- return "isize";
- case TOKEN_USIZE:
- return "usize";
- case TOKEN_CAPS_IDENT:
- return "";
- case TOKEN_AT_IDENT:
- return "<@ident>";
- case TOKEN_HASH_IDENT:
- return "<#ident>";
- case TOKEN_DOLLAR_IDENT:
- return "<$ident>";
- case TOKEN_CATCH:
- return "catch";
- case TOKEN_GENERIC:
- return "generic";
- case TOKEN_THROW:
- return "throw";
- case TOKEN_THROWS:
- return "throws";
- case TOKEN_TRY:
- return "try";
- case TOKEN_TYPEDEF:
- return "typedef";
- case TOKEN_VAR:
- return "var";
- case TOKEN_HALF:
- return "half";
- case INVALID_TOKEN:
- return "<\?\?\?>";
- }
- UNREACHABLE
-}
\ No newline at end of file
diff --git a/src/compiler/lexer.h b/src/compiler/lexer.h
index e243e89b5..86993d80d 100644
--- a/src/compiler/lexer.h
+++ b/src/compiler/lexer.h
@@ -4,179 +4,14 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+#include "tokens.h"
+#include "compiler_common.h"
-typedef enum _TokenType
-{
- // Single-character tokens.
- TOKEN_LPAREN,
- TOKEN_RPAREN,
- TOKEN_LBRACE,
- TOKEN_RBRACE,
- TOKEN_LBRACKET,
- TOKEN_RBRACKET,
- TOKEN_COMMA,
- TOKEN_DOT,
- TOKEN_EOS,
- TOKEN_AT,
+Token scan_token(void);
- // One or two character tokens.
- TOKEN_PLUS,
- TOKEN_PLUSPLUS,
- TOKEN_PLUS_ASSIGN,
- TOKEN_BIT_NOT,
- TOKEN_NOT,
- TOKEN_MINUS,
- TOKEN_MINUSMINUS,
- TOKEN_MINUS_ASSIGN,
- TOKEN_STAR,
- TOKEN_MULT_ASSIGN,
- TOKEN_DIV,
- TOKEN_DIV_ASSIGN,
- TOKEN_MOD,
- TOKEN_MOD_ASSIGN,
- TOKEN_NOT_EQUAL,
- TOKEN_EQ,
- TOKEN_EQEQ,
- TOKEN_COLON,
- TOKEN_COLCOLON, // Not used but reserved
- TOKEN_DOTDOT,
- TOKEN_QUESTION,
-
- // Three or more
- TOKEN_ELIPSIS,
- TOKEN_GREATER,
- TOKEN_GREATER_EQ,
- TOKEN_RIGHT_SHIFT,
- TOKEN_RIGHT_SHIFT_ASSIGN,
- TOKEN_LESS,
- TOKEN_LESS_EQ,
- TOKEN_LEFT_SHIFT,
- TOKEN_LEFT_SHIFT_ASSIGN,
- TOKEN_ARROW, // Not used but reserved
- TOKEN_AND,
- TOKEN_AND_ASSIGN,
- TOKEN_AMP,
- TOKEN_BIT_AND_ASSIGN,
- TOKEN_OR,
- TOKEN_OR_ASSIGN,
- TOKEN_BIT_OR,
- TOKEN_BIT_OR_ASSIGN,
- TOKEN_BIT_XOR,
- TOKEN_BIT_XOR_ASSIGN,
- TOKEN_ELVIS,
-
- TOKEN_F256,
- TOKEN_I256,
- TOKEN_U256,
- TOKEN_F128,
- TOKEN_I128,
- TOKEN_U128,
- TOKEN_F64,
- TOKEN_I64,
- TOKEN_U64,
- TOKEN_F32,
- TOKEN_I32,
- TOKEN_U32,
- TOKEN_F16,
- TOKEN_I16,
- TOKEN_U16,
- TOKEN_I8,
- TOKEN_U8,
- TOKEN_QUAD,
- TOKEN_DOUBLE,
- TOKEN_FLOAT,
- TOKEN_HALF,
- TOKEN_LONG,
- TOKEN_ULONG,
- TOKEN_INT,
- TOKEN_UINT,
- TOKEN_SHORT,
- TOKEN_USHORT,
- TOKEN_BYTE,
- TOKEN_CHAR,
- TOKEN_BOOL,
- TOKEN_ISIZE,
- TOKEN_USIZE,
-
- // Literals.
-
- // In order to make the grammar
- // non ambiguous, we split tokens at the
- // lexer level
- TOKEN_TYPE_IDENT,
- TOKEN_CAPS_IDENT,
- TOKEN_VAR_IDENT,
-
- // We want to parse @foo / #foo / $foo separately.
- // Otherwise we allow things like "@ foo" which would be pretty bad.
- TOKEN_AT_IDENT,
- TOKEN_HASH_IDENT,
- TOKEN_DOLLAR_IDENT,
-
- TOKEN_STRING,
- TOKEN_INTEGER,
- TOKEN_REAL,
-
- // Keywords.
- TOKEN_ALIAS, // Reserved
- TOKEN_AS,
- TOKEN_ASM,
- TOKEN_BREAK,
- TOKEN_CASE,
- TOKEN_CAST,
- TOKEN_CATCH,
- TOKEN_CONST,
- TOKEN_CONTINUE,
- TOKEN_DEFAULT,
- TOKEN_DEFER,
- TOKEN_DO,
- TOKEN_ELSE,
- TOKEN_ENUM,
- TOKEN_ERROR,
- TOKEN_FALSE,
- TOKEN_FOR,
- TOKEN_FUNC,
- TOKEN_GENERIC,
- TOKEN_GOTO,
- TOKEN_IF,
- TOKEN_IMPORT,
- TOKEN_LOCAL,
- TOKEN_MACRO,
- TOKEN_MODULE,
- TOKEN_NIL,
- TOKEN_PUBLIC,
- TOKEN_RETURN,
- TOKEN_STRUCT,
- TOKEN_SWITCH,
- TOKEN_THROW,
- TOKEN_THROWS,
- TOKEN_TRUE,
- TOKEN_TRY,
- TOKEN_TYPE, // Reserved
- TOKEN_TYPEDEF,
- TOKEN_UNION,
- TOKEN_UNTIL,
- TOKEN_VAR, // Reserved
- TOKEN_VOID,
- TOKEN_VOLATILE,
- TOKEN_WHILE,
-
-
- TOKEN_C_USHORT,
- TOKEN_C_SHORT,
- TOKEN_C_INT,
- TOKEN_C_UINT,
- TOKEN_C_LONG,
- TOKEN_C_ULONG,
- TOKEN_C_LONGLONG,
- TOKEN_C_ULONGLONG,
- TOKEN_C_LONGDOUBLE,
-
- INVALID_TOKEN,
- TOKEN_EOF,
-
-} TokenType;
-
-const char *token_type_to_string(TokenType type);
TokenType identifier_type(const char* restrict start, int len);
TokenType ident_type_fnv1(const char *restrict start, int len);
+
+Token scan_ident_test(const char* scan);
+
+void lexer_test_setup(const char* text);
diff --git a/src/compiler/malloc.c b/src/compiler/malloc.c
new file mode 100644
index 000000000..4b03577e8
--- /dev/null
+++ b/src/compiler/malloc.c
@@ -0,0 +1,106 @@
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "malloc.h"
+
+#include
+#include
+#include "../utils/errors.h"
+
+#define KB 1024L
+// Use 1MB at a time.
+#define BUCKET_SIZE (1024 * KB)
+#define ARENA_BUCKET_START_SIZE 16
+
+static uint8_t **arena_buckets;
+static int arena_buckets_used;
+static size_t arena_buckets_array_size;
+static size_t current_use;
+static void *current_arena;
+static int allocations_done;
+void init_arena(void)
+{
+ printf("---- ARENA ALLOCATED ----\n");
+ arena_buckets = malloc(ARENA_BUCKET_START_SIZE * sizeof(void *));
+ arena_buckets_used = 1;
+ arena_buckets_array_size = ARENA_BUCKET_START_SIZE;
+ arena_buckets[0] = malloc(BUCKET_SIZE);
+ allocations_done = 0;
+ current_use = 0;
+ current_arena = arena_buckets[0];
+}
+
+// Simple bump allocator with buckets.
+void *malloc_arena(size_t mem)
+{
+ if (mem == 0) return NULL;
+ // Round to multiple of 16
+ size_t oldmem = mem;
+ mem = (mem + 15u) & ~15ull;
+ assert(mem >= oldmem);
+ if (mem >= BUCKET_SIZE / 4)
+ {
+ return malloc(mem);
+ }
+ if (current_use + mem > BUCKET_SIZE)
+ {
+ if (arena_buckets_used == arena_buckets_array_size)
+ {
+ arena_buckets_array_size *= 2;
+ arena_buckets = realloc(arena_buckets, arena_buckets_array_size * sizeof(void *));
+ ASSERT(arena_buckets, "Ran out of memory after allocating %ld KB", BUCKET_SIZE * arena_buckets_used / KB);
+ }
+ current_arena = malloc(BUCKET_SIZE);
+ ASSERT(current_arena, "Ran out of memory after allocating %ld KB", BUCKET_SIZE * arena_buckets_used / KB);
+ arena_buckets[arena_buckets_used++] = current_arena;
+ current_use = 0;
+ }
+ uint8_t *ptr = current_arena + current_use;
+ current_use += mem;
+ allocations_done++;
+ if (mem > 4096)
+ {
+ printf("Allocated large chunk %llu\n", (unsigned long long)mem);
+ }
+ return (void *)ptr;
+
+}
+
+
+void free_arena(void)
+{
+ printf("-- FREEING ARENA -- \n");
+ printf(" * Memory used: %ld Kb\n", ((arena_buckets_used - 1) * BUCKET_SIZE + current_use) / 1024);
+ printf(" * Buckets used: %d\n", arena_buckets_used);
+ printf(" * Allocations: %d\n", allocations_done);
+
+ for (int i = 0; i < arena_buckets_used; i++)
+ {
+ free(arena_buckets[i]);
+ }
+ current_arena = NULL;
+ arena_buckets_used = 0;
+ arena_buckets = NULL;
+ arena_buckets_array_size = 0;
+ current_use = 0;
+ printf("-- FREE DONE -- \n");
+}
+
+
+void run_arena_allocator_tests(void)
+{
+ init_arena();
+ free_arena();
+ init_arena();
+ ASSERT(malloc_arena(10) != malloc_arena(10), "Expected different values...");
+ ASSERT(current_use == 32, "Expected allocations rounded to next 8 bytes");
+ EXPECT("buckets in use", arena_buckets_used, 1);
+ ASSERT(malloc_arena(BUCKET_SIZE), "Should be possible to allocate this");
+ EXPECT("buckets in use", arena_buckets_used, 2);
+ ASSERT(malloc_arena(1), "Expected alloc to pass");
+ EXPECT("buckets in use", arena_buckets_used, 3);
+ free_arena();
+ ASSERT(arena_buckets_array_size == 0, "Arena not freed?");
+ printf("Passed all arena tests\n");
+}
diff --git a/src/compiler/malloc.h b/src/compiler/malloc.h
new file mode 100644
index 000000000..ddc971a31
--- /dev/null
+++ b/src/compiler/malloc.h
@@ -0,0 +1,16 @@
+#pragma once
+
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+
+
+void init_arena(void);
+void *malloc_arena(unsigned long mem);
+void free_arena(void);
+
+void run_arena_allocator_tests(void);
+
+#define MALLOC(mem) malloc_arena(mem)
+#define MALLOCS(type) malloc_arena(sizeof(type))
\ No newline at end of file
diff --git a/src/compiler/symtab.c b/src/compiler/symtab.c
new file mode 100644
index 000000000..bd9488ef1
--- /dev/null
+++ b/src/compiler/symtab.c
@@ -0,0 +1,213 @@
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "symtab.h"
+#include
+#include
+#include "../utils/errors.h"
+#include
+#include "../utils/lib.h"
+#include "malloc.h"
+#include "tokens.h"
+
+#define TABLE_MAX_LOAD 0.75
+#define MAX_HASH_SIZE (1024 * 1024)
+
+
+typedef struct _SymEntry
+{
+ const char *value;
+ TokenType type;
+ uint32_t key_len;
+ uint32_t hash;
+} SymEntry;
+
+typedef struct _SymTab
+{
+ uint32_t count;
+ uint32_t capacity;
+ SymEntry *entries;
+} SymTab;
+
+typedef struct _Entry
+{
+ const char *key;
+ uint32_t key_len;
+ uint32_t hash;
+ void *value;
+} Entry;
+
+
+static SymTab symtab;
+
+void symtab_init(uint32_t capacity)
+{
+ assert (is_power_of_two(capacity) && "Must be a power of two");
+ if (symtab.capacity != 0)
+ {
+ free(symtab.entries);
+ }
+ size_t size = capacity * sizeof(SymEntry);
+ symtab.entries = MALLOC(size);
+ memset(symtab.entries, 0, size);
+ symtab.count = 0;
+ symtab.capacity = capacity;
+
+ // Add keywords.
+ for (int i = 0; i < TOKEN_EOF; i++)
+ {
+ const char* name = token_type_to_string(i);
+ // Skip non-keywords
+ if (!is_lower(name[0]))
+ {
+ if (name[0] != '@' || !is_lower(name[1])) continue;
+ }
+ int len = strlen(name);
+ TokenType type = (TokenType)i;
+ const char* interned = symtab_add(name, strlen(name), fnv1a(name, len), &type);
+ assert(type == i);
+ assert(symtab_add(name, strlen(name), fnv1a(name, len), &type) == interned);
+
+ }
+}
+
+static inline SymEntry *entry_find(const char *key, uint32_t key_len, uint32_t hash)
+{
+ uint32_t index = hash & (symtab.capacity - 1);
+ while (1)
+ {
+ SymEntry *entry = &symtab.entries[index];
+ if (entry->key_len == key_len && (entry->value == key || memcmp(key, entry->value, key_len) == 0)) return entry;
+ if (entry->value == NULL)
+ {
+ return entry;
+ }
+ index = (index + 1) % (symtab.capacity - 1);
+ }
+}
+
+const char *symtab_add(const char *symbol, uint32_t len, uint32_t fnv1hash, TokenType *type)
+{
+ if (symtab.count + 1 > symtab.capacity * TABLE_MAX_LOAD)
+ {
+ FATAL_ERROR("Symtab exceeded capacity, please increase --symtab.");
+ }
+ SymEntry *entry = entry_find(symbol, len, fnv1hash);
+ if (entry->value)
+ {
+ *type = entry->type;
+ return entry->value;
+ }
+
+ char *copy = MALLOC(len + 1);
+ memcpy(copy, symbol, len);
+ copy[len] = '\0';
+ entry->value = copy;
+ entry->key_len = len;
+ entry->hash = fnv1hash;
+ entry->type = *type;
+ symtab.count++;
+ return entry->value;
+}
+
+void stable_init(STable *table, uint32_t initial_size)
+{
+ assert(initial_size && "Size must be larger than 0");
+ assert (is_power_of_two(initial_size) && "Must be a power of two");
+
+ SEntry *entries = MALLOC(initial_size * sizeof(Entry));
+ for (uint32_t i = 0; i < initial_size; i++)
+ {
+ entries[i].key = NULL;
+ entries[i].value = NULL;
+ }
+ table->count = 0;
+ table->capacity = initial_size;
+ table->entries = entries;
+}
+
+void stable_clear(STable *table)
+{
+ memset(table->entries, 0, table->capacity * sizeof(Entry));
+ table->count = 0;
+}
+
+#define TOMBSTONE ((void *)0x01)
+static SEntry *sentry_find(SEntry *entries, uint32_t capacity, const char *key)
+{
+ uint32_t index = (uint32_t)((((uintptr_t)key) >> 2u) & (capacity - 1));
+ SEntry *tombstone = NULL;
+ while (1)
+ {
+ SEntry *entry = &entries[index];
+ if (entry->key == key) return entry;
+ if (entry->key == NULL)
+ {
+ if (entry->value != TOMBSTONE)
+ {
+ return tombstone ? tombstone : entry;
+ }
+ else
+ {
+ if (!tombstone) tombstone = entry;
+ }
+ }
+ index = (index + 1) & (capacity - 1);
+ }
+}
+
+
+void *stable_set(STable *table, const char *key, void *value)
+{
+ assert(value && "Cannot insert NULL");
+ if (table->count + 1 > table->capacity * TABLE_MAX_LOAD)
+ {
+ ASSERT(table->capacity < MAX_HASH_SIZE, "Table size too large, exceeded %d", MAX_HASH_SIZE);
+
+ uint32_t new_capacity = table->capacity ? (table->capacity << 1u) : 16u;
+ SEntry *new_data = MALLOC(new_capacity * sizeof(SEntry));
+ for (uint32_t i = 0; i < new_capacity; i++)
+ {
+ new_data[i].key = NULL;
+ new_data[i].value = NULL;
+ }
+ table->count = 0;
+ for (uint32_t i = 0; i < table->capacity; i++)
+ {
+ SEntry *entry = &table->entries[i];
+ if (!entry->key) continue;
+ table->count++;
+ SEntry *dest = sentry_find(new_data, new_capacity, entry->key);
+ *dest = *entry;
+ }
+ table->entries = new_data;
+ table->capacity = new_capacity;
+ }
+
+ SEntry *entry = sentry_find(table->entries, table->capacity, key);
+ void *old = entry->value && entry->value != TOMBSTONE ? entry->value : NULL;
+ entry->key = key;
+ entry->value = value;
+ if (!old) table->count++;
+ return old;
+}
+
+
+void *stable_get(STable *table, const char *key)
+{
+ if (!table->entries) return NULL;
+ SEntry *entry = sentry_find(table->entries, table->capacity, key);
+ return entry->key == NULL ? NULL : entry->value;
+}
+
+void *stable_delete(STable *table, const char *key)
+{
+ if (!table->count) return NULL;
+ SEntry *entry = sentry_find(table->entries, table->capacity, key);
+ if (!entry->key) return NULL;
+ void *value = entry->value;
+ entry->key = NULL;
+ entry->value = TOMBSTONE;
+ return value;
+}
diff --git a/src/compiler/symtab.h b/src/compiler/symtab.h
new file mode 100644
index 000000000..0824fd51c
--- /dev/null
+++ b/src/compiler/symtab.h
@@ -0,0 +1,30 @@
+#pragma once
+
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include
+#include "tokens.h"
+
+void symtab_init(uint32_t max_size);
+const char *symtab_add(const char *symbol, uint32_t len, uint32_t fnv1hash, TokenType *type);
+
+typedef struct _VoidEntry
+{
+ const char *key;
+ void *value;
+} SEntry;
+
+typedef struct _STable
+{
+ uint32_t count;
+ uint32_t capacity;
+ SEntry *entries;
+} STable;
+
+void stable_init(STable *table, uint32_t initial_size);
+void *stable_set(STable *table, const char *key, void *value);
+void *stable_get(STable *table, const char *key);
+void *stable_delete(STable *table, const char *key);
+void stable_clear(STable *table);
diff --git a/src/compiler/tokens.c b/src/compiler/tokens.c
new file mode 100644
index 000000000..30fe37cd3
--- /dev/null
+++ b/src/compiler/tokens.c
@@ -0,0 +1,336 @@
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tokens.h"
+#include "../utils/errors.h"
+
+const char *token_type_to_string(TokenType type)
+{
+ switch (type)
+ {
+ case TOKEN_HASH:
+ return "#";
+ case TOKEN_DOLLAR:
+ return "$";
+ case TOKEN_LPAREN:
+ return "(";
+ case TOKEN_RPAREN:
+ return ")";
+ case TOKEN_LBRACE:
+ return "{";
+ case TOKEN_RBRACE:
+ return "}";
+ case TOKEN_LBRACKET:
+ return "[";
+ case TOKEN_RBRACKET:
+ return "]";
+ case TOKEN_COMMA:
+ return ",";
+ case TOKEN_DOT:
+ return ".";
+ case TOKEN_EOS:
+ return ";";
+ case TOKEN_PLUS:
+ return "+";
+ case TOKEN_PLUSPLUS:
+ return "++";
+ case TOKEN_PLUS_ASSIGN:
+ return "+=";
+ case TOKEN_BIT_NOT:
+ return "~";
+ case TOKEN_NOT:
+ return "!";
+ case TOKEN_MINUS:
+ return "-";
+ case TOKEN_MINUSMINUS:
+ return "--";
+ case TOKEN_MINUS_ASSIGN:
+ return "-=";
+ case TOKEN_STAR:
+ return "*";
+ case TOKEN_MULT_ASSIGN:
+ return "*=";
+ case TOKEN_MOD:
+ return "%";
+ case TOKEN_MOD_ASSIGN:
+ return "%=";
+ case TOKEN_DIV:
+ return "/";
+ case TOKEN_DIV_ASSIGN:
+ return "/=";
+ case TOKEN_NOT_EQUAL:
+ return "!=";
+ case TOKEN_EQ:
+ return "=";
+ case TOKEN_EQEQ:
+ return "==";
+ case TOKEN_COLON:
+ return ":";
+ case TOKEN_COLCOLON:
+ return "::";
+ case TOKEN_DOTDOT:
+ return "..";
+ case TOKEN_ELIPSIS:
+ return "...";
+ case TOKEN_GREATER:
+ return ">";
+ case TOKEN_GREATER_EQ:
+ return ">=";
+ case TOKEN_SHR:
+ return ">>";
+ case TOKEN_SHR_ASSIGN:
+ return ">>=";
+ case TOKEN_LESS:
+ return "<";
+ case TOKEN_LESS_EQ:
+ return "<=";
+ case TOKEN_SHL:
+ return "<<";
+ case TOKEN_SHL_ASSIGN:
+ return "<<=";
+ case TOKEN_ARROW:
+ return "->";
+ case TOKEN_AND:
+ return "&&";
+ case TOKEN_AND_ASSIGN:
+ return "&&=";
+ case TOKEN_AMP:
+ return "&";
+ case TOKEN_BIT_AND_ASSIGN:
+ return "&=";
+ case TOKEN_OR:
+ return "||";
+ case TOKEN_OR_ASSIGN:
+ return "||=";
+ case TOKEN_BIT_OR:
+ return "|";
+ case TOKEN_BIT_OR_ASSIGN:
+ return "|=";
+ case TOKEN_BIT_XOR:
+ return "^";
+ case TOKEN_BIT_XOR_ASSIGN:
+ return "^=";
+ case TOKEN_VAR_IDENT:
+ return "";
+ case TOKEN_TYPE_IDENT:
+ return "";
+ case TOKEN_STRING:
+ return "";
+ case TOKEN_INTEGER:
+ return "";
+ case TOKEN_REAL:
+ return "";
+ case TOKEN_QUESTION:
+ return "?";
+ case TOKEN_ELVIS:
+ return "?:";
+ case TOKEN_VOID:
+ return "void";
+ case TOKEN_ALIAS:
+ return "alias";
+ case TOKEN_CONST:
+ return "const";
+ case TOKEN_VOLATILE:
+ return "volatile";
+ case TOKEN_ELSE:
+ return "else";
+ case TOKEN_FALSE:
+ return "false";
+ case TOKEN_CONTINUE:
+ return "continue";
+ case TOKEN_FUNC:
+ return "func";
+ case TOKEN_FOR:
+ return "for";
+ case TOKEN_IMPORT:
+ return "import";
+ case TOKEN_MODULE:
+ return "module";
+ case TOKEN_IF:
+ return "if";
+ case TOKEN_NIL:
+ return "nil";
+ case TOKEN_RETURN:
+ return "return";
+ case TOKEN_GOTO:
+ return "goto";
+ case TOKEN_DEFER:
+ return "defer";
+ case TOKEN_TRUE:
+ return "true";
+ case TOKEN_WHILE:
+ return "while";
+ case TOKEN_CASE:
+ return "case";
+ case TOKEN_ASM:
+ return "asm";
+ case TOKEN_DEFAULT:
+ return "default";
+ case TOKEN_SWITCH:
+ return "switch";
+ case TOKEN_UNTIL:
+ return "until";
+ case TOKEN_BREAK:
+ return "break";
+ case TOKEN_TYPE:
+ return "type";
+ case TOKEN_DO:
+ return "do";
+ case TOKEN_PUBLIC:
+ return "public";
+ case TOKEN_LOCAL:
+ return "local";
+ case TOKEN_STRUCT:
+ return "struct";
+ case TOKEN_UNION:
+ return "union";
+ case TOKEN_ENUM:
+ return "enum";
+ case TOKEN_AS:
+ return "as";
+ case TOKEN_AT:
+ return "@";
+ case TOKEN_ERROR:
+ return "";
+ case TOKEN_EOF:
+ return "";
+ case TOKEN_CAST:
+ return "cast";
+ case TOKEN_C_LONGDOUBLE:
+ return "c_longdouble";
+ case TOKEN_C_USHORT:
+ return "c_ushort";
+ case TOKEN_C_UINT:
+ return "c_uint";
+ case TOKEN_C_ULONG:
+ return "c_ulong";
+ case TOKEN_C_ULONGLONG:
+ return "c_ulonglong";
+ case TOKEN_C_SHORT:
+ return "c_ishort";
+ case TOKEN_C_INT:
+ return "c_int";
+ case TOKEN_C_LONG:
+ return "c_long";
+ case TOKEN_C_LONGLONG:
+ return "c_longlong";
+ case TOKEN_MACRO:
+ return "macro";
+ case TOKEN_F256:
+ return "f256";
+ case TOKEN_I256:
+ return "i256";
+ case TOKEN_U256:
+ return "u256";
+ case TOKEN_F128:
+ return "f128";
+ case TOKEN_I128:
+ return "i128";
+ case TOKEN_U128:
+ return "u128";
+ case TOKEN_F64:
+ return "f64";
+ case TOKEN_I64:
+ return "i64";
+ case TOKEN_U64:
+ return "u64";
+ case TOKEN_F32:
+ return "f32";
+ case TOKEN_I32:
+ return "i32";
+ case TOKEN_U32:
+ return "u32";
+ case TOKEN_F16:
+ return "f16";
+ case TOKEN_I16:
+ return "i16";
+ case TOKEN_U16:
+ return "u16";
+ case TOKEN_I8:
+ return "i8";
+ case TOKEN_U8:
+ return "u8";
+ case TOKEN_BOOL:
+ return "bool";
+ case TOKEN_QUAD:
+ return "quad";
+ case TOKEN_DOUBLE:
+ return "double";
+ case TOKEN_FLOAT:
+ return "float";
+ case TOKEN_LONG:
+ return "long";
+ case TOKEN_ULONG:
+ return "ulong";
+ case TOKEN_INT:
+ return "int";
+ case TOKEN_UINT:
+ return "uint";
+ case TOKEN_SHORT:
+ return "short";
+ case TOKEN_USHORT:
+ return "ushort";
+ case TOKEN_BYTE:
+ return "byte";
+ case TOKEN_CHAR:
+ return "char";
+ case TOKEN_ISIZE:
+ return "isize";
+ case TOKEN_USIZE:
+ return "usize";
+ case TOKEN_GENERIC:
+ return "generic";
+ case TOKEN_THROW:
+ return "throw";
+ case TOKEN_THROWS:
+ return "throws";
+ case TOKEN_TRY:
+ return "try";
+ case TOKEN_TYPEDEF:
+ return "typedef";
+ case TOKEN_VAR:
+ return "var";
+ case TOKEN_HALF:
+ return "half";
+ case TOKEN_CAPS_IDENT:
+ return "";
+ case TOKEN_AT_IDENT:
+ return "<@ident>";
+ case TOKEN_HASH_IDENT:
+ return "<#ident>";
+ case TOKEN_DOLLAR_IDENT:
+ return "<$ident>";
+ case TOKEN_CATCH:
+ return "catch";
+ case INVALID_TOKEN:
+ return "<\?\?\?>";
+ case TOKEN_DOCS_EOL:
+ return "";
+ case TOKEN_DOCS_START:
+ return "/**";
+ case TOKEN_DOCS_END:
+ return "*/";
+ case TOKEN_DOCS_LINE:
+ return "";
+ case TOKEN_AT_REQUIRE:
+ return "@require";
+ case TOKEN_AT_ENSURE:
+ return "@ensure";
+ case TOKEN_AT_PARAM:
+ return "@param";
+ case TOKEN_AT_CONST:
+ return "@const";
+ case TOKEN_AT_PURE:
+ return "@pure";
+ case TOKEN_AT_RETURN:
+ return "@return";
+ case TOKEN_AT_THROWS:
+ return "@throws";
+ case TOKEN_AT_REQPARSE:
+ return "@reqparse";
+ case TOKEN_AT_DEPRECATED:
+ return "@deprecated";
+ }
+ UNREACHABLE
+}
\ No newline at end of file
diff --git a/src/compiler/tokens.h b/src/compiler/tokens.h
new file mode 100644
index 000000000..1f21fc337
--- /dev/null
+++ b/src/compiler/tokens.h
@@ -0,0 +1,200 @@
+#pragma once
+
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+
+typedef enum _TokenType
+{
+ INVALID_TOKEN = 0,
+
+ // Single-character tokens.
+ TOKEN_AT, // @
+ TOKEN_COMMA, // ,
+ TOKEN_EOS, // ;
+ TOKEN_DOLLAR, // $
+ TOKEN_DOT, // .
+ TOKEN_HASH, // #
+ TOKEN_LBRACE, // {
+ TOKEN_LBRACKET, // [
+ TOKEN_LPAREN, // (
+ TOKEN_RBRACE, // }
+ TOKEN_RBRACKET, // ]
+ TOKEN_RPAREN, // )
+
+ // One or two character tokens.
+ TOKEN_BIT_NOT, // ~
+ TOKEN_COLON, // :
+ TOKEN_COLCOLON, // :: Not used but reserved
+ TOKEN_DIV, // /
+ TOKEN_DIV_ASSIGN, // /=
+ TOKEN_DOCS_START, // /** (will consume an arbitrary number of `*` after this.
+ TOKEN_DOCS_END, // */ (may start with an arbitrary number of `*`
+ TOKEN_DOCS_EOL, // "\n" only seen in docs.
+ TOKEN_DOTDOT, // ..
+ TOKEN_EQ, // =
+ TOKEN_EQEQ, // ==
+ TOKEN_NOT, // !
+ TOKEN_NOT_EQUAL, // !=
+ TOKEN_MINUS, // -
+ TOKEN_MINUSMINUS, // --
+ TOKEN_MINUS_ASSIGN, // -=
+ TOKEN_MOD, // %
+ TOKEN_MOD_ASSIGN, // %=
+ TOKEN_MULT_ASSIGN, // *=
+ TOKEN_PLUS, // +
+ TOKEN_PLUSPLUS, // ++
+ TOKEN_PLUS_ASSIGN, // +=
+ TOKEN_QUESTION, // ?
+ TOKEN_STAR, // *
+
+ // Three or more
+ TOKEN_BIT_AND_ASSIGN, // &=
+ TOKEN_BIT_OR, // =
+ TOKEN_BIT_OR_ASSIGN, // |=
+ TOKEN_BIT_XOR, // ^
+ TOKEN_BIT_XOR_ASSIGN, // ^=
+ TOKEN_AMP, // &
+ TOKEN_AND, // &&
+ TOKEN_AND_ASSIGN, // &&=
+ TOKEN_ARROW, // -> // Not used but reserved
+ TOKEN_ELIPSIS, // ...
+ TOKEN_ELVIS, // ?:
+ TOKEN_GREATER, // >
+ TOKEN_GREATER_EQ, // >=
+ TOKEN_OR, // |
+ TOKEN_OR_ASSIGN, // ||=
+ TOKEN_SHR, // >>
+ TOKEN_SHR_ASSIGN, // >>=
+ TOKEN_LESS, // <
+ TOKEN_LESS_EQ, // <=
+ TOKEN_SHL, // >>
+ TOKEN_SHL_ASSIGN, // >>=
+
+ // Basic types bit
+ TOKEN_F256, // f256
+ TOKEN_I256, // i256
+ TOKEN_U256, // u256
+ TOKEN_F128, // f128
+ TOKEN_I128, // i128
+ TOKEN_U128, // u128
+ TOKEN_F64, // f64
+ TOKEN_I64, // i64
+ TOKEN_U64, // u64
+ TOKEN_F32, // f32
+ TOKEN_I32, // i32
+ TOKEN_U32, // u32
+ TOKEN_F16, // f16
+ TOKEN_I16, // i16
+ TOKEN_U16, // u16
+ TOKEN_I8, // i8
+ TOKEN_U8, // u8
+
+ // Basic types names
+ TOKEN_BYTE,
+ TOKEN_BOOL,
+ TOKEN_CHAR,
+ TOKEN_DOUBLE,
+ TOKEN_FLOAT,
+ TOKEN_HALF,
+ TOKEN_INT,
+ TOKEN_ISIZE,
+ TOKEN_LONG,
+ TOKEN_SHORT,
+ TOKEN_UINT,
+ TOKEN_ULONG,
+ TOKEN_USHORT,
+ TOKEN_USIZE,
+ TOKEN_QUAD,
+
+ // C compatibility types
+ TOKEN_C_USHORT,
+ TOKEN_C_SHORT,
+ TOKEN_C_INT,
+ TOKEN_C_UINT,
+ TOKEN_C_LONG,
+ TOKEN_C_ULONG,
+ TOKEN_C_LONGLONG,
+ TOKEN_C_ULONGLONG,
+ TOKEN_C_LONGDOUBLE,
+
+ // Literals.
+
+ // In order to make the grammar
+ // non ambiguous, we split tokens at the
+ // lexer level
+ TOKEN_TYPE_IDENT, // FooBarBaz
+ TOKEN_CAPS_IDENT, // FOO_BAR_BAZ
+ TOKEN_VAR_IDENT, // fooBarBaz
+
+ // We want to parse @foo / #foo / $foo separately.
+ // Otherwise we allow things like "@ foo" which would be pretty bad.
+ TOKEN_AT_IDENT, // @foobar
+ TOKEN_HASH_IDENT, // #foobar
+ TOKEN_DOLLAR_IDENT, // $foobar
+
+ TOKEN_STRING, // "Teststring"
+ TOKEN_INTEGER, // 123 0x23 0b10010 0o327
+ TOKEN_REAL, // 0x23.2p-2a 43.23e23
+ TOKEN_DOCS_LINE, // Any line within /** **/
+
+ // Keywords
+ TOKEN_ALIAS, // Reserved
+ TOKEN_AS,
+ TOKEN_ASM,
+ TOKEN_BREAK,
+ TOKEN_CASE,
+ TOKEN_CAST,
+ TOKEN_CATCH,
+ TOKEN_CONST,
+ TOKEN_CONTINUE,
+ TOKEN_DEFAULT,
+ TOKEN_DEFER,
+ TOKEN_DO,
+ TOKEN_ELSE,
+ TOKEN_ENUM,
+ TOKEN_ERROR,
+ TOKEN_FALSE,
+ TOKEN_FOR,
+ TOKEN_FUNC,
+ TOKEN_GENERIC,
+ TOKEN_GOTO,
+ TOKEN_IF,
+ TOKEN_IMPORT,
+ TOKEN_LOCAL,
+ TOKEN_MACRO,
+ TOKEN_MODULE,
+ TOKEN_NIL,
+ TOKEN_PUBLIC,
+ TOKEN_RETURN,
+ TOKEN_STRUCT,
+ TOKEN_SWITCH,
+ TOKEN_THROW,
+ TOKEN_THROWS,
+ TOKEN_TRUE,
+ TOKEN_TRY,
+ TOKEN_TYPE, // Reserved
+ TOKEN_TYPEDEF,
+ TOKEN_UNION,
+ TOKEN_UNTIL,
+ TOKEN_VAR, // Reserved
+ TOKEN_VOID,
+ TOKEN_VOLATILE,
+ TOKEN_WHILE,
+
+ TOKEN_AT_PARAM, // @param
+ TOKEN_AT_THROWS, // @throws
+ TOKEN_AT_RETURN, // @return
+ TOKEN_AT_ENSURE, // @ensure
+ TOKEN_AT_REQUIRE, // @require
+ TOKEN_AT_PURE, // @pure
+ TOKEN_AT_CONST, // @const
+ TOKEN_AT_REQPARSE, // @reqparse
+ TOKEN_AT_DEPRECATED, // @deprecated
+
+ TOKEN_EOF, // \n - SHOULD ALWAYS BE THE LAST TOKEN.
+
+} TokenType;
+
+const char *token_type_to_string(TokenType type);
diff --git a/src/compiler_tests/shorttest.c b/src/compiler_tests/shorttest.c
new file mode 100644
index 000000000..68c5152fc
--- /dev/null
+++ b/src/compiler_tests/shorttest.c
@@ -0,0 +1,97 @@
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+static const char* test_parse = "struct Node\n"
+"{\n"
+" uint hole;\n"
+" uint size;\n"
+" Node* next;\n"
+" Node* prev;\n"
+"}\n"
+"\n"
+"struct Footer\n"
+"{ \n"
+" Node &header;\n"
+"}\n"
+"\n"
+"struct Bin \n"
+"{\n"
+" Node& head;\n"
+"}\n"
+"\n"
+"struct Heap \n"
+"{\n"
+" size start;\n"
+" size end;\n"
+" Bin* bins[BIN_COUNT];\n"
+"}\n"
+"\n"
+"const uint OFFSET = 8;\n"
+"\n"
+"/**\n"
+" * @require start > 0\n"
+" */\n"
+"void Heap.init(Heap& heap, usize start) \n"
+"{\n"
+" Node& init_region = @cast(Node&, start);\n"
+" init_region.hole = 1;\n"
+" init_region.size = HEAP_INIT_SIZE - @sizeof(Node) - @sizeof(Footer);\n"
+"\n"
+" init_region.createFoot();\n"
+"\n"
+" heap.bins[get_bin_index(init_region.size)].add(init_region);\n"
+"\n"
+" heap.start = @cast(void*, start);\n"
+" heap.end = @cast(void*, start + HEAP_INIT_SIZE);\n"
+"}\n"
+"\n"
+"void* Heap.alloc(Heap& heap, usize size) \n"
+"{\n"
+" uint index = get_bin_index(size);\n"
+" Bin& temp = @cast(Bin&, heap.bins[index]);\n"
+" Node* found = temp.getBestFit(size);\n"
+"\n"
+" while (!found) \n"
+" {\n"
+" temp = heap.bins[++index];\n"
+" found = temp.getBestFit(size);\n"
+" }\n"
+"\n"
+" if ((found.size - size) > (overhead + MIN_ALLOC_SZ)) \n"
+" {\n"
+" Node& split = @cast(Node*, @cast(char&, found) + sizeof(Node) + sizeof(Footer)) + size);\n"
+" split.size = found.size - size - sizeof(Node) - sizeof(Footer);\n"
+" split.hole = 1;\n"
+" \n"
+" split.createFoot();\n"
+"\n"
+" uint new_idx = get_bin_index(split.size);\n"
+"\n"
+" heap.bins[new_idx].addNode(split); \n"
+"\n"
+" found.size = size; \n"
+" found.createFoot(found); \n"
+" }\n"
+"\n"
+" found.hole = 0; \n"
+" heap.bins[index].removeNode(found);\n"
+" \n"
+" Node& wild = heap.getWilderness(heap);\n"
+" if (wild.size < MIN_WILDERNESS) \n"
+" {\n"
+" uint success = heap.expand(0x1000);\n"
+" if (success == 0) \n"
+" {\n"
+" return nil;\n"
+" }\n"
+" }\n"
+" else if (wild.size > MAX_WILDERNESS) \n"
+" {\n"
+" heap.contract(0x1000);\n"
+" }\n"
+"\n"
+" found.prev = nil;\n"
+" found.next = nil;\n"
+" return &found.next; \n"
+"}";
\ No newline at end of file
diff --git a/src/compiler_tests/tests.c b/src/compiler_tests/tests.c
index 91f767c22..493a8d003 100644
--- a/src/compiler_tests/tests.c
+++ b/src/compiler_tests/tests.c
@@ -7,76 +7,103 @@
#include
#include
#include
+#include
+#include
+#include
#include "benchmark.h"
+#include "../compiler/symtab.h"
-#define TEST_ASSERT(cond, text, ...) do { if (!(cond)) { printf("\nTEST FAILED: " text "\n", ##__VA_ARGS__); exit(-1); } } while (0)
static void test_lexer(void)
{
#ifdef __OPTIMIZE__
printf("--- RUNNING OPTIMIZED ---\n");
#endif
printf("Begin lexer testing.\n");
- printf("1. Check number of keywords...");
+ printf("-- Check number of keywords...\n");
int tokens_found = 0;
- const int EXPECTED_TOKENS = 81;
- const char* tokens[INVALID_TOKEN];
- int len[INVALID_TOKEN];
- for (int i = 0; i < INVALID_TOKEN; i++)
+ const int EXPECTED_TOKENS = 91;
+ const char* tokens[TOKEN_EOF];
+ int len[TOKEN_EOF];
+ compiler_init();
+ for (int i = 1; i < TOKEN_EOF; i++)
{
const char* token = token_type_to_string((TokenType)i);
tokens[i] = token;
len[i] = strlen(token);
- TokenType type = identifier_type(token, len[i]);
- TokenType type2 = ident_type_fnv1(token, len[i]);
-
- if (type != TOKEN_VAR_IDENT)
+ TokenType lookup = TOKEN_VAR_IDENT;
+ const char* interned = symtab_add(token, len[i], fnv1a(token, len[i]), &lookup);
+ if (lookup != TOKEN_VAR_IDENT)
{
+ Token scanned = scan_ident_test(token);
+ TEST_ASSERT(scanned.type == i, "Mismatch scanning: was '%s', expected '%s' - lookup: %s - interned: %s.",
+ token_type_to_string(scanned.type),
+ token_type_to_string(i),
+ token_type_to_string(lookup),
+ interned);
tokens_found++;
- TEST_ASSERT(type == i, "Mismatch on token %s", token);
- if (type2 != type)
- {
- printf("\n(fnv1) Test mismatch on token %s, generated %s\n", token, token_type_to_string(type2));
- }
}
- tokens[i] = "byte";
- len[i] = 4;
+ else
+ {
+ tokens[i] = "casi";
+ len[i] = 4;
+ }
}
- printf(" %d found.\n", tokens_found);
- TEST_ASSERT(ident_type_fnv1("alias ", 6) == TOKEN_VAR_IDENT, "Error in fnv1 ident");
- TEST_ASSERT(identifier_type("alias ", 6) == TOKEN_VAR_IDENT, "Error in switch ident");
- TEST_ASSERT(ident_type_fnv1("alias ", 5) != TOKEN_VAR_IDENT, "Error in fnv1 ident2");
- TEST_ASSERT(identifier_type("alias ", 5) != TOKEN_VAR_IDENT, "Error in switch ident2");
- TEST_ASSERT(tokens_found == EXPECTED_TOKENS, "Unexpected number of identifiers! Expected %d.", EXPECTED_TOKENS);
+ printf("-> %d keywords found.\n", tokens_found);
+ EXPECT("Keywords", tokens_found, EXPECTED_TOKENS);
- const int BENCH_REPEATS = 10000000;
+ const int BENCH_REPEATS = 100000;
- printf("2. Test keyword lexing speed (switch)... ");
+ printf("-- Test keyword lexing speed...\n");
bench_begin();
for (int b = 0; b < BENCH_REPEATS; b++)
{
- for (int i = 0; i < INVALID_TOKEN; i++)
+ for (int i = 1; i < TOKEN_EOF; i++)
{
- identifier_type(tokens[i], len[i]);
+ volatile TokenType t = scan_ident_test(tokens[i]).type;
}
}
- printf("complete in %fs\n", bench_mark());
- printf("3. Test keyword lexing speed (fnv1)... ");
+ printf("-> Test complete in %fs, %.0f kkeywords/s\n", bench_mark(), (BENCH_REPEATS * (TOKEN_EOF - 1)) / (1000 * bench_mark()));
+
+#include "shorttest.c"
+
+ printf("-- Test token lexing speed...\n");
+ const char *pointer = test_parse;
+ int loc = 0;
+ while (*pointer != '\0')
+ {
+ if (*(pointer++) == '\n') loc++;
+ }
+
bench_begin();
+ int tokens_parsed = 0;
for (int b = 0; b < BENCH_REPEATS; b++)
{
- for (int i = 0; i < INVALID_TOKEN; i++)
+ lexer_test_setup(test_parse);
+ Token token;
+ while (1)
{
- ident_type_fnv1(tokens[i], len[i]);
+ token = scan_token();
+ if (token.type == TOKEN_EOF) break;
+ TEST_ASSERT(token.type != INVALID_TOKEN, "Got invalid token");
+ tokens_parsed++;
}
}
- printf("complete in %fs\n", bench_mark());
+ printf("-> Test complete in %fs, %.0f kloc/s, %.0f ktokens/s\n", bench_mark(),
+ loc * BENCH_REPEATS / (1000 * bench_mark()), tokens_parsed / (1000 * bench_mark()));
- exit(0);
+}
+
+void test_compiler(void)
+{
+ compiler_init();
}
void compiler_tests(void)
{
test_lexer();
+ test_compiler();
+
+ exit(0);
}
\ No newline at end of file
diff --git a/src/main.c b/src/main.c
index 624881b59..ead3c153f 100644
--- a/src/main.c
+++ b/src/main.c
@@ -3,9 +3,10 @@
#include "build/project_creation.h"
#include "utils/errors.h"
#include "compiler_tests/tests.h"
-
+#include "compiler/malloc.h"
int main(int argc, const char *argv[])
{
+ init_arena();
parse_arguments(argc, argv);
switch (build_options.command)
{
@@ -26,7 +27,7 @@ int main(int argc, const char *argv[])
case COMMAND_BENCH:
printf("TODO\n");
}
-
+ free_arena();
return 0;
}
diff --git a/src/utils/errors.h b/src/utils/errors.h
index 4b8f9625b..6b40240b9 100644
--- a/src/utils/errors.h
+++ b/src/utils/errors.h
@@ -11,6 +11,8 @@
#define FATAL_ERROR(_string, ...) do { printf("FATAL ERROR at %s:%d: " _string, __func__, __LINE__, ##__VA_ARGS__); printf("\n"); exit(-1); } while(0)
+#define ASSERT(_condition, _string, ...) while (!(_condition)) { FATAL_ERROR(_string, ##__VA_ARGS__); }
+
#define UNREACHABLE FATAL_ERROR("Cannot reach %s:%d", __func__, __LINE__);
#define TODO FATAL_ERROR("Not done yet %s:%d", __func__, __LINE__);
@@ -18,4 +20,4 @@
#define EXPECT(_string, _value, _expected) \
do { long long __tempval1 = _value; long long __tempval2 = _expected; \
- TEST_ASSERT(__tempval1 == __tempval2, "Checking " _string ": expected %lld but was %lld.", __tempval2, __tempval1); } while(0);
+ TEST_ASSERT(__tempval1 == __tempval2, "Checking " _string ": expected %lld but was %lld.", __tempval2, __tempval1); } while(0)
diff --git a/src/utils/lib.h b/src/utils/lib.h
new file mode 100644
index 000000000..fb81ff37c
--- /dev/null
+++ b/src/utils/lib.h
@@ -0,0 +1,183 @@
+#pragma once
+
+// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include
+#include
+
+static inline bool is_power_of_two(uint64_t x)
+{
+ return x != 0 && (x & (x - 1)) == 0;
+}
+
+static inline uint32_t nextHighestPowerOf2(uint32_t v)
+{
+ v--;
+ v |= v >> 1u;
+ v |= v >> 2u;
+ v |= v >> 4u;
+ v |= v >> 8u;
+ v |= v >> 16u;
+ v++;
+ return v;
+}
+
+
+
+static inline bool is_lower(char c)
+{
+ return c >= 'a' && c <= 'z';
+}
+
+static inline bool is_upper(char c)
+{
+ return c >= 'A' && c <= 'Z';
+}
+
+static inline bool is_oct(char c)
+{
+ return c >= '0' && c <= '7';
+}
+
+static inline bool is_oct_or_(char c)
+{
+ switch (c)
+ {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '_':
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool is_binary(c)
+{
+ return c == '0' || c == '1';
+}
+
+static inline bool is_binary_or_(c)
+{
+ switch (c)
+ {
+ case '0': case '1': case '_':
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool is_digit_or_(char c)
+{
+ switch (c)
+ {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case '_':
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool is_digit(char c)
+{
+ return c >= '0' && c <= '9';
+}
+
+static inline bool is_hex_or_(char c)
+{
+ switch (c)
+ {
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F':
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case '_':
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool is_hex(char c)
+{
+ switch (c)
+ {
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F':
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool is_alphanum_(char c)
+{
+ switch (c)
+ {
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f': case 'g': case 'h': case 'i': case 'j':
+ case 'k': case 'l': case 'm': case 'n': case 'o':
+ case 'p': case 'q': case 'r': case 's': case 't':
+ case 'u': case 'v': case 'w': case 'x': case 'y':
+ case 'z':
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F': case 'G': case 'H': case 'I': case 'J':
+ case 'K': case 'L': case 'M': case 'N': case 'O':
+ case 'P': case 'Q': case 'R': case 'S': case 'T':
+ case 'U': case 'V': case 'W': case 'X': case 'Y':
+ case 'Z':
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case '_':
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool is_letter(char c)
+{
+ switch (c)
+ {
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f': case 'g': case 'h': case 'i': case 'j':
+ case 'k': case 'l': case 'm': case 'n': case 'o':
+ case 'p': case 'q': case 'r': case 's': case 't':
+ case 'u': case 'v': case 'w': case 'x': case 'y':
+ case 'z':
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F': case 'G': case 'H': case 'I': case 'J':
+ case 'K': case 'L': case 'M': case 'N': case 'O':
+ case 'P': case 'Q': case 'R': case 'S': case 'T':
+ case 'U': case 'V': case 'W': case 'X': case 'Y':
+ case 'Z':
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+#define FNV1_PRIME 0x01000193u
+#define FNV1_SEED 0x811C9DC5u
+#define FNV1a(c, seed) ((uint32_t)((((unsigned)(c)) ^ (seed)) * FNV1_PRIME))
+
+static inline uint32_t fnv1a(const char *key, uint32_t len)
+{
+ uint32_t hash = FNV1_SEED;
+ for (int i = 0; i < len; i++)
+ {
+ hash = FNV1a(key[i], hash);
+ }
+ return hash;
+}
\ No newline at end of file
diff --git a/src/utils/string_utils.h b/src/utils/string_utils.h
deleted file mode 100644
index d9101e93d..000000000
--- a/src/utils/string_utils.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#pragma once
-
-// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-
-#include
-
-static inline bool is_lower(char c)
-{
- return c >= 'a' && c <= 'z';
-}
-
-static inline bool is_upper(char c)
-{
- return c >= 'A' && c <= 'Z';
-}
-
-static inline bool is_alphanum_(char c)
-{
- return (c >= 'a' && c <= 'z')
- || (c >= 'A' && c <= 'Z')
- || (c >= '0' && c <= '9')
- || c == '_';
-}
\ No newline at end of file