mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 03:51:18 +00:00
Add a little testing and keyword parsing x2
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
project(c3c C)
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
|
||||
|
||||
set(CMAKE_C_STANDARD 11)
|
||||
|
||||
@@ -14,4 +15,4 @@ add_executable(c3c
|
||||
src/utils/errors.c
|
||||
src/utils/file_utils.c
|
||||
src/utils/string_utils.c
|
||||
)
|
||||
src/compiler/lexer.c src/compiler/lexer.h src/compiler_tests/tests.c src/compiler_tests/tests.h src/compiler_tests/benchmark.c src/compiler_tests/benchmark.h)
|
||||
@@ -128,6 +128,11 @@ static void parse_command(void)
|
||||
build_options.project_name = next_arg();
|
||||
return;
|
||||
}
|
||||
if (arg_match("utest"))
|
||||
{
|
||||
build_options.command = COMMAND_UNIT_TEST;
|
||||
return;
|
||||
}
|
||||
if (arg_match("compile"))
|
||||
{
|
||||
build_options.command = COMMAND_COMPILE;
|
||||
|
||||
@@ -20,6 +20,7 @@ typedef enum
|
||||
COMMAND_DIST,
|
||||
COMMAND_DOCS,
|
||||
COMMAND_BENCH,
|
||||
COMMAND_UNIT_TEST,
|
||||
} CompilerCommand;
|
||||
|
||||
typedef struct
|
||||
|
||||
806
src/compiler/lexer.c
Normal file
806
src/compiler/lexer.c
Normal file
@@ -0,0 +1,806 @@
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include <stdint.h>
|
||||
#include "lexer.h"
|
||||
#include "../utils/string_utils.h"
|
||||
#include <string.h>
|
||||
#include "../utils/errors.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
const char *begin;
|
||||
const char *start;
|
||||
const char *current;
|
||||
uint16_t source_file;
|
||||
/* LexerState lexer_state;
|
||||
File *current_file;
|
||||
Token saved_tok;
|
||||
Token saved_prev_tok;
|
||||
SourceLoc last_in_range;*/
|
||||
} Lexer;
|
||||
|
||||
|
||||
Lexer lexer;
|
||||
|
||||
|
||||
#define MATCH_KEYWORD_LEN(_keyword, _type) \
|
||||
((sizeof(_keyword) != len + 1) ? TOKEN_VAR_IDENT : check_keyword(start, len, _keyword, _type))
|
||||
|
||||
#define MATCH_KEYWORD(_keyword, _type) check_keyword(start, len, _keyword, _type)
|
||||
|
||||
// Yes this is an ugly hand written keyword identifier. It should be benchmarked against
|
||||
// an table based state machine.
|
||||
|
||||
static inline TokenType check_keyword(const char * restrict start, size_t len, const char * restrict keyword, TokenType type)
|
||||
{
|
||||
if (memcmp(start + 1, keyword + 1, len - 1) == 0)
|
||||
{
|
||||
return type;
|
||||
}
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static inline TokenType c_ident(const char *restrict start, const int len)
|
||||
{
|
||||
switch (start[3])
|
||||
{
|
||||
case 'n':
|
||||
return MATCH_KEYWORD_LEN("c_int", TOKEN_C_INT);
|
||||
case 'i':
|
||||
return MATCH_KEYWORD_LEN("c_uint", TOKEN_C_UINT);
|
||||
case 's':
|
||||
return MATCH_KEYWORD_LEN("c_ushort", TOKEN_C_USHORT);
|
||||
case 'h':
|
||||
return MATCH_KEYWORD_LEN("c_short", TOKEN_C_SHORT);
|
||||
case 'o':
|
||||
switch (len)
|
||||
{
|
||||
case 10:
|
||||
return MATCH_KEYWORD("c_longlong", TOKEN_C_LONGLONG);
|
||||
case 6:
|
||||
return MATCH_KEYWORD("c_long", TOKEN_C_LONG);
|
||||
case 12:
|
||||
return MATCH_KEYWORD("c_longdouble", TOKEN_C_LONGDOUBLE);
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
case 'l':
|
||||
return len == 11
|
||||
? MATCH_KEYWORD("c_ulonglong", TOKEN_C_ULONGLONG)
|
||||
: MATCH_KEYWORD_LEN("c_ulong", TOKEN_C_ULONG);
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
}
|
||||
static inline TokenType ident_type(const char *restrict start, const int len)
|
||||
{
|
||||
char current_value = start[0];
|
||||
if (len < 2) return TOKEN_VAR_IDENT;
|
||||
if (current_value == 'c' && start[1] == '_') return c_ident(start, len);
|
||||
if (len > 8 || !is_lower(current_value)) return TOKEN_VAR_IDENT;
|
||||
switch (current_value)
|
||||
{
|
||||
case 'a':
|
||||
if (len == 2) return MATCH_KEYWORD("as", TOKEN_AS);
|
||||
switch (start[1])
|
||||
{
|
||||
case 's':
|
||||
return MATCH_KEYWORD_LEN("asm", TOKEN_ASM);
|
||||
case 'l':
|
||||
return MATCH_KEYWORD_LEN("alias", TOKEN_ALIAS);
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
case 'b':
|
||||
switch (start[1])
|
||||
{
|
||||
case 'o':
|
||||
return MATCH_KEYWORD_LEN("bool", TOKEN_BOOL);
|
||||
case 'y':
|
||||
return MATCH_KEYWORD_LEN("byte", TOKEN_BYTE);
|
||||
case 'r':
|
||||
return MATCH_KEYWORD_LEN("break", TOKEN_BREAK);
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
case 'c':
|
||||
if (len < 4) return TOKEN_VAR_IDENT;
|
||||
if (len == 8) return MATCH_KEYWORD_LEN("continue", TOKEN_CONTINUE);
|
||||
switch (start[3])
|
||||
{
|
||||
case 't':
|
||||
return MATCH_KEYWORD_LEN("cast", TOKEN_CAST);
|
||||
case 'e':
|
||||
return MATCH_KEYWORD_LEN("case", TOKEN_CASE);
|
||||
case 'r':
|
||||
return MATCH_KEYWORD_LEN("char", TOKEN_CHAR);
|
||||
case 's':
|
||||
return MATCH_KEYWORD_LEN("const", TOKEN_CONST);
|
||||
case 'c':
|
||||
return MATCH_KEYWORD_LEN("catch", TOKEN_CATCH);
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
|
||||
}
|
||||
case 'd':
|
||||
if (len < 5) return MATCH_KEYWORD_LEN("do", TOKEN_DO);
|
||||
switch (start[3])
|
||||
{
|
||||
case 'e':
|
||||
return MATCH_KEYWORD_LEN("defer", TOKEN_DEFER);
|
||||
case 'a':
|
||||
return MATCH_KEYWORD_LEN("default", TOKEN_DEFAULT);
|
||||
case 'b':
|
||||
return MATCH_KEYWORD_LEN("double", TOKEN_DOUBLE);
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
case 'e':
|
||||
switch (start[1])
|
||||
{
|
||||
case 'l':
|
||||
return MATCH_KEYWORD_LEN("else", TOKEN_ELSE);
|
||||
case 'n':
|
||||
return MATCH_KEYWORD_LEN("enum", TOKEN_ENUM);
|
||||
case 'r':
|
||||
return MATCH_KEYWORD_LEN("error", TOKEN_ERROR);
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
case 'f':
|
||||
switch (start[2])
|
||||
{
|
||||
case '6':
|
||||
return MATCH_KEYWORD_LEN("f16", TOKEN_F16);
|
||||
case '2':
|
||||
if (len == 4) return MATCH_KEYWORD_LEN("f128", TOKEN_F128);
|
||||
return MATCH_KEYWORD_LEN("f32", TOKEN_F32);
|
||||
case '4':
|
||||
return MATCH_KEYWORD_LEN("f64", TOKEN_F64);
|
||||
case '5':
|
||||
return MATCH_KEYWORD_LEN("f256", TOKEN_F256);
|
||||
case 'r':
|
||||
return MATCH_KEYWORD_LEN("for", TOKEN_FOR);
|
||||
case 'l':
|
||||
return MATCH_KEYWORD_LEN("false", TOKEN_FALSE);
|
||||
case 'o':
|
||||
return MATCH_KEYWORD_LEN("float", TOKEN_FLOAT);
|
||||
case 'n':
|
||||
return MATCH_KEYWORD_LEN("func", TOKEN_FUNC);
|
||||
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
case 'g':
|
||||
switch (start[1])
|
||||
{
|
||||
case 'o':
|
||||
return MATCH_KEYWORD_LEN("goto", TOKEN_GOTO);
|
||||
case 'e':
|
||||
return MATCH_KEYWORD_LEN("generic", TOKEN_GENERIC);
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
case 'h':
|
||||
return MATCH_KEYWORD_LEN("half", TOKEN_HALF);
|
||||
case 'i':
|
||||
switch (start[1])
|
||||
{
|
||||
case 'f':
|
||||
return MATCH_KEYWORD_LEN("if", TOKEN_IF);
|
||||
case '8':
|
||||
return MATCH_KEYWORD_LEN("i8", TOKEN_I8);
|
||||
case '6':
|
||||
return MATCH_KEYWORD_LEN("i64", TOKEN_I64);
|
||||
case '2':
|
||||
return MATCH_KEYWORD_LEN("i256", TOKEN_I256);
|
||||
case '3':
|
||||
return MATCH_KEYWORD_LEN("i32", TOKEN_I32);
|
||||
case '1':
|
||||
if (len == 4) return MATCH_KEYWORD_LEN("i128", TOKEN_I128);
|
||||
return MATCH_KEYWORD_LEN("i16", TOKEN_I16);
|
||||
case 'n':
|
||||
return MATCH_KEYWORD_LEN("int", TOKEN_INT);
|
||||
case 'm':
|
||||
return MATCH_KEYWORD_LEN("import", TOKEN_IMPORT);
|
||||
case 's':
|
||||
return MATCH_KEYWORD_LEN("isize", TOKEN_ISIZE);
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
case 'l':
|
||||
if (len < 4) return TOKEN_VAR_IDENT;
|
||||
switch (start[2])
|
||||
{
|
||||
case 'n':
|
||||
return MATCH_KEYWORD_LEN("long", TOKEN_LONG);
|
||||
case 'c':
|
||||
return MATCH_KEYWORD_LEN("local", TOKEN_LOCAL);
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
case 'm':
|
||||
switch (start[1])
|
||||
{
|
||||
case 'a':
|
||||
return MATCH_KEYWORD_LEN("macro", TOKEN_MACRO);
|
||||
case 'o':
|
||||
return MATCH_KEYWORD_LEN("module", TOKEN_MODULE);
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
case 'n':
|
||||
return MATCH_KEYWORD_LEN("nil", TOKEN_NIL);
|
||||
case 'p':
|
||||
return MATCH_KEYWORD_LEN("public", TOKEN_PUBLIC);
|
||||
case 'q':
|
||||
return MATCH_KEYWORD_LEN("quad", TOKEN_QUAD);
|
||||
case 'r':
|
||||
return MATCH_KEYWORD_LEN("return", TOKEN_RETURN);
|
||||
case 's':
|
||||
switch (start[1])
|
||||
{
|
||||
case 'h':
|
||||
return MATCH_KEYWORD_LEN("short", TOKEN_SHORT);
|
||||
case 't':
|
||||
return MATCH_KEYWORD_LEN("struct", TOKEN_STRUCT);
|
||||
case 'w':
|
||||
return MATCH_KEYWORD_LEN("switch", TOKEN_SWITCH);
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
case 't':
|
||||
if (len < 3) return TOKEN_VAR_IDENT;
|
||||
switch (start[2])
|
||||
{
|
||||
case 'p':
|
||||
if (len == 7) return MATCH_KEYWORD_LEN("typedef", TOKEN_TYPEDEF);
|
||||
return MATCH_KEYWORD_LEN("type", TOKEN_TYPE);
|
||||
case 'u':
|
||||
return MATCH_KEYWORD_LEN("true", TOKEN_TRUE);
|
||||
case 'y':
|
||||
return MATCH_KEYWORD_LEN("try", TOKEN_TRY);
|
||||
case 'r':
|
||||
if (len == 6) return MATCH_KEYWORD_LEN("throws", TOKEN_THROWS);
|
||||
return MATCH_KEYWORD_LEN("throw", TOKEN_THROW);
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
case 'u':
|
||||
if (len < 3) return MATCH_KEYWORD_LEN("u8", TOKEN_U8);
|
||||
switch (start[1])
|
||||
{
|
||||
case '1':
|
||||
if (len == 4) return MATCH_KEYWORD("u128", TOKEN_U128);
|
||||
return MATCH_KEYWORD_LEN("u16", TOKEN_U16);
|
||||
case '2':
|
||||
return MATCH_KEYWORD_LEN("u256", TOKEN_U256);
|
||||
case '3':
|
||||
return MATCH_KEYWORD_LEN("u32", TOKEN_U32);
|
||||
case '6':
|
||||
return MATCH_KEYWORD_LEN("u64", TOKEN_U64);
|
||||
case 'i':
|
||||
return MATCH_KEYWORD_LEN("uint", TOKEN_UINT);
|
||||
case 'n':
|
||||
if (start[2] == 't') return MATCH_KEYWORD_LEN("until", TOKEN_UNTIL);
|
||||
return MATCH_KEYWORD_LEN("union", TOKEN_UNION);
|
||||
case 'l':
|
||||
return MATCH_KEYWORD_LEN("ulong", TOKEN_ULONG);
|
||||
case 's':
|
||||
if (len == 5) return MATCH_KEYWORD("usize", TOKEN_USIZE);
|
||||
return MATCH_KEYWORD_LEN("ushort", TOKEN_USHORT);
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
case 'v':
|
||||
if (len < 3) return TOKEN_VAR_IDENT;
|
||||
switch (start[2])
|
||||
{
|
||||
case 'r':
|
||||
return MATCH_KEYWORD_LEN("var", TOKEN_VAR);
|
||||
case 'i':
|
||||
return MATCH_KEYWORD_LEN("void", TOKEN_VOID);
|
||||
case 'l':
|
||||
return MATCH_KEYWORD_LEN("volatile", TOKEN_VOLATILE);
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
case 'w':
|
||||
return MATCH_KEYWORD_LEN("while", TOKEN_WHILE);
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
}
|
||||
|
||||
#define PRIME 0x01000193
|
||||
#define SEED 0x811C9DC5
|
||||
|
||||
#define FNV1(a, seed) ((uint32_t)((((unsigned int)(a)) ^ (seed)) * PRIME))
|
||||
#define HASH(a, b, c) (FNV1(c, FNV1((a), FNV1(b, SEED))) & 0x1FFu)
|
||||
|
||||
TokenType ident_type_fnv1(const char *restrict start, int len)
|
||||
{
|
||||
char current_value = start[0];
|
||||
if (len < 2) return TOKEN_VAR_IDENT;
|
||||
char second = start[1];
|
||||
if (current_value == 'c' && second == '_') return c_ident(start, len);
|
||||
if (len > 8 || !is_lower(current_value)) return TOKEN_VAR_IDENT;
|
||||
switch (HASH(len, current_value, second))
|
||||
{
|
||||
case HASH(2, 'a', 's'):
|
||||
return MATCH_KEYWORD_LEN("as", TOKEN_AS);
|
||||
case HASH(3, 'a', 's'):
|
||||
return MATCH_KEYWORD_LEN("asm", TOKEN_ASM);
|
||||
case HASH(5, 'a', 'l'):
|
||||
return MATCH_KEYWORD_LEN("alias", TOKEN_ALIAS);
|
||||
case HASH(4, 'b', 'o'):
|
||||
return MATCH_KEYWORD_LEN("bool", TOKEN_BOOL);
|
||||
case HASH(4, 'b', 'y'):
|
||||
return MATCH_KEYWORD_LEN("byte", TOKEN_BYTE);
|
||||
case HASH(5, 'b', 'r'):
|
||||
return MATCH_KEYWORD_LEN("break", TOKEN_BREAK);
|
||||
case HASH(8, 'c', 'o'):
|
||||
return MATCH_KEYWORD_LEN("continue", TOKEN_CONTINUE);
|
||||
case HASH(4, 'c', 'a'):
|
||||
return len > 3 && start[3] == 't' ? MATCH_KEYWORD_LEN("cast", TOKEN_CAST) : MATCH_KEYWORD_LEN("case", TOKEN_CASE);
|
||||
case HASH(5, 'c', '_'):
|
||||
return MATCH_KEYWORD_LEN("c_int", TOKEN_C_INT);
|
||||
case HASH(6, 'c', '_'):
|
||||
return len > 3 && start[2] == 'u' ? MATCH_KEYWORD_LEN("c_uint", TOKEN_C_UINT) : MATCH_KEYWORD_LEN("c_long", TOKEN_C_LONG);
|
||||
case HASH(7, 'c', '_'):
|
||||
return MATCH_KEYWORD_LEN("c_short", TOKEN_C_SHORT);
|
||||
case HASH(8, 'c', '_'):
|
||||
return MATCH_KEYWORD_LEN("c_ushort", TOKEN_C_USHORT);
|
||||
case HASH(4, 'c', 'h'):
|
||||
return MATCH_KEYWORD_LEN("char", TOKEN_CHAR);
|
||||
case HASH(5, 'c', 'o'):
|
||||
return MATCH_KEYWORD_LEN("const", TOKEN_CONST);
|
||||
case HASH(5, 'c', 'a'):
|
||||
return MATCH_KEYWORD_LEN("catch", TOKEN_CATCH);
|
||||
case HASH(2, 'd', 'o'):
|
||||
return MATCH_KEYWORD_LEN("do", TOKEN_DO);
|
||||
case HASH(5, 'd', 'e'):
|
||||
return MATCH_KEYWORD_LEN("defer", TOKEN_DEFER);
|
||||
case HASH(7, 'd', 'e'):
|
||||
return MATCH_KEYWORD_LEN("default", TOKEN_DEFAULT);
|
||||
case HASH(6, 'd', 'o'):
|
||||
return MATCH_KEYWORD_LEN("double", TOKEN_DOUBLE);
|
||||
case HASH(4, 'e', 'l'):
|
||||
return MATCH_KEYWORD_LEN("else", TOKEN_ELSE);
|
||||
case HASH(4, 'e', 'n'):
|
||||
return MATCH_KEYWORD_LEN("enum", TOKEN_ENUM);
|
||||
case HASH(5, 'e', 'r'):
|
||||
return MATCH_KEYWORD_LEN("error", TOKEN_ERROR);
|
||||
case HASH(3, 'f', '1'):
|
||||
return MATCH_KEYWORD_LEN("f16", TOKEN_F16);
|
||||
case HASH(4, 'f', '1'):
|
||||
return MATCH_KEYWORD_LEN("f128", TOKEN_F128);
|
||||
case HASH(3, 'f', '3'):
|
||||
return MATCH_KEYWORD_LEN("f32", TOKEN_F32);
|
||||
case HASH(3, 'f', '6'):
|
||||
return MATCH_KEYWORD_LEN("f64", TOKEN_F64);
|
||||
case HASH(4, 'f', '2'):
|
||||
return MATCH_KEYWORD_LEN("f256", TOKEN_F256);
|
||||
case HASH(3, 'f', 'o'):
|
||||
return MATCH_KEYWORD_LEN("for", TOKEN_FOR);
|
||||
case HASH(5, 'f', 'a'):
|
||||
return MATCH_KEYWORD_LEN("false", TOKEN_FALSE);
|
||||
case HASH(5, 'f', 'l'):
|
||||
return MATCH_KEYWORD_LEN("float", TOKEN_FLOAT);
|
||||
case HASH(4, 'f', 'u'):
|
||||
return MATCH_KEYWORD_LEN("func", TOKEN_FUNC);
|
||||
case HASH(4, 'g', 'o'):
|
||||
return MATCH_KEYWORD_LEN("goto", TOKEN_GOTO);
|
||||
case HASH(7, 'g', 'e'):
|
||||
return MATCH_KEYWORD_LEN("generic", TOKEN_GENERIC);
|
||||
case HASH(4, 'h', 'a'):
|
||||
return MATCH_KEYWORD_LEN("half", TOKEN_HALF);
|
||||
case HASH(2, 'i', 'f'):
|
||||
return MATCH_KEYWORD_LEN("if", TOKEN_IF);
|
||||
case HASH(2, 'i', '8'):
|
||||
return MATCH_KEYWORD_LEN("i8", TOKEN_I8);
|
||||
case HASH(3, 'i', '6'):
|
||||
return MATCH_KEYWORD_LEN("i64", TOKEN_I64);
|
||||
case HASH(4, 'i', '2'):
|
||||
return MATCH_KEYWORD_LEN("i256", TOKEN_I256);
|
||||
case HASH(3, 'i', '3'):
|
||||
return MATCH_KEYWORD_LEN("i32", TOKEN_I32);
|
||||
case HASH(4, 'i', '1'):
|
||||
return MATCH_KEYWORD_LEN("i128", TOKEN_I128);
|
||||
case HASH(3, 'i', '1'):
|
||||
return MATCH_KEYWORD_LEN("i16", TOKEN_I16);
|
||||
case HASH(3, 'i', 'n'):
|
||||
return MATCH_KEYWORD_LEN("int", TOKEN_INT);
|
||||
case HASH(6, 'i', 'm'):
|
||||
return MATCH_KEYWORD_LEN("import", TOKEN_IMPORT);
|
||||
case HASH(5, 'i', 's'):
|
||||
return MATCH_KEYWORD_LEN("isize", TOKEN_ISIZE);
|
||||
case HASH(4, 'l', 'o'):
|
||||
return MATCH_KEYWORD_LEN("long", TOKEN_LONG);
|
||||
case HASH(5, 'l', 'o'):
|
||||
return MATCH_KEYWORD_LEN("local", TOKEN_LOCAL);
|
||||
case HASH(5, 'm', 'a'):
|
||||
return MATCH_KEYWORD_LEN("macro", TOKEN_MACRO);
|
||||
case HASH(6, 'm', 'o'):
|
||||
return MATCH_KEYWORD_LEN("module", TOKEN_MODULE);
|
||||
case HASH(3, 'n', 'i'):
|
||||
return MATCH_KEYWORD_LEN("nil", TOKEN_NIL);
|
||||
case HASH(6, 'p', 'u'):
|
||||
return MATCH_KEYWORD_LEN("public", TOKEN_PUBLIC);
|
||||
case HASH(4, 'q', 'u'):
|
||||
return MATCH_KEYWORD_LEN("quad", TOKEN_QUAD);
|
||||
case HASH(6, 'r', 'e'):
|
||||
return MATCH_KEYWORD_LEN("return", TOKEN_RETURN);
|
||||
case HASH(5, 's', 'h'):
|
||||
return MATCH_KEYWORD_LEN("short", TOKEN_SHORT);
|
||||
case HASH(6, 's', 't'):
|
||||
return MATCH_KEYWORD_LEN("struct", TOKEN_STRUCT);
|
||||
case HASH(6, 's', 'w'):
|
||||
return MATCH_KEYWORD_LEN("switch", TOKEN_SWITCH);
|
||||
case HASH(7, 't', 'y'):
|
||||
return MATCH_KEYWORD_LEN("typedef", TOKEN_TYPEDEF);
|
||||
case HASH(4, 't', 'y'):
|
||||
return MATCH_KEYWORD_LEN("type", TOKEN_TYPE);
|
||||
case HASH(4, 't', 'r'):
|
||||
return MATCH_KEYWORD_LEN("true", TOKEN_TRUE);
|
||||
case HASH(3, 't', 'r'):
|
||||
return MATCH_KEYWORD_LEN("try", TOKEN_TRY);
|
||||
case HASH(6, 't', 'h'):
|
||||
return MATCH_KEYWORD_LEN("throws", TOKEN_THROWS);
|
||||
case HASH(5, 't', 'h'):
|
||||
return MATCH_KEYWORD_LEN("throw", TOKEN_THROW);
|
||||
case HASH(2, 'u', '8'):
|
||||
return MATCH_KEYWORD_LEN("u8", TOKEN_U8);
|
||||
case HASH(4, 'u', '1'):
|
||||
return MATCH_KEYWORD_LEN("u128", TOKEN_U128);
|
||||
case HASH(3, 'u', '1'):
|
||||
return MATCH_KEYWORD_LEN("u16", TOKEN_U16);
|
||||
case HASH(4, 'u', '2'):
|
||||
return MATCH_KEYWORD_LEN("u256", TOKEN_U256);
|
||||
case HASH(3, 'u', '3'):
|
||||
return MATCH_KEYWORD_LEN("u32", TOKEN_U32);
|
||||
case HASH(3, 'u', '6'):
|
||||
return MATCH_KEYWORD_LEN("u64", TOKEN_U64);
|
||||
case HASH(4, 'u', 'i'):
|
||||
return MATCH_KEYWORD_LEN("uint", TOKEN_UINT);
|
||||
case HASH(5, 'u', 'n'):
|
||||
if (start[2] == 't') return MATCH_KEYWORD_LEN("until", TOKEN_UNTIL);
|
||||
return MATCH_KEYWORD_LEN("union", TOKEN_UNION);
|
||||
case HASH(5, 'u', 'l'):
|
||||
return MATCH_KEYWORD_LEN("ulong", TOKEN_ULONG);
|
||||
case HASH(5, 'u', 's'):
|
||||
return MATCH_KEYWORD_LEN("usize", TOKEN_USIZE);
|
||||
case HASH(6, 'u', 's'):
|
||||
return MATCH_KEYWORD_LEN("ushort", TOKEN_USHORT);
|
||||
case HASH(3, 'v', 'a'):
|
||||
return MATCH_KEYWORD_LEN("var", TOKEN_VAR);
|
||||
case HASH(4, 'v', 'o'):
|
||||
return MATCH_KEYWORD_LEN("void", TOKEN_VOID);
|
||||
case HASH(8, 'v', 'o'):
|
||||
return MATCH_KEYWORD_LEN("volatile", TOKEN_VOLATILE);
|
||||
case HASH(5, 'w', 'h'):
|
||||
return MATCH_KEYWORD_LEN("while", TOKEN_WHILE);
|
||||
default:
|
||||
return TOKEN_VAR_IDENT;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#undef HASH4V
|
||||
#undef HASH4
|
||||
#undef HASH3V
|
||||
#undef HASH3
|
||||
#undef HASH2V
|
||||
#undef HASH2
|
||||
#undef HASH
|
||||
|
||||
#undef MATCH_KEYWORD
|
||||
|
||||
TokenType identifier_type(const char* restrict start, int len)
|
||||
{
|
||||
return ident_type(start, len);
|
||||
}
|
||||
|
||||
const char *token_type_to_string(TokenType type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case TOKEN_LPAREN:
|
||||
return "(";
|
||||
case TOKEN_RPAREN:
|
||||
return ")";
|
||||
case TOKEN_LBRACE:
|
||||
return "{";
|
||||
case TOKEN_RBRACE:
|
||||
return "}";
|
||||
case TOKEN_LBRACKET:
|
||||
return "[";
|
||||
case TOKEN_RBRACKET:
|
||||
return "]";
|
||||
case TOKEN_COMMA:
|
||||
return ",";
|
||||
case TOKEN_DOT:
|
||||
return ".";
|
||||
case TOKEN_EOS:
|
||||
return ";";
|
||||
case TOKEN_PLUS:
|
||||
return "+";
|
||||
case TOKEN_PLUSPLUS:
|
||||
return "++";
|
||||
case TOKEN_PLUS_ASSIGN:
|
||||
return "+=";
|
||||
case TOKEN_BIT_NOT:
|
||||
return "~";
|
||||
case TOKEN_NOT:
|
||||
return "!";
|
||||
case TOKEN_MINUS:
|
||||
return "-";
|
||||
case TOKEN_MINUSMINUS:
|
||||
return "--";
|
||||
case TOKEN_MINUS_ASSIGN:
|
||||
return "-=";
|
||||
case TOKEN_STAR:
|
||||
return "*";
|
||||
case TOKEN_MULT_ASSIGN:
|
||||
return "*=";
|
||||
case TOKEN_MOD:
|
||||
return "%";
|
||||
case TOKEN_MOD_ASSIGN:
|
||||
return "%=";
|
||||
case TOKEN_DIV:
|
||||
return "/";
|
||||
case TOKEN_DIV_ASSIGN:
|
||||
return "/=";
|
||||
case TOKEN_NOT_EQUAL:
|
||||
return "!=";
|
||||
case TOKEN_EQ:
|
||||
return "=";
|
||||
case TOKEN_EQEQ:
|
||||
return "==";
|
||||
case TOKEN_COLON:
|
||||
return ":";
|
||||
case TOKEN_COLCOLON:
|
||||
return "::";
|
||||
case TOKEN_DOTDOT:
|
||||
return "..";
|
||||
case TOKEN_ELIPSIS:
|
||||
return "...";
|
||||
case TOKEN_GREATER:
|
||||
return ">";
|
||||
case TOKEN_GREATER_EQ:
|
||||
return ">=";
|
||||
case TOKEN_RIGHT_SHIFT:
|
||||
return ">>";
|
||||
case TOKEN_RIGHT_SHIFT_ASSIGN:
|
||||
return ">>=";
|
||||
case TOKEN_LESS:
|
||||
return "<";
|
||||
case TOKEN_LESS_EQ:
|
||||
return "<=";
|
||||
case TOKEN_LEFT_SHIFT:
|
||||
return "<<";
|
||||
case TOKEN_LEFT_SHIFT_ASSIGN:
|
||||
return "<<=";
|
||||
case TOKEN_ARROW:
|
||||
return "->";
|
||||
case TOKEN_AND:
|
||||
return "&&";
|
||||
case TOKEN_AND_ASSIGN:
|
||||
return "&&=";
|
||||
case TOKEN_AMP:
|
||||
return "&";
|
||||
case TOKEN_BIT_AND_ASSIGN:
|
||||
return "&=";
|
||||
case TOKEN_OR:
|
||||
return "||";
|
||||
case TOKEN_OR_ASSIGN:
|
||||
return "||=";
|
||||
case TOKEN_BIT_OR:
|
||||
return "|";
|
||||
case TOKEN_BIT_OR_ASSIGN:
|
||||
return "|=";
|
||||
case TOKEN_BIT_XOR:
|
||||
return "^";
|
||||
case TOKEN_BIT_XOR_ASSIGN:
|
||||
return "^=";
|
||||
case TOKEN_VAR_IDENT:
|
||||
return "<varIdent>";
|
||||
case TOKEN_TYPE_IDENT:
|
||||
return "<TypeIdent>";
|
||||
case TOKEN_STRING:
|
||||
return "<string>";
|
||||
case TOKEN_INTEGER:
|
||||
return "<int>";
|
||||
case TOKEN_REAL:
|
||||
return "<float>";
|
||||
case TOKEN_QUESTION:
|
||||
return "?";
|
||||
case TOKEN_ELVIS:
|
||||
return "?:";
|
||||
case TOKEN_VOID:
|
||||
return "void";
|
||||
case TOKEN_ALIAS:
|
||||
return "alias";
|
||||
case TOKEN_CONST:
|
||||
return "const";
|
||||
case TOKEN_VOLATILE:
|
||||
return "volatile";
|
||||
case TOKEN_ELSE:
|
||||
return "else";
|
||||
case TOKEN_FALSE:
|
||||
return "false";
|
||||
case TOKEN_CONTINUE:
|
||||
return "continue";
|
||||
case TOKEN_FUNC:
|
||||
return "func";
|
||||
case TOKEN_FOR:
|
||||
return "for";
|
||||
case TOKEN_IMPORT:
|
||||
return "import";
|
||||
case TOKEN_MODULE:
|
||||
return "module";
|
||||
case TOKEN_IF:
|
||||
return "if";
|
||||
case TOKEN_NIL:
|
||||
return "nil";
|
||||
case TOKEN_RETURN:
|
||||
return "return";
|
||||
case TOKEN_GOTO:
|
||||
return "goto";
|
||||
case TOKEN_DEFER:
|
||||
return "defer";
|
||||
case TOKEN_TRUE:
|
||||
return "true";
|
||||
case TOKEN_WHILE:
|
||||
return "while";
|
||||
case TOKEN_CASE:
|
||||
return "case";
|
||||
case TOKEN_ASM:
|
||||
return "asm";
|
||||
case TOKEN_DEFAULT:
|
||||
return "default";
|
||||
case TOKEN_SWITCH:
|
||||
return "switch";
|
||||
case TOKEN_UNTIL:
|
||||
return "until";
|
||||
case TOKEN_BREAK:
|
||||
return "break";
|
||||
case TOKEN_TYPE:
|
||||
return "type";
|
||||
case TOKEN_DO:
|
||||
return "do";
|
||||
case TOKEN_PUBLIC:
|
||||
return "public";
|
||||
case TOKEN_LOCAL:
|
||||
return "local";
|
||||
case TOKEN_STRUCT:
|
||||
return "struct";
|
||||
case TOKEN_UNION:
|
||||
return "union";
|
||||
case TOKEN_ENUM:
|
||||
return "enum";
|
||||
case TOKEN_AT:
|
||||
return "@";
|
||||
case TOKEN_AS:
|
||||
return "as";
|
||||
case TOKEN_ERROR:
|
||||
return "<error>";
|
||||
case TOKEN_EOF:
|
||||
return "<eof>";
|
||||
case TOKEN_CAST:
|
||||
return "cast";
|
||||
case TOKEN_C_LONGDOUBLE:
|
||||
return "c_longdouble";
|
||||
case TOKEN_C_USHORT:
|
||||
return "c_ushort";
|
||||
case TOKEN_C_UINT:
|
||||
return "c_uint";
|
||||
case TOKEN_C_ULONG:
|
||||
return "c_ulong";
|
||||
case TOKEN_C_ULONGLONG:
|
||||
return "c_ulonglong";
|
||||
case TOKEN_C_SHORT:
|
||||
return "c_ishort";
|
||||
case TOKEN_C_INT:
|
||||
return "c_int";
|
||||
case TOKEN_C_LONG:
|
||||
return "c_long";
|
||||
case TOKEN_C_LONGLONG:
|
||||
return "c_longlong";
|
||||
case TOKEN_MACRO:
|
||||
return "macro";
|
||||
case TOKEN_F256:
|
||||
return "f256";
|
||||
case TOKEN_I256:
|
||||
return "i256";
|
||||
case TOKEN_U256:
|
||||
return "u256";
|
||||
case TOKEN_F128:
|
||||
return "f128";
|
||||
case TOKEN_I128:
|
||||
return "i128";
|
||||
case TOKEN_U128:
|
||||
return "u128";
|
||||
case TOKEN_F64:
|
||||
return "f64";
|
||||
case TOKEN_I64:
|
||||
return "i64";
|
||||
case TOKEN_U64:
|
||||
return "u64";
|
||||
case TOKEN_F32:
|
||||
return "f32";
|
||||
case TOKEN_I32:
|
||||
return "i32";
|
||||
case TOKEN_U32:
|
||||
return "u32";
|
||||
case TOKEN_F16:
|
||||
return "f16";
|
||||
case TOKEN_I16:
|
||||
return "i16";
|
||||
case TOKEN_U16:
|
||||
return "u16";
|
||||
case TOKEN_I8:
|
||||
return "i8";
|
||||
case TOKEN_U8:
|
||||
return "u8";
|
||||
case TOKEN_BOOL:
|
||||
return "bool";
|
||||
case TOKEN_QUAD:
|
||||
return "quad";
|
||||
case TOKEN_DOUBLE:
|
||||
return "double";
|
||||
case TOKEN_FLOAT:
|
||||
return "float";
|
||||
case TOKEN_LONG:
|
||||
return "long";
|
||||
case TOKEN_ULONG:
|
||||
return "ulong";
|
||||
case TOKEN_INT:
|
||||
return "int";
|
||||
case TOKEN_UINT:
|
||||
return "uint";
|
||||
case TOKEN_SHORT:
|
||||
return "short";
|
||||
case TOKEN_USHORT:
|
||||
return "ushort";
|
||||
case TOKEN_BYTE:
|
||||
return "byte";
|
||||
case TOKEN_CHAR:
|
||||
return "char";
|
||||
case TOKEN_ISIZE:
|
||||
return "isize";
|
||||
case TOKEN_USIZE:
|
||||
return "usize";
|
||||
case TOKEN_CAPS_IDENT:
|
||||
return "<CAPS_IDENT>";
|
||||
case TOKEN_AT_IDENT:
|
||||
return "<@ident>";
|
||||
case TOKEN_HASH_IDENT:
|
||||
return "<#ident>";
|
||||
case TOKEN_DOLLAR_IDENT:
|
||||
return "<$ident>";
|
||||
case TOKEN_CATCH:
|
||||
return "catch";
|
||||
case TOKEN_GENERIC:
|
||||
return "generic";
|
||||
case TOKEN_THROW:
|
||||
return "throw";
|
||||
case TOKEN_THROWS:
|
||||
return "throws";
|
||||
case TOKEN_TRY:
|
||||
return "try";
|
||||
case TOKEN_TYPEDEF:
|
||||
return "typedef";
|
||||
case TOKEN_VAR:
|
||||
return "var";
|
||||
case TOKEN_HALF:
|
||||
return "half";
|
||||
case INVALID_TOKEN:
|
||||
return "<\?\?\?>";
|
||||
}
|
||||
UNREACHABLE
|
||||
}
|
||||
182
src/compiler/lexer.h
Normal file
182
src/compiler/lexer.h
Normal file
@@ -0,0 +1,182 @@
|
||||
#pragma once
|
||||
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
||||
typedef enum _TokenType
|
||||
{
|
||||
// Single-character tokens.
|
||||
TOKEN_LPAREN,
|
||||
TOKEN_RPAREN,
|
||||
TOKEN_LBRACE,
|
||||
TOKEN_RBRACE,
|
||||
TOKEN_LBRACKET,
|
||||
TOKEN_RBRACKET,
|
||||
TOKEN_COMMA,
|
||||
TOKEN_DOT,
|
||||
TOKEN_EOS,
|
||||
TOKEN_AT,
|
||||
|
||||
// One or two character tokens.
|
||||
TOKEN_PLUS,
|
||||
TOKEN_PLUSPLUS,
|
||||
TOKEN_PLUS_ASSIGN,
|
||||
TOKEN_BIT_NOT,
|
||||
TOKEN_NOT,
|
||||
TOKEN_MINUS,
|
||||
TOKEN_MINUSMINUS,
|
||||
TOKEN_MINUS_ASSIGN,
|
||||
TOKEN_STAR,
|
||||
TOKEN_MULT_ASSIGN,
|
||||
TOKEN_DIV,
|
||||
TOKEN_DIV_ASSIGN,
|
||||
TOKEN_MOD,
|
||||
TOKEN_MOD_ASSIGN,
|
||||
TOKEN_NOT_EQUAL,
|
||||
TOKEN_EQ,
|
||||
TOKEN_EQEQ,
|
||||
TOKEN_COLON,
|
||||
TOKEN_COLCOLON, // Not used but reserved
|
||||
TOKEN_DOTDOT,
|
||||
TOKEN_QUESTION,
|
||||
|
||||
// Three or more
|
||||
TOKEN_ELIPSIS,
|
||||
TOKEN_GREATER,
|
||||
TOKEN_GREATER_EQ,
|
||||
TOKEN_RIGHT_SHIFT,
|
||||
TOKEN_RIGHT_SHIFT_ASSIGN,
|
||||
TOKEN_LESS,
|
||||
TOKEN_LESS_EQ,
|
||||
TOKEN_LEFT_SHIFT,
|
||||
TOKEN_LEFT_SHIFT_ASSIGN,
|
||||
TOKEN_ARROW, // Not used but reserved
|
||||
TOKEN_AND,
|
||||
TOKEN_AND_ASSIGN,
|
||||
TOKEN_AMP,
|
||||
TOKEN_BIT_AND_ASSIGN,
|
||||
TOKEN_OR,
|
||||
TOKEN_OR_ASSIGN,
|
||||
TOKEN_BIT_OR,
|
||||
TOKEN_BIT_OR_ASSIGN,
|
||||
TOKEN_BIT_XOR,
|
||||
TOKEN_BIT_XOR_ASSIGN,
|
||||
TOKEN_ELVIS,
|
||||
|
||||
TOKEN_F256,
|
||||
TOKEN_I256,
|
||||
TOKEN_U256,
|
||||
TOKEN_F128,
|
||||
TOKEN_I128,
|
||||
TOKEN_U128,
|
||||
TOKEN_F64,
|
||||
TOKEN_I64,
|
||||
TOKEN_U64,
|
||||
TOKEN_F32,
|
||||
TOKEN_I32,
|
||||
TOKEN_U32,
|
||||
TOKEN_F16,
|
||||
TOKEN_I16,
|
||||
TOKEN_U16,
|
||||
TOKEN_I8,
|
||||
TOKEN_U8,
|
||||
TOKEN_QUAD,
|
||||
TOKEN_DOUBLE,
|
||||
TOKEN_FLOAT,
|
||||
TOKEN_HALF,
|
||||
TOKEN_LONG,
|
||||
TOKEN_ULONG,
|
||||
TOKEN_INT,
|
||||
TOKEN_UINT,
|
||||
TOKEN_SHORT,
|
||||
TOKEN_USHORT,
|
||||
TOKEN_BYTE,
|
||||
TOKEN_CHAR,
|
||||
TOKEN_BOOL,
|
||||
TOKEN_ISIZE,
|
||||
TOKEN_USIZE,
|
||||
|
||||
// Literals.
|
||||
|
||||
// In order to make the grammar
|
||||
// non ambiguous, we split tokens at the
|
||||
// lexer level
|
||||
TOKEN_TYPE_IDENT,
|
||||
TOKEN_CAPS_IDENT,
|
||||
TOKEN_VAR_IDENT,
|
||||
|
||||
// We want to parse @foo / #foo / $foo separately.
|
||||
// Otherwise we allow things like "@ foo" which would be pretty bad.
|
||||
TOKEN_AT_IDENT,
|
||||
TOKEN_HASH_IDENT,
|
||||
TOKEN_DOLLAR_IDENT,
|
||||
|
||||
TOKEN_STRING,
|
||||
TOKEN_INTEGER,
|
||||
TOKEN_REAL,
|
||||
|
||||
// Keywords.
|
||||
TOKEN_ALIAS, // Reserved
|
||||
TOKEN_AS,
|
||||
TOKEN_ASM,
|
||||
TOKEN_BREAK,
|
||||
TOKEN_CASE,
|
||||
TOKEN_CAST,
|
||||
TOKEN_CATCH,
|
||||
TOKEN_CONST,
|
||||
TOKEN_CONTINUE,
|
||||
TOKEN_DEFAULT,
|
||||
TOKEN_DEFER,
|
||||
TOKEN_DO,
|
||||
TOKEN_ELSE,
|
||||
TOKEN_ENUM,
|
||||
TOKEN_ERROR,
|
||||
TOKEN_FALSE,
|
||||
TOKEN_FOR,
|
||||
TOKEN_FUNC,
|
||||
TOKEN_GENERIC,
|
||||
TOKEN_GOTO,
|
||||
TOKEN_IF,
|
||||
TOKEN_IMPORT,
|
||||
TOKEN_LOCAL,
|
||||
TOKEN_MACRO,
|
||||
TOKEN_MODULE,
|
||||
TOKEN_NIL,
|
||||
TOKEN_PUBLIC,
|
||||
TOKEN_RETURN,
|
||||
TOKEN_STRUCT,
|
||||
TOKEN_SWITCH,
|
||||
TOKEN_THROW,
|
||||
TOKEN_THROWS,
|
||||
TOKEN_TRUE,
|
||||
TOKEN_TRY,
|
||||
TOKEN_TYPE, // Reserved
|
||||
TOKEN_TYPEDEF,
|
||||
TOKEN_UNION,
|
||||
TOKEN_UNTIL,
|
||||
TOKEN_VAR, // Reserved
|
||||
TOKEN_VOID,
|
||||
TOKEN_VOLATILE,
|
||||
TOKEN_WHILE,
|
||||
|
||||
|
||||
TOKEN_C_USHORT,
|
||||
TOKEN_C_SHORT,
|
||||
TOKEN_C_INT,
|
||||
TOKEN_C_UINT,
|
||||
TOKEN_C_LONG,
|
||||
TOKEN_C_ULONG,
|
||||
TOKEN_C_LONGLONG,
|
||||
TOKEN_C_ULONGLONG,
|
||||
TOKEN_C_LONGDOUBLE,
|
||||
|
||||
INVALID_TOKEN,
|
||||
TOKEN_EOF,
|
||||
|
||||
} TokenType;
|
||||
|
||||
const char *token_type_to_string(TokenType type);
|
||||
TokenType identifier_type(const char* restrict start, int len);
|
||||
TokenType ident_type_fnv1(const char *restrict start, int len);
|
||||
17
src/compiler_tests/benchmark.c
Normal file
17
src/compiler_tests/benchmark.c
Normal file
@@ -0,0 +1,17 @@
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "benchmark.h"
|
||||
#include <time.h>
|
||||
|
||||
static int begin = 0;
|
||||
|
||||
void bench_begin(void)
|
||||
{
|
||||
begin = clock();
|
||||
}
|
||||
double bench_mark(void)
|
||||
{
|
||||
return (clock() - begin) / (double)CLOCKS_PER_SEC;
|
||||
}
|
||||
9
src/compiler_tests/benchmark.h
Normal file
9
src/compiler_tests/benchmark.h
Normal file
@@ -0,0 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
||||
void bench_begin(void);
|
||||
double bench_mark(void);
|
||||
82
src/compiler_tests/tests.c
Normal file
82
src/compiler_tests/tests.c
Normal file
@@ -0,0 +1,82 @@
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include <compiler/lexer.h>
|
||||
#include "tests.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "benchmark.h"
|
||||
|
||||
#define TEST_ASSERT(cond, text, ...) do { if (!(cond)) { printf("\nTEST FAILED: " text "\n", ##__VA_ARGS__); exit(-1); } } while (0)
|
||||
static void test_lexer(void)
|
||||
{
|
||||
#ifdef __OPTIMIZE__
|
||||
printf("--- RUNNING OPTIMIZED ---\n");
|
||||
#endif
|
||||
printf("Begin lexer testing.\n");
|
||||
printf("1. Check number of keywords...");
|
||||
int tokens_found = 0;
|
||||
const int EXPECTED_TOKENS = 81;
|
||||
const char* tokens[INVALID_TOKEN];
|
||||
int len[INVALID_TOKEN];
|
||||
for (int i = 0; i < INVALID_TOKEN; i++)
|
||||
{
|
||||
const char* token = token_type_to_string((TokenType)i);
|
||||
tokens[i] = token;
|
||||
len[i] = strlen(token);
|
||||
TokenType type = identifier_type(token, len[i]);
|
||||
TokenType type2 = ident_type_fnv1(token, len[i]);
|
||||
|
||||
if (type != TOKEN_VAR_IDENT)
|
||||
{
|
||||
tokens_found++;
|
||||
TEST_ASSERT(type == i, "Mismatch on token %s", token);
|
||||
if (type2 != type)
|
||||
{
|
||||
printf("\n(fnv1) Test mismatch on token %s, generated %s\n", token, token_type_to_string(type2));
|
||||
}
|
||||
}
|
||||
tokens[i] = "byte";
|
||||
len[i] = 4;
|
||||
}
|
||||
printf(" %d found.\n", tokens_found);
|
||||
TEST_ASSERT(ident_type_fnv1("alias ", 6) == TOKEN_VAR_IDENT, "Error in fnv1 ident");
|
||||
TEST_ASSERT(identifier_type("alias ", 6) == TOKEN_VAR_IDENT, "Error in switch ident");
|
||||
TEST_ASSERT(ident_type_fnv1("alias ", 5) != TOKEN_VAR_IDENT, "Error in fnv1 ident2");
|
||||
TEST_ASSERT(identifier_type("alias ", 5) != TOKEN_VAR_IDENT, "Error in switch ident2");
|
||||
TEST_ASSERT(tokens_found == EXPECTED_TOKENS, "Unexpected number of identifiers! Expected %d.", EXPECTED_TOKENS);
|
||||
|
||||
const int BENCH_REPEATS = 10000000;
|
||||
|
||||
printf("2. Test keyword lexing speed (switch)... ");
|
||||
bench_begin();
|
||||
for (int b = 0; b < BENCH_REPEATS; b++)
|
||||
{
|
||||
for (int i = 0; i < INVALID_TOKEN; i++)
|
||||
{
|
||||
identifier_type(tokens[i], len[i]);
|
||||
}
|
||||
}
|
||||
printf("complete in %fs\n", bench_mark());
|
||||
|
||||
printf("3. Test keyword lexing speed (fnv1)... ");
|
||||
bench_begin();
|
||||
for (int b = 0; b < BENCH_REPEATS; b++)
|
||||
{
|
||||
for (int i = 0; i < INVALID_TOKEN; i++)
|
||||
{
|
||||
ident_type_fnv1(tokens[i], len[i]);
|
||||
}
|
||||
}
|
||||
printf("complete in %fs\n", bench_mark());
|
||||
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
void compiler_tests(void)
|
||||
{
|
||||
test_lexer();
|
||||
}
|
||||
8
src/compiler_tests/tests.h
Normal file
8
src/compiler_tests/tests.h
Normal file
@@ -0,0 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
// Copyright (c) 2019 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
||||
void compiler_tests(void);
|
||||
@@ -2,7 +2,7 @@
|
||||
#include "build/build_options.h"
|
||||
#include "build/project_creation.h"
|
||||
#include "utils/errors.h"
|
||||
|
||||
#include "compiler_tests/tests.h"
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
@@ -12,6 +12,8 @@ int main(int argc, const char *argv[])
|
||||
case COMMAND_INIT:
|
||||
create_project();
|
||||
break;
|
||||
case COMMAND_UNIT_TEST:
|
||||
compiler_tests();
|
||||
case COMMAND_COMPILE:
|
||||
case COMMAND_COMPILE_RUN:
|
||||
case COMMAND_MISSING:
|
||||
|
||||
@@ -4,4 +4,18 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define error_exit(...) do { fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); exit(EXIT_FAILURE); } while(0)
|
||||
|
||||
#define FATAL_ERROR(_string, ...) do { printf("FATAL ERROR at %s:%d: " _string, __func__, __LINE__, ##__VA_ARGS__); printf("\n"); exit(-1); } while(0)
|
||||
|
||||
#define UNREACHABLE FATAL_ERROR("Cannot reach %s:%d", __func__, __LINE__);
|
||||
#define TODO FATAL_ERROR("Not done yet %s:%d", __func__, __LINE__);
|
||||
|
||||
#define TEST_ASSERT(_condition, _string, ...) while (!(_condition)) { FATAL_ERROR(_string, ##__VA_ARGS__); }
|
||||
|
||||
#define EXPECT(_string, _value, _expected) \
|
||||
do { long long __tempval1 = _value; long long __tempval2 = _expected; \
|
||||
TEST_ASSERT(__tempval1 == __tempval2, "Checking " _string ": expected %lld but was %lld.", __tempval2, __tempval1); } while(0);
|
||||
|
||||
Reference in New Issue
Block a user