Lexing updated with simpler code. Note: may have bugs.

This commit is contained in:
Christoffer Lerno
2021-12-27 00:22:48 +01:00
committed by Christoffer Lerno
parent 0a9a014e4a
commit a0be188902
20 changed files with 388 additions and 329 deletions

View File

@@ -163,24 +163,21 @@ typedef struct
AstId end;
} DeferList;
typedef unsigned FileId;
typedef struct
{
FileId file_id;
const char *contents;
char *name;
char *dir_path;
const char *full_path;
SourceLoc start_id;
SourceLoc end_id;
SourceLoc *lines;
SourceLoc current_line_start;
} File;
typedef struct
{
File *file;
uint32_t line;
uint32_t col;
FileId file_id;
uint16_t col;
uint32_t row;
uint32_t start;
uint32_t length;
} SourceLocation;
@@ -1352,8 +1349,10 @@ typedef struct
uint32_t token_start_id;
const char *lexing_start;
const char *current;
uint32_t current_line;
uint32_t current_row;
uint32_t start_row;
const char *line_start;
const char *start_row_start;
File *file;
TokenData *latest_token_data;
SourceLocation *latest_token_loc;
@@ -1982,9 +1981,8 @@ void sema_error(Context *context, const char *message, ...);
void sema_prev_at_range3(SourceSpan span, const char *message, ...);
void sema_shadow_error(Decl *decl, Decl *old);
File *source_file_by_id(FileId file);
File *source_file_load(const char *filename, bool *already_loaded);
void source_file_append_line_end(File *file, SourceLoc loc);
SourcePosition source_file_find_position_in_file(File *file, SourceLoc loc);
static inline SourceSpan source_span_from_token_id(TokenId id)
{

View File

@@ -14,53 +14,80 @@ typedef enum
PRINT_TYPE_WARN
} PrintType;
static void print_error2(SourceLocation *location, const char *message, PrintType print_type)
#define LINES_SHOWN 4
static void print_error(SourceLocation *location, const char *message, PrintType print_type)
{
File *file = source_file_by_id(location->file_id);
if (active_target.test_output)
{
switch (print_type)
{
case PRINT_TYPE_ERROR:
eprintf("Error|%s|%d|%s\n", location->file->name, location->line, message);
eprintf("Error|%s|%d|%s\n", file->name, location->row, message);
return;
case PRINT_TYPE_PREV:
return;
case PRINT_TYPE_WARN:
eprintf("Warning|%s|%d|%s\n", location->file->name, location->line, message);
eprintf("Warning|%s|%d|%s\n", file->name, location->row, message);
return;
default:
UNREACHABLE
}
}
static const int LINES_SHOWN = 4;
unsigned max_line_length = (unsigned)round(log10(location->line)) + 1;
unsigned max_line_length = (unsigned)round(log10(location->row)) + 1;
char number_buffer[20];
snprintf(number_buffer, 20, "%%%dd: %%.*s\n", max_line_length);
// Insert end in case it's not yet there.
for (SourceLoc s = location->start; s < location->file->end_id; s++)
const char *file_contents = file->contents;
int lines_found = 0;
size_t line_starts[LINES_SHOWN + 1] = { 0, 0, 0, 0 };
uint32_t start = location->start;
if (start < 2)
{
if ((location->file->contents + s - location->file->start_id)[0] == '\n')
line_starts[++lines_found] = 0;
}
else
{
for (size_t i = start; i > 0; i--)
{
source_file_append_line_end(location->file, s);
break;
if (file_contents[i - 1] == '\n')
{
line_starts[++lines_found] = i;
if (lines_found >= LINES_SHOWN) break;
}
if (i == 1)
{
line_starts[++lines_found] = 0;
break;
}
}
}
size_t lines_in_file = vec_size(location->file->lines);
const char *start = NULL;
for (unsigned i = LINES_SHOWN; i > 0; i--)
for (size_t i = start; ; i++)
{
if (location->line < i) continue;
uint32_t line_number = location->line + 1 - i;
SourceLoc line_start = location->file->lines[line_number - 1];
SourceLoc line_end = line_number == lines_in_file ? location->file->end_id + 1 :
location->file->lines[line_number];
uint32_t line_len = line_end - line_start - 1;
start = location->file->contents + line_start - location->file->start_id;
eprintf(number_buffer, line_number, line_len, start);
switch (file_contents[i])
{
case '\0':
case '\n':
line_starts[0] = i + 1;
goto FOUND;
default:
continue;
}
}
FOUND:;
const char *start_char = NULL;
for (unsigned i = lines_found; i > 0; i--)
{
SourceLoc line_start = line_starts[i];
SourceLoc line_end = line_starts[i - 1] - 1;
uint32_t line_number = location->row + 1 - i;
uint32_t line_len = line_end - line_start;
start_char = file->contents + line_start;
eprintf(number_buffer, line_number, line_len, start_char);
}
eprintf(" ");
for (unsigned i = 0; i < max_line_length; i++)
@@ -70,7 +97,7 @@ static void print_error2(SourceLocation *location, const char *message, PrintTyp
for (unsigned i = 1; i < location->col; i++)
{
switch (start[i])
switch (start_char[i])
{
case '\t':
eprintf("\t");
@@ -87,13 +114,13 @@ static void print_error2(SourceLocation *location, const char *message, PrintTyp
switch (print_type)
{
case PRINT_TYPE_ERROR:
eprintf("(%s:%d) Error: %s\n\n", location->file->name, location->line, message);
eprintf("(%s:%d) Error: %s\n\n", file->name, location->row, message);
break;
case PRINT_TYPE_PREV:
eprintf("(%s:%d) %s\n\n", location->file->name, location->line, message);
eprintf("(%s:%d) %s\n\n", file->name, location->row, message);
break;
case PRINT_TYPE_WARN:
eprintf("(%s:%d) Warning: %s\n\n", location->file->name, location->line, message);
eprintf("(%s:%d) Warning: %s\n\n", file->name, location->row, message);
break;
default:
UNREACHABLE
@@ -105,7 +132,7 @@ static void vprint_error(SourceLocation *location, const char *message, va_list
{
char buffer[256];
vsnprintf(buffer, 256, message, args);
print_error2(location, buffer, PRINT_TYPE_ERROR);
print_error(location, buffer, PRINT_TYPE_ERROR);
}
@@ -135,7 +162,7 @@ void sema_prev_at_range3(SourceSpan span, const char *message, ...)
vsnprintf(buffer, 256, message, args);
SourceLocation loc = *start;
loc.length = end->start - start->start + end->length;
print_error2(&loc, buffer, PRINT_TYPE_PREV);
print_error(&loc, buffer, PRINT_TYPE_PREV);
va_end(args);
}
@@ -157,12 +184,12 @@ void sema_error_at_prev_end(Token token, const char *message, ...)
SourceLocation *curr = TOKLOC(token);
SourceLocation *prev = TOKLOC((TokenId) { token.id.index - 1 });
SourceLocation location;
if (curr->file != prev->file)
if (curr->file_id != prev->file_id)
{
// Ok, this is the first location, so then we create a "start" location:
location = *curr;
location.start = 0;
location.line = 1;
location.row = 1;
location.col = 1;
}
else

View File

@@ -94,7 +94,6 @@ typedef enum
CAST_XIERR,
CAST_PTRPTR,
CAST_PTRXI,
CAST_ARRPTR,
CAST_ARRVEC,
CAST_STRPTR,
CAST_PTRBOOL,

View File

@@ -18,10 +18,36 @@ typedef enum
DOC_END_ERROR,
} DocEnd;
static inline uint16_t check_col(intptr_t col, uint32_t row)
{
if (col > 65535) error_exit("Column on line %d exceeded %d.", row, 65535);
return (uint16_t)col;
}
static inline uint32_t check_row(intptr_t line, uint32_t row)
{
if (line > 1024 * 1024) error_exit("Token on line %d exceeded %d.", row, 1024 * 1024);
return (uint32_t)line;
}
// --- Lexing general methods.
static bool lexer_scan_token_inner(Lexer *lexer, LexMode mode);
static inline void begin_new_token(Lexer *lexer)
{
lexer->lexing_start = lexer->current;
lexer->start_row = lexer->current_row;
lexer->start_row_start = lexer->line_start;
}
static inline void backtrace_to_lexing_start(Lexer *lexer)
{
lexer->current = lexer->lexing_start;
lexer->current_row = lexer->start_row;
lexer->line_start = lexer->start_row_start;
}
// Peek at the current character in the buffer.
static inline char peek(Lexer *lexer)
{
@@ -38,14 +64,10 @@ static inline char prev(Lexer *lexer)
static inline void backtrack(Lexer *lexer)
{
lexer->current--;
}
// Store a line ending (and current line start at the current character)
void lexer_store_line_end(Lexer *lexer)
{
lexer->current_line++;
lexer->line_start = lexer->current + 1;
source_file_append_line_end(lexer->file, (SourceLoc)(lexer->file->start_id + lexer->current - lexer->file_begin));
if (lexer->current[0] == '\n')
{
lexer->current_row--;
}
}
// Peek one character ahead.
@@ -55,16 +77,24 @@ static inline char peek_next(Lexer *lexer)
}
// Return the current character and step one character forward.
static inline char next(Lexer *lexer)
static inline void next(Lexer *lexer)
{
return *(lexer->current++);
if (lexer->current[0] == '\n')
{
lexer->line_start = lexer->current + 1;
lexer->current_row++;
}
lexer->current++;
}
// Skip the x next characters.
static inline void skip(Lexer *lexer, int steps)
{
assert(steps > 0);
lexer->current += steps;
for (int i = 0; i < steps; i++)
{
next(lexer);
}
}
// Is the current character '\0' if so we assume we reached the end.
@@ -78,7 +108,7 @@ static inline bool match(Lexer *lexer, char expected)
{
if (reached_end(lexer)) return false;
if (*lexer->current != expected) return false;
lexer->current++;
next(lexer);
return true;
}
@@ -103,38 +133,28 @@ static inline void add_generic_token(Lexer *lexer, TokenType type)
token_type[0] = (unsigned char)type;
// Set the location.
location->file = lexer->file;
location->file_id = lexer->file->file_id;
location->start = (uint32_t)(lexer->lexing_start - lexer->file_begin);
// Calculate the column
if (lexer->lexing_start < lexer->line_start)
uint32_t line = lexer->start_row;
location->row = line;
if (line == lexer->current_row)
{
// In this case lexing started before the start of the current line.
// Start by looking at the previous line.
SourceLoc *current = &lexer->file->lines[lexer->current_line - 1];
location->line = lexer->current_line;
// Walk upwards until we find a line that starts before the current.
while (*current > location->start)
{
location->line--;
current--;
}
// We found the line we wanted, so the col is just an offset from the start.
location->col = location->start - *current + 1;
// Length is restricted to the end of the line.
location->length = current[1] - current[0] - 1;
}
else
{
// The simple case, where the parsing started on the current line.
location->line = lexer->current_line;
// Col is simple difference.
location->col = (unsigned) (lexer->lexing_start - lexer->line_start) + 1;
location->col = check_col(lexer->lexing_start - lexer->line_start + 1, line);
// Start is offset to file begin.
location->start = (SourceLoc) (lexer->lexing_start - lexer->file_begin);
// Length is diff between current and start.
location->length = (SourceLoc) (lexer->current - lexer->lexing_start);
location->length = check_row(lexer->current - lexer->lexing_start, line);
}
else
{
location->col = check_col(lexer->lexing_start - lexer->start_row_start + 1, line);
// Start is offset to file begin.
location->start = (SourceLoc) (lexer->lexing_start - lexer->file_begin);
location->length = 1;
}
// Return pointers to the data and the location,
// these maybe be used to fill in data.
lexer->latest_token_data = data;
@@ -153,22 +173,56 @@ static bool add_error_token(Lexer *lexer, const char *message, ...)
return false;
}
static bool add_error_token_at(Lexer *lexer, const char *loc, uint32_t len, const char *message, ...)
static bool add_error_token_at_start(Lexer *lexer, const char *message, ...)
{
va_list list;
va_start(list, message);
SourceLocation location = { .file = lexer->file,
.start = (uint32_t) (loc - lexer->file_begin),
.line = lexer->current_line,
.length = len,
.col = (uint32_t) (loc - lexer->line_start) + 1,
SourceLocation location = { .file_id = lexer->file->file_id,
.start = (uint32_t) (lexer->lexing_start - lexer->file_begin),
.row = lexer->start_row,
.length = 1,
.col = check_col((lexer->lexing_start - lexer->start_row_start) + 1, lexer->start_row),
};
sema_verror_range(&location, message, list);
va_end(list);
add_generic_token(lexer, TOKEN_INVALID_TOKEN);
return false;
}
static bool add_error_token_at(Lexer *lexer, const char *loc, uint32_t len, const char *message, ...)
{
va_list list;
va_start(list, message);
uint32_t current_line = lexer->current_row;
SourceLocation location = { .file_id = lexer->file->file_id,
.start = (uint32_t) (loc - lexer->file_begin),
.row = current_line,
.length = len,
.col = check_col((loc - lexer->line_start) + 1, current_line),
};
sema_verror_range(&location, message, list);
va_end(list);
add_generic_token(lexer, TOKEN_INVALID_TOKEN);
return false;
}
static bool add_error_token_at_current(Lexer *lexer, const char *message, ...)
{
va_list list;
va_start(list, message);
uint32_t current_line = lexer->current_row;
SourceLocation location = { .file_id = lexer->file->file_id,
.start = (uint32_t) (lexer->current - lexer->file_begin),
.row = current_line,
.length = 1,
.col = check_col((lexer->current - lexer->line_start) + 1, current_line),
};
sema_verror_range(&location, message, list);
va_end(list);
add_generic_token(lexer, TOKEN_INVALID_TOKEN);
return false;
}
// Add a new regular token.
static inline bool add_token(Lexer *lexer, TokenType type, const char *string)
{
@@ -204,7 +258,6 @@ static inline bool parse_line_comment(Lexer *lexer)
// If we found EOL, then walk past '\n'
if (!reached_end(lexer))
{
lexer_store_line_end(lexer);
next(lexer);
}
return success;
@@ -240,7 +293,6 @@ static inline bool parse_multiline_comment(Lexer *lexer)
}
break;
case '\n':
lexer_store_line_end(lexer);
break;
case '\0':
if (type != TOKEN_DOC_COMMENT) return add_token(lexer, type, lexer->lexing_start);
@@ -264,7 +316,6 @@ static void skip_whitespace(Lexer *lexer, LexMode lex_type)
{
case '\n':
if (lex_type != LEX_NORMAL) return;
lexer_store_line_end(lexer);
FALLTHROUGH;
case ' ':
case '\t':
@@ -296,7 +347,8 @@ static inline bool scan_ident(Lexer *lexer, TokenType normal, TokenType const_to
}
while (peek(lexer) == '_')
{
hash = FNV1a(next(lexer), hash);
hash = FNV1a(peek(lexer), hash);
next(lexer);
}
while (1)
{
@@ -333,12 +385,14 @@ static inline bool scan_ident(Lexer *lexer, TokenType normal, TokenType const_to
default:
goto EXIT;
}
hash = FNV1a(next(lexer), hash);
hash = FNV1a(peek(lexer), hash);
next(lexer);
}
// Allow bang!
if (peek(lexer) == '!' && type == normal)
{
hash = FNV1a(next(lexer), hash);
hash = FNV1a('!', hash);
next(lexer);
}
EXIT:;
uint32_t len = (uint32_t)(lexer->current - lexer->lexing_start);
@@ -353,6 +407,12 @@ static inline bool scan_ident(Lexer *lexer, TokenType normal, TokenType const_to
// --- Number scanning
/**
* For C3 we use the practice of f<bit-width> u<bit-width> and s<bit-width>
* @param lexer
* @param is_float
* @return
*/
static bool scan_number_suffix(Lexer *lexer, bool *is_float)
{
if (!is_alphanum_(peek(lexer))) return true;
@@ -390,12 +450,16 @@ static bool scan_number_suffix(Lexer *lexer, bool *is_float)
*/
static bool scan_oct(Lexer *lexer)
{
if (!is_oct(next(lexer)))
if (!is_oct(peek(lexer)))
{
backtrack(lexer);
return add_error_token_at(lexer, lexer->current, 1, "An expression starting with '0o' should be followed by octal numbers (0-7).");
return add_error_token_at_current(lexer, "An expression starting with '0o' should be followed by octal numbers (0-7).");
}
next(lexer);
while (is_oct_or_(peek(lexer))) next(lexer);
if (is_number(peek(lexer)))
{
return add_error_token_at_current(lexer, "An expression starting with '0o' should be followed by octal numbers (0-7).");
}
bool is_float = false;
if (!scan_number_suffix(lexer, &is_float)) return false;
if (is_float)
@@ -410,12 +474,16 @@ static bool scan_oct(Lexer *lexer)
**/
static bool scan_binary(Lexer *lexer)
{
if (!is_binary(next(lexer)))
if (!is_binary(peek(lexer)))
{
backtrack(lexer);
return add_error_token_at(lexer, lexer->current, 1, "An expression starting with '0b' should be followed by binary digits (0-1).");
return add_error_token_at_current(lexer, "An expression starting with '0b' should be followed by binary digits (0-1).");
}
next(lexer);
while (is_binary_or_(peek(lexer))) next(lexer);
if (is_number(peek((lexer))))
{
return add_error_token_at_current(lexer, "An expression starting with '0b' should be followed by binary digits (0-1).");
}
bool is_float = false;
if (!scan_number_suffix(lexer, &is_float)) return false;
if (is_float)
@@ -434,16 +502,21 @@ static inline bool scan_exponent(Lexer *lexer)
{
// Step past e/E or p/P
next(lexer);
char c = next(lexer);
char c = peek(lexer);
next(lexer);
// Step past +/-
if (c == '+' || c == '-') c = next(lexer);
if (c == '+' || c == '-')
{
c = peek(lexer);
next(lexer);
}
// Now we need at least one digit
if (!is_digit(c))
{
if (c == 0)
{
backtrack(lexer);
return add_error_token(lexer, "End of file was reached while parsing the exponent.");
return add_error_token_at_current(lexer, "End of file was reached while parsing the exponent.");
}
if (c == '\n') return add_error_token(lexer, "End of line was reached while parsing the exponent.");
if (c < 31 || c > 127) add_error_token(lexer, "An unexpected character was found while parsing the exponent.");
@@ -460,11 +533,11 @@ static inline bool scan_exponent(Lexer *lexer)
**/
static inline bool scan_hex(Lexer *lexer)
{
if (!is_hex(next(lexer)))
if (!is_hex(peek(lexer)))
{
backtrack(lexer);
return add_error_token_at(lexer, lexer->current, 1, "'0x' starts a hexadecimal number, so the next character should be 0-9, a-f or A-F.");
return add_error_token_at_current(lexer, "'0x' starts a hexadecimal number, so the next character should be 0-9, a-f or A-F.");
}
next(lexer);
while (is_hex_or_(peek(lexer))) next(lexer);
bool is_float = false;
if (peek(lexer) == '.' && peek_next(lexer) != '.')
@@ -472,7 +545,7 @@ static inline bool scan_hex(Lexer *lexer)
is_float = true;
next(lexer);
char c = peek(lexer);
if (c == '_') return add_error_token(lexer, "Can't parse this as a floating point value due to the '_' directly after decimal point.");
if (c == '_') return add_error_token_at_current(lexer, "'_' is not allowed directly after decimal point, try removing it.");
if (is_hex(c)) next(lexer);
while (is_hex_or_(peek(lexer))) next(lexer);
}
@@ -482,7 +555,11 @@ static inline bool scan_hex(Lexer *lexer)
is_float = true;
if (!scan_exponent(lexer)) return false;
}
if (prev(lexer) == '_') return add_error_token(lexer, "The number ended with '_', but that character needs to be between, not after, digits.");
if (prev(lexer) == '_')
{
backtrack(lexer);
return add_error_token_at_current(lexer, "The number ended with '_', which isn't allowed, please remove it.");
}
if (!scan_number_suffix(lexer, &is_float)) return false;
return add_token(lexer, is_float ? TOKEN_REAL : TOKEN_INTEGER, lexer->lexing_start);
}
@@ -511,7 +588,7 @@ static inline bool scan_dec(Lexer *lexer)
next(lexer);
// Check our rule to disallow 123._32
char c = peek(lexer);
if (c == '_') return add_error_token(lexer, "Can't parse this as a floating point value due to the '_' directly after decimal point.");
if (c == '_') return add_error_token_at_current(lexer, "'_' is not allowed directly after decimal point, try removing it.");
// Now walk until we see no more digits.
// This allows 123. as a floating point number.
while (is_digit_or_(peek(lexer))) next(lexer);
@@ -525,7 +602,11 @@ static inline bool scan_dec(Lexer *lexer)
if (!scan_exponent(lexer)) return false;
}
if (prev(lexer) == '_') return add_error_token(lexer, "The number ended with '_', but that character needs to be between, not after, digits.");
if (prev(lexer) == '_')
{
backtrack(lexer);
return add_error_token_at_current(lexer, "The number ended with '_', which isn't allowed, please remove it.");
}
if (!scan_number_suffix(lexer, &is_float)) return false;
return add_token(lexer, is_float ? TOKEN_REAL : TOKEN_INTEGER, lexer->lexing_start);
}
@@ -622,8 +703,9 @@ static inline int64_t scan_utf8(Lexer *lexer, unsigned char c)
for (int i = 1; i < utf8_bytes; i++)
{
result <<= 6U;
if (peek(lexer) == '\0') return 0xFFFD;
c = (unsigned char)next(lexer);
c = (unsigned char)peek(lexer);
if (c == '\0') return 0xFFFD;
next(lexer);
if ((c & 0xc0) != 0x80)
{
goto ERROR;
@@ -659,29 +741,29 @@ static inline bool scan_char(Lexer *lexer)
char c;
Int128 b = { 0, 0 };
while ((c = next(lexer)) != '\'')
while (!match(lexer, '\''))
{
c = peek(lexer);
next(lexer);
// End of file may occur:
if (c == '\0')
{
backtrack(lexer);
return add_error_token(lexer, "The character literal did not terminate.");
return add_error_token_at_start(lexer, "The character literal did not terminate.");
}
// We might exceed the width that we allow.
if (width > 15) return add_error_token(lexer, "The character literal exceeds 16 characters.");
if (width > 15) return add_error_token_at_start(lexer, "The character literal exceeds 16 characters.");
// Handle (expected) utf-8 characters.
if ((unsigned)c >= (unsigned)0x80)
{
if (width != 0) goto UNICODE_IN_MULTI;
const char *start = lexer->current;
int64_t utf8 = scan_utf8(lexer, (unsigned char)c);
if (utf8 < 0) return false;
if (!match(lexer, '\''))
{
if (peek(lexer) == '\0') continue;
lexer->lexing_start = start;
return add_error_token(lexer, "Unicode character literals may only contain one character, "
"please remove the additional ones or use all ASCII.");
backtrack(lexer);
return add_error_token_at_current(lexer, "Unicode character literals may only contain one character, "
"please remove the additional ones or use all ASCII.");
}
b.low = (uint64_t) utf8;
width = utf8 > 0xffff ? 4 : 2;
@@ -689,23 +771,24 @@ static inline bool scan_char(Lexer *lexer)
}
// Parse the escape code
signed char escape = ' ';
const char *start = lexer->current;
if (c == '\\')
{
assert(c == '\\');
c = next(lexer);
c = peek(lexer);
escape = is_valid_escape(c);
if (escape == -1)
{
backtrack(lexer);
lexer->lexing_start = start - 1;
lexer->lexing_start += 1;
if (c > ' ' && c <= 127)
{
next(lexer);
return add_error_token(lexer, "Invalid escape sequence '\\%c'.", c);
}
return add_error_token_at(lexer, start, 1, "An escape sequence was expected after '\\'.");
return add_error_token_at_current(lexer, "An escape sequence was expected after '\\'.");
}
next(lexer);
}
const char *escape_begin = lexer->current - 2;
switch (escape)
{
case 'x':
@@ -713,9 +796,7 @@ static inline bool scan_char(Lexer *lexer)
int64_t hex = scan_hex_literal(lexer, 2);
if (hex < 0)
{
lexer->lexing_start = start - 1;
// Fix underlining if this is an unfinished escape.
return add_error_token(lexer, "Expected a two character hex value after \\x.");
return add_error_token_at(lexer, escape_begin, lexer->current - escape_begin, "Expected a two character hex value after \\x.");
}
// We can now reassign c and use the default code.
c = (char)hex;
@@ -731,21 +812,19 @@ static inline bool scan_char(Lexer *lexer)
// The hex parsing may have failed, lacking more hex chars.
if (hex < 0)
{
lexer->lexing_start = start - 1;
return add_error_token(lexer, "Expected %s character hex value after \\%c.",
escape == 'u' ? "a four" : "an eight", escape);
begin_new_token(lexer);
return add_error_token_at(lexer, escape_begin, lexer->current - escape_begin,
"Expected %s character hex value after \\%c.",
escape == 'u' ? "a four" : "an eight", escape);
}
// If we don't see the end here, then something is wrong.
if (!match(lexer, '\''))
{
// It may be the end of the line, if so use the default handling by invoking "continue"
if (peek(lexer) == '\0') continue;
// Otherwise step forward and mark it as an error.
next(lexer);
lexer->lexing_start = lexer->current - 1;
return add_error_token(lexer,
"Character literals with '\\%c' can only contain one character, please remove this one.",
escape);
return add_error_token_at_current(lexer,
"Character literals with '\\%c' can only contain one character, please remove this one.",
escape);
}
// Assign the value and go to DONE.
b.low = (uint64_t) hex;
@@ -764,7 +843,6 @@ static inline bool scan_char(Lexer *lexer)
b = i128_shl64(b, 8);
b = i128_add64(b, (unsigned char)c);
}
assert(width > 0 && width <= 16);
if (width > 8 && !platform_target.int128)
{
@@ -790,9 +868,7 @@ static inline void skip_first_line_if_empty(Lexer *lexer)
{
case '\n':
// Line end? then we jump to the first token after line end.
lexer->current = current - 1;
lexer_store_line_end(lexer);
lexer->current++;
next(lexer);
return;
case ' ':
case '\t':
@@ -969,13 +1045,13 @@ bool scan_consume_end_of_multiline(Lexer *lexer, bool error_on_eof)
int consume_end = 3;
while (consume_end > 0)
{
char c = next(lexer);
char c = peek(lexer);
next(lexer);
if (c == '\0')
{
backtrack(lexer);
if (!error_on_eof) return false;
return add_error_token_at(lexer, lexer->current - 1, 1, "The multi-line string unexpectedly ended. "
"Did you forget a '\"\"\"' somewhere?");
return add_error_token_at_start(lexer, "The multi-line string unexpectedly ended. "
"Did you forget a '\"\"\"' somewhere?");
}
if (c == '"') consume_end--;
}
@@ -1023,7 +1099,6 @@ static inline bool scan_multiline_string(Lexer *lexer)
// update the line end and store it in the resulting buffer.
if (c == '\n')
{
lexer_store_line_end(lexer);
next(lexer);
destination[len++] = c;
line = 0;
@@ -1037,8 +1112,8 @@ static inline bool scan_multiline_string(Lexer *lexer)
// We reached EOF, or escape + end of file.
if (c == '\0' || (c == '\\' && peek(lexer) == '\0'))
{
return add_error_token_at(lexer, lexer->current - 1, 1, "The multi-line string unexpectedly ended. "
"Did you forget a '\"\"\"' somewhere?");
return add_error_token_at_start(lexer, "The multi-line string unexpectedly ended. "
"Did you forget a '\"\"\"' somewhere?");
}
// An escape sequence was reached.
@@ -1053,11 +1128,12 @@ static inline bool scan_multiline_string(Lexer *lexer)
int scanned = append_esc_string_token(destination, lexer->current, &len);
if (scanned < 0)
{
add_error_token_at(lexer, lexer->current - 1, 2, "Invalid escape in string.");
backtrack(lexer);
add_error_token_at_current(lexer, "Invalid escape in string.");
scan_consume_end_of_multiline(lexer, false);
return false;
}
lexer->current += scanned;
skip(lexer, scanned);
continue;
}
// Now first we skip any empty space if line has not been reached.
@@ -1080,10 +1156,6 @@ static inline void consume_to_end_quote(Lexer *lexer)
char c;
while ((c = peek(lexer)) != '\0' && c != '"')
{
if (c == '\n')
{
lexer_store_line_end(lexer);
}
next(lexer);
}
}
@@ -1114,22 +1186,24 @@ static inline bool scan_string(Lexer *lexer)
size_t len = 0;
while (lexer->current < end)
{
c = next(lexer);
c = peek(lexer);
next(lexer);
if (c == '\0' || (c == '\\' && peek(lexer) == '\0'))
{
if (c == '\0') backtrack(lexer);
add_error_token_at(lexer, lexer->current - 1, 1, "The end of the file was reached "
"while parsing the string. "
"Did you forget (or accidentally add) a '\"' somewhere?");
add_error_token_at_start(lexer, "The end of the file was reached "
"while parsing the string. "
"Did you forget (or accidentally add) a '\"' somewhere?");
consume_to_end_quote(lexer);
return false;
}
if (c == '\n' || (c == '\\' && peek(lexer) == '\n'))
{
add_error_token_at(lexer, lexer->current - 1, 1, "The end of the line was reached "
"while parsing the string. "
"Did you forget (or accidentally add) a '\"' somewhere?");
lexer->current--;
backtrack(lexer);
add_error_token_at_start(lexer, "The end of the line was reached "
"while parsing the string. "
"Did you forget (or accidentally add) a '\"' somewhere?");
consume_to_end_quote(lexer);
return false;
}
@@ -1138,11 +1212,11 @@ static inline bool scan_string(Lexer *lexer)
int scanned = append_esc_string_token(destination, lexer->current, &len);
if (scanned < 0)
{
add_error_token_at(lexer, lexer->current - 1, 2, "Invalid escape in string.");
add_error_token_at_current(lexer, "Invalid escape in string.");
consume_to_end_quote(lexer);
return false;
}
lexer->current += scanned;
skip(lexer, scanned);
continue;
}
destination[len++] = c;
@@ -1158,14 +1232,16 @@ static inline bool scan_string(Lexer *lexer)
static inline bool scan_raw_string(Lexer *lexer)
{
char c;
while ((c = next(lexer)) != '`' || peek(lexer) == '`')
while (1)
{
c = peek(lexer);
next(lexer);
if (c == '`' && peek(lexer) != '`') break;
if (c == '\0')
{
backtrack(lexer);
return add_error_token_at(lexer, lexer->lexing_start , 1, "Reached the end of the file looking for "
"the end of the raw string that starts "
"here. Did you forget a '`' somewhere?");
return add_error_token_at_start(lexer, "Reached the end of the file looking for "
"the end of the raw string that starts "
"here. Did you forget a '`' somewhere?");
}
if (c == '`') next(lexer);
}
@@ -1191,34 +1267,39 @@ static inline bool scan_raw_string(Lexer *lexer)
static inline bool scan_hex_array(Lexer *lexer)
{
char start_char = next(lexer); // Step past ' or "
const char *hexdata = lexer->current;
char start_char = peek(lexer);
next(lexer); // Step past ' or "
char c;
uint64_t len = 0;
while (1)
{
c = next(lexer);
if (c == start_char) break;
c = peek(lexer);
if (c == 0)
{
backtrack(lexer);
lexer->lexing_start = lexer->current - 1;
return add_error_token(lexer, "The hex string seems to be missing a terminating '%c'", start_char);
return add_error_token_at_current(lexer, "The hex string seems to be missing a terminating '%c'", start_char);
}
if (c == start_char) break;
if (is_hex(c))
{
next(lexer);
len++;
continue;
}
if (!is_whitespace(c))
if (is_whitespace(c))
{
lexer->lexing_start = hexdata - 1;
lexer->current = hexdata;
return add_error_token(lexer,
"'%c' isn't a valid hexadecimal digit, all digits should be a-z, A-Z and 0-9.",
c);
next(lexer);
continue;
}
if (c > ' ' && c < 127)
{
return add_error_token_at_current(lexer,
"'%c' isn't a valid hexadecimal digit, all digits should be a-z, A-Z and 0-9.",
c);
}
return add_error_token_at_current(lexer,
"This isn't a valid hexadecimal digit, all digits should be a-z, A-Z and 0-9.");
}
next(lexer);
if (len % 2)
{
return add_error_token(lexer, "The hexadecimal string is not an even length, did you miss a digit somewhere?");
@@ -1233,27 +1314,25 @@ static inline bool scan_base64(Lexer *lexer)
{
next(lexer); // Step past 6
next(lexer); // Step past 4
char start_char = next(lexer); // Step past ' or "
const char *b64data = lexer->current;
char start_char = peek(lexer);
next(lexer); // Step past ' or "
char c;
unsigned end_len = 0;
uint64_t len = 0;
while (1)
{
c = next(lexer);
if (c == start_char) break;
c = peek(lexer);
if (c == 0)
{
backtrack(lexer);
lexer->lexing_start = lexer->current - 1;
return add_error_token(lexer, "The base64 string seems to be missing a terminating '%c'", start_char);
return add_error_token_at_start(lexer, "The base64 string seems to be missing a terminating '%c'", start_char);
}
next(lexer);
if (c == start_char) break;
if (is_base64(c))
{
if (end_len)
{
lexer->lexing_start = lexer->current - 1;
return add_error_token(lexer, "'%c' can't be placed after an ending '='", c);
return add_error_token_at_current(lexer, "'%c' can't be placed after an ending '='", c);
}
len++;
continue;
@@ -1262,7 +1341,7 @@ static inline bool scan_base64(Lexer *lexer)
{
if (end_len > 1)
{
return add_error_token_at(lexer, lexer->current - 1, 1, "There cannot be more than 2 '=' at the end of a base64 string.", c);
return add_error_token_at_current(lexer, "There cannot be more than 2 '=' at the end of a base64 string.", c);
}
end_len++;
continue;
@@ -1271,9 +1350,9 @@ static inline bool scan_base64(Lexer *lexer)
{
if (c < ' ' || c > 127)
{
return add_error_token_at(lexer, lexer->current - 1, 1, "A valid base64 character was expected here.");
return add_error_token_at_current(lexer, "A valid base64 character was expected here.");
}
return add_error_token_at(lexer, lexer->current - 1, 1, "'%c' is not a valid base64 character.", c);
return add_error_token_at_current(lexer, "'%c' is not a valid base64 character.", c);
}
}
if (!end_len && len % 4 != 0)
@@ -1300,8 +1379,8 @@ static inline bool scan_base64(Lexer *lexer)
}
if ((len + end_len) % 4 != 0)
{
return add_error_token(lexer, "Base64 strings must either be padded to multiple of 4, or if unpadded "
"- only need 1 or 2 bytes of extra padding.");
return add_error_token_at_start(lexer, "Base64 strings must either be padded to multiple of 4, or if unpadded "
"- only need 1 or 2 bytes of extra padding.");
}
uint64_t decoded_len = (3 * len - end_len) / 4;
if (!add_token(lexer, TOKEN_BYTES, lexer->lexing_start)) return false;
@@ -1348,7 +1427,7 @@ static bool parse_add_end_of_docs_if_present(Lexer *lexer)
// Otherwise, gladly skip ahead and store the end.
skip(lexer, lookahead + 1);
add_token(lexer, TOKEN_DOCS_END, lexer->lexing_start);
lexer->lexing_start = lexer->current;
begin_new_token(lexer);
return true;
}
@@ -1357,10 +1436,9 @@ static void parse_add_end_of_doc_line(Lexer *lexer)
{
assert(peek(lexer) == '\n');
// Add the EOL token.
lexer_store_line_end(lexer);
next(lexer);
add_token(lexer, TOKEN_DOCS_EOL, lexer->lexing_start);
lexer->lexing_start = lexer->current;
begin_new_token(lexer);
// Skip whitespace
skip_whitespace(lexer, LEX_DOCS);
// And any leading stars:
@@ -1376,7 +1454,7 @@ static DocEnd parse_doc_remainder(Lexer *lexer)
{
// Skip all initial whitespace.
skip_whitespace(lexer, LEX_DOCS);
lexer->lexing_start = lexer->current;
begin_new_token(lexer);
int characters_read = 0;
while (1)
@@ -1391,7 +1469,7 @@ static DocEnd parse_doc_remainder(Lexer *lexer)
if (characters_read > 0)
{
add_token(lexer, TOKEN_DOCS_LINE, 0);
lexer->lexing_start = lexer->current;
begin_new_token(lexer);
}
if (parse_add_end_of_docs_if_present(lexer)) return DOC_END_LAST;
// Otherwise use default parsing.
@@ -1401,14 +1479,14 @@ static DocEnd parse_doc_remainder(Lexer *lexer)
if (characters_read > 0)
{
add_token(lexer, TOKEN_DOCS_LINE, 0);
lexer->lexing_start = lexer->current;
begin_new_token(lexer);
}
return DOC_END_EOL;
case '\0':
if (characters_read > 0)
{
add_token(lexer, TOKEN_DOCS_LINE, 0);
lexer->lexing_start = lexer->current;
begin_new_token(lexer);
}
return DOC_END_EOF;
default:
@@ -1497,18 +1575,21 @@ static DocEnd parse_doc_param_directive(Lexer *lexer)
return parse_doc_remainder(lexer);
}
static DocEnd parse_doc_directive(Lexer *lexer)
{
// We expect a directive here.
if (!is_letter(peek_next(lexer)))
begin_new_token(lexer);
// First parse the '@'
next(lexer);
add_token(lexer, TOKEN_DOCS_DIRECTIVE, "@");
begin_new_token(lexer);
if (!is_letter(peek(lexer)))
{
next(lexer);
return add_error_token(lexer, "Expected doc directive here.");
}
lexer->lexing_start = lexer->current;
// First parse the '@'
skip(lexer, 1);
add_token(lexer, TOKEN_DOCS_DIRECTIVE, "@");
lexer->lexing_start = lexer->current;
// Then our keyword
if (!scan_ident(lexer, TOKEN_IDENT, TOKEN_CONST, TOKEN_TYPE_IDENT, 0)) return DOC_END_ERROR;
@@ -1557,7 +1638,10 @@ static bool parse_doc_comment(Lexer *lexer)
skip_whitespace(lexer, LEX_DOCS);
// 2. Did we find the end?
if (reached_end(lexer)) return add_error_token(lexer, "Missing '*/' to end the doc comment.");
if (reached_end(lexer))
{
return add_error_token_at_start(lexer, "Missing '*/' to end the doc comment.");
}
// 3. See if we reach the end of the docs.
if (parse_add_end_of_docs_if_present(lexer)) return true;
@@ -1607,7 +1691,7 @@ static bool lexer_scan_token_inner(Lexer *lexer, LexMode mode)
skip_whitespace(lexer, mode);
// Point start to the first non-whitespace character.
lexer->lexing_start = lexer->current;
begin_new_token(lexer);
if (reached_end(lexer))
{
@@ -1615,7 +1699,8 @@ static bool lexer_scan_token_inner(Lexer *lexer, LexMode mode)
return add_token(lexer, TOKEN_EOF, "\n") && false;
}
char c = next(lexer);
char c = peek(lexer);
next(lexer);
switch (c)
{
case '@':
@@ -1635,10 +1720,10 @@ static bool lexer_scan_token_inner(Lexer *lexer, LexMode mode)
if (is_letter(peek(lexer)))
{
add_token(lexer, TOKEN_BUILTIN, "$$");
lexer->lexing_start = lexer->current;
begin_new_token(lexer);
return scan_ident(lexer, TOKEN_IDENT, TOKEN_CONST_IDENT, TOKEN_TYPE_IDENT, 0);
}
return add_error_token(lexer, "Expected a letter after $$.");
return add_error_token_at_current(lexer, "Expected a letter after $$.");
}
return scan_ident(lexer, TOKEN_CT_IDENT, TOKEN_CT_CONST_IDENT, TOKEN_CT_TYPE_IDENT, '$');
case ',':
@@ -1744,7 +1829,7 @@ static bool lexer_scan_token_inner(Lexer *lexer, LexMode mode)
}
if (c < 0)
{
return add_error_token(lexer, "The 0%x character may not be placed outside of a string or comment, did you perhaps forget a \" somewhere?", (uint8_t)c);
return add_error_token(lexer, "The 0x%x character may not be placed outside of a string or comment, did you forget a \" somewhere?", (uint8_t)c);
}
return add_error_token(lexer, "'%c' may not be placed outside of a string or comment, did you perhaps forget a \" somewhere?", c);
@@ -1757,10 +1842,10 @@ void lexer_lex_file(Lexer *lexer)
{
lexer->token_start_id = (uint32_t) toktype_arena.allocated;
lexer->file_begin = lexer->file->contents;
lexer->lexing_start = lexer->file_begin;
lexer->current = lexer->lexing_start;
lexer->current_line = 1;
lexer->current = lexer->file_begin;
lexer->line_start = lexer->current;
lexer->current_row = 1;
begin_new_token(lexer);
const unsigned char *check = (const unsigned char *)lexer->current;
unsigned c;
int balance = 0;
@@ -1801,7 +1886,7 @@ void lexer_lex_file(Lexer *lexer)
DONE:
if (balance != 0)
{
add_error_token(lexer, "Invalid encoding - Unbalanced bidirectional markers.");
add_error_token_at_start(lexer, "Invalid encoding - Unbalanced bidirectional markers.");
return;
}
while(1)
@@ -1810,7 +1895,7 @@ DONE:
{
if (reached_end(lexer)) break;
while (!reached_end(lexer) && peek(lexer) != '\n') next(lexer);
lexer->lexing_start = lexer->current;
begin_new_token(lexer);
continue;
}
}

View File

@@ -21,7 +21,7 @@ static inline LLVMMetadataRef llvm_get_debug_struct(GenContext *c, Type *type, c
scope,
external_name_len ? type->name : "", external_name_len ? strlen(type->name) : 0,
loc ? c->debug.file : NULL,
loc ? loc->line : 0,
loc ? loc->row : 0,
type_size(type) * 8,
(uint32_t)(type_abi_alignment(type) * 8),
flags, NULL,
@@ -43,7 +43,7 @@ static inline LLVMMetadataRef llvm_get_debug_member(GenContext *c, Type *type, c
scope,
name, strlen(name),
loc ? c->debug.file : NULL,
loc ? loc->line : 0,
loc ? loc->row : 0,
type_size(type) * 8,
(uint32_t)(type_abi_alignment(type) * 8),
offset * 8, flags, llvm_get_debug_type_internal(c, type, scope));
@@ -80,7 +80,7 @@ void llvm_emit_debug_global_var(GenContext *c, Decl *global)
global->external_name,
strlen(global->external_name),
c->debug.file,
loc->line,
loc->row,
llvm_get_debug_type(c, global->type),
global->visibility == VISIBLE_LOCAL,
LLVMDIBuilderCreateExpression(c->debug.builder, NULL, 0),
@@ -116,11 +116,11 @@ void llvm_emit_debug_function(GenContext *c, Decl *decl)
decl->name, TOKLEN(decl->name_token),
decl->external_name, strlen(decl->external_name),
c->debug.file,
loc->line,
loc->row,
llvm_get_debug_type(c, decl->type),
decl->visibility == VISIBLE_LOCAL,
true,
loc->line,
loc->row,
flags,
active_target.optimization_level != OPTIMIZATION_NONE);
LLVMSetSubprogram(decl->backend_ref, c->debug.function);
@@ -136,7 +136,7 @@ void llvm_emit_debug_local_var(GenContext *c, Decl *decl)
decl->name,
TOKLEN(decl->name_token),
c->debug.file,
location->line,
location->row,
llvm_get_debug_type(c, decl->type),
active_target.optimization_level != OPTIMIZATION_NONE,
LLVMDIFlagZero,
@@ -147,7 +147,7 @@ void llvm_emit_debug_local_var(GenContext *c, Decl *decl)
LLVMDIBuilderInsertDeclareAtEnd(c->debug.builder,
decl->backend_ref, var,
LLVMDIBuilderCreateExpression(c->debug.builder, NULL, 0),
LLVMDIBuilderCreateDebugLocation(c->context, location->line, location->col,
LLVMDIBuilderCreateDebugLocation(c->context, location->row, location->col,
c->debug.function, inline_at),
LLVMGetInsertBlock(c->builder));
}
@@ -171,7 +171,7 @@ void llvm_emit_debug_parameter(GenContext *c, Decl *parameter, unsigned index)
strlen(name),
index + 1,
c->debug.file,
loc->line,
loc->row,
llvm_get_debug_type(c, parameter->type),
always_preserve,
LLVMDIFlagZero);
@@ -181,8 +181,8 @@ void llvm_emit_debug_parameter(GenContext *c, Decl *parameter, unsigned index)
parameter->backend_ref,
parameter->var.backend_debug_ref,
LLVMDIBuilderCreateExpression(c->debug.builder, NULL, 0),
LLVMDIBuilderCreateDebugLocation(c->context, loc->line, loc->col, c->debug.function,
inline_at),
LLVMDIBuilderCreateDebugLocation(c->context, loc->row, loc->col, c->debug.function,
inline_at),
LLVMGetInsertBlock(c->builder));
@@ -200,7 +200,7 @@ void llvm_emit_debug_location(GenContext *context, SourceSpan location)
LLVMMetadataRef scope = llvm_debug_current_scope(context);
LLVMMetadataRef loc = LLVMDIBuilderCreateDebugLocation(context->context,
source_loc->line,
source_loc->row,
source_loc->col,
scope, /* inlined at */ 0);
@@ -212,7 +212,7 @@ static LLVMMetadataRef llvm_debug_forward_comp(GenContext *c, Type *type, const
return LLVMDIBuilderCreateReplaceableCompositeType(c->debug.builder, id_counter++,
type->name, strlen(type->name),
scope,
c->debug.file, loc ? loc->line : 0,
c->debug.file, loc ? loc->row : 0,
1 /* version TODO */,
type_size(type) * 8,
type_abi_alignment(type) * 8,
@@ -237,7 +237,7 @@ void llvm_debug_push_lexical_scope(GenContext *context, SourceSpan location)
LLVMMetadataRef block =
LLVMDIBuilderCreateLexicalBlock(context->debug.builder, scope, context->debug.file,
source_loc->line,
source_loc->row,
source_loc->col);
llvm_debug_scope_push(context, block);
@@ -301,7 +301,7 @@ static LLVMMetadataRef llvm_debug_enum_type(GenContext *c, Type *type, LLVMMetad
LLVMMetadataRef real = LLVMDIBuilderCreateEnumerationType(c->debug.builder,
scope,
type->decl->name, TOKLEN(type->decl->name_token),
c->debug.file, location->line, type_size(type) * 8,
c->debug.file, location->row, type_size(type) * 8,
type_abi_alignment(type) * 8,
elements, vec_size(elements),
llvm_get_debug_type(c, enum_real_type));
@@ -344,7 +344,7 @@ static LLVMMetadataRef llvm_debug_structlike_type(GenContext *c, Type *type, LLV
scope,
type->decl->name ? type->decl->name : "",
type->decl->name ? TOKLEN(type->decl->name_token) : 0,
c->debug.file, location->line, type_size(type) * 8,
c->debug.file, location->row, type_size(type) * 8,
type_abi_alignment(type) * 8,
LLVMDIFlagZero,
elements, vec_size(members),
@@ -440,7 +440,7 @@ static LLVMMetadataRef llvm_debug_typedef_type(GenContext *c, Type *type)
LLVMMetadataRef real = LLVMDIBuilderCreateTypedef(c->debug.builder,
llvm_get_debug_type(c, original_type),
decl->name, TOKLEN(decl->name_token),
c->debug.file, location->line,
c->debug.file, location->row,
c->debug.file, type_abi_alignment(type));
if (type->backend_debug_type)
{

View File

@@ -1181,8 +1181,6 @@ void llvm_emit_cast(GenContext *c, CastKind cast_kind, BEValue *value, Type *to_
case CAST_SAPTR:
llvm_emit_subarray_pointer(c, value, value);
break;
case CAST_ARRPTR:
TODO
case CAST_EREU:
// This is a no op.
assert(type_lowering(to_type) == type_lowering(from_type));
@@ -2319,6 +2317,7 @@ static void gencontext_emit_slice(GenContext *c, BEValue *be_value, Expr *expr)
// Calculate the size
LLVMValueRef size = LLVMBuildSub(c->builder, LLVMBuildAdd(c->builder, end.value, llvm_const_int(c, start.type, 1), ""), start.value, "size");
LLVMValueRef start_pointer;
switch (parent.type->type_kind)
{
case TYPE_ARRAY:
@@ -2336,8 +2335,11 @@ static void gencontext_emit_slice(GenContext *c, BEValue *be_value, Expr *expr)
case TYPE_POINTER:
start_pointer = llvm_emit_pointer_inbounds_gep_raw(c, llvm_get_pointee_type(c, parent.type), parent.value, start.value);
break;
default:
case TYPE_FLEXIBLE_ARRAY:
case TYPE_VECTOR:
TODO
default:
UNREACHABLE
}
// Create a new subarray type
@@ -3500,7 +3502,8 @@ static inline void llvm_emit_force_unwrap_expr(GenContext *c, BEValue *be_value,
{
// TODO, we should add info about the error.
SourceLocation *loc = TOKLOC(expr->span.loc);
llvm_emit_debug_output(c, "Runtime error force unwrap!", loc->file->name, c->cur_func_decl->external_name, loc->line);
File *file = source_file_by_id(loc->file_id);
llvm_emit_debug_output(c, "Runtime error force unwrap!", file->name, c->cur_func_decl->external_name, loc->row);
llvm_emit_call_intrinsic(c, intrinsic_id.trap, NULL, 0, NULL, 0);
LLVMBuildUnreachable(c->builder);
c->current_block = NULL;

View File

@@ -431,7 +431,8 @@ void llvm_emit_for_stmt(GenContext *c, Ast *ast)
if (loop == LOOP_INFINITE)
{
SourceLocation *loc = TOKLOC(ast->span.loc);
llvm_emit_debug_output(c, "Infinite loop found", loc->file->name, c->cur_func_decl->external_name, loc->line);
File *file = source_file_by_id(loc->file_id);
llvm_emit_debug_output(c, "Infinite loop found", file->name, c->cur_func_decl->external_name, loc->row);
LLVMBuildUnreachable(c->builder);
LLVMBasicBlockRef block = llvm_basic_block_new(c, "unreachable_block");
c->current_block = NULL;
@@ -994,7 +995,8 @@ static inline void llvm_emit_assert_stmt(GenContext *c, Ast *ast)
{
error = "Assert violation";
}
llvm_emit_debug_output(c, error, loc->file->name, c->cur_func_decl->name, loc->line);
File *file = source_file_by_id(loc->file_id);
llvm_emit_debug_output(c, error, file->name, c->cur_func_decl->name, loc->row);
llvm_emit_call_intrinsic(c, intrinsic_id.trap, NULL, 0, NULL, 0);
llvm_emit_br(c, on_ok);
llvm_emit_block(c, on_ok);
@@ -1045,7 +1047,8 @@ static inline void llvm_emit_asm_stmt(GenContext *c, Ast *ast)
static inline void gencontext_emit_unreachable_stmt(GenContext *context, Ast *ast)
{
SourceLocation *loc = TOKLOC(ast->span.loc);
llvm_emit_debug_output(context, "Unreachable statement reached.", loc->file->name, context->cur_func_decl->external_name, loc->line);
File *file = source_file_by_id(loc->file_id);
llvm_emit_debug_output(context, "Unreachable statement reached.", file->name, context->cur_func_decl->external_name, loc->row);
llvm_emit_call_intrinsic(context, intrinsic_id.trap, NULL, 0, NULL, 0);
LLVMBuildUnreachable(context->builder);
LLVMBasicBlockRef block = llvm_basic_block_new(context, "unreachable_block");
@@ -1222,7 +1225,8 @@ void llvm_emit_panic_if_true(GenContext *c, BEValue *value, const char *panic_na
assert(llvm_value_is_bool(value));
llvm_emit_cond_br(c, value, panic_block, ok_block);
llvm_emit_block(c, panic_block);
llvm_emit_debug_output(c, panic_name, loc->file->name, c->cur_func_decl->name, loc->line);
File *file = source_file_by_id(loc->file_id);
llvm_emit_debug_output(c, panic_name, file->name, c->cur_func_decl->name, loc->row);
llvm_emit_call_intrinsic(c, intrinsic_id.trap, NULL, 0, NULL, 0);
llvm_emit_br(c, ok_block);
llvm_emit_block(c, ok_block);
@@ -1230,13 +1234,14 @@ void llvm_emit_panic_if_true(GenContext *c, BEValue *value, const char *panic_na
void llvm_emit_panic_on_true(GenContext *c, LLVMValueRef value, const char *panic_name, SourceLocation *loc)
{
File *file = source_file_by_id(loc->file_id);
LLVMBasicBlockRef panic_block = llvm_basic_block_new(c, "panic");
LLVMBasicBlockRef ok_block = llvm_basic_block_new(c, "checkok");
BEValue be_value;
llvm_value_set_bool(&be_value, value);
llvm_emit_cond_br(c, &be_value, panic_block, ok_block);
llvm_emit_block(c, panic_block);
llvm_emit_debug_output(c, panic_name, loc->file->name, c->cur_func_decl->name, loc->line);
llvm_emit_debug_output(c, panic_name, file->name, c->cur_func_decl->name, loc->row);
llvm_emit_call_intrinsic(c, intrinsic_id.trap, NULL, 0, NULL, 0);
llvm_emit_br(c, ok_block);
llvm_emit_block(c, ok_block);

View File

@@ -1247,15 +1247,16 @@ static Expr *parse_bytes_expr(Context *context, Expr *left)
{
TokenData *token_data = tokendata_from_token(context->lex.tok);
SourceLocation *loc = TOKLOC(context->lex.tok);
File *file = source_file_by_id(loc->file_id);
if (token_data->is_base64)
{
const char *base64data = &loc->file->contents[loc->start] + 4;
const char *base64data = &file->contents[loc->start] + 4;
const char *end = base64data + loc->length - 1;
parse_base64(&data_current, data_current + token_data->len, base64data, end);
}
else
{
const char *hexdata = &loc->file->contents[loc->start] + 2;
const char *hexdata = &file->contents[loc->start] + 2;
const char *end = hexdata + loc->length - 1;
parse_hex(&data_current, hexdata, end);
}

View File

@@ -48,7 +48,7 @@ inline void advance(Context *context)
SourceLocation *curr = TOKLOC(context->lex.tok);
SourceLocation *next = TOKLOC(context->lex.next_tok);
vec_add(context->comments, context->lex.next_tok);
if (curr->line == next->line)
if (curr->row == next->row)
{
if (context->trailing_comment)
{

View File

@@ -198,7 +198,6 @@ bool expr_cast_is_constant_eval(Expr *expr, ConstantEvalKind eval_kind)
case CAST_EREU:
case CAST_XIERR:
case CAST_PTRPTR:
case CAST_ARRPTR:
case CAST_STRPTR:
case CAST_PTRBOOL:
case CAST_BOOLINT:
@@ -1737,8 +1736,8 @@ bool sema_expr_analyse_macro_call(Context *context, Expr *call_expr, Expr *struc
context->macro_scope = (MacroScope){
.body_param = decl->macro_decl.block_parameter.index ? TOKSTR(decl->macro_decl.block_parameter) : NULL,
.macro = decl,
.inline_line = TOKLOC(call_expr->span.loc)->line,
.original_inline_line = old_macro_scope.depth ? old_macro_scope.original_inline_line : TOKLOC(call_expr->span.loc)->line,
.inline_line = TOKLOC(call_expr->span.loc)->row,
.original_inline_line = old_macro_scope.depth ? old_macro_scope.original_inline_line : TOKLOC(call_expr->span.loc)->row,
.locals_start = context->active_scope.current_local,
.depth = old_macro_scope.depth + 1,
.yield_symbol_start = first_local,
@@ -5938,7 +5937,7 @@ static inline bool sema_expr_analyse_placeholder(Context *context, Expr *expr)
}
if (string == kw_LINEREAL)
{
expr_rewrite_to_int_const(expr, type_isize, TOKLOC(expr->placeholder_expr.identifier)->line, true);
expr_rewrite_to_int_const(expr, type_isize, TOKLOC(expr->placeholder_expr.identifier)->row, true);
return true;
}
if (string == kw_LINE)
@@ -5949,7 +5948,7 @@ static inline bool sema_expr_analyse_placeholder(Context *context, Expr *expr)
}
else
{
expr_rewrite_to_int_const(expr, type_isize, TOKLOC(expr->placeholder_expr.identifier)->line, true);
expr_rewrite_to_int_const(expr, type_isize, TOKLOC(expr->placeholder_expr.identifier)->row, true);
}
return true;
}

View File

@@ -620,7 +620,7 @@ static inline bool sema_analyse_stmt_placement(Expr *cond, Ast *stmt)
if (stmt->ast_kind == AST_COMPOUND_STMT) return true;
SourceLocation *end_of_cond = TOKLOC(cond->span.end_loc);
SourceLocation *start_of_then = TOKLOC(stmt->span.loc);
return end_of_cond->line == start_of_then->line;
return end_of_cond->row == start_of_then->row;
}
/**
@@ -1467,7 +1467,7 @@ static inline bool sema_analyse_if_stmt(Context *context, Ast *statement)
{
SourceLocation *end_of_cond = TOKLOC(cond->span.end_loc);
SourceLocation *start_of_then = TOKLOC(statement->if_stmt.then_body->span.loc);
if (end_of_cond->line != start_of_then->line)
if (end_of_cond->row != start_of_then->row)
{
SEMA_ERROR(statement->if_stmt.then_body,
"The 'then' part of a single line if-statement must start on the same line as the 'if' or use '{ }'");

View File

@@ -15,7 +15,12 @@
static const size_t LEXER_FILES_START_CAPACITY = 128;
File pseudo_file;
File *source_file_by_id(FileId file)
{
assert(file < vec_size(global_context.loaded_sources));
return global_context.loaded_sources[file];
}
File *source_file_load(const char *filename, bool *already_loaded)
{
@@ -45,62 +50,12 @@ File *source_file_load(const char *filename, bool *already_loaded)
size_t size;
const char* source_text = read_file(filename, &size);
File *file = CALLOCS(File);
file->file_id = vec_size(global_context.loaded_sources);
file->full_path = full_path;
file->start_id = vec_size(global_context.loaded_sources) ? VECLAST(global_context.loaded_sources)->end_id : 0;
file->current_line_start = file->start_id;
file->contents = source_text;
ASSERT(file->start_id + size < UINT32_MAX, "Total files loaded exceeded %d bytes", UINT32_MAX);
file->end_id = (SourceLoc) (file->start_id + size);
size_t pre_allocated_lines = size / 40;
file->lines = VECNEW(SourceLoc, pre_allocated_lines < 16 ? 16 : pre_allocated_lines);
vec_add(file->lines, file->start_id);
path_get_dir_and_filename_from_full(file->full_path, &file->name, &file->dir_path);
vec_add(global_context.loaded_sources, file);
return file;
}
void source_file_append_line_end(File *file, SourceLoc loc)
{
if (file->current_line_start > loc) return;
file->current_line_start = loc + 1;
vec_add(file->lines, file->current_line_start);
}
SourcePosition source_file_find_position_in_file(File *file, SourceLoc loc)
{
assert(file->start_id <= loc);
unsigned lines = vec_size(file->lines);
unsigned low = 0;
unsigned high = lines;
while (1)
{
// Line found iff line_start[mid] <= loc && line_start[mid + 1] < loc
// Binary search
uint32_t mid = (high + low) / 2;
// Mid is before the location.
SourceLoc line_start = file->lines[mid];
if (line_start > loc)
{
high = mid;
continue;
}
if (mid + 1 != lines && file->lines[mid + 1] <= loc)
{
low = mid;
continue;
}
return (SourcePosition)
{
.file = file,
.line = mid + 1,
.col = loc - line_start + 1,
.loc = loc,
.start = file->contents + loc - file->start_id,
};
}
}

View File

@@ -14,29 +14,6 @@ void test_file(void)
{
File file;
memset(&file, 0, sizeof(file));
file.start_id = 3;
file.contents = "";
vec_add(file.lines, file.start_id);
TEST_ASSERT(source_file_find_position_in_file(&file, 3).line == 1, "Expected first line");
TEST_ASSERT(source_file_find_position_in_file(&file, 10).line == 1, "Expected first line");
source_file_append_line_end(&file, 9);
TEST_ASSERT(source_file_find_position_in_file(&file, 3).line == 1, "Expected first line");
TEST_ASSERT(source_file_find_position_in_file(&file, 5).line == 1, "Expected first line");
TEST_ASSERT(source_file_find_position_in_file(&file, 10).line == 2, "Expected second line");
source_file_append_line_end(&file, 19);
TEST_ASSERT(source_file_find_position_in_file(&file, 3).line == 1, "Expected first line");
TEST_ASSERT(source_file_find_position_in_file(&file, 5).line == 1, "Expected first line");
TEST_ASSERT(source_file_find_position_in_file(&file, 10).line == 2, "Expected second line");
TEST_ASSERT(source_file_find_position_in_file(&file, 15).line == 2, "Expected second line");
TEST_ASSERT(source_file_find_position_in_file(&file, 21).line == 3, "Expected third line");
source_file_append_line_end(&file, 29);
TEST_ASSERT(source_file_find_position_in_file(&file, 3).line == 1, "Expected first line");
TEST_ASSERT(source_file_find_position_in_file(&file, 5).line == 1, "Expected first line");
TEST_ASSERT(source_file_find_position_in_file(&file, 10).line == 2, "Expected second line");
TEST_ASSERT(source_file_find_position_in_file(&file, 15).line == 2, "Expected second line");
TEST_ASSERT(source_file_find_position_in_file(&file, 21).line == 3, "Expected third line");
TEST_ASSERT(source_file_find_position_in_file(&file, 25).line == 3, "Expected third line");
TEST_ASSERT(source_file_find_position_in_file(&file, 31).line == 4, "Expected fourth line");
}
#define i128(x_, y_) ((Int128){x_, y_})
void test128()

View File

@@ -1 +1 @@
#define COMPILER_VERSION "PRE.6"
#define COMPILER_VERSION "PRE.7"

View File

@@ -0,0 +1,3 @@
/**
@1 // #error: Expected doc directive here
*/

View File

@@ -0,0 +1 @@
x"abcé" // #error: This isn't a valid hexadecimal digit

View File

@@ -0,0 +1 @@
x"abcg" // #error: 'g' isn't a valid hexadecimal digit

View File

@@ -0,0 +1,2 @@
$$1 // #error: Expected a letter after

View File

@@ -2,10 +2,14 @@
0o% // #error: An expression starting with '0o' should be followed by octal numbers (0-7).
0o08 // #error: An expression starting with '0o' should be followed by octal numbers (0-7).
0b2 // #error: An expression starting with '0b' should be followed by binary digits
0b# // #error: An expression starting with '0b' should be followed by binary digits
0b12 // #error: An expression starting with '0b' should be followed by binary digits
0xg // #error: '0x' starts a hexadecimal number, so the next character should be 0-9, a-f or A-F.
0x! // #error: '0x' starts a hexadecimal number, so the next character should be 0-9, a-f or A-F.

View File

@@ -1,7 +1,6 @@
// @warnings{no-unused}
module test;
struct Aa
{
int a;