diff --git a/src/compiler/compiler_internal.h b/src/compiler/compiler_internal.h index 7e07330ba..0d31f3524 100644 --- a/src/compiler/compiler_internal.h +++ b/src/compiler/compiler_internal.h @@ -163,24 +163,21 @@ typedef struct AstId end; } DeferList; - +typedef unsigned FileId; typedef struct { + FileId file_id; const char *contents; char *name; char *dir_path; const char *full_path; - SourceLoc start_id; - SourceLoc end_id; - SourceLoc *lines; - SourceLoc current_line_start; } File; typedef struct { - File *file; - uint32_t line; - uint32_t col; + FileId file_id; + uint16_t col; + uint32_t row; uint32_t start; uint32_t length; } SourceLocation; @@ -1352,8 +1349,10 @@ typedef struct uint32_t token_start_id; const char *lexing_start; const char *current; - uint32_t current_line; + uint32_t current_row; + uint32_t start_row; const char *line_start; + const char *start_row_start; File *file; TokenData *latest_token_data; SourceLocation *latest_token_loc; @@ -1982,9 +1981,8 @@ void sema_error(Context *context, const char *message, ...); void sema_prev_at_range3(SourceSpan span, const char *message, ...); void sema_shadow_error(Decl *decl, Decl *old); +File *source_file_by_id(FileId file); File *source_file_load(const char *filename, bool *already_loaded); -void source_file_append_line_end(File *file, SourceLoc loc); -SourcePosition source_file_find_position_in_file(File *file, SourceLoc loc); static inline SourceSpan source_span_from_token_id(TokenId id) { diff --git a/src/compiler/diagnostics.c b/src/compiler/diagnostics.c index 7df166715..4fdbfd287 100644 --- a/src/compiler/diagnostics.c +++ b/src/compiler/diagnostics.c @@ -14,53 +14,80 @@ typedef enum PRINT_TYPE_WARN } PrintType; -static void print_error2(SourceLocation *location, const char *message, PrintType print_type) +#define LINES_SHOWN 4 + +static void print_error(SourceLocation *location, const char *message, PrintType print_type) { + File *file = source_file_by_id(location->file_id); if (active_target.test_output) { switch (print_type) { case PRINT_TYPE_ERROR: - eprintf("Error|%s|%d|%s\n", location->file->name, location->line, message); + eprintf("Error|%s|%d|%s\n", file->name, location->row, message); return; case PRINT_TYPE_PREV: return; case PRINT_TYPE_WARN: - eprintf("Warning|%s|%d|%s\n", location->file->name, location->line, message); + eprintf("Warning|%s|%d|%s\n", file->name, location->row, message); return; default: UNREACHABLE } } - static const int LINES_SHOWN = 4; - - unsigned max_line_length = (unsigned)round(log10(location->line)) + 1; + unsigned max_line_length = (unsigned)round(log10(location->row)) + 1; char number_buffer[20]; snprintf(number_buffer, 20, "%%%dd: %%.*s\n", max_line_length); // Insert end in case it's not yet there. - for (SourceLoc s = location->start; s < location->file->end_id; s++) + + const char *file_contents = file->contents; + int lines_found = 0; + size_t line_starts[LINES_SHOWN + 1] = { 0, 0, 0, 0 }; + uint32_t start = location->start; + if (start < 2) { - if ((location->file->contents + s - location->file->start_id)[0] == '\n') + line_starts[++lines_found] = 0; + } + else + { + for (size_t i = start; i > 0; i--) { - source_file_append_line_end(location->file, s); - break; + if (file_contents[i - 1] == '\n') + { + line_starts[++lines_found] = i; + if (lines_found >= LINES_SHOWN) break; + } + if (i == 1) + { + line_starts[++lines_found] = 0; + break; + } } } - size_t lines_in_file = vec_size(location->file->lines); - const char *start = NULL; - for (unsigned i = LINES_SHOWN; i > 0; i--) + for (size_t i = start; ; i++) { - if (location->line < i) continue; - uint32_t line_number = location->line + 1 - i; - SourceLoc line_start = location->file->lines[line_number - 1]; - - SourceLoc line_end = line_number == lines_in_file ? location->file->end_id + 1 : - location->file->lines[line_number]; - uint32_t line_len = line_end - line_start - 1; - start = location->file->contents + line_start - location->file->start_id; - eprintf(number_buffer, line_number, line_len, start); + switch (file_contents[i]) + { + case '\0': + case '\n': + line_starts[0] = i + 1; + goto FOUND; + default: + continue; + } + } + FOUND:; + const char *start_char = NULL; + for (unsigned i = lines_found; i > 0; i--) + { + SourceLoc line_start = line_starts[i]; + SourceLoc line_end = line_starts[i - 1] - 1; + uint32_t line_number = location->row + 1 - i; + uint32_t line_len = line_end - line_start; + start_char = file->contents + line_start; + eprintf(number_buffer, line_number, line_len, start_char); } eprintf(" "); for (unsigned i = 0; i < max_line_length; i++) @@ -70,7 +97,7 @@ static void print_error2(SourceLocation *location, const char *message, PrintTyp for (unsigned i = 1; i < location->col; i++) { - switch (start[i]) + switch (start_char[i]) { case '\t': eprintf("\t"); @@ -87,13 +114,13 @@ static void print_error2(SourceLocation *location, const char *message, PrintTyp switch (print_type) { case PRINT_TYPE_ERROR: - eprintf("(%s:%d) Error: %s\n\n", location->file->name, location->line, message); + eprintf("(%s:%d) Error: %s\n\n", file->name, location->row, message); break; case PRINT_TYPE_PREV: - eprintf("(%s:%d) %s\n\n", location->file->name, location->line, message); + eprintf("(%s:%d) %s\n\n", file->name, location->row, message); break; case PRINT_TYPE_WARN: - eprintf("(%s:%d) Warning: %s\n\n", location->file->name, location->line, message); + eprintf("(%s:%d) Warning: %s\n\n", file->name, location->row, message); break; default: UNREACHABLE @@ -105,7 +132,7 @@ static void vprint_error(SourceLocation *location, const char *message, va_list { char buffer[256]; vsnprintf(buffer, 256, message, args); - print_error2(location, buffer, PRINT_TYPE_ERROR); + print_error(location, buffer, PRINT_TYPE_ERROR); } @@ -135,7 +162,7 @@ void sema_prev_at_range3(SourceSpan span, const char *message, ...) vsnprintf(buffer, 256, message, args); SourceLocation loc = *start; loc.length = end->start - start->start + end->length; - print_error2(&loc, buffer, PRINT_TYPE_PREV); + print_error(&loc, buffer, PRINT_TYPE_PREV); va_end(args); } @@ -157,12 +184,12 @@ void sema_error_at_prev_end(Token token, const char *message, ...) SourceLocation *curr = TOKLOC(token); SourceLocation *prev = TOKLOC((TokenId) { token.id.index - 1 }); SourceLocation location; - if (curr->file != prev->file) + if (curr->file_id != prev->file_id) { // Ok, this is the first location, so then we create a "start" location: location = *curr; location.start = 0; - location.line = 1; + location.row = 1; location.col = 1; } else diff --git a/src/compiler/enums.h b/src/compiler/enums.h index ed3d77810..a2db89d9a 100644 --- a/src/compiler/enums.h +++ b/src/compiler/enums.h @@ -94,7 +94,6 @@ typedef enum CAST_XIERR, CAST_PTRPTR, CAST_PTRXI, - CAST_ARRPTR, CAST_ARRVEC, CAST_STRPTR, CAST_PTRBOOL, diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c index 2aef78852..8a2d86016 100644 --- a/src/compiler/lexer.c +++ b/src/compiler/lexer.c @@ -18,10 +18,36 @@ typedef enum DOC_END_ERROR, } DocEnd; +static inline uint16_t check_col(intptr_t col, uint32_t row) +{ + if (col > 65535) error_exit("Column on line %d exceeded %d.", row, 65535); + return (uint16_t)col; +} + +static inline uint32_t check_row(intptr_t line, uint32_t row) +{ + if (line > 1024 * 1024) error_exit("Token on line %d exceeded %d.", row, 1024 * 1024); + return (uint32_t)line; +} + // --- Lexing general methods. static bool lexer_scan_token_inner(Lexer *lexer, LexMode mode); +static inline void begin_new_token(Lexer *lexer) +{ + lexer->lexing_start = lexer->current; + lexer->start_row = lexer->current_row; + lexer->start_row_start = lexer->line_start; +} + +static inline void backtrace_to_lexing_start(Lexer *lexer) +{ + lexer->current = lexer->lexing_start; + lexer->current_row = lexer->start_row; + lexer->line_start = lexer->start_row_start; +} + // Peek at the current character in the buffer. static inline char peek(Lexer *lexer) { @@ -38,14 +64,10 @@ static inline char prev(Lexer *lexer) static inline void backtrack(Lexer *lexer) { lexer->current--; -} - -// Store a line ending (and current line start at the current character) -void lexer_store_line_end(Lexer *lexer) -{ - lexer->current_line++; - lexer->line_start = lexer->current + 1; - source_file_append_line_end(lexer->file, (SourceLoc)(lexer->file->start_id + lexer->current - lexer->file_begin)); + if (lexer->current[0] == '\n') + { + lexer->current_row--; + } } // Peek one character ahead. @@ -55,16 +77,24 @@ static inline char peek_next(Lexer *lexer) } // Return the current character and step one character forward. -static inline char next(Lexer *lexer) +static inline void next(Lexer *lexer) { - return *(lexer->current++); + if (lexer->current[0] == '\n') + { + lexer->line_start = lexer->current + 1; + lexer->current_row++; + } + lexer->current++; } // Skip the x next characters. static inline void skip(Lexer *lexer, int steps) { assert(steps > 0); - lexer->current += steps; + for (int i = 0; i < steps; i++) + { + next(lexer); + } } // Is the current character '\0' if so we assume we reached the end. @@ -78,7 +108,7 @@ static inline bool match(Lexer *lexer, char expected) { if (reached_end(lexer)) return false; if (*lexer->current != expected) return false; - lexer->current++; + next(lexer); return true; } @@ -103,38 +133,28 @@ static inline void add_generic_token(Lexer *lexer, TokenType type) token_type[0] = (unsigned char)type; // Set the location. - location->file = lexer->file; + location->file_id = lexer->file->file_id; location->start = (uint32_t)(lexer->lexing_start - lexer->file_begin); - // Calculate the column - if (lexer->lexing_start < lexer->line_start) + uint32_t line = lexer->start_row; + location->row = line; + if (line == lexer->current_row) { - // In this case lexing started before the start of the current line. - // Start by looking at the previous line. - SourceLoc *current = &lexer->file->lines[lexer->current_line - 1]; - location->line = lexer->current_line; - // Walk upwards until we find a line that starts before the current. - while (*current > location->start) - { - location->line--; - current--; - } - // We found the line we wanted, so the col is just an offset from the start. - location->col = location->start - *current + 1; - // Length is restricted to the end of the line. - location->length = current[1] - current[0] - 1; - } - else - { - // The simple case, where the parsing started on the current line. - location->line = lexer->current_line; // Col is simple difference. - location->col = (unsigned) (lexer->lexing_start - lexer->line_start) + 1; + location->col = check_col(lexer->lexing_start - lexer->line_start + 1, line); // Start is offset to file begin. location->start = (SourceLoc) (lexer->lexing_start - lexer->file_begin); // Length is diff between current and start. - location->length = (SourceLoc) (lexer->current - lexer->lexing_start); + location->length = check_row(lexer->current - lexer->lexing_start, line); } + else + { + location->col = check_col(lexer->lexing_start - lexer->start_row_start + 1, line); + // Start is offset to file begin. + location->start = (SourceLoc) (lexer->lexing_start - lexer->file_begin); + location->length = 1; + } + // Return pointers to the data and the location, // these maybe be used to fill in data. lexer->latest_token_data = data; @@ -153,22 +173,56 @@ static bool add_error_token(Lexer *lexer, const char *message, ...) return false; } -static bool add_error_token_at(Lexer *lexer, const char *loc, uint32_t len, const char *message, ...) +static bool add_error_token_at_start(Lexer *lexer, const char *message, ...) { va_list list; va_start(list, message); - SourceLocation location = { .file = lexer->file, - .start = (uint32_t) (loc - lexer->file_begin), - .line = lexer->current_line, - .length = len, - .col = (uint32_t) (loc - lexer->line_start) + 1, + SourceLocation location = { .file_id = lexer->file->file_id, + .start = (uint32_t) (lexer->lexing_start - lexer->file_begin), + .row = lexer->start_row, + .length = 1, + .col = check_col((lexer->lexing_start - lexer->start_row_start) + 1, lexer->start_row), }; sema_verror_range(&location, message, list); va_end(list); add_generic_token(lexer, TOKEN_INVALID_TOKEN); return false; - } + +static bool add_error_token_at(Lexer *lexer, const char *loc, uint32_t len, const char *message, ...) +{ + va_list list; + va_start(list, message); + uint32_t current_line = lexer->current_row; + SourceLocation location = { .file_id = lexer->file->file_id, + .start = (uint32_t) (loc - lexer->file_begin), + .row = current_line, + .length = len, + .col = check_col((loc - lexer->line_start) + 1, current_line), + }; + sema_verror_range(&location, message, list); + va_end(list); + add_generic_token(lexer, TOKEN_INVALID_TOKEN); + return false; +} + +static bool add_error_token_at_current(Lexer *lexer, const char *message, ...) +{ + va_list list; + va_start(list, message); + uint32_t current_line = lexer->current_row; + SourceLocation location = { .file_id = lexer->file->file_id, + .start = (uint32_t) (lexer->current - lexer->file_begin), + .row = current_line, + .length = 1, + .col = check_col((lexer->current - lexer->line_start) + 1, current_line), + }; + sema_verror_range(&location, message, list); + va_end(list); + add_generic_token(lexer, TOKEN_INVALID_TOKEN); + return false; +} + // Add a new regular token. static inline bool add_token(Lexer *lexer, TokenType type, const char *string) { @@ -204,7 +258,6 @@ static inline bool parse_line_comment(Lexer *lexer) // If we found EOL, then walk past '\n' if (!reached_end(lexer)) { - lexer_store_line_end(lexer); next(lexer); } return success; @@ -240,7 +293,6 @@ static inline bool parse_multiline_comment(Lexer *lexer) } break; case '\n': - lexer_store_line_end(lexer); break; case '\0': if (type != TOKEN_DOC_COMMENT) return add_token(lexer, type, lexer->lexing_start); @@ -264,7 +316,6 @@ static void skip_whitespace(Lexer *lexer, LexMode lex_type) { case '\n': if (lex_type != LEX_NORMAL) return; - lexer_store_line_end(lexer); FALLTHROUGH; case ' ': case '\t': @@ -296,7 +347,8 @@ static inline bool scan_ident(Lexer *lexer, TokenType normal, TokenType const_to } while (peek(lexer) == '_') { - hash = FNV1a(next(lexer), hash); + hash = FNV1a(peek(lexer), hash); + next(lexer); } while (1) { @@ -333,12 +385,14 @@ static inline bool scan_ident(Lexer *lexer, TokenType normal, TokenType const_to default: goto EXIT; } - hash = FNV1a(next(lexer), hash); + hash = FNV1a(peek(lexer), hash); + next(lexer); } // Allow bang! if (peek(lexer) == '!' && type == normal) { - hash = FNV1a(next(lexer), hash); + hash = FNV1a('!', hash); + next(lexer); } EXIT:; uint32_t len = (uint32_t)(lexer->current - lexer->lexing_start); @@ -353,6 +407,12 @@ static inline bool scan_ident(Lexer *lexer, TokenType normal, TokenType const_to // --- Number scanning +/** + * For C3 we use the practice of f u and s + * @param lexer + * @param is_float + * @return + */ static bool scan_number_suffix(Lexer *lexer, bool *is_float) { if (!is_alphanum_(peek(lexer))) return true; @@ -390,12 +450,16 @@ static bool scan_number_suffix(Lexer *lexer, bool *is_float) */ static bool scan_oct(Lexer *lexer) { - if (!is_oct(next(lexer))) + if (!is_oct(peek(lexer))) { - backtrack(lexer); - return add_error_token_at(lexer, lexer->current, 1, "An expression starting with '0o' should be followed by octal numbers (0-7)."); + return add_error_token_at_current(lexer, "An expression starting with '0o' should be followed by octal numbers (0-7)."); } + next(lexer); while (is_oct_or_(peek(lexer))) next(lexer); + if (is_number(peek(lexer))) + { + return add_error_token_at_current(lexer, "An expression starting with '0o' should be followed by octal numbers (0-7)."); + } bool is_float = false; if (!scan_number_suffix(lexer, &is_float)) return false; if (is_float) @@ -410,12 +474,16 @@ static bool scan_oct(Lexer *lexer) **/ static bool scan_binary(Lexer *lexer) { - if (!is_binary(next(lexer))) + if (!is_binary(peek(lexer))) { - backtrack(lexer); - return add_error_token_at(lexer, lexer->current, 1, "An expression starting with '0b' should be followed by binary digits (0-1)."); + return add_error_token_at_current(lexer, "An expression starting with '0b' should be followed by binary digits (0-1)."); } + next(lexer); while (is_binary_or_(peek(lexer))) next(lexer); + if (is_number(peek((lexer)))) + { + return add_error_token_at_current(lexer, "An expression starting with '0b' should be followed by binary digits (0-1)."); + } bool is_float = false; if (!scan_number_suffix(lexer, &is_float)) return false; if (is_float) @@ -434,16 +502,21 @@ static inline bool scan_exponent(Lexer *lexer) { // Step past e/E or p/P next(lexer); - char c = next(lexer); + char c = peek(lexer); + next(lexer); // Step past +/- - if (c == '+' || c == '-') c = next(lexer); + if (c == '+' || c == '-') + { + c = peek(lexer); + next(lexer); + } // Now we need at least one digit if (!is_digit(c)) { if (c == 0) { backtrack(lexer); - return add_error_token(lexer, "End of file was reached while parsing the exponent."); + return add_error_token_at_current(lexer, "End of file was reached while parsing the exponent."); } if (c == '\n') return add_error_token(lexer, "End of line was reached while parsing the exponent."); if (c < 31 || c > 127) add_error_token(lexer, "An unexpected character was found while parsing the exponent."); @@ -460,11 +533,11 @@ static inline bool scan_exponent(Lexer *lexer) **/ static inline bool scan_hex(Lexer *lexer) { - if (!is_hex(next(lexer))) + if (!is_hex(peek(lexer))) { - backtrack(lexer); - return add_error_token_at(lexer, lexer->current, 1, "'0x' starts a hexadecimal number, so the next character should be 0-9, a-f or A-F."); + return add_error_token_at_current(lexer, "'0x' starts a hexadecimal number, so the next character should be 0-9, a-f or A-F."); } + next(lexer); while (is_hex_or_(peek(lexer))) next(lexer); bool is_float = false; if (peek(lexer) == '.' && peek_next(lexer) != '.') @@ -472,7 +545,7 @@ static inline bool scan_hex(Lexer *lexer) is_float = true; next(lexer); char c = peek(lexer); - if (c == '_') return add_error_token(lexer, "Can't parse this as a floating point value due to the '_' directly after decimal point."); + if (c == '_') return add_error_token_at_current(lexer, "'_' is not allowed directly after decimal point, try removing it."); if (is_hex(c)) next(lexer); while (is_hex_or_(peek(lexer))) next(lexer); } @@ -482,7 +555,11 @@ static inline bool scan_hex(Lexer *lexer) is_float = true; if (!scan_exponent(lexer)) return false; } - if (prev(lexer) == '_') return add_error_token(lexer, "The number ended with '_', but that character needs to be between, not after, digits."); + if (prev(lexer) == '_') + { + backtrack(lexer); + return add_error_token_at_current(lexer, "The number ended with '_', which isn't allowed, please remove it."); + } if (!scan_number_suffix(lexer, &is_float)) return false; return add_token(lexer, is_float ? TOKEN_REAL : TOKEN_INTEGER, lexer->lexing_start); } @@ -511,7 +588,7 @@ static inline bool scan_dec(Lexer *lexer) next(lexer); // Check our rule to disallow 123._32 char c = peek(lexer); - if (c == '_') return add_error_token(lexer, "Can't parse this as a floating point value due to the '_' directly after decimal point."); + if (c == '_') return add_error_token_at_current(lexer, "'_' is not allowed directly after decimal point, try removing it."); // Now walk until we see no more digits. // This allows 123. as a floating point number. while (is_digit_or_(peek(lexer))) next(lexer); @@ -525,7 +602,11 @@ static inline bool scan_dec(Lexer *lexer) if (!scan_exponent(lexer)) return false; } - if (prev(lexer) == '_') return add_error_token(lexer, "The number ended with '_', but that character needs to be between, not after, digits."); + if (prev(lexer) == '_') + { + backtrack(lexer); + return add_error_token_at_current(lexer, "The number ended with '_', which isn't allowed, please remove it."); + } if (!scan_number_suffix(lexer, &is_float)) return false; return add_token(lexer, is_float ? TOKEN_REAL : TOKEN_INTEGER, lexer->lexing_start); } @@ -622,8 +703,9 @@ static inline int64_t scan_utf8(Lexer *lexer, unsigned char c) for (int i = 1; i < utf8_bytes; i++) { result <<= 6U; - if (peek(lexer) == '\0') return 0xFFFD; - c = (unsigned char)next(lexer); + c = (unsigned char)peek(lexer); + if (c == '\0') return 0xFFFD; + next(lexer); if ((c & 0xc0) != 0x80) { goto ERROR; @@ -659,29 +741,29 @@ static inline bool scan_char(Lexer *lexer) char c; Int128 b = { 0, 0 }; - while ((c = next(lexer)) != '\'') + while (!match(lexer, '\'')) { + c = peek(lexer); + next(lexer); // End of file may occur: if (c == '\0') { - backtrack(lexer); - return add_error_token(lexer, "The character literal did not terminate."); + return add_error_token_at_start(lexer, "The character literal did not terminate."); } // We might exceed the width that we allow. - if (width > 15) return add_error_token(lexer, "The character literal exceeds 16 characters."); + if (width > 15) return add_error_token_at_start(lexer, "The character literal exceeds 16 characters."); // Handle (expected) utf-8 characters. if ((unsigned)c >= (unsigned)0x80) { if (width != 0) goto UNICODE_IN_MULTI; - const char *start = lexer->current; int64_t utf8 = scan_utf8(lexer, (unsigned char)c); if (utf8 < 0) return false; if (!match(lexer, '\'')) { if (peek(lexer) == '\0') continue; - lexer->lexing_start = start; - return add_error_token(lexer, "Unicode character literals may only contain one character, " - "please remove the additional ones or use all ASCII."); + backtrack(lexer); + return add_error_token_at_current(lexer, "Unicode character literals may only contain one character, " + "please remove the additional ones or use all ASCII."); } b.low = (uint64_t) utf8; width = utf8 > 0xffff ? 4 : 2; @@ -689,23 +771,24 @@ static inline bool scan_char(Lexer *lexer) } // Parse the escape code signed char escape = ' '; - const char *start = lexer->current; if (c == '\\') { assert(c == '\\'); - c = next(lexer); + c = peek(lexer); escape = is_valid_escape(c); if (escape == -1) { - backtrack(lexer); - lexer->lexing_start = start - 1; + lexer->lexing_start += 1; if (c > ' ' && c <= 127) { + next(lexer); return add_error_token(lexer, "Invalid escape sequence '\\%c'.", c); } - return add_error_token_at(lexer, start, 1, "An escape sequence was expected after '\\'."); + return add_error_token_at_current(lexer, "An escape sequence was expected after '\\'."); } + next(lexer); } + const char *escape_begin = lexer->current - 2; switch (escape) { case 'x': @@ -713,9 +796,7 @@ static inline bool scan_char(Lexer *lexer) int64_t hex = scan_hex_literal(lexer, 2); if (hex < 0) { - lexer->lexing_start = start - 1; - // Fix underlining if this is an unfinished escape. - return add_error_token(lexer, "Expected a two character hex value after \\x."); + return add_error_token_at(lexer, escape_begin, lexer->current - escape_begin, "Expected a two character hex value after \\x."); } // We can now reassign c and use the default code. c = (char)hex; @@ -731,21 +812,19 @@ static inline bool scan_char(Lexer *lexer) // The hex parsing may have failed, lacking more hex chars. if (hex < 0) { - lexer->lexing_start = start - 1; - return add_error_token(lexer, "Expected %s character hex value after \\%c.", - escape == 'u' ? "a four" : "an eight", escape); + begin_new_token(lexer); + return add_error_token_at(lexer, escape_begin, lexer->current - escape_begin, + "Expected %s character hex value after \\%c.", + escape == 'u' ? "a four" : "an eight", escape); } // If we don't see the end here, then something is wrong. if (!match(lexer, '\'')) { // It may be the end of the line, if so use the default handling by invoking "continue" if (peek(lexer) == '\0') continue; - // Otherwise step forward and mark it as an error. - next(lexer); - lexer->lexing_start = lexer->current - 1; - return add_error_token(lexer, - "Character literals with '\\%c' can only contain one character, please remove this one.", - escape); + return add_error_token_at_current(lexer, + "Character literals with '\\%c' can only contain one character, please remove this one.", + escape); } // Assign the value and go to DONE. b.low = (uint64_t) hex; @@ -764,7 +843,6 @@ static inline bool scan_char(Lexer *lexer) b = i128_shl64(b, 8); b = i128_add64(b, (unsigned char)c); } - assert(width > 0 && width <= 16); if (width > 8 && !platform_target.int128) { @@ -790,9 +868,7 @@ static inline void skip_first_line_if_empty(Lexer *lexer) { case '\n': // Line end? then we jump to the first token after line end. - lexer->current = current - 1; - lexer_store_line_end(lexer); - lexer->current++; + next(lexer); return; case ' ': case '\t': @@ -969,13 +1045,13 @@ bool scan_consume_end_of_multiline(Lexer *lexer, bool error_on_eof) int consume_end = 3; while (consume_end > 0) { - char c = next(lexer); + char c = peek(lexer); + next(lexer); if (c == '\0') { - backtrack(lexer); if (!error_on_eof) return false; - return add_error_token_at(lexer, lexer->current - 1, 1, "The multi-line string unexpectedly ended. " - "Did you forget a '\"\"\"' somewhere?"); + return add_error_token_at_start(lexer, "The multi-line string unexpectedly ended. " + "Did you forget a '\"\"\"' somewhere?"); } if (c == '"') consume_end--; } @@ -1023,7 +1099,6 @@ static inline bool scan_multiline_string(Lexer *lexer) // update the line end and store it in the resulting buffer. if (c == '\n') { - lexer_store_line_end(lexer); next(lexer); destination[len++] = c; line = 0; @@ -1037,8 +1112,8 @@ static inline bool scan_multiline_string(Lexer *lexer) // We reached EOF, or escape + end of file. if (c == '\0' || (c == '\\' && peek(lexer) == '\0')) { - return add_error_token_at(lexer, lexer->current - 1, 1, "The multi-line string unexpectedly ended. " - "Did you forget a '\"\"\"' somewhere?"); + return add_error_token_at_start(lexer, "The multi-line string unexpectedly ended. " + "Did you forget a '\"\"\"' somewhere?"); } // An escape sequence was reached. @@ -1053,11 +1128,12 @@ static inline bool scan_multiline_string(Lexer *lexer) int scanned = append_esc_string_token(destination, lexer->current, &len); if (scanned < 0) { - add_error_token_at(lexer, lexer->current - 1, 2, "Invalid escape in string."); + backtrack(lexer); + add_error_token_at_current(lexer, "Invalid escape in string."); scan_consume_end_of_multiline(lexer, false); return false; } - lexer->current += scanned; + skip(lexer, scanned); continue; } // Now first we skip any empty space if line has not been reached. @@ -1080,10 +1156,6 @@ static inline void consume_to_end_quote(Lexer *lexer) char c; while ((c = peek(lexer)) != '\0' && c != '"') { - if (c == '\n') - { - lexer_store_line_end(lexer); - } next(lexer); } } @@ -1114,22 +1186,24 @@ static inline bool scan_string(Lexer *lexer) size_t len = 0; while (lexer->current < end) { - c = next(lexer); + c = peek(lexer); + next(lexer); if (c == '\0' || (c == '\\' && peek(lexer) == '\0')) { if (c == '\0') backtrack(lexer); - add_error_token_at(lexer, lexer->current - 1, 1, "The end of the file was reached " - "while parsing the string. " - "Did you forget (or accidentally add) a '\"' somewhere?"); + add_error_token_at_start(lexer, "The end of the file was reached " + "while parsing the string. " + "Did you forget (or accidentally add) a '\"' somewhere?"); consume_to_end_quote(lexer); return false; } if (c == '\n' || (c == '\\' && peek(lexer) == '\n')) { - add_error_token_at(lexer, lexer->current - 1, 1, "The end of the line was reached " - "while parsing the string. " - "Did you forget (or accidentally add) a '\"' somewhere?"); - lexer->current--; + + backtrack(lexer); + add_error_token_at_start(lexer, "The end of the line was reached " + "while parsing the string. " + "Did you forget (or accidentally add) a '\"' somewhere?"); consume_to_end_quote(lexer); return false; } @@ -1138,11 +1212,11 @@ static inline bool scan_string(Lexer *lexer) int scanned = append_esc_string_token(destination, lexer->current, &len); if (scanned < 0) { - add_error_token_at(lexer, lexer->current - 1, 2, "Invalid escape in string."); + add_error_token_at_current(lexer, "Invalid escape in string."); consume_to_end_quote(lexer); return false; } - lexer->current += scanned; + skip(lexer, scanned); continue; } destination[len++] = c; @@ -1158,14 +1232,16 @@ static inline bool scan_string(Lexer *lexer) static inline bool scan_raw_string(Lexer *lexer) { char c; - while ((c = next(lexer)) != '`' || peek(lexer) == '`') + while (1) { + c = peek(lexer); + next(lexer); + if (c == '`' && peek(lexer) != '`') break; if (c == '\0') { - backtrack(lexer); - return add_error_token_at(lexer, lexer->lexing_start , 1, "Reached the end of the file looking for " - "the end of the raw string that starts " - "here. Did you forget a '`' somewhere?"); + return add_error_token_at_start(lexer, "Reached the end of the file looking for " + "the end of the raw string that starts " + "here. Did you forget a '`' somewhere?"); } if (c == '`') next(lexer); } @@ -1191,34 +1267,39 @@ static inline bool scan_raw_string(Lexer *lexer) static inline bool scan_hex_array(Lexer *lexer) { - char start_char = next(lexer); // Step past ' or " - const char *hexdata = lexer->current; + char start_char = peek(lexer); + next(lexer); // Step past ' or " char c; uint64_t len = 0; while (1) { - c = next(lexer); - if (c == start_char) break; + c = peek(lexer); if (c == 0) { - backtrack(lexer); - lexer->lexing_start = lexer->current - 1; - return add_error_token(lexer, "The hex string seems to be missing a terminating '%c'", start_char); + return add_error_token_at_current(lexer, "The hex string seems to be missing a terminating '%c'", start_char); } + if (c == start_char) break; if (is_hex(c)) { + next(lexer); len++; continue; } - if (!is_whitespace(c)) + if (is_whitespace(c)) { - lexer->lexing_start = hexdata - 1; - lexer->current = hexdata; - return add_error_token(lexer, - "'%c' isn't a valid hexadecimal digit, all digits should be a-z, A-Z and 0-9.", - c); + next(lexer); + continue; } + if (c > ' ' && c < 127) + { + return add_error_token_at_current(lexer, + "'%c' isn't a valid hexadecimal digit, all digits should be a-z, A-Z and 0-9.", + c); + } + return add_error_token_at_current(lexer, + "This isn't a valid hexadecimal digit, all digits should be a-z, A-Z and 0-9."); } + next(lexer); if (len % 2) { return add_error_token(lexer, "The hexadecimal string is not an even length, did you miss a digit somewhere?"); @@ -1233,27 +1314,25 @@ static inline bool scan_base64(Lexer *lexer) { next(lexer); // Step past 6 next(lexer); // Step past 4 - char start_char = next(lexer); // Step past ' or " - const char *b64data = lexer->current; + char start_char = peek(lexer); + next(lexer); // Step past ' or " char c; unsigned end_len = 0; uint64_t len = 0; while (1) { - c = next(lexer); - if (c == start_char) break; + c = peek(lexer); if (c == 0) { - backtrack(lexer); - lexer->lexing_start = lexer->current - 1; - return add_error_token(lexer, "The base64 string seems to be missing a terminating '%c'", start_char); + return add_error_token_at_start(lexer, "The base64 string seems to be missing a terminating '%c'", start_char); } + next(lexer); + if (c == start_char) break; if (is_base64(c)) { if (end_len) { - lexer->lexing_start = lexer->current - 1; - return add_error_token(lexer, "'%c' can't be placed after an ending '='", c); + return add_error_token_at_current(lexer, "'%c' can't be placed after an ending '='", c); } len++; continue; @@ -1262,7 +1341,7 @@ static inline bool scan_base64(Lexer *lexer) { if (end_len > 1) { - return add_error_token_at(lexer, lexer->current - 1, 1, "There cannot be more than 2 '=' at the end of a base64 string.", c); + return add_error_token_at_current(lexer, "There cannot be more than 2 '=' at the end of a base64 string.", c); } end_len++; continue; @@ -1271,9 +1350,9 @@ static inline bool scan_base64(Lexer *lexer) { if (c < ' ' || c > 127) { - return add_error_token_at(lexer, lexer->current - 1, 1, "A valid base64 character was expected here."); + return add_error_token_at_current(lexer, "A valid base64 character was expected here."); } - return add_error_token_at(lexer, lexer->current - 1, 1, "'%c' is not a valid base64 character.", c); + return add_error_token_at_current(lexer, "'%c' is not a valid base64 character.", c); } } if (!end_len && len % 4 != 0) @@ -1300,8 +1379,8 @@ static inline bool scan_base64(Lexer *lexer) } if ((len + end_len) % 4 != 0) { - return add_error_token(lexer, "Base64 strings must either be padded to multiple of 4, or if unpadded " - "- only need 1 or 2 bytes of extra padding."); + return add_error_token_at_start(lexer, "Base64 strings must either be padded to multiple of 4, or if unpadded " + "- only need 1 or 2 bytes of extra padding."); } uint64_t decoded_len = (3 * len - end_len) / 4; if (!add_token(lexer, TOKEN_BYTES, lexer->lexing_start)) return false; @@ -1348,7 +1427,7 @@ static bool parse_add_end_of_docs_if_present(Lexer *lexer) // Otherwise, gladly skip ahead and store the end. skip(lexer, lookahead + 1); add_token(lexer, TOKEN_DOCS_END, lexer->lexing_start); - lexer->lexing_start = lexer->current; + begin_new_token(lexer); return true; } @@ -1357,10 +1436,9 @@ static void parse_add_end_of_doc_line(Lexer *lexer) { assert(peek(lexer) == '\n'); // Add the EOL token. - lexer_store_line_end(lexer); next(lexer); add_token(lexer, TOKEN_DOCS_EOL, lexer->lexing_start); - lexer->lexing_start = lexer->current; + begin_new_token(lexer); // Skip whitespace skip_whitespace(lexer, LEX_DOCS); // And any leading stars: @@ -1376,7 +1454,7 @@ static DocEnd parse_doc_remainder(Lexer *lexer) { // Skip all initial whitespace. skip_whitespace(lexer, LEX_DOCS); - lexer->lexing_start = lexer->current; + begin_new_token(lexer); int characters_read = 0; while (1) @@ -1391,7 +1469,7 @@ static DocEnd parse_doc_remainder(Lexer *lexer) if (characters_read > 0) { add_token(lexer, TOKEN_DOCS_LINE, 0); - lexer->lexing_start = lexer->current; + begin_new_token(lexer); } if (parse_add_end_of_docs_if_present(lexer)) return DOC_END_LAST; // Otherwise use default parsing. @@ -1401,14 +1479,14 @@ static DocEnd parse_doc_remainder(Lexer *lexer) if (characters_read > 0) { add_token(lexer, TOKEN_DOCS_LINE, 0); - lexer->lexing_start = lexer->current; + begin_new_token(lexer); } return DOC_END_EOL; case '\0': if (characters_read > 0) { add_token(lexer, TOKEN_DOCS_LINE, 0); - lexer->lexing_start = lexer->current; + begin_new_token(lexer); } return DOC_END_EOF; default: @@ -1497,18 +1575,21 @@ static DocEnd parse_doc_param_directive(Lexer *lexer) return parse_doc_remainder(lexer); } + static DocEnd parse_doc_directive(Lexer *lexer) { // We expect a directive here. - if (!is_letter(peek_next(lexer))) + begin_new_token(lexer); + // First parse the '@' + next(lexer); + add_token(lexer, TOKEN_DOCS_DIRECTIVE, "@"); + begin_new_token(lexer); + + if (!is_letter(peek(lexer))) { + next(lexer); return add_error_token(lexer, "Expected doc directive here."); } - lexer->lexing_start = lexer->current; - // First parse the '@' - skip(lexer, 1); - add_token(lexer, TOKEN_DOCS_DIRECTIVE, "@"); - lexer->lexing_start = lexer->current; // Then our keyword if (!scan_ident(lexer, TOKEN_IDENT, TOKEN_CONST, TOKEN_TYPE_IDENT, 0)) return DOC_END_ERROR; @@ -1557,7 +1638,10 @@ static bool parse_doc_comment(Lexer *lexer) skip_whitespace(lexer, LEX_DOCS); // 2. Did we find the end? - if (reached_end(lexer)) return add_error_token(lexer, "Missing '*/' to end the doc comment."); + if (reached_end(lexer)) + { + return add_error_token_at_start(lexer, "Missing '*/' to end the doc comment."); + } // 3. See if we reach the end of the docs. if (parse_add_end_of_docs_if_present(lexer)) return true; @@ -1607,7 +1691,7 @@ static bool lexer_scan_token_inner(Lexer *lexer, LexMode mode) skip_whitespace(lexer, mode); // Point start to the first non-whitespace character. - lexer->lexing_start = lexer->current; + begin_new_token(lexer); if (reached_end(lexer)) { @@ -1615,7 +1699,8 @@ static bool lexer_scan_token_inner(Lexer *lexer, LexMode mode) return add_token(lexer, TOKEN_EOF, "\n") && false; } - char c = next(lexer); + char c = peek(lexer); + next(lexer); switch (c) { case '@': @@ -1635,10 +1720,10 @@ static bool lexer_scan_token_inner(Lexer *lexer, LexMode mode) if (is_letter(peek(lexer))) { add_token(lexer, TOKEN_BUILTIN, "$$"); - lexer->lexing_start = lexer->current; + begin_new_token(lexer); return scan_ident(lexer, TOKEN_IDENT, TOKEN_CONST_IDENT, TOKEN_TYPE_IDENT, 0); } - return add_error_token(lexer, "Expected a letter after $$."); + return add_error_token_at_current(lexer, "Expected a letter after $$."); } return scan_ident(lexer, TOKEN_CT_IDENT, TOKEN_CT_CONST_IDENT, TOKEN_CT_TYPE_IDENT, '$'); case ',': @@ -1744,7 +1829,7 @@ static bool lexer_scan_token_inner(Lexer *lexer, LexMode mode) } if (c < 0) { - return add_error_token(lexer, "The 0%x character may not be placed outside of a string or comment, did you perhaps forget a \" somewhere?", (uint8_t)c); + return add_error_token(lexer, "The 0x%x character may not be placed outside of a string or comment, did you forget a \" somewhere?", (uint8_t)c); } return add_error_token(lexer, "'%c' may not be placed outside of a string or comment, did you perhaps forget a \" somewhere?", c); @@ -1757,10 +1842,10 @@ void lexer_lex_file(Lexer *lexer) { lexer->token_start_id = (uint32_t) toktype_arena.allocated; lexer->file_begin = lexer->file->contents; - lexer->lexing_start = lexer->file_begin; - lexer->current = lexer->lexing_start; - lexer->current_line = 1; + lexer->current = lexer->file_begin; lexer->line_start = lexer->current; + lexer->current_row = 1; + begin_new_token(lexer); const unsigned char *check = (const unsigned char *)lexer->current; unsigned c; int balance = 0; @@ -1801,7 +1886,7 @@ void lexer_lex_file(Lexer *lexer) DONE: if (balance != 0) { - add_error_token(lexer, "Invalid encoding - Unbalanced bidirectional markers."); + add_error_token_at_start(lexer, "Invalid encoding - Unbalanced bidirectional markers."); return; } while(1) @@ -1810,7 +1895,7 @@ DONE: { if (reached_end(lexer)) break; while (!reached_end(lexer) && peek(lexer) != '\n') next(lexer); - lexer->lexing_start = lexer->current; + begin_new_token(lexer); continue; } } diff --git a/src/compiler/llvm_codegen_debug_info.c b/src/compiler/llvm_codegen_debug_info.c index 4f404a77a..648dc7045 100644 --- a/src/compiler/llvm_codegen_debug_info.c +++ b/src/compiler/llvm_codegen_debug_info.c @@ -21,7 +21,7 @@ static inline LLVMMetadataRef llvm_get_debug_struct(GenContext *c, Type *type, c scope, external_name_len ? type->name : "", external_name_len ? strlen(type->name) : 0, loc ? c->debug.file : NULL, - loc ? loc->line : 0, + loc ? loc->row : 0, type_size(type) * 8, (uint32_t)(type_abi_alignment(type) * 8), flags, NULL, @@ -43,7 +43,7 @@ static inline LLVMMetadataRef llvm_get_debug_member(GenContext *c, Type *type, c scope, name, strlen(name), loc ? c->debug.file : NULL, - loc ? loc->line : 0, + loc ? loc->row : 0, type_size(type) * 8, (uint32_t)(type_abi_alignment(type) * 8), offset * 8, flags, llvm_get_debug_type_internal(c, type, scope)); @@ -80,7 +80,7 @@ void llvm_emit_debug_global_var(GenContext *c, Decl *global) global->external_name, strlen(global->external_name), c->debug.file, - loc->line, + loc->row, llvm_get_debug_type(c, global->type), global->visibility == VISIBLE_LOCAL, LLVMDIBuilderCreateExpression(c->debug.builder, NULL, 0), @@ -116,11 +116,11 @@ void llvm_emit_debug_function(GenContext *c, Decl *decl) decl->name, TOKLEN(decl->name_token), decl->external_name, strlen(decl->external_name), c->debug.file, - loc->line, + loc->row, llvm_get_debug_type(c, decl->type), decl->visibility == VISIBLE_LOCAL, true, - loc->line, + loc->row, flags, active_target.optimization_level != OPTIMIZATION_NONE); LLVMSetSubprogram(decl->backend_ref, c->debug.function); @@ -136,7 +136,7 @@ void llvm_emit_debug_local_var(GenContext *c, Decl *decl) decl->name, TOKLEN(decl->name_token), c->debug.file, - location->line, + location->row, llvm_get_debug_type(c, decl->type), active_target.optimization_level != OPTIMIZATION_NONE, LLVMDIFlagZero, @@ -147,7 +147,7 @@ void llvm_emit_debug_local_var(GenContext *c, Decl *decl) LLVMDIBuilderInsertDeclareAtEnd(c->debug.builder, decl->backend_ref, var, LLVMDIBuilderCreateExpression(c->debug.builder, NULL, 0), - LLVMDIBuilderCreateDebugLocation(c->context, location->line, location->col, + LLVMDIBuilderCreateDebugLocation(c->context, location->row, location->col, c->debug.function, inline_at), LLVMGetInsertBlock(c->builder)); } @@ -171,7 +171,7 @@ void llvm_emit_debug_parameter(GenContext *c, Decl *parameter, unsigned index) strlen(name), index + 1, c->debug.file, - loc->line, + loc->row, llvm_get_debug_type(c, parameter->type), always_preserve, LLVMDIFlagZero); @@ -181,8 +181,8 @@ void llvm_emit_debug_parameter(GenContext *c, Decl *parameter, unsigned index) parameter->backend_ref, parameter->var.backend_debug_ref, LLVMDIBuilderCreateExpression(c->debug.builder, NULL, 0), - LLVMDIBuilderCreateDebugLocation(c->context, loc->line, loc->col, c->debug.function, - inline_at), + LLVMDIBuilderCreateDebugLocation(c->context, loc->row, loc->col, c->debug.function, + inline_at), LLVMGetInsertBlock(c->builder)); @@ -200,7 +200,7 @@ void llvm_emit_debug_location(GenContext *context, SourceSpan location) LLVMMetadataRef scope = llvm_debug_current_scope(context); LLVMMetadataRef loc = LLVMDIBuilderCreateDebugLocation(context->context, - source_loc->line, + source_loc->row, source_loc->col, scope, /* inlined at */ 0); @@ -212,7 +212,7 @@ static LLVMMetadataRef llvm_debug_forward_comp(GenContext *c, Type *type, const return LLVMDIBuilderCreateReplaceableCompositeType(c->debug.builder, id_counter++, type->name, strlen(type->name), scope, - c->debug.file, loc ? loc->line : 0, + c->debug.file, loc ? loc->row : 0, 1 /* version TODO */, type_size(type) * 8, type_abi_alignment(type) * 8, @@ -237,7 +237,7 @@ void llvm_debug_push_lexical_scope(GenContext *context, SourceSpan location) LLVMMetadataRef block = LLVMDIBuilderCreateLexicalBlock(context->debug.builder, scope, context->debug.file, - source_loc->line, + source_loc->row, source_loc->col); llvm_debug_scope_push(context, block); @@ -301,7 +301,7 @@ static LLVMMetadataRef llvm_debug_enum_type(GenContext *c, Type *type, LLVMMetad LLVMMetadataRef real = LLVMDIBuilderCreateEnumerationType(c->debug.builder, scope, type->decl->name, TOKLEN(type->decl->name_token), - c->debug.file, location->line, type_size(type) * 8, + c->debug.file, location->row, type_size(type) * 8, type_abi_alignment(type) * 8, elements, vec_size(elements), llvm_get_debug_type(c, enum_real_type)); @@ -344,7 +344,7 @@ static LLVMMetadataRef llvm_debug_structlike_type(GenContext *c, Type *type, LLV scope, type->decl->name ? type->decl->name : "", type->decl->name ? TOKLEN(type->decl->name_token) : 0, - c->debug.file, location->line, type_size(type) * 8, + c->debug.file, location->row, type_size(type) * 8, type_abi_alignment(type) * 8, LLVMDIFlagZero, elements, vec_size(members), @@ -440,7 +440,7 @@ static LLVMMetadataRef llvm_debug_typedef_type(GenContext *c, Type *type) LLVMMetadataRef real = LLVMDIBuilderCreateTypedef(c->debug.builder, llvm_get_debug_type(c, original_type), decl->name, TOKLEN(decl->name_token), - c->debug.file, location->line, + c->debug.file, location->row, c->debug.file, type_abi_alignment(type)); if (type->backend_debug_type) { diff --git a/src/compiler/llvm_codegen_expr.c b/src/compiler/llvm_codegen_expr.c index 54a78bc4f..c3bcd5273 100644 --- a/src/compiler/llvm_codegen_expr.c +++ b/src/compiler/llvm_codegen_expr.c @@ -1181,8 +1181,6 @@ void llvm_emit_cast(GenContext *c, CastKind cast_kind, BEValue *value, Type *to_ case CAST_SAPTR: llvm_emit_subarray_pointer(c, value, value); break; - case CAST_ARRPTR: - TODO case CAST_EREU: // This is a no op. assert(type_lowering(to_type) == type_lowering(from_type)); @@ -2319,6 +2317,7 @@ static void gencontext_emit_slice(GenContext *c, BEValue *be_value, Expr *expr) // Calculate the size LLVMValueRef size = LLVMBuildSub(c->builder, LLVMBuildAdd(c->builder, end.value, llvm_const_int(c, start.type, 1), ""), start.value, "size"); LLVMValueRef start_pointer; + switch (parent.type->type_kind) { case TYPE_ARRAY: @@ -2336,8 +2335,11 @@ static void gencontext_emit_slice(GenContext *c, BEValue *be_value, Expr *expr) case TYPE_POINTER: start_pointer = llvm_emit_pointer_inbounds_gep_raw(c, llvm_get_pointee_type(c, parent.type), parent.value, start.value); break; - default: + case TYPE_FLEXIBLE_ARRAY: + case TYPE_VECTOR: TODO + default: + UNREACHABLE } // Create a new subarray type @@ -3500,7 +3502,8 @@ static inline void llvm_emit_force_unwrap_expr(GenContext *c, BEValue *be_value, { // TODO, we should add info about the error. SourceLocation *loc = TOKLOC(expr->span.loc); - llvm_emit_debug_output(c, "Runtime error force unwrap!", loc->file->name, c->cur_func_decl->external_name, loc->line); + File *file = source_file_by_id(loc->file_id); + llvm_emit_debug_output(c, "Runtime error force unwrap!", file->name, c->cur_func_decl->external_name, loc->row); llvm_emit_call_intrinsic(c, intrinsic_id.trap, NULL, 0, NULL, 0); LLVMBuildUnreachable(c->builder); c->current_block = NULL; diff --git a/src/compiler/llvm_codegen_stmt.c b/src/compiler/llvm_codegen_stmt.c index 8e422694c..cf2b3c54b 100644 --- a/src/compiler/llvm_codegen_stmt.c +++ b/src/compiler/llvm_codegen_stmt.c @@ -431,7 +431,8 @@ void llvm_emit_for_stmt(GenContext *c, Ast *ast) if (loop == LOOP_INFINITE) { SourceLocation *loc = TOKLOC(ast->span.loc); - llvm_emit_debug_output(c, "Infinite loop found", loc->file->name, c->cur_func_decl->external_name, loc->line); + File *file = source_file_by_id(loc->file_id); + llvm_emit_debug_output(c, "Infinite loop found", file->name, c->cur_func_decl->external_name, loc->row); LLVMBuildUnreachable(c->builder); LLVMBasicBlockRef block = llvm_basic_block_new(c, "unreachable_block"); c->current_block = NULL; @@ -994,7 +995,8 @@ static inline void llvm_emit_assert_stmt(GenContext *c, Ast *ast) { error = "Assert violation"; } - llvm_emit_debug_output(c, error, loc->file->name, c->cur_func_decl->name, loc->line); + File *file = source_file_by_id(loc->file_id); + llvm_emit_debug_output(c, error, file->name, c->cur_func_decl->name, loc->row); llvm_emit_call_intrinsic(c, intrinsic_id.trap, NULL, 0, NULL, 0); llvm_emit_br(c, on_ok); llvm_emit_block(c, on_ok); @@ -1045,7 +1047,8 @@ static inline void llvm_emit_asm_stmt(GenContext *c, Ast *ast) static inline void gencontext_emit_unreachable_stmt(GenContext *context, Ast *ast) { SourceLocation *loc = TOKLOC(ast->span.loc); - llvm_emit_debug_output(context, "Unreachable statement reached.", loc->file->name, context->cur_func_decl->external_name, loc->line); + File *file = source_file_by_id(loc->file_id); + llvm_emit_debug_output(context, "Unreachable statement reached.", file->name, context->cur_func_decl->external_name, loc->row); llvm_emit_call_intrinsic(context, intrinsic_id.trap, NULL, 0, NULL, 0); LLVMBuildUnreachable(context->builder); LLVMBasicBlockRef block = llvm_basic_block_new(context, "unreachable_block"); @@ -1222,7 +1225,8 @@ void llvm_emit_panic_if_true(GenContext *c, BEValue *value, const char *panic_na assert(llvm_value_is_bool(value)); llvm_emit_cond_br(c, value, panic_block, ok_block); llvm_emit_block(c, panic_block); - llvm_emit_debug_output(c, panic_name, loc->file->name, c->cur_func_decl->name, loc->line); + File *file = source_file_by_id(loc->file_id); + llvm_emit_debug_output(c, panic_name, file->name, c->cur_func_decl->name, loc->row); llvm_emit_call_intrinsic(c, intrinsic_id.trap, NULL, 0, NULL, 0); llvm_emit_br(c, ok_block); llvm_emit_block(c, ok_block); @@ -1230,13 +1234,14 @@ void llvm_emit_panic_if_true(GenContext *c, BEValue *value, const char *panic_na void llvm_emit_panic_on_true(GenContext *c, LLVMValueRef value, const char *panic_name, SourceLocation *loc) { + File *file = source_file_by_id(loc->file_id); LLVMBasicBlockRef panic_block = llvm_basic_block_new(c, "panic"); LLVMBasicBlockRef ok_block = llvm_basic_block_new(c, "checkok"); BEValue be_value; llvm_value_set_bool(&be_value, value); llvm_emit_cond_br(c, &be_value, panic_block, ok_block); llvm_emit_block(c, panic_block); - llvm_emit_debug_output(c, panic_name, loc->file->name, c->cur_func_decl->name, loc->line); + llvm_emit_debug_output(c, panic_name, file->name, c->cur_func_decl->name, loc->row); llvm_emit_call_intrinsic(c, intrinsic_id.trap, NULL, 0, NULL, 0); llvm_emit_br(c, ok_block); llvm_emit_block(c, ok_block); diff --git a/src/compiler/parse_expr.c b/src/compiler/parse_expr.c index 9febc56c9..8e0a00f88 100644 --- a/src/compiler/parse_expr.c +++ b/src/compiler/parse_expr.c @@ -1247,15 +1247,16 @@ static Expr *parse_bytes_expr(Context *context, Expr *left) { TokenData *token_data = tokendata_from_token(context->lex.tok); SourceLocation *loc = TOKLOC(context->lex.tok); + File *file = source_file_by_id(loc->file_id); if (token_data->is_base64) { - const char *base64data = &loc->file->contents[loc->start] + 4; + const char *base64data = &file->contents[loc->start] + 4; const char *end = base64data + loc->length - 1; parse_base64(&data_current, data_current + token_data->len, base64data, end); } else { - const char *hexdata = &loc->file->contents[loc->start] + 2; + const char *hexdata = &file->contents[loc->start] + 2; const char *end = hexdata + loc->length - 1; parse_hex(&data_current, hexdata, end); } diff --git a/src/compiler/parser.c b/src/compiler/parser.c index fd8c6171b..a9ad97002 100644 --- a/src/compiler/parser.c +++ b/src/compiler/parser.c @@ -48,7 +48,7 @@ inline void advance(Context *context) SourceLocation *curr = TOKLOC(context->lex.tok); SourceLocation *next = TOKLOC(context->lex.next_tok); vec_add(context->comments, context->lex.next_tok); - if (curr->line == next->line) + if (curr->row == next->row) { if (context->trailing_comment) { diff --git a/src/compiler/sema_expr.c b/src/compiler/sema_expr.c index dc1b32188..a308de064 100644 --- a/src/compiler/sema_expr.c +++ b/src/compiler/sema_expr.c @@ -198,7 +198,6 @@ bool expr_cast_is_constant_eval(Expr *expr, ConstantEvalKind eval_kind) case CAST_EREU: case CAST_XIERR: case CAST_PTRPTR: - case CAST_ARRPTR: case CAST_STRPTR: case CAST_PTRBOOL: case CAST_BOOLINT: @@ -1737,8 +1736,8 @@ bool sema_expr_analyse_macro_call(Context *context, Expr *call_expr, Expr *struc context->macro_scope = (MacroScope){ .body_param = decl->macro_decl.block_parameter.index ? TOKSTR(decl->macro_decl.block_parameter) : NULL, .macro = decl, - .inline_line = TOKLOC(call_expr->span.loc)->line, - .original_inline_line = old_macro_scope.depth ? old_macro_scope.original_inline_line : TOKLOC(call_expr->span.loc)->line, + .inline_line = TOKLOC(call_expr->span.loc)->row, + .original_inline_line = old_macro_scope.depth ? old_macro_scope.original_inline_line : TOKLOC(call_expr->span.loc)->row, .locals_start = context->active_scope.current_local, .depth = old_macro_scope.depth + 1, .yield_symbol_start = first_local, @@ -5938,7 +5937,7 @@ static inline bool sema_expr_analyse_placeholder(Context *context, Expr *expr) } if (string == kw_LINEREAL) { - expr_rewrite_to_int_const(expr, type_isize, TOKLOC(expr->placeholder_expr.identifier)->line, true); + expr_rewrite_to_int_const(expr, type_isize, TOKLOC(expr->placeholder_expr.identifier)->row, true); return true; } if (string == kw_LINE) @@ -5949,7 +5948,7 @@ static inline bool sema_expr_analyse_placeholder(Context *context, Expr *expr) } else { - expr_rewrite_to_int_const(expr, type_isize, TOKLOC(expr->placeholder_expr.identifier)->line, true); + expr_rewrite_to_int_const(expr, type_isize, TOKLOC(expr->placeholder_expr.identifier)->row, true); } return true; } diff --git a/src/compiler/sema_stmts.c b/src/compiler/sema_stmts.c index e50ea9e79..f8585c0cc 100644 --- a/src/compiler/sema_stmts.c +++ b/src/compiler/sema_stmts.c @@ -620,7 +620,7 @@ static inline bool sema_analyse_stmt_placement(Expr *cond, Ast *stmt) if (stmt->ast_kind == AST_COMPOUND_STMT) return true; SourceLocation *end_of_cond = TOKLOC(cond->span.end_loc); SourceLocation *start_of_then = TOKLOC(stmt->span.loc); - return end_of_cond->line == start_of_then->line; + return end_of_cond->row == start_of_then->row; } /** @@ -1467,7 +1467,7 @@ static inline bool sema_analyse_if_stmt(Context *context, Ast *statement) { SourceLocation *end_of_cond = TOKLOC(cond->span.end_loc); SourceLocation *start_of_then = TOKLOC(statement->if_stmt.then_body->span.loc); - if (end_of_cond->line != start_of_then->line) + if (end_of_cond->row != start_of_then->row) { SEMA_ERROR(statement->if_stmt.then_body, "The 'then' part of a single line if-statement must start on the same line as the 'if' or use '{ }'"); diff --git a/src/compiler/source_file.c b/src/compiler/source_file.c index 185d33a48..3375acbe1 100644 --- a/src/compiler/source_file.c +++ b/src/compiler/source_file.c @@ -15,7 +15,12 @@ static const size_t LEXER_FILES_START_CAPACITY = 128; -File pseudo_file; + +File *source_file_by_id(FileId file) +{ + assert(file < vec_size(global_context.loaded_sources)); + return global_context.loaded_sources[file]; +} File *source_file_load(const char *filename, bool *already_loaded) { @@ -45,62 +50,12 @@ File *source_file_load(const char *filename, bool *already_loaded) size_t size; const char* source_text = read_file(filename, &size); File *file = CALLOCS(File); - + file->file_id = vec_size(global_context.loaded_sources); file->full_path = full_path; - file->start_id = vec_size(global_context.loaded_sources) ? VECLAST(global_context.loaded_sources)->end_id : 0; - file->current_line_start = file->start_id; file->contents = source_text; - ASSERT(file->start_id + size < UINT32_MAX, "Total files loaded exceeded %d bytes", UINT32_MAX); - file->end_id = (SourceLoc) (file->start_id + size); - size_t pre_allocated_lines = size / 40; - file->lines = VECNEW(SourceLoc, pre_allocated_lines < 16 ? 16 : pre_allocated_lines); - vec_add(file->lines, file->start_id); path_get_dir_and_filename_from_full(file->full_path, &file->name, &file->dir_path); vec_add(global_context.loaded_sources, file); return file; } -void source_file_append_line_end(File *file, SourceLoc loc) -{ - if (file->current_line_start > loc) return; - file->current_line_start = loc + 1; - vec_add(file->lines, file->current_line_start); -} - -SourcePosition source_file_find_position_in_file(File *file, SourceLoc loc) -{ - assert(file->start_id <= loc); - - unsigned lines = vec_size(file->lines); - unsigned low = 0; - unsigned high = lines; - while (1) - { - // Line found iff line_start[mid] <= loc && line_start[mid + 1] < loc - // Binary search - uint32_t mid = (high + low) / 2; - - // Mid is before the location. - SourceLoc line_start = file->lines[mid]; - if (line_start > loc) - { - high = mid; - continue; - } - if (mid + 1 != lines && file->lines[mid + 1] <= loc) - { - low = mid; - continue; - } - return (SourcePosition) - { - .file = file, - .line = mid + 1, - .col = loc - line_start + 1, - .loc = loc, - .start = file->contents + loc - file->start_id, - }; - } -} - diff --git a/src/compiler_tests/tests.c b/src/compiler_tests/tests.c index b3e4c66f2..80a57eb5a 100644 --- a/src/compiler_tests/tests.c +++ b/src/compiler_tests/tests.c @@ -14,29 +14,6 @@ void test_file(void) { File file; memset(&file, 0, sizeof(file)); - file.start_id = 3; - file.contents = ""; - vec_add(file.lines, file.start_id); - TEST_ASSERT(source_file_find_position_in_file(&file, 3).line == 1, "Expected first line"); - TEST_ASSERT(source_file_find_position_in_file(&file, 10).line == 1, "Expected first line"); - source_file_append_line_end(&file, 9); - TEST_ASSERT(source_file_find_position_in_file(&file, 3).line == 1, "Expected first line"); - TEST_ASSERT(source_file_find_position_in_file(&file, 5).line == 1, "Expected first line"); - TEST_ASSERT(source_file_find_position_in_file(&file, 10).line == 2, "Expected second line"); - source_file_append_line_end(&file, 19); - TEST_ASSERT(source_file_find_position_in_file(&file, 3).line == 1, "Expected first line"); - TEST_ASSERT(source_file_find_position_in_file(&file, 5).line == 1, "Expected first line"); - TEST_ASSERT(source_file_find_position_in_file(&file, 10).line == 2, "Expected second line"); - TEST_ASSERT(source_file_find_position_in_file(&file, 15).line == 2, "Expected second line"); - TEST_ASSERT(source_file_find_position_in_file(&file, 21).line == 3, "Expected third line"); - source_file_append_line_end(&file, 29); - TEST_ASSERT(source_file_find_position_in_file(&file, 3).line == 1, "Expected first line"); - TEST_ASSERT(source_file_find_position_in_file(&file, 5).line == 1, "Expected first line"); - TEST_ASSERT(source_file_find_position_in_file(&file, 10).line == 2, "Expected second line"); - TEST_ASSERT(source_file_find_position_in_file(&file, 15).line == 2, "Expected second line"); - TEST_ASSERT(source_file_find_position_in_file(&file, 21).line == 3, "Expected third line"); - TEST_ASSERT(source_file_find_position_in_file(&file, 25).line == 3, "Expected third line"); - TEST_ASSERT(source_file_find_position_in_file(&file, 31).line == 4, "Expected fourth line"); } #define i128(x_, y_) ((Int128){x_, y_}) void test128() diff --git a/src/version.h b/src/version.h index 15e667d6b..92fbe8eeb 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define COMPILER_VERSION "PRE.6" \ No newline at end of file +#define COMPILER_VERSION "PRE.7" \ No newline at end of file diff --git a/test/test_suite/lexing/expected_directive.c3 b/test/test_suite/lexing/expected_directive.c3 new file mode 100644 index 000000000..e0c62e665 --- /dev/null +++ b/test/test_suite/lexing/expected_directive.c3 @@ -0,0 +1,3 @@ +/** +@1 // #error: Expected doc directive here +*/ diff --git a/test/test_suite/lexing/invalid_hex_in_hexarray.c3 b/test/test_suite/lexing/invalid_hex_in_hexarray.c3 new file mode 100644 index 000000000..536515a37 --- /dev/null +++ b/test/test_suite/lexing/invalid_hex_in_hexarray.c3 @@ -0,0 +1 @@ +x"abcé" // #error: This isn't a valid hexadecimal digit \ No newline at end of file diff --git a/test/test_suite/lexing/invalid_hex_in_hexarray2.c3 b/test/test_suite/lexing/invalid_hex_in_hexarray2.c3 new file mode 100644 index 000000000..eb571c8b2 --- /dev/null +++ b/test/test_suite/lexing/invalid_hex_in_hexarray2.c3 @@ -0,0 +1 @@ +x"abcg" // #error: 'g' isn't a valid hexadecimal digit \ No newline at end of file diff --git a/test/test_suite/lexing/no_builtin.c3 b/test/test_suite/lexing/no_builtin.c3 new file mode 100644 index 000000000..09ff7d6d9 --- /dev/null +++ b/test/test_suite/lexing/no_builtin.c3 @@ -0,0 +1,2 @@ + +$$1 // #error: Expected a letter after \ No newline at end of file diff --git a/test/test_suite/literals/radix_numbers_errors.c3 b/test/test_suite/literals/radix_numbers_errors.c3 index 84026a29a..a37970238 100644 --- a/test/test_suite/literals/radix_numbers_errors.c3 +++ b/test/test_suite/literals/radix_numbers_errors.c3 @@ -2,10 +2,14 @@ 0o% // #error: An expression starting with '0o' should be followed by octal numbers (0-7). +0o08 // #error: An expression starting with '0o' should be followed by octal numbers (0-7). + 0b2 // #error: An expression starting with '0b' should be followed by binary digits 0b# // #error: An expression starting with '0b' should be followed by binary digits +0b12 // #error: An expression starting with '0b' should be followed by binary digits + 0xg // #error: '0x' starts a hexadecimal number, so the next character should be 0-9, a-f or A-F. 0x! // #error: '0x' starts a hexadecimal number, so the next character should be 0-9, a-f or A-F. diff --git a/test/test_suite/struct/duplicate_member.c3 b/test/test_suite/struct/duplicate_member.c3 index 08d8245bc..345871af5 100644 --- a/test/test_suite/struct/duplicate_member.c3 +++ b/test/test_suite/struct/duplicate_member.c3 @@ -1,7 +1,6 @@ // @warnings{no-unused} module test; - struct Aa { int a;