Lexing updated with simpler code. Note: may have bugs.

2026-02-27 12:01:16 +00:00 · 2021-12-27 00:22:48 +01:00
parent 0a9a014e4a
commit a0be188902
20 changed files with 388 additions and 329 deletions
--- a/src/compiler/compiler_internal.h
+++ b/src/compiler/compiler_internal.h
@@ -163,24 +163,21 @@ typedef struct
 	AstId end;
 } DeferList;

-
+typedef unsigned FileId;
 typedef struct
 {
+	FileId file_id;
 	const char *contents;
 	char *name;
 	char *dir_path;
 	const char *full_path;
-	SourceLoc start_id;
-	SourceLoc end_id;
-	SourceLoc *lines;
-	SourceLoc current_line_start;
 } File;

 typedef struct
 {
-	File *file;
-	uint32_t line;
-	uint32_t col;
+	FileId file_id;
+	uint16_t col;
+	uint32_t row;
 	uint32_t start;
 	uint32_t length;
 } SourceLocation;
@@ -1352,8 +1349,10 @@ typedef struct
 	uint32_t token_start_id;
 	const char *lexing_start;
 	const char *current;
-	uint32_t current_line;
+	uint32_t current_row;
+	uint32_t start_row;
 	const char *line_start;
+	const char *start_row_start;
 	File *file;
 	TokenData *latest_token_data;
 	SourceLocation *latest_token_loc;
@@ -1982,9 +1981,8 @@ void sema_error(Context *context, const char *message, ...);
 void sema_prev_at_range3(SourceSpan span, const char *message, ...);
 void sema_shadow_error(Decl *decl, Decl *old);

+File *source_file_by_id(FileId file);
 File *source_file_load(const char *filename, bool *already_loaded);
-void source_file_append_line_end(File *file, SourceLoc loc);
-SourcePosition source_file_find_position_in_file(File *file, SourceLoc loc);

 static inline SourceSpan source_span_from_token_id(TokenId id)
 {
--- a/src/compiler/diagnostics.c
+++ b/src/compiler/diagnostics.c
@@ -14,53 +14,80 @@ typedef enum
 	PRINT_TYPE_WARN
 } PrintType;

-static void print_error2(SourceLocation *location, const char *message, PrintType print_type)
+#define LINES_SHOWN 4
+
+static void print_error(SourceLocation *location, const char *message, PrintType print_type)
 {
+	File *file = source_file_by_id(location->file_id);
 	if (active_target.test_output)
 	{
 		switch (print_type)
 		{
 			case PRINT_TYPE_ERROR:
-				eprintf("Error|%s|%d|%s\n", location->file->name, location->line, message);
+				eprintf("Error|%s|%d|%s\n", file->name, location->row, message);
 				return;
 			case PRINT_TYPE_PREV:
 				return;
 			case PRINT_TYPE_WARN:
-				eprintf("Warning|%s|%d|%s\n", location->file->name, location->line, message);
+				eprintf("Warning|%s|%d|%s\n", file->name, location->row, message);
 				return;
 			default:
 				UNREACHABLE
 		}
 	}
-	static const int LINES_SHOWN = 4;
-
-	unsigned max_line_length = (unsigned)round(log10(location->line)) + 1;
+	unsigned max_line_length = (unsigned)round(log10(location->row)) + 1;

 	char number_buffer[20];
 	snprintf(number_buffer, 20, "%%%dd: %%.*s\n", max_line_length);

 	// Insert end in case it's not yet there.
-	for (SourceLoc s = location->start; s < location->file->end_id; s++)
+
+	const char *file_contents = file->contents;
+	int lines_found = 0;
+	size_t line_starts[LINES_SHOWN + 1] = { 0, 0, 0, 0 };
+	uint32_t start = location->start;
+	if (start < 2)
 	{
-		if ((location->file->contents + s - location->file->start_id)[0] == '\n')
+		line_starts[++lines_found] = 0;
+	}
+	else
+	{
+		for (size_t i = start; i > 0; i--)
 		{
-			source_file_append_line_end(location->file, s);
-			break;
+			if (file_contents[i - 1] == '\n')
+			{
+				line_starts[++lines_found] = i;
+				if (lines_found >= LINES_SHOWN) break;
+			}
+			if (i == 1)
+			{
+				line_starts[++lines_found] = 0;
+				break;
+			}
 		}
 	}
-	size_t lines_in_file = vec_size(location->file->lines);
-	const char *start = NULL;
-	for (unsigned i = LINES_SHOWN; i > 0; i--)
+	for (size_t i = start; ; i++)
 	{
-		if (location->line < i) continue;
-		uint32_t line_number = location->line + 1 - i;
-		SourceLoc line_start = location->file->lines[line_number - 1];
-
-		SourceLoc line_end = line_number == lines_in_file ? location->file->end_id + 1 :
-		                     location->file->lines[line_number];
-		uint32_t line_len = line_end - line_start - 1;
-		start = location->file->contents + line_start - location->file->start_id;
-		eprintf(number_buffer, line_number, line_len, start);
+		switch (file_contents[i])
+		{
+			case '\0':
+			case '\n':
+				line_starts[0] = i + 1;
+				goto FOUND;
+			default:
+				continue;
+		}
+	}
+	FOUND:;
+	const char *start_char = NULL;
+	for (unsigned i = lines_found; i > 0; i--)
+	{
+		SourceLoc line_start = line_starts[i];
+		SourceLoc line_end = line_starts[i - 1] - 1;
+		uint32_t line_number = location->row + 1 - i;
+		uint32_t line_len = line_end - line_start;
+		start_char = file->contents + line_start;
+		eprintf(number_buffer, line_number, line_len, start_char);
 	}
 	eprintf("  ");
 	for (unsigned i = 0; i < max_line_length; i++)
@@ -70,7 +97,7 @@ static void print_error2(SourceLocation *location, const char *message, PrintTyp

 	for (unsigned i = 1; i < location->col; i++)
 	{
-		switch (start[i])
+		switch (start_char[i])
 		{
 			case '\t':
 				eprintf("\t");
@@ -87,13 +114,13 @@ static void print_error2(SourceLocation *location, const char *message, PrintTyp
 	switch (print_type)
 	{
 		case PRINT_TYPE_ERROR:
-			eprintf("(%s:%d) Error: %s\n\n", location->file->name, location->line, message);
+			eprintf("(%s:%d) Error: %s\n\n", file->name, location->row, message);
 			break;
 		case PRINT_TYPE_PREV:
-			eprintf("(%s:%d) %s\n\n", location->file->name, location->line, message);
+			eprintf("(%s:%d) %s\n\n", file->name, location->row, message);
 			break;
 		case PRINT_TYPE_WARN:
-			eprintf("(%s:%d) Warning: %s\n\n", location->file->name, location->line, message);
+			eprintf("(%s:%d) Warning: %s\n\n", file->name, location->row, message);
 			break;
 		default:
 			UNREACHABLE
@@ -105,7 +132,7 @@ static void vprint_error(SourceLocation *location, const char *message, va_list
 {
 	char buffer[256];
 	vsnprintf(buffer, 256, message, args);
-	print_error2(location, buffer, PRINT_TYPE_ERROR);
+	print_error(location, buffer, PRINT_TYPE_ERROR);
 }


@@ -135,7 +162,7 @@ void sema_prev_at_range3(SourceSpan span, const char *message, ...)
 	vsnprintf(buffer, 256, message, args);
 	SourceLocation loc = *start;
 	loc.length = end->start - start->start + end->length;
-	print_error2(&loc, buffer, PRINT_TYPE_PREV);
+	print_error(&loc, buffer, PRINT_TYPE_PREV);
 	va_end(args);
 }

@@ -157,12 +184,12 @@ void sema_error_at_prev_end(Token token, const char *message, ...)
 	SourceLocation *curr = TOKLOC(token);
 	SourceLocation *prev = TOKLOC((TokenId) { token.id.index - 1 });
 	SourceLocation location;
-	if (curr->file != prev->file)
+	if (curr->file_id != prev->file_id)
 	{
 		// Ok, this is the first location, so then we create a "start" location:
 		location = *curr;
 		location.start = 0;
-		location.line = 1;
+		location.row = 1;
 		location.col = 1;
 	}
 	else
--- a/src/compiler/enums.h
+++ b/src/compiler/enums.h
@@ -94,7 +94,6 @@ typedef enum
 	CAST_XIERR,
 	CAST_PTRPTR,
 	CAST_PTRXI,
-	CAST_ARRPTR,
 	CAST_ARRVEC,
 	CAST_STRPTR,
 	CAST_PTRBOOL,
--- a/src/compiler/lexer.c
+++ b/src/compiler/lexer.c
@@ -18,10 +18,36 @@ typedef enum
 	DOC_END_ERROR,
 } DocEnd;

+static inline uint16_t check_col(intptr_t col, uint32_t row)
+{
+	if (col > 65535) error_exit("Column on line %d exceeded %d.", row, 65535);
+	return (uint16_t)col;
+}
+
+static inline uint32_t check_row(intptr_t line, uint32_t row)
+{
+	if (line > 1024 * 1024) error_exit("Token on line %d exceeded %d.", row, 1024 * 1024);
+	return (uint32_t)line;
+}
+
 // --- Lexing general methods.

 static bool lexer_scan_token_inner(Lexer *lexer, LexMode mode);

+static inline void begin_new_token(Lexer *lexer)
+{
+	lexer->lexing_start = lexer->current;
+	lexer->start_row = lexer->current_row;
+	lexer->start_row_start = lexer->line_start;
+}
+
+static inline void backtrace_to_lexing_start(Lexer *lexer)
+{
+	lexer->current = lexer->lexing_start;
+	lexer->current_row = lexer->start_row;
+	lexer->line_start = lexer->start_row_start;
+}
+
 // Peek at the current character in the buffer.
 static inline char peek(Lexer *lexer)
 {
@@ -38,14 +64,10 @@ static inline char prev(Lexer *lexer)
 static inline void backtrack(Lexer *lexer)
 {
 	lexer->current--;
-}
-
-// Store a line ending (and current line start at the current character)
-void lexer_store_line_end(Lexer *lexer)
-{
-	lexer->current_line++;
-	lexer->line_start = lexer->current + 1;
-	source_file_append_line_end(lexer->file, (SourceLoc)(lexer->file->start_id + lexer->current - lexer->file_begin));
+	if (lexer->current[0] == '\n')
+	{
+		lexer->current_row--;
+	}
 }

 // Peek one character ahead.
@@ -55,16 +77,24 @@ static inline char peek_next(Lexer *lexer)
 }

 // Return the current character and step one character forward.
-static inline char next(Lexer *lexer)
+static inline void next(Lexer *lexer)
 {
-	return *(lexer->current++);
+	if (lexer->current[0] == '\n')
+	{
+		lexer->line_start = lexer->current + 1;
+		lexer->current_row++;
+	}
+	lexer->current++;
 }

 // Skip the x next characters.
 static inline void skip(Lexer *lexer, int steps)
 {
 	assert(steps > 0);
-	lexer->current += steps;
+	for (int i = 0; i < steps; i++)
+	{
+		next(lexer);
+	}
 }

 // Is the current character '\0' if so we assume we reached the end.
@@ -78,7 +108,7 @@ static inline bool match(Lexer *lexer, char expected)
 {
 	if (reached_end(lexer)) return false;
 	if (*lexer->current != expected) return false;
-	lexer->current++;
+	next(lexer);
 	return true;
 }

@@ -103,38 +133,28 @@ static inline void add_generic_token(Lexer *lexer, TokenType type)
 	token_type[0] = (unsigned char)type;

 	// Set the location.
-	location->file = lexer->file;
+	location->file_id = lexer->file->file_id;
 	location->start = (uint32_t)(lexer->lexing_start - lexer->file_begin);

-	// Calculate the column
-	if (lexer->lexing_start < lexer->line_start)
+	uint32_t line = lexer->start_row;
+	location->row = line;
+	if (line == lexer->current_row)
 	{
-		// In this case lexing started before the start of the current line.
-		// Start by looking at the previous line.
-		SourceLoc *current = &lexer->file->lines[lexer->current_line - 1];
-		location->line = lexer->current_line;
-		// Walk upwards until we find a line that starts before the current.
-		while (*current > location->start)
-		{
-			location->line--;
-			current--;
-		}
-		// We found the line we wanted, so the col is just an offset from the start.
-		location->col = location->start - *current + 1;
-		// Length is restricted to the end of the line.
-		location->length = current[1] - current[0] - 1;
-	}
-	else
-	{
-		// The simple case, where the parsing started on the current line.
-		location->line = lexer->current_line;
 		// Col is simple difference.
-		location->col = (unsigned) (lexer->lexing_start - lexer->line_start) + 1;
+		location->col = check_col(lexer->lexing_start - lexer->line_start + 1, line);
 		// Start is offset to file begin.
 		location->start = (SourceLoc) (lexer->lexing_start - lexer->file_begin);
 		// Length is diff between current and start.
-		location->length = (SourceLoc) (lexer->current - lexer->lexing_start);
+		location->length = check_row(lexer->current - lexer->lexing_start, line);
 	}
+	else
+	{
+		location->col = check_col(lexer->lexing_start - lexer->start_row_start + 1, line);
+		// Start is offset to file begin.
+		location->start = (SourceLoc) (lexer->lexing_start - lexer->file_begin);
+		location->length = 1;
+	}
+
 	// Return pointers to the data and the location,
 	// these maybe be used to fill in data.
 	lexer->latest_token_data = data;
@@ -153,22 +173,56 @@ static bool add_error_token(Lexer *lexer, const char *message, ...)
 	return false;
 }

-static bool add_error_token_at(Lexer *lexer, const char *loc, uint32_t len, const char *message, ...)
+static bool add_error_token_at_start(Lexer *lexer, const char *message, ...)
 {
 	va_list list;
 	va_start(list, message);
-	SourceLocation location = { .file = lexer->file,
-								.start = (uint32_t) (loc - lexer->file_begin),
-								.line = lexer->current_line,
-								.length = len,
-								.col = (uint32_t) (loc - lexer->line_start) + 1,
+	SourceLocation location = { .file_id = lexer->file->file_id,
+								.start = (uint32_t) (lexer->lexing_start - lexer->file_begin),
+								.row = lexer->start_row,
+								.length = 1,
+								.col = check_col((lexer->lexing_start - lexer->start_row_start) + 1, lexer->start_row),
 								};
 	sema_verror_range(&location, message, list);
 	va_end(list);
 	add_generic_token(lexer, TOKEN_INVALID_TOKEN);
 	return false;
-
 }
+
+static bool add_error_token_at(Lexer *lexer, const char *loc, uint32_t len, const char *message, ...)
+{
+	va_list list;
+	va_start(list, message);
+	uint32_t current_line = lexer->current_row;
+	SourceLocation location = { .file_id = lexer->file->file_id,
+								.start = (uint32_t) (loc - lexer->file_begin),
+								.row = current_line,
+								.length = len,
+								.col = check_col((loc - lexer->line_start) + 1, current_line),
+								};
+	sema_verror_range(&location, message, list);
+	va_end(list);
+	add_generic_token(lexer, TOKEN_INVALID_TOKEN);
+	return false;
+}
+
+static bool add_error_token_at_current(Lexer *lexer, const char *message, ...)
+{
+	va_list list;
+	va_start(list, message);
+	uint32_t current_line = lexer->current_row;
+	SourceLocation location = { .file_id = lexer->file->file_id,
+								.start = (uint32_t) (lexer->current - lexer->file_begin),
+								.row = current_line,
+								.length = 1,
+								.col = check_col((lexer->current - lexer->line_start) + 1, current_line),
+								};
+	sema_verror_range(&location, message, list);
+	va_end(list);
+	add_generic_token(lexer, TOKEN_INVALID_TOKEN);
+	return false;
+}
+
 // Add a new regular token.
 static inline bool add_token(Lexer *lexer, TokenType type, const char *string)
 {
@@ -204,7 +258,6 @@ static inline bool parse_line_comment(Lexer *lexer)
 	// If we found EOL, then walk past '\n'
 	if (!reached_end(lexer))
 	{
-		lexer_store_line_end(lexer);
 		next(lexer);
 	}
 	return success;
@@ -240,7 +293,6 @@ static inline bool parse_multiline_comment(Lexer *lexer)
 				}
 				break;
 			case '\n':
-				lexer_store_line_end(lexer);
 				break;
 			case '\0':
 				if (type != TOKEN_DOC_COMMENT) return add_token(lexer, type, lexer->lexing_start);
@@ -264,7 +316,6 @@ static void skip_whitespace(Lexer *lexer, LexMode lex_type)
 		{
 			case '\n':
 				if (lex_type != LEX_NORMAL) return;
-				lexer_store_line_end(lexer);
 				FALLTHROUGH;
 			case ' ':
 			case '\t':
@@ -296,7 +347,8 @@ static inline bool scan_ident(Lexer *lexer, TokenType normal, TokenType const_to
 	}
 	while (peek(lexer) == '_')
 	{
-		hash = FNV1a(next(lexer), hash);
+		hash = FNV1a(peek(lexer), hash);
+		next(lexer);
 	}
 	while (1)
 	{
@@ -333,12 +385,14 @@ static inline bool scan_ident(Lexer *lexer, TokenType normal, TokenType const_to
 			default:
 				goto EXIT;
 		}
-		hash = FNV1a(next(lexer), hash);
+		hash = FNV1a(peek(lexer), hash);
+		next(lexer);
 	}
 	// Allow bang!
 	if (peek(lexer) == '!' && type == normal)
 	{
-		hash = FNV1a(next(lexer), hash);
+		hash = FNV1a('!', hash);
+		next(lexer);
 	}
 	EXIT:;
 	uint32_t len = (uint32_t)(lexer->current - lexer->lexing_start);
@@ -353,6 +407,12 @@ static inline bool scan_ident(Lexer *lexer, TokenType normal, TokenType const_to

 // --- Number scanning

+/**
+ * For C3 we use the practice of f<bit-width> u<bit-width> and s<bit-width>
+ * @param lexer
+ * @param is_float
+ * @return
+ */
 static bool scan_number_suffix(Lexer *lexer, bool *is_float)
 {
 	if (!is_alphanum_(peek(lexer))) return true;
@@ -390,12 +450,16 @@ static bool scan_number_suffix(Lexer *lexer, bool *is_float)
 */
 static bool scan_oct(Lexer *lexer)
 {
-	if (!is_oct(next(lexer)))
+	if (!is_oct(peek(lexer)))
 	{
-		backtrack(lexer);
-		return add_error_token_at(lexer, lexer->current, 1, "An expression starting with '0o' should be followed by octal numbers (0-7).");
+		return add_error_token_at_current(lexer, "An expression starting with '0o' should be followed by octal numbers (0-7).");
 	}
+	next(lexer);
 	while (is_oct_or_(peek(lexer))) next(lexer);
+	if (is_number(peek(lexer)))
+	{
+		return add_error_token_at_current(lexer, "An expression starting with '0o' should be followed by octal numbers (0-7).");
+	}
 	bool is_float = false;
 	if (!scan_number_suffix(lexer, &is_float)) return false;
 	if (is_float)
@@ -410,12 +474,16 @@ static bool scan_oct(Lexer *lexer)
 **/
 static bool scan_binary(Lexer *lexer)
 {
-	if (!is_binary(next(lexer)))
+	if (!is_binary(peek(lexer)))
 	{
-		backtrack(lexer);
-		return add_error_token_at(lexer, lexer->current, 1, "An expression starting with '0b' should be followed by binary digits (0-1).");
+		return add_error_token_at_current(lexer, "An expression starting with '0b' should be followed by binary digits (0-1).");
 	}
+	next(lexer);
 	while (is_binary_or_(peek(lexer))) next(lexer);
+	if (is_number(peek((lexer))))
+	{
+		return add_error_token_at_current(lexer, "An expression starting with '0b' should be followed by binary digits (0-1).");
+	}
 	bool is_float = false;
 	if (!scan_number_suffix(lexer, &is_float)) return false;
 	if (is_float)
@@ -434,16 +502,21 @@ static inline bool scan_exponent(Lexer *lexer)
 {
 	// Step past e/E or p/P
 	next(lexer);
-	char c = next(lexer);
+	char c = peek(lexer);
+	next(lexer);
 	// Step past +/-
-	if (c == '+' || c == '-') c = next(lexer);
+	if (c == '+' || c == '-')
+	{
+		c = peek(lexer);
+		next(lexer);
+	}
 	// Now we need at least one digit
 	if (!is_digit(c))
 	{
 		if (c == 0)
 		{
 			backtrack(lexer);
-			return add_error_token(lexer, "End of file was reached while parsing the exponent.");
+			return add_error_token_at_current(lexer, "End of file was reached while parsing the exponent.");
 		}
 		if (c == '\n') return add_error_token(lexer, "End of line was reached while parsing the exponent.");
 		if (c < 31 || c > 127) add_error_token(lexer, "An unexpected character was found while parsing the exponent.");
@@ -460,11 +533,11 @@ static inline bool scan_exponent(Lexer *lexer)
 **/
 static inline bool scan_hex(Lexer *lexer)
 {
-	if (!is_hex(next(lexer)))
+	if (!is_hex(peek(lexer)))
 	{
-		backtrack(lexer);
-		return add_error_token_at(lexer, lexer->current, 1, "'0x' starts a hexadecimal number, so the next character should be 0-9, a-f or A-F.");
+		return add_error_token_at_current(lexer, "'0x' starts a hexadecimal number, so the next character should be 0-9, a-f or A-F.");
 	}
+	next(lexer);
 	while (is_hex_or_(peek(lexer))) next(lexer);
 	bool is_float = false;
 	if (peek(lexer) == '.' && peek_next(lexer) != '.')
@@ -472,7 +545,7 @@ static inline bool scan_hex(Lexer *lexer)
 		is_float = true;
 		next(lexer);
 		char c = peek(lexer);
-		if (c == '_') return add_error_token(lexer, "Can't parse this as a floating point value due to the '_' directly after decimal point.");
+		if (c == '_') return add_error_token_at_current(lexer, "'_' is not allowed directly after decimal point, try removing it.");
 		if (is_hex(c)) next(lexer);
 		while (is_hex_or_(peek(lexer))) next(lexer);
 	}
@@ -482,7 +555,11 @@ static inline bool scan_hex(Lexer *lexer)
 		is_float = true;
 		if (!scan_exponent(lexer)) return false;
 	}
-	if (prev(lexer) == '_') return add_error_token(lexer, "The number ended with '_', but that character needs to be between, not after, digits.");
+	if (prev(lexer) == '_')
+	{
+		backtrack(lexer);
+		return add_error_token_at_current(lexer, "The number ended with '_', which isn't allowed, please remove it.");
+	}
 	if (!scan_number_suffix(lexer, &is_float)) return false;
 	return add_token(lexer, is_float ? TOKEN_REAL : TOKEN_INTEGER, lexer->lexing_start);
 }
@@ -511,7 +588,7 @@ static inline bool scan_dec(Lexer *lexer)
 		next(lexer);
 		// Check our rule to disallow 123._32
 		char c = peek(lexer);
-		if (c == '_') return add_error_token(lexer, "Can't parse this as a floating point value due to the '_' directly after decimal point.");
+		if (c == '_') return add_error_token_at_current(lexer, "'_' is not allowed directly after decimal point, try removing it.");
 		// Now walk until we see no more digits.
 		// This allows 123. as a floating point number.
 		while (is_digit_or_(peek(lexer))) next(lexer);
@@ -525,7 +602,11 @@ static inline bool scan_dec(Lexer *lexer)
 		if (!scan_exponent(lexer)) return false;
 	}

-	if (prev(lexer) == '_') return add_error_token(lexer, "The number ended with '_', but that character needs to be between, not after, digits.");
+	if (prev(lexer) == '_')
+	{
+		backtrack(lexer);
+		return add_error_token_at_current(lexer, "The number ended with '_', which isn't allowed, please remove it.");
+	}
 	if (!scan_number_suffix(lexer, &is_float)) return false;
 	return add_token(lexer, is_float ? TOKEN_REAL : TOKEN_INTEGER, lexer->lexing_start);
 }
@@ -622,8 +703,9 @@ static inline int64_t scan_utf8(Lexer *lexer, unsigned char c)
 	for (int i = 1; i < utf8_bytes; i++)
 	{
 		result <<= 6U;
-		if (peek(lexer) == '\0') return 0xFFFD;
-		c = (unsigned char)next(lexer);
+		c = (unsigned char)peek(lexer);
+		if (c == '\0') return 0xFFFD;
+		next(lexer);
 		if ((c & 0xc0) != 0x80)
 		{
 			goto ERROR;
@@ -659,29 +741,29 @@ static inline bool scan_char(Lexer *lexer)
 	char c;
 	Int128 b = { 0, 0 };

-	while ((c = next(lexer)) != '\'')
+	while (!match(lexer, '\''))
 	{
+		c = peek(lexer);
+		next(lexer);
 		// End of file may occur:
 		if (c == '\0')
 		{
-			backtrack(lexer);
-			return add_error_token(lexer, "The character literal did not terminate.");
+			return add_error_token_at_start(lexer, "The character literal did not terminate.");
 		}
 		// We might exceed the width that we allow.
-		if (width > 15) return add_error_token(lexer, "The character literal exceeds 16 characters.");
+		if (width > 15) return add_error_token_at_start(lexer, "The character literal exceeds 16 characters.");
 		// Handle (expected) utf-8 characters.
 		if ((unsigned)c >= (unsigned)0x80)
 		{
 			if (width != 0) goto UNICODE_IN_MULTI;
-			const char *start = lexer->current;
 			int64_t utf8 = scan_utf8(lexer, (unsigned char)c);
 			if (utf8 < 0) return false;
 			if (!match(lexer, '\''))
 			{
 				if (peek(lexer) == '\0') continue;
-				lexer->lexing_start = start;
-				return add_error_token(lexer, "Unicode character literals may only contain one character, "
-											  "please remove the additional ones or use all ASCII.");
+				backtrack(lexer);
+				return add_error_token_at_current(lexer, "Unicode character literals may only contain one character, "
+				                                         "please remove the additional ones or use all ASCII.");
 			}
 			b.low = (uint64_t) utf8;
 			width = utf8 > 0xffff ? 4 : 2;
@@ -689,23 +771,24 @@ static inline bool scan_char(Lexer *lexer)
 		}
 		// Parse the escape code
 		signed char escape = ' ';
-		const char *start = lexer->current;
 		if (c == '\\')
 		{
 			assert(c == '\\');
-			c = next(lexer);
+			c = peek(lexer);
 			escape = is_valid_escape(c);
 			if (escape == -1)
 			{
-				backtrack(lexer);
-				lexer->lexing_start = start - 1;
+				lexer->lexing_start += 1;
 				if (c > ' ' && c <= 127)
 				{
+					next(lexer);
 					return add_error_token(lexer, "Invalid escape sequence '\\%c'.", c);
 				}
-				return add_error_token_at(lexer, start, 1, "An escape sequence was expected after '\\'.");
+				return add_error_token_at_current(lexer, "An escape sequence was expected after '\\'.");
 			}
+			next(lexer);
 		}
+		const char *escape_begin = lexer->current - 2;
 		switch (escape)
 		{
 			case 'x':
@@ -713,9 +796,7 @@ static inline bool scan_char(Lexer *lexer)
 				int64_t hex = scan_hex_literal(lexer, 2);
 				if (hex < 0)
 				{
-					lexer->lexing_start = start - 1;
-					// Fix underlining if this is an unfinished escape.
-					return add_error_token(lexer, "Expected a two character hex value after \\x.");
+					return add_error_token_at(lexer, escape_begin, lexer->current - escape_begin, "Expected a two character hex value after \\x.");
 				}
 				// We can now reassign c and use the default code.
 				c = (char)hex;
@@ -731,21 +812,19 @@ static inline bool scan_char(Lexer *lexer)
 				// The hex parsing may have failed, lacking more hex chars.
 				if (hex < 0)
 				{
-					lexer->lexing_start = start - 1;
-					return add_error_token(lexer, "Expected %s character hex value after \\%c.",
-										   escape == 'u' ? "a four" : "an eight", escape);
+					begin_new_token(lexer);
+					return add_error_token_at(lexer, escape_begin, lexer->current - escape_begin,
+					                          "Expected %s character hex value after \\%c.",
+					                          escape == 'u' ? "a four" : "an eight", escape);
 				}
 				// If we don't see the end here, then something is wrong.
 				if (!match(lexer, '\''))
 				{
 					// It may be the end of the line, if so use the default handling by invoking "continue"
 					if (peek(lexer) == '\0') continue;
-					// Otherwise step forward and mark it as an error.
-					next(lexer);
-					lexer->lexing_start = lexer->current - 1;
-					return add_error_token(lexer,
-					                       "Character literals with '\\%c' can only contain one character, please remove this one.",
-					                       escape);
+					return add_error_token_at_current(lexer,
+					                                  "Character literals with '\\%c' can only contain one character, please remove this one.",
+					                                  escape);
 				}
 				// Assign the value and go to DONE.
 				b.low = (uint64_t) hex;
@@ -764,7 +843,6 @@ static inline bool scan_char(Lexer *lexer)
 		b = i128_shl64(b, 8);
 		b = i128_add64(b, (unsigned char)c);
 	}
-
 	assert(width > 0 && width <= 16);
 	if (width > 8 && !platform_target.int128)
 	{
@@ -790,9 +868,7 @@ static inline void skip_first_line_if_empty(Lexer *lexer)
 		{
 			case '\n':
 				// Line end? then we jump to the first token after line end.
-				lexer->current = current - 1;
-				lexer_store_line_end(lexer);
-				lexer->current++;
+				next(lexer);
 				return;
 			case ' ':
 			case '\t':
@@ -969,13 +1045,13 @@ bool scan_consume_end_of_multiline(Lexer *lexer, bool error_on_eof)
 	int consume_end = 3;
 	while (consume_end > 0)
 	{
-		char c = next(lexer);
+		char c = peek(lexer);
+		next(lexer);
 		if (c == '\0')
 		{
-			backtrack(lexer);
 			if (!error_on_eof) return false;
-			return add_error_token_at(lexer, lexer->current - 1, 1, "The multi-line string unexpectedly ended. "
-																 "Did you forget a '\"\"\"' somewhere?");
+			return add_error_token_at_start(lexer, "The multi-line string unexpectedly ended. "
+			                                       "Did you forget a '\"\"\"' somewhere?");
 		}
 		if (c == '"') consume_end--;
 	}
@@ -1023,7 +1099,6 @@ static inline bool scan_multiline_string(Lexer *lexer)
 		// update the line end and store it in the resulting buffer.
 		if (c == '\n')
 		{
-			lexer_store_line_end(lexer);
 			next(lexer);
 			destination[len++] = c;
 			line = 0;
@@ -1037,8 +1112,8 @@ static inline bool scan_multiline_string(Lexer *lexer)
 		// We reached EOF, or escape + end of file.
 		if (c == '\0' || (c == '\\' && peek(lexer) == '\0'))
 		{
-			return add_error_token_at(lexer, lexer->current - 1, 1, "The multi-line string unexpectedly ended. "
-			                                                     "Did you forget a '\"\"\"' somewhere?");
+			return add_error_token_at_start(lexer, "The multi-line string unexpectedly ended. "
+			                                       "Did you forget a '\"\"\"' somewhere?");
 		}

 		// An escape sequence was reached.
@@ -1053,11 +1128,12 @@ static inline bool scan_multiline_string(Lexer *lexer)
 			int scanned = append_esc_string_token(destination, lexer->current, &len);
 			if (scanned < 0)
 			{
-				add_error_token_at(lexer, lexer->current - 1, 2, "Invalid escape in string.");
+				backtrack(lexer);
+				add_error_token_at_current(lexer, "Invalid escape in string.");
 				scan_consume_end_of_multiline(lexer, false);
 				return false;
 			}
-			lexer->current += scanned;
+			skip(lexer, scanned);
 			continue;
 		}
 		// Now first we skip any empty space if line has not been reached.
@@ -1080,10 +1156,6 @@ static inline void consume_to_end_quote(Lexer *lexer)
 	char c;
 	while ((c = peek(lexer)) != '\0' && c != '"')
 	{
-		if (c == '\n')
-		{
-			lexer_store_line_end(lexer);
-		}
 		next(lexer);
 	}
 }
@@ -1114,22 +1186,24 @@ static inline bool scan_string(Lexer *lexer)
 	size_t len = 0;
 	while (lexer->current < end)
 	{
-		c = next(lexer);
+		c = peek(lexer);
+		next(lexer);
 		if (c == '\0' || (c == '\\' && peek(lexer) == '\0'))
 		{
 			if (c == '\0') backtrack(lexer);
-			add_error_token_at(lexer, lexer->current - 1, 1, "The end of the file was reached "
-			                                                 "while parsing the string. "
-			                                                 "Did you forget (or accidentally add) a '\"' somewhere?");
+			add_error_token_at_start(lexer, "The end of the file was reached "
+			                                "while parsing the string. "
+			                                "Did you forget (or accidentally add) a '\"' somewhere?");
 			consume_to_end_quote(lexer);
 			return false;
 		}
 		if (c == '\n' || (c == '\\' && peek(lexer) == '\n'))
 		{
-			add_error_token_at(lexer, lexer->current - 1, 1, "The end of the line was reached "
-			                                                 "while parsing the string. "
-			                                                 "Did you forget (or accidentally add) a '\"' somewhere?");
-			lexer->current--;
+
+			backtrack(lexer);
+			add_error_token_at_start(lexer, "The end of the line was reached "
+			                                "while parsing the string. "
+			                                "Did you forget (or accidentally add) a '\"' somewhere?");
 			consume_to_end_quote(lexer);
 			return false;
 		}
@@ -1138,11 +1212,11 @@ static inline bool scan_string(Lexer *lexer)
 			int scanned = append_esc_string_token(destination, lexer->current, &len);
 			if (scanned < 0)
 			{
-				add_error_token_at(lexer, lexer->current - 1, 2, "Invalid escape in string.");
+				add_error_token_at_current(lexer, "Invalid escape in string.");
 				consume_to_end_quote(lexer);
 				return false;
 			}
-			lexer->current += scanned;
+			skip(lexer, scanned);
 			continue;
 		}
 		destination[len++] = c;
@@ -1158,14 +1232,16 @@ static inline bool scan_string(Lexer *lexer)
 static inline bool scan_raw_string(Lexer *lexer)
 {
 	char c;
-	while ((c = next(lexer)) != '`' || peek(lexer) == '`')
+	while (1)
 	{
+		c = peek(lexer);
+		next(lexer);
+		if (c == '`' && peek(lexer) != '`') break;
 		if (c == '\0')
 		{
-			backtrack(lexer);
-			return add_error_token_at(lexer, lexer->lexing_start , 1, "Reached the end of the file looking for "
-																	  "the end of the raw string that starts "
-																	  "here. Did you forget a '`' somewhere?");
+			return add_error_token_at_start(lexer, "Reached the end of the file looking for "
+			                                       "the end of the raw string that starts "
+			                                       "here. Did you forget a '`' somewhere?");
 		}
 		if (c == '`') next(lexer);
 	}
@@ -1191,34 +1267,39 @@ static inline bool scan_raw_string(Lexer *lexer)

 static inline bool scan_hex_array(Lexer *lexer)
 {
-	char start_char = next(lexer); // Step past ' or "
-	const char *hexdata = lexer->current;
+	char start_char = peek(lexer);
+	next(lexer); // Step past ' or "
 	char c;
 	uint64_t len = 0;
 	while (1)
 	{
-		c = next(lexer);
-		if (c == start_char) break;
+		c = peek(lexer);
 		if (c == 0)
 		{
-			backtrack(lexer);
-			lexer->lexing_start = lexer->current - 1;
-			return add_error_token(lexer, "The hex string seems to be missing a terminating '%c'", start_char);
+			return add_error_token_at_current(lexer, "The hex string seems to be missing a terminating '%c'", start_char);
 		}
+		if (c == start_char) break;
 		if (is_hex(c))
 		{
+			next(lexer);
 			len++;
 			continue;
 		}
-		if (!is_whitespace(c))
+		if (is_whitespace(c))
 		{
-			lexer->lexing_start = hexdata - 1;
-			lexer->current = hexdata;
-			return add_error_token(lexer,
-			                       "'%c' isn't a valid hexadecimal digit, all digits should be a-z, A-Z and 0-9.",
-			                       c);
+			next(lexer);
+			continue;
 		}
+		if (c > ' ' && c < 127)
+		{
+			return add_error_token_at_current(lexer,
+											  "'%c' isn't a valid hexadecimal digit, all digits should be a-z, A-Z and 0-9.",
+											  c);
+		}
+		return add_error_token_at_current(lexer,
+										  "This isn't a valid hexadecimal digit, all digits should be a-z, A-Z and 0-9.");
 	}
+	next(lexer);
 	if (len % 2)
 	{
 		return add_error_token(lexer, "The hexadecimal string is not an even length, did you miss a digit somewhere?");
@@ -1233,27 +1314,25 @@ static inline bool scan_base64(Lexer *lexer)
 {
 	next(lexer); // Step past 6
 	next(lexer); // Step past 4
-	char start_char = next(lexer); // Step past ' or "
-	const char *b64data = lexer->current;
+	char start_char = peek(lexer);
+	next(lexer); // Step past ' or "
 	char c;
 	unsigned end_len = 0;
 	uint64_t len = 0;
 	while (1)
 	{
-		c = next(lexer);
-		if (c == start_char) break;
+		c = peek(lexer);
 		if (c == 0)
 		{
-			backtrack(lexer);
-			lexer->lexing_start = lexer->current - 1;
-			return add_error_token(lexer, "The base64 string seems to be missing a terminating '%c'", start_char);
+			return add_error_token_at_start(lexer, "The base64 string seems to be missing a terminating '%c'", start_char);
 		}
+		next(lexer);
+		if (c == start_char) break;
 		if (is_base64(c))
 		{
 			if (end_len)
 			{
-				lexer->lexing_start = lexer->current - 1;
-				return add_error_token(lexer, "'%c' can't be placed after an ending '='", c);
+				return add_error_token_at_current(lexer, "'%c' can't be placed after an ending '='", c);
 			}
 			len++;
 			continue;
@@ -1262,7 +1341,7 @@ static inline bool scan_base64(Lexer *lexer)
 		{
 			if (end_len > 1)
 			{
-				return add_error_token_at(lexer, lexer->current - 1, 1, "There cannot be more than 2 '=' at the end of a base64 string.", c);
+				return add_error_token_at_current(lexer, "There cannot be more than 2 '=' at the end of a base64 string.", c);
 			}
 			end_len++;
 			continue;
@@ -1271,9 +1350,9 @@ static inline bool scan_base64(Lexer *lexer)
 		{
 			if (c < ' ' || c > 127)
 			{
-				return add_error_token_at(lexer, lexer->current - 1, 1, "A valid base64 character was expected here.");
+				return add_error_token_at_current(lexer, "A valid base64 character was expected here.");
 			}
-			return add_error_token_at(lexer, lexer->current - 1, 1, "'%c' is not a valid base64 character.", c);
+			return add_error_token_at_current(lexer, "'%c' is not a valid base64 character.", c);
 		}
 	}
 	if (!end_len && len % 4 != 0)
@@ -1300,8 +1379,8 @@ static inline bool scan_base64(Lexer *lexer)
 	}
 	if ((len + end_len) % 4 != 0)
 	{
-		return add_error_token(lexer, "Base64 strings must either be padded to multiple of 4, or if unpadded "
-									  "- only need 1 or 2 bytes of extra padding.");
+		return add_error_token_at_start(lexer, "Base64 strings must either be padded to multiple of 4, or if unpadded "
+		                                       "- only need 1 or 2 bytes of extra padding.");
 	}
 	uint64_t decoded_len = (3 * len - end_len) / 4;
 	if (!add_token(lexer, TOKEN_BYTES, lexer->lexing_start)) return false;
@@ -1348,7 +1427,7 @@ static bool parse_add_end_of_docs_if_present(Lexer *lexer)
 	// Otherwise, gladly skip ahead and store the end.
 	skip(lexer, lookahead + 1);
 	add_token(lexer, TOKEN_DOCS_END, lexer->lexing_start);
-	lexer->lexing_start = lexer->current;
+	begin_new_token(lexer);
 	return true;
 }

@@ -1357,10 +1436,9 @@ static void parse_add_end_of_doc_line(Lexer *lexer)
 {
 	assert(peek(lexer) == '\n');
 	// Add the EOL token.
-	lexer_store_line_end(lexer);
 	next(lexer);
 	add_token(lexer, TOKEN_DOCS_EOL, lexer->lexing_start);
-	lexer->lexing_start = lexer->current;
+	begin_new_token(lexer);
 	// Skip whitespace
 	skip_whitespace(lexer, LEX_DOCS);
 	// And any leading stars:
@@ -1376,7 +1454,7 @@ static DocEnd parse_doc_remainder(Lexer *lexer)
 {
 	// Skip all initial whitespace.
 	skip_whitespace(lexer, LEX_DOCS);
-	lexer->lexing_start = lexer->current;
+	begin_new_token(lexer);

 	int characters_read = 0;
 	while (1)
@@ -1391,7 +1469,7 @@ static DocEnd parse_doc_remainder(Lexer *lexer)
 				if (characters_read > 0)
 				{
 					add_token(lexer, TOKEN_DOCS_LINE, 0);
-					lexer->lexing_start = lexer->current;
+					begin_new_token(lexer);
 				}
 				if (parse_add_end_of_docs_if_present(lexer)) return DOC_END_LAST;
 				// Otherwise use default parsing.
@@ -1401,14 +1479,14 @@ static DocEnd parse_doc_remainder(Lexer *lexer)
 				if (characters_read > 0)
 				{
 					add_token(lexer, TOKEN_DOCS_LINE, 0);
-					lexer->lexing_start = lexer->current;
+					begin_new_token(lexer);
 				}
 				return DOC_END_EOL;
 			case '\0':
 				if (characters_read > 0)
 				{
 					add_token(lexer, TOKEN_DOCS_LINE, 0);
-					lexer->lexing_start = lexer->current;
+					begin_new_token(lexer);
 				}
 				return DOC_END_EOF;
 			default:
@@ -1497,18 +1575,21 @@ static DocEnd parse_doc_param_directive(Lexer *lexer)
 	return parse_doc_remainder(lexer);
 }

+
 static DocEnd parse_doc_directive(Lexer *lexer)
 {
 	// We expect a directive here.
-	if (!is_letter(peek_next(lexer)))
+	begin_new_token(lexer);
+	// First parse the '@'
+	next(lexer);
+	add_token(lexer, TOKEN_DOCS_DIRECTIVE, "@");
+	begin_new_token(lexer);
+
+	if (!is_letter(peek(lexer)))
 	{
+		next(lexer);
 		return add_error_token(lexer, "Expected doc directive here.");
 	}
-	lexer->lexing_start = lexer->current;
-	// First parse the '@'
-	skip(lexer, 1);
-	add_token(lexer, TOKEN_DOCS_DIRECTIVE, "@");
-	lexer->lexing_start = lexer->current;

 	// Then our keyword
 	if (!scan_ident(lexer, TOKEN_IDENT, TOKEN_CONST, TOKEN_TYPE_IDENT, 0)) return DOC_END_ERROR;
@@ -1557,7 +1638,10 @@ static bool parse_doc_comment(Lexer *lexer)
 		skip_whitespace(lexer, LEX_DOCS);

 		// 2. Did we find the end?
-		if (reached_end(lexer))	return add_error_token(lexer, "Missing '*/' to end the doc comment.");
+		if (reached_end(lexer))
+		{
+			return add_error_token_at_start(lexer, "Missing '*/' to end the doc comment.");
+		}

 		// 3. See if we reach the end of the docs.
 		if (parse_add_end_of_docs_if_present(lexer)) return true;
@@ -1607,7 +1691,7 @@ static bool lexer_scan_token_inner(Lexer *lexer, LexMode mode)
 	skip_whitespace(lexer, mode);

 	// Point start to the first non-whitespace character.
-	lexer->lexing_start = lexer->current;
+	begin_new_token(lexer);

 	if (reached_end(lexer))
 	{
@@ -1615,7 +1699,8 @@ static bool lexer_scan_token_inner(Lexer *lexer, LexMode mode)
 		return add_token(lexer, TOKEN_EOF, "\n") && false;
 	}

-	char c = next(lexer);
+	char c = peek(lexer);
+	next(lexer);
 	switch (c)
 	{
 		case '@':
@@ -1635,10 +1720,10 @@ static bool lexer_scan_token_inner(Lexer *lexer, LexMode mode)
 				if (is_letter(peek(lexer)))
 				{
 					add_token(lexer, TOKEN_BUILTIN, "$$");
-					lexer->lexing_start = lexer->current;
+					begin_new_token(lexer);
 					return scan_ident(lexer, TOKEN_IDENT, TOKEN_CONST_IDENT, TOKEN_TYPE_IDENT, 0);
 				}
-				return add_error_token(lexer, "Expected a letter after $$.");
+				return add_error_token_at_current(lexer, "Expected a letter after $$.");
 			}
 			return scan_ident(lexer, TOKEN_CT_IDENT, TOKEN_CT_CONST_IDENT, TOKEN_CT_TYPE_IDENT, '$');
 		case ',':
@@ -1744,7 +1829,7 @@ static bool lexer_scan_token_inner(Lexer *lexer, LexMode mode)
 			}
 			if (c < 0)
 			{
-				return add_error_token(lexer, "The 0%x character may not be placed outside of a string or comment, did you perhaps forget a \" somewhere?", (uint8_t)c);
+				return add_error_token(lexer, "The 0x%x character may not be placed outside of a string or comment, did you forget a \" somewhere?", (uint8_t)c);
 			}
 			return add_error_token(lexer, "'%c' may not be placed outside of a string or comment, did you perhaps forget a \" somewhere?", c);

@@ -1757,10 +1842,10 @@ void lexer_lex_file(Lexer *lexer)
 {
 	lexer->token_start_id = (uint32_t) toktype_arena.allocated;
 	lexer->file_begin = lexer->file->contents;
-	lexer->lexing_start = lexer->file_begin;
-	lexer->current = lexer->lexing_start;
-	lexer->current_line = 1;
+	lexer->current = lexer->file_begin;
 	lexer->line_start = lexer->current;
+	lexer->current_row = 1;
+	begin_new_token(lexer);
 	const unsigned char *check = (const unsigned char *)lexer->current;
 	unsigned c;
 	int balance = 0;
@@ -1801,7 +1886,7 @@ void lexer_lex_file(Lexer *lexer)
 DONE:
 	if (balance != 0)
 	{
-		add_error_token(lexer, "Invalid encoding - Unbalanced bidirectional markers.");
+		add_error_token_at_start(lexer, "Invalid encoding - Unbalanced bidirectional markers.");
 		return;
 	}
 	while(1)
@@ -1810,7 +1895,7 @@ DONE:
 		{
 			if (reached_end(lexer)) break;
 			while (!reached_end(lexer) && peek(lexer) != '\n') next(lexer);
-			lexer->lexing_start = lexer->current;
+			begin_new_token(lexer);
 			continue;
 		}
 	}
--- a/src/compiler/llvm_codegen_debug_info.c
+++ b/src/compiler/llvm_codegen_debug_info.c
@@ -21,7 +21,7 @@ static inline LLVMMetadataRef llvm_get_debug_struct(GenContext *c, Type *type, c
 	                                                     scope,
 	                                                     external_name_len ? type->name : "", external_name_len ? strlen(type->name) : 0,
 	                                                     loc ? c->debug.file : NULL,
-	                                                     loc ? loc->line : 0,
+	                                                     loc ? loc->row : 0,
 	                                                     type_size(type) * 8,
 	                                                     (uint32_t)(type_abi_alignment(type) * 8),
 	                                                     flags, NULL,
@@ -43,7 +43,7 @@ static inline LLVMMetadataRef llvm_get_debug_member(GenContext *c, Type *type, c
 			scope,
 			name, strlen(name),
 			loc ? c->debug.file : NULL,
-			loc ? loc->line : 0,
+			loc ? loc->row : 0,
 			type_size(type) * 8,
 			(uint32_t)(type_abi_alignment(type) * 8),
 			offset * 8, flags, llvm_get_debug_type_internal(c, type, scope));
@@ -80,7 +80,7 @@ void llvm_emit_debug_global_var(GenContext *c, Decl *global)
 			global->external_name,
 			strlen(global->external_name),
 			c->debug.file,
-			loc->line,
+			loc->row,
 			llvm_get_debug_type(c, global->type),
 			global->visibility == VISIBLE_LOCAL,
 			LLVMDIBuilderCreateExpression(c->debug.builder, NULL, 0),
@@ -116,11 +116,11 @@ void llvm_emit_debug_function(GenContext *c, Decl *decl)
 	                                                decl->name, TOKLEN(decl->name_token),
 	                                                decl->external_name, strlen(decl->external_name),
 	                                                c->debug.file,
-	                                                loc->line,
+	                                                loc->row,
 	                                                llvm_get_debug_type(c, decl->type),
 	                                                decl->visibility == VISIBLE_LOCAL,
 	                                                true,
-	                                                loc->line,
+	                                                loc->row,
 	                                                flags,
 	                                                active_target.optimization_level != OPTIMIZATION_NONE);
 	LLVMSetSubprogram(decl->backend_ref, c->debug.function);
@@ -136,7 +136,7 @@ void llvm_emit_debug_local_var(GenContext *c, Decl *decl)
 			decl->name,
 			TOKLEN(decl->name_token),
 			c->debug.file,
-			location->line,
+			location->row,
 			llvm_get_debug_type(c, decl->type),
 			active_target.optimization_level != OPTIMIZATION_NONE,
 			LLVMDIFlagZero,
@@ -147,7 +147,7 @@ void llvm_emit_debug_local_var(GenContext *c, Decl *decl)
 	LLVMDIBuilderInsertDeclareAtEnd(c->debug.builder,
 	                                decl->backend_ref, var,
 	                                LLVMDIBuilderCreateExpression(c->debug.builder, NULL, 0),
-	                                LLVMDIBuilderCreateDebugLocation(c->context, location->line, location->col,
+	                                LLVMDIBuilderCreateDebugLocation(c->context, location->row, location->col,
 	                                                                 c->debug.function, inline_at),
 	                                LLVMGetInsertBlock(c->builder));
 }
@@ -171,7 +171,7 @@ void llvm_emit_debug_parameter(GenContext *c, Decl *parameter, unsigned index)
 			strlen(name),
 			index + 1,
 			c->debug.file,
-			loc->line,
+			loc->row,
 			llvm_get_debug_type(c, parameter->type),
 			always_preserve,
 			LLVMDIFlagZero);
@@ -181,8 +181,8 @@ void llvm_emit_debug_parameter(GenContext *c, Decl *parameter, unsigned index)
 	                                parameter->backend_ref,
 	                                parameter->var.backend_debug_ref,
 	                                LLVMDIBuilderCreateExpression(c->debug.builder, NULL, 0),
-	                                LLVMDIBuilderCreateDebugLocation(c->context, loc->line, loc->col, c->debug.function,
-																  inline_at),
+	                                LLVMDIBuilderCreateDebugLocation(c->context, loc->row, loc->col, c->debug.function,
+	                                                                 inline_at),
 	                                LLVMGetInsertBlock(c->builder));


@@ -200,7 +200,7 @@ void llvm_emit_debug_location(GenContext *context, SourceSpan location)
 	LLVMMetadataRef scope = llvm_debug_current_scope(context);

 	LLVMMetadataRef loc = LLVMDIBuilderCreateDebugLocation(context->context,
-	                                                       source_loc->line,
+	                                                       source_loc->row,
 	                                                       source_loc->col,
 	                                                       scope, /* inlined at */ 0);

@@ -212,7 +212,7 @@ static LLVMMetadataRef llvm_debug_forward_comp(GenContext *c, Type *type, const
 	return LLVMDIBuilderCreateReplaceableCompositeType(c->debug.builder, id_counter++,
 	                                                   type->name, strlen(type->name),
 	                                                   scope,
-	                                                   c->debug.file, loc ? loc->line : 0,
+	                                                   c->debug.file, loc ? loc->row : 0,
 	                                                   1 /* version TODO */,
 	                                                   type_size(type) * 8,
 	                                                   type_abi_alignment(type) * 8,
@@ -237,7 +237,7 @@ void llvm_debug_push_lexical_scope(GenContext *context, SourceSpan location)

 	LLVMMetadataRef block =
 			LLVMDIBuilderCreateLexicalBlock(context->debug.builder, scope, context->debug.file,
-			                                source_loc->line,
+			                                source_loc->row,
 			                                source_loc->col);

 	llvm_debug_scope_push(context, block);
@@ -301,7 +301,7 @@ static LLVMMetadataRef llvm_debug_enum_type(GenContext *c, Type *type, LLVMMetad
 	LLVMMetadataRef real = LLVMDIBuilderCreateEnumerationType(c->debug.builder,
 	                                                          scope,
 	                                                          type->decl->name, TOKLEN(type->decl->name_token),
-	                                                          c->debug.file, location->line, type_size(type) * 8,
+	                                                          c->debug.file, location->row, type_size(type) * 8,
 	                                                          type_abi_alignment(type) * 8,
 	                                                          elements, vec_size(elements),
 	                                                          llvm_get_debug_type(c, enum_real_type));
@@ -344,7 +344,7 @@ static LLVMMetadataRef llvm_debug_structlike_type(GenContext *c, Type *type, LLV
 		                                    scope,
 		                                    type->decl->name ? type->decl->name : "",
 		                                    type->decl->name ? TOKLEN(type->decl->name_token) : 0,
-		                                    c->debug.file, location->line, type_size(type) * 8,
+		                                    c->debug.file, location->row, type_size(type) * 8,
 		                                    type_abi_alignment(type) * 8,
 		                                    LLVMDIFlagZero,
 		                                    elements, vec_size(members),
@@ -440,7 +440,7 @@ static LLVMMetadataRef llvm_debug_typedef_type(GenContext *c, Type *type)
 	LLVMMetadataRef real = LLVMDIBuilderCreateTypedef(c->debug.builder,
 	                                                  llvm_get_debug_type(c, original_type),
 	                                                  decl->name, TOKLEN(decl->name_token),
-	                                                  c->debug.file, location->line,
+	                                                  c->debug.file, location->row,
 	                                                  c->debug.file, type_abi_alignment(type));
 	if (type->backend_debug_type)
 	{
--- a/src/compiler/llvm_codegen_expr.c
+++ b/src/compiler/llvm_codegen_expr.c
@@ -1181,8 +1181,6 @@ void llvm_emit_cast(GenContext *c, CastKind cast_kind, BEValue *value, Type *to_
 		case CAST_SAPTR:
 			llvm_emit_subarray_pointer(c, value, value);
 			break;
-		case CAST_ARRPTR:
-			TODO
 		case CAST_EREU:
 			// This is a no op.
 			assert(type_lowering(to_type) == type_lowering(from_type));
@@ -2319,6 +2317,7 @@ static void gencontext_emit_slice(GenContext *c, BEValue *be_value, Expr *expr)
 	// Calculate the size
 	LLVMValueRef size = LLVMBuildSub(c->builder, LLVMBuildAdd(c->builder, end.value, llvm_const_int(c, start.type, 1), ""), start.value, "size");
 	LLVMValueRef start_pointer;
+
 	switch (parent.type->type_kind)
 	{
 		case TYPE_ARRAY:
@@ -2336,8 +2335,11 @@ static void gencontext_emit_slice(GenContext *c, BEValue *be_value, Expr *expr)
 		case TYPE_POINTER:
 			start_pointer = llvm_emit_pointer_inbounds_gep_raw(c, llvm_get_pointee_type(c, parent.type), parent.value, start.value);
 			break;
-		default:
+		case TYPE_FLEXIBLE_ARRAY:
+		case TYPE_VECTOR:
 			TODO
+		default:
+			UNREACHABLE
 	}

 	// Create a new subarray type
@@ -3500,7 +3502,8 @@ static inline void llvm_emit_force_unwrap_expr(GenContext *c, BEValue *be_value,
 	{
 		// TODO, we should add info about the error.
 		SourceLocation *loc = TOKLOC(expr->span.loc);
-		llvm_emit_debug_output(c, "Runtime error force unwrap!", loc->file->name, c->cur_func_decl->external_name, loc->line);
+		File *file = source_file_by_id(loc->file_id);
+		llvm_emit_debug_output(c, "Runtime error force unwrap!", file->name, c->cur_func_decl->external_name, loc->row);
 		llvm_emit_call_intrinsic(c, intrinsic_id.trap, NULL, 0, NULL, 0);
 		LLVMBuildUnreachable(c->builder);
 		c->current_block = NULL;
--- a/src/compiler/llvm_codegen_stmt.c
+++ b/src/compiler/llvm_codegen_stmt.c
@@ -431,7 +431,8 @@ void llvm_emit_for_stmt(GenContext *c, Ast *ast)
 		if (loop == LOOP_INFINITE)
 		{
 			SourceLocation *loc = TOKLOC(ast->span.loc);
-			llvm_emit_debug_output(c, "Infinite loop found", loc->file->name, c->cur_func_decl->external_name, loc->line);
+			File  *file = source_file_by_id(loc->file_id);
+			llvm_emit_debug_output(c, "Infinite loop found", file->name, c->cur_func_decl->external_name, loc->row);
 			LLVMBuildUnreachable(c->builder);
 			LLVMBasicBlockRef block = llvm_basic_block_new(c, "unreachable_block");
 			c->current_block = NULL;
@@ -994,7 +995,8 @@ static inline void llvm_emit_assert_stmt(GenContext *c, Ast *ast)
 		{
 			error = "Assert violation";
 		}
-		llvm_emit_debug_output(c, error, loc->file->name, c->cur_func_decl->name, loc->line);
+		File  *file = source_file_by_id(loc->file_id);
+		llvm_emit_debug_output(c, error, file->name, c->cur_func_decl->name, loc->row);
 		llvm_emit_call_intrinsic(c, intrinsic_id.trap, NULL, 0, NULL, 0);
 		llvm_emit_br(c, on_ok);
 		llvm_emit_block(c, on_ok);
@@ -1045,7 +1047,8 @@ static inline void llvm_emit_asm_stmt(GenContext *c, Ast *ast)
 static inline void gencontext_emit_unreachable_stmt(GenContext *context, Ast *ast)
 {
 	SourceLocation *loc = TOKLOC(ast->span.loc);
-	llvm_emit_debug_output(context, "Unreachable statement reached.", loc->file->name, context->cur_func_decl->external_name, loc->line);
+	File  *file = source_file_by_id(loc->file_id);
+	llvm_emit_debug_output(context, "Unreachable statement reached.", file->name, context->cur_func_decl->external_name, loc->row);
 	llvm_emit_call_intrinsic(context, intrinsic_id.trap, NULL, 0, NULL, 0);
 	LLVMBuildUnreachable(context->builder);
 	LLVMBasicBlockRef block = llvm_basic_block_new(context, "unreachable_block");
@@ -1222,7 +1225,8 @@ void llvm_emit_panic_if_true(GenContext *c, BEValue *value, const char *panic_na
 	assert(llvm_value_is_bool(value));
 	llvm_emit_cond_br(c, value, panic_block, ok_block);
 	llvm_emit_block(c, panic_block);
-	llvm_emit_debug_output(c, panic_name, loc->file->name, c->cur_func_decl->name, loc->line);
+	File  *file = source_file_by_id(loc->file_id);
+	llvm_emit_debug_output(c, panic_name, file->name, c->cur_func_decl->name, loc->row);
 	llvm_emit_call_intrinsic(c, intrinsic_id.trap, NULL, 0, NULL, 0);
 	llvm_emit_br(c, ok_block);
 	llvm_emit_block(c, ok_block);
@@ -1230,13 +1234,14 @@ void llvm_emit_panic_if_true(GenContext *c, BEValue *value, const char *panic_na

 void llvm_emit_panic_on_true(GenContext *c, LLVMValueRef value, const char *panic_name, SourceLocation *loc)
 {
+	File  *file = source_file_by_id(loc->file_id);
 	LLVMBasicBlockRef panic_block = llvm_basic_block_new(c, "panic");
 	LLVMBasicBlockRef ok_block = llvm_basic_block_new(c, "checkok");
 	BEValue be_value;
 	llvm_value_set_bool(&be_value, value);
 	llvm_emit_cond_br(c, &be_value, panic_block, ok_block);
 	llvm_emit_block(c, panic_block);
-	llvm_emit_debug_output(c, panic_name, loc->file->name, c->cur_func_decl->name, loc->line);
+	llvm_emit_debug_output(c, panic_name, file->name, c->cur_func_decl->name, loc->row);
 	llvm_emit_call_intrinsic(c, intrinsic_id.trap, NULL, 0, NULL, 0);
 	llvm_emit_br(c, ok_block);
 	llvm_emit_block(c, ok_block);
--- a/src/compiler/parse_expr.c
+++ b/src/compiler/parse_expr.c
@@ -1247,15 +1247,16 @@ static Expr *parse_bytes_expr(Context *context, Expr *left)
 	{
 		TokenData *token_data = tokendata_from_token(context->lex.tok);
 		SourceLocation *loc = TOKLOC(context->lex.tok);
+		File *file = source_file_by_id(loc->file_id);
 		if (token_data->is_base64)
 		{
-			const char *base64data =  &loc->file->contents[loc->start] + 4;
+			const char *base64data =  &file->contents[loc->start] + 4;
 			const char *end = base64data + loc->length - 1;
 			parse_base64(&data_current, data_current + token_data->len, base64data, end);
 		}
 		else
 		{
-			const char *hexdata =  &loc->file->contents[loc->start] + 2;
+			const char *hexdata =  &file->contents[loc->start] + 2;
 			const char *end = hexdata + loc->length - 1;
 			parse_hex(&data_current, hexdata, end);
 		}
--- a/src/compiler/parser.c
+++ b/src/compiler/parser.c
@@ -48,7 +48,7 @@ inline void advance(Context *context)
 			SourceLocation *curr = TOKLOC(context->lex.tok);
 			SourceLocation *next = TOKLOC(context->lex.next_tok);
 			vec_add(context->comments, context->lex.next_tok);
-			if (curr->line == next->line)
+			if (curr->row == next->row)
 			{
 				if (context->trailing_comment)
 				{
--- a/src/compiler/sema_expr.c
+++ b/src/compiler/sema_expr.c
@@ -198,7 +198,6 @@ bool expr_cast_is_constant_eval(Expr *expr, ConstantEvalKind eval_kind)
 		case CAST_EREU:
 		case CAST_XIERR:
 		case CAST_PTRPTR:
-		case CAST_ARRPTR:
 		case CAST_STRPTR:
 		case CAST_PTRBOOL:
 		case CAST_BOOLINT:
@@ -1737,8 +1736,8 @@ bool sema_expr_analyse_macro_call(Context *context, Expr *call_expr, Expr *struc
 		context->macro_scope = (MacroScope){
 				.body_param = decl->macro_decl.block_parameter.index ? TOKSTR(decl->macro_decl.block_parameter) : NULL,
 				.macro = decl,
-				.inline_line = TOKLOC(call_expr->span.loc)->line,
-				.original_inline_line = old_macro_scope.depth ? old_macro_scope.original_inline_line : TOKLOC(call_expr->span.loc)->line,
+				.inline_line = TOKLOC(call_expr->span.loc)->row,
+				.original_inline_line = old_macro_scope.depth ? old_macro_scope.original_inline_line : TOKLOC(call_expr->span.loc)->row,
 				.locals_start = context->active_scope.current_local,
 				.depth = old_macro_scope.depth + 1,
 				.yield_symbol_start = first_local,
@@ -5938,7 +5937,7 @@ static inline bool sema_expr_analyse_placeholder(Context *context, Expr *expr)
 	}
 	if (string == kw_LINEREAL)
 	{
-		expr_rewrite_to_int_const(expr, type_isize, TOKLOC(expr->placeholder_expr.identifier)->line, true);
+		expr_rewrite_to_int_const(expr, type_isize, TOKLOC(expr->placeholder_expr.identifier)->row, true);
 		return true;
 	}
 	if (string == kw_LINE)
@@ -5949,7 +5948,7 @@ static inline bool sema_expr_analyse_placeholder(Context *context, Expr *expr)
 		}
 		else
 		{
-			expr_rewrite_to_int_const(expr, type_isize, TOKLOC(expr->placeholder_expr.identifier)->line, true);
+			expr_rewrite_to_int_const(expr, type_isize, TOKLOC(expr->placeholder_expr.identifier)->row, true);
 		}
 		return true;
 	}
--- a/src/compiler/sema_stmts.c
+++ b/src/compiler/sema_stmts.c
@@ -620,7 +620,7 @@ static inline bool sema_analyse_stmt_placement(Expr *cond, Ast *stmt)
 	if (stmt->ast_kind == AST_COMPOUND_STMT) return true;
 	SourceLocation *end_of_cond = TOKLOC(cond->span.end_loc);
 	SourceLocation *start_of_then = TOKLOC(stmt->span.loc);
-	return end_of_cond->line == start_of_then->line;
+	return end_of_cond->row == start_of_then->row;
 }

 /**
@@ -1467,7 +1467,7 @@ static inline bool sema_analyse_if_stmt(Context *context, Ast *statement)
 		{
 			SourceLocation *end_of_cond = TOKLOC(cond->span.end_loc);
 			SourceLocation *start_of_then = TOKLOC(statement->if_stmt.then_body->span.loc);
-			if (end_of_cond->line != start_of_then->line)
+			if (end_of_cond->row != start_of_then->row)
 			{
 				SEMA_ERROR(statement->if_stmt.then_body,
 				           "The 'then' part of a single line if-statement must start on the same line as the 'if' or use '{ }'");
--- a/src/compiler/source_file.c
+++ b/src/compiler/source_file.c
@@ -15,7 +15,12 @@

 static const size_t LEXER_FILES_START_CAPACITY = 128;

-File pseudo_file;
+
+File *source_file_by_id(FileId file)
+{
+	assert(file < vec_size(global_context.loaded_sources));
+	return global_context.loaded_sources[file];
+}

 File *source_file_load(const char *filename, bool *already_loaded)
 {
@@ -45,62 +50,12 @@ File *source_file_load(const char *filename, bool *already_loaded)
 	size_t size;
 	const char* source_text = read_file(filename, &size);
 	File *file = CALLOCS(File);
-
+	file->file_id = vec_size(global_context.loaded_sources);
 	file->full_path = full_path;
-	file->start_id = vec_size(global_context.loaded_sources) ? VECLAST(global_context.loaded_sources)->end_id : 0;
-	file->current_line_start = file->start_id;
 	file->contents = source_text;
-	ASSERT(file->start_id + size < UINT32_MAX, "Total files loaded exceeded %d bytes", UINT32_MAX);
-	file->end_id = (SourceLoc) (file->start_id + size);
-	size_t pre_allocated_lines = size / 40;
-	file->lines = VECNEW(SourceLoc, pre_allocated_lines < 16 ? 16 : pre_allocated_lines);
-	vec_add(file->lines, file->start_id);
 	path_get_dir_and_filename_from_full(file->full_path, &file->name, &file->dir_path);
 	vec_add(global_context.loaded_sources, file);
 	return file;
 }

-void source_file_append_line_end(File *file, SourceLoc loc)
-{
-	if (file->current_line_start > loc) return;
-	file->current_line_start = loc + 1;
-	vec_add(file->lines, file->current_line_start);
-}
-
-SourcePosition source_file_find_position_in_file(File *file, SourceLoc loc)
-{
-	assert(file->start_id <= loc);
-
-	unsigned lines = vec_size(file->lines);
-	unsigned low = 0;
-	unsigned high = lines;
-	while (1)
-	{
-		// Line found iff line_start[mid] <= loc && line_start[mid + 1] < loc
-		// Binary search
-		uint32_t mid = (high + low) / 2;
-
-		// Mid is before the location.
-		SourceLoc line_start = file->lines[mid];
-		if (line_start > loc)
-		{
-			high = mid;
-			continue;
-		}
-		if (mid + 1 != lines && file->lines[mid + 1] <= loc)
-		{
-			low = mid;
-			continue;
-		}
-		return (SourcePosition)
-				{
-					.file = file,
-					.line = mid + 1,
-					.col = loc - line_start + 1,
-					.loc = loc,
-					.start = file->contents + loc - file->start_id,
-				};
-	}
-}
-

--- a/src/compiler_tests/tests.c
+++ b/src/compiler_tests/tests.c
@@ -14,29 +14,6 @@ void test_file(void)
 {
 	File file;
 	memset(&file, 0, sizeof(file));
-	file.start_id = 3;
-	file.contents = "";
-	vec_add(file.lines, file.start_id);
-	TEST_ASSERT(source_file_find_position_in_file(&file, 3).line == 1, "Expected first line");
-	TEST_ASSERT(source_file_find_position_in_file(&file, 10).line == 1, "Expected first line");
-	source_file_append_line_end(&file, 9);
-	TEST_ASSERT(source_file_find_position_in_file(&file, 3).line == 1, "Expected first line");
-	TEST_ASSERT(source_file_find_position_in_file(&file, 5).line == 1, "Expected first line");
-	TEST_ASSERT(source_file_find_position_in_file(&file, 10).line == 2, "Expected second line");
-	source_file_append_line_end(&file, 19);
-	TEST_ASSERT(source_file_find_position_in_file(&file, 3).line == 1, "Expected first line");
-	TEST_ASSERT(source_file_find_position_in_file(&file, 5).line == 1, "Expected first line");
-	TEST_ASSERT(source_file_find_position_in_file(&file, 10).line == 2, "Expected second line");
-	TEST_ASSERT(source_file_find_position_in_file(&file, 15).line == 2, "Expected second line");
-	TEST_ASSERT(source_file_find_position_in_file(&file, 21).line == 3, "Expected third line");
-	source_file_append_line_end(&file, 29);
-	TEST_ASSERT(source_file_find_position_in_file(&file, 3).line == 1, "Expected first line");
-	TEST_ASSERT(source_file_find_position_in_file(&file, 5).line == 1, "Expected first line");
-	TEST_ASSERT(source_file_find_position_in_file(&file, 10).line == 2, "Expected second line");
-	TEST_ASSERT(source_file_find_position_in_file(&file, 15).line == 2, "Expected second line");
-	TEST_ASSERT(source_file_find_position_in_file(&file, 21).line == 3, "Expected third line");
-	TEST_ASSERT(source_file_find_position_in_file(&file, 25).line == 3, "Expected third line");
-	TEST_ASSERT(source_file_find_position_in_file(&file, 31).line == 4, "Expected fourth line");
 }
 #define i128(x_, y_) ((Int128){x_, y_})
 void test128()
--- a/src/version.h
+++ b/src/version.h
@@ -1 +1 @@
-#define COMPILER_VERSION "PRE.6"
+#define COMPILER_VERSION "PRE.7"
--- a/test/test_suite/lexing/expected_directive.c3
+++ b/test/test_suite/lexing/expected_directive.c3
@@ -0,0 +1,3 @@
+/**
+@1 // #error: Expected doc directive here
+*/
--- a/test/test_suite/lexing/invalid_hex_in_hexarray.c3
+++ b/test/test_suite/lexing/invalid_hex_in_hexarray.c3
@@ -0,0 +1 @@
+x"abcé" // #error: This isn't a valid hexadecimal digit
--- a/test/test_suite/lexing/invalid_hex_in_hexarray2.c3
+++ b/test/test_suite/lexing/invalid_hex_in_hexarray2.c3
@@ -0,0 +1 @@
+x"abcg" // #error: 'g' isn't a valid hexadecimal digit
--- a/test/test_suite/lexing/no_builtin.c3
+++ b/test/test_suite/lexing/no_builtin.c3
@@ -0,0 +1,2 @@
+
+$$1 // #error: Expected a letter after
--- a/test/test_suite/literals/radix_numbers_errors.c3
+++ b/test/test_suite/literals/radix_numbers_errors.c3
@@ -2,10 +2,14 @@

 0o% // #error: An expression starting with '0o' should be followed by octal numbers (0-7).

+0o08 // #error: An expression starting with '0o' should be followed by octal numbers (0-7).
+
 0b2 // #error: An expression starting with '0b' should be followed by binary digits

 0b# // #error: An expression starting with '0b' should be followed by binary digits

+0b12 // #error: An expression starting with '0b' should be followed by binary digits
+
 0xg // #error: '0x' starts a hexadecimal number, so the next character should be 0-9, a-f or A-F.

 0x! // #error: '0x' starts a hexadecimal number, so the next character should be 0-9, a-f or A-F.
--- a/test/test_suite/struct/duplicate_member.c3
+++ b/test/test_suite/struct/duplicate_member.c3
@@ -1,7 +1,6 @@
 // @warnings{no-unused}
 module test;

-
 struct Aa
 {
    int a;
				`@@ -0,0 +1 @@`
				`x"abcé" // #error: This isn't a valid hexadecimal digit`
				`@@ -0,0 +1 @@`
				`x"abcg" // #error: 'g' isn't a valid hexadecimal digit`