Filter \r before lexing.

2026-02-27 12:01:16 +00:00 · 2021-12-03 16:43:37 +01:00
parent 2c802878bb
commit bc2d789c2e
5 changed files with 33 additions and 20 deletions
--- a/resources/examples/notworking/acornvm/lexer.c3
+++ b/resources/examples/notworking/acornvm/lexer.c3
@@ -160,9 +160,10 @@ fn bool LexInfo.scanWhite(LexInfo *lex)
 		// Skip past spaces and tabs
 		case ' ':
 		case '\t':
-		case '\r':
 			lex_skipchar(lex);
 			break;
+		case '\r':
+		    UNREACHABLE

 		// Skip past new line
 		case '\n':
--- a/src/compiler/diagnostics.c
+++ b/src/compiler/diagnostics.c
@@ -67,15 +67,15 @@ static void print_error2(SourceLocation *location, const char *message, PrintTyp
 	{
 		eprintf(" ");
 	}
+
 	for (unsigned i = 1; i < location->col; i++)
 	{
-		if (start[i] == '\t')
+		switch (start[i])
 		{
-			eprintf("\t");
-		}
-		else
-		{
-			eprintf(" ");
+			case '\t':
+				eprintf("\t");
+			default:
+				eprintf(" ");
 		}
 	}
 	for (uint32_t i = 0; i < location->length; i++)
--- a/src/compiler/lexer.c
+++ b/src/compiler/lexer.c
@@ -267,10 +267,11 @@ static void skip_whitespace(Lexer *lexer, LexMode lex_type)
 				FALLTHROUGH;
 			case ' ':
 			case '\t':
-			case '\r':
 			case '\f':
 				next(lexer);
 				break;
+			case '\r':
+				UNREACHABLE
 			default:
 				return;
 		}
@@ -443,7 +444,7 @@ static inline bool scan_exponent(Lexer *lexer)
 			backtrack(lexer);
 			return add_error_token(lexer, "End of file was reached while parsing the exponent.");
 		}
-		if (c == '\n' || c == '\r') return add_error_token(lexer, "End of line was reached while parsing the exponent.");
+		if (c == '\n') return add_error_token(lexer, "End of line was reached while parsing the exponent.");
 		if (c < 31 || c > 127) add_error_token(lexer, "An unexpected character was found while parsing the exponent.");
 		return add_error_token(lexer, "Parsing the floating point exponent failed, because '%c' is not a number.", c);
 	}
@@ -802,10 +803,11 @@ static inline void skip_first_line_if_empty(Lexer *lexer)
 				return;
 			case ' ':
 			case '\t':
-			case '\r':
 			case '\f':
 				// Counts as whitespace.
 				break;
+			case '\r':
+				UNREACHABLE
 			default:
 				// Non whitespace -> no skip.
 				return;
@@ -904,7 +906,7 @@ static inline size_t scan_multiline_indent(const char *current, const char **end
 			// 2. More whitespace, so increase indent
 			if (is_whitespace(c))
 			{
-				current_indent++;
+				if (c == ' ' || c == '\t') current_indent++;
 			}
 			else
 			{
@@ -1013,13 +1015,6 @@ static inline bool scan_multiline_string(Lexer *lexer)
 	{
 		c = peek(lexer);

-		// We ignore \r
-		if (c == '\r')
-		{
-			next(lexer);
-			continue;
-		}
-
 		// Ok, we reached the end of line
 		// update the line end and store it in the resulting buffer.
 		if (c == '\n')
--- a/src/utils/file_utils.c
+++ b/src/utils/file_utils.c
@@ -170,7 +170,23 @@ char *read_file(const char *path, size_t *return_size)
 	assert(bytes_read == file_size);

 	buffer[bytes_read] = '\0';
-
+	// Filter '\r' early.
+	size_t offset = 0;
+	for (size_t i = 0; i < file_size - offset; i++)
+	{
+		char c = buffer[i + offset];
+		if (c == '\r')
+		{
+			offset++;
+			i--;
+			continue;
+		}
+		if (offset)
+		{
+			buffer[i] = c;
+		}
+	}
+	buffer[bytes_read - offset] = '\0';
 	fclose(file);
 	return buffer;
 }
--- a/src/utils/lib.h
+++ b/src/utils/lib.h
@@ -282,8 +282,9 @@ static inline bool is_whitespace(char c)
 		case ' ':
 		case '\t':
 		case '\n':
-		case '\r':
 			return true;
+		case '\r':
+			UNREACHABLE
 		default:
 			return false;
 	}