Add a leniency flag to String.unescape(), and fix a memory leak in String.(un)escape() (#2640)

* fix memory leak in string_escape.c3 * add unescape_lenient and tunescape_lenient * Optimize for the use of the temp allocator. --------- Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
2026-02-27 03:51:18 +00:00 · 2025-12-12 18:49:01 -05:00
parent 0c0d0ace4d
commit 2a2c0f5d91
1 changed files with 172 additions and 144 deletions
--- a/lib/std/core/string_escape.c3
+++ b/lib/std/core/string_escape.c3
@@ -1,4 +1,4 @@
-// Copyright (c) 2024 Christoffer Lerno. All rights reserved.
+// Copyright (c) 2024-2025 Christoffer Lerno. All rights reserved.
 // Use of this source code is governed by the MIT license
 // a copy of which can be found in the LICENSE_STDLIB file.

@@ -22,43 +22,57 @@ faultdef INVALID_ESCAPE_SEQUENCE, UNTERMINATED_STRING, INVALID_HEX_ESCAPE, INVAL
 *>
 fn String String.escape(String s, Allocator allocator, bool strip_quotes = true)
 {
-    // Conservative allocation: most strings need minimal escaping
-    usz initial_capacity = s.len + s.len / 5 + 2; // ~1.2x + quotes
-    DString result = dstring::new_with_capacity(allocator, initial_capacity);
+	// Conservative allocation: most strings need minimal escaping
+	usz initial_capacity = s.len + s.len / 5 + 2; // ~1.2x + quotes

-    if (!strip_quotes) result.append_char('"');
-
-    foreach (char c : s)
-    {
-        switch (c)
-        {
-            case '"':  result.append(`\"`);
-            case '\\': result.append(`\\`);
-            case '\b': result.append(`\b`);
-            case '\f': result.append(`\f`);
-            case '\n': result.append(`\n`);
-            case '\r': result.append(`\r`);
-            case '\t': result.append(`\t`);
-            case '\v': result.append(`\v`);
-            case '\0': result.append(`\0`);
-            default:
-                if (c >= 32 && c <= 126)
-                {
-                    // Printable ASCII
-                    result.append_char(c);
-                }
-                else
-                {
-                    // Non-printable, use hex escape
-                    result.appendf("\\x%02x", (uint)c);
-                }
-        }
-    }
-
-    if (!strip_quotes) result.append_char('"');
-    return result.copy_str(allocator);
+	if (allocator == tmem)
+	{
+		DString result = dstring::new_with_capacity(tmem, initial_capacity);
+		escape_dstring(s, result, strip_quotes);
+		return result.str_view();
+	}
+	@pool()
+	{
+		DString result = dstring::temp_with_capacity(initial_capacity);
+		escape_dstring(s, result, strip_quotes);
+		return result.copy_str(allocator);
+	};
 }

+fn void escape_dstring(String s, DString result, bool strip_quotes) @private
+{
+	if (!strip_quotes) result.append_char('"');
+
+	foreach (char c : s)
+	{
+		switch (c)
+		{
+			case '"':  result.append(`\"`);
+			case '\\': result.append(`\\`);
+			case '\b': result.append(`\b`);
+			case '\f': result.append(`\f`);
+			case '\n': result.append(`\n`);
+			case '\r': result.append(`\r`);
+			case '\t': result.append(`\t`);
+			case '\v': result.append(`\v`);
+			case '\0': result.append(`\0`);
+			default:
+				if (c >= 32 && c <= 126)
+				{
+					// Printable ASCII
+					result.append_char(c);
+				}
+				else
+				{
+					// Non-printable, use hex escape
+					result.appendf("\\x%02x", (uint)c);
+				}
+		}
+	}
+
+	if (!strip_quotes) result.append_char('"');
+
+}
 <*
 Escape a string using the temp allocator.

@@ -76,33 +90,33 @@ fn String String.tescape(String s, bool strip_quotes = false) => s.escape(tmem,
 *>
 fn usz escape_len(String s)
 {
-    usz len = 2; // For quotes
-    foreach (char c : s)
-    {
-        switch (c)
-        {
-            case '"':
-            case '\\':
-            case '\b':
-            case '\f':
-            case '\n':
-            case '\r':
-            case '\t':
-            case '\v':
-            case '\0':
-                len += 2; // \X
-            default:
-                if (c >= 32 && c <= 126)
-                {
-                    len += 1;
-                }
-                else
-                {
-                    len += 4; // \xHH
-                }
-        }
-    }
-    return len;
+	usz len = 2; // For quotes
+	foreach (char c : s)
+	{
+		switch (c)
+		{
+			case '"':
+			case '\\':
+			case '\b':
+			case '\f':
+			case '\n':
+			case '\r':
+			case '\t':
+			case '\v':
+			case '\0':
+				len += 2; // \X
+			default:
+				if (c >= 32 && c <= 126)
+				{
+					len += 1;
+				}
+				else
+				{
+					len += 4; // \xHH
+				}
+		}
+	}
+	return len;
 }

 <*
@@ -111,10 +125,11 @@ fn usz escape_len(String s)
 @param allocator : "The allocator to use for the result"
 @param s : "The quoted string to unescape"
 @param allow_unquoted : "Set to true to unescape strings not surrounded by quotes, defaults to false"
+ @param lenient : "Be lenient with escapes, resolving unknown sequences to the escape character, defaults to false"
 @return "The unescaped string without quotes, safe to convert to ZString"
 @return? UNTERMINATED_STRING, INVALID_ESCAPE_SEQUENCE, INVALID_HEX_ESCAPE, INVALID_UNICODE_ESCAPE
 *>
-fn String? String.unescape(String s, Allocator allocator, bool allow_unquoted = false)
+fn String? String.unescape(String s, Allocator allocator, bool allow_unquoted = false, bool lenient = false)
 {
 	if (s.len >= 2 && s[0] == '"' && s[^1] == '"')
 	{
@@ -123,78 +138,90 @@ fn String? String.unescape(String s, Allocator allocator, bool allow_unquoted =
 	}
 	else if (!allow_unquoted) return UNTERMINATED_STRING?;

-    // Handle empty string case
-    if (!s.len)
-    {
-        return "".copy(allocator);
-    }
-
-    DString result = dstring::new_with_capacity(allocator, s.len);
+	// Handle empty string case
+	if (!s.len)
+	{
+		return "".copy(allocator);
+	}
+	if (allocator == tmem)
+	{
+		DString result = dstring::new_with_capacity(tmem, s.len);
+		unescape_dstring(s, result, allow_unquoted, lenient)!;
+		return result.str_view();
+	}
+	@pool()
+	{
+		DString result = dstring::temp_with_capacity(s.len);
+		unescape_dstring(s, result, allow_unquoted, lenient)!;
+		return result.copy_str(allocator);
+	};
+}

+fn void? unescape_dstring(String s, DString result, bool allow_unquoted = false, bool lenient = false) @private
+{
 	usz len = s.len;
 	for (usz i = 0; i < len; i++)
 	{
 		char c = s[i];
-        if (c != '\\')
-        {
-            result.append_char(c);
-            continue;
-        }
+		if (c != '\\')
+		{
+			result.append_char(c);
+			continue;
+		}

-        // Handle escape sequence
-        if (i + 1 >= len) return INVALID_ESCAPE_SEQUENCE?;
+		// Handle escape sequence
+		if (i + 1 >= len) return INVALID_ESCAPE_SEQUENCE?;

-        char escape_char = s[++i];
-        switch (escape_char)
-        {
-            case '"':  result.append_char('"');
-            case '\\': result.append_char('\\');
-            case '/':  result.append_char('/');
-            case 'b':  result.append_char('\b');
-            case 'f':  result.append_char('\f');
-            case 'n':  result.append_char('\n');
-            case 'r':  result.append_char('\r');
-            case 't':  result.append_char('\t');
-            case 'v':  result.append_char('\v');
-            case '0':  result.append_char('\0');
-            case 'x':
-                // Hex escape \xHH
-                if (i + 2 >= len) return INVALID_HEX_ESCAPE?;
-                char h1 = s[++i];
-                char h2 = s[++i];
-                if (!h1.is_xdigit() || !h2.is_xdigit()) return INVALID_HEX_ESCAPE?;
-                uint val = h1 > '9' ? (h1 | 32) - 'a' + 10 : h1 - '0';
-                val = val << 4;
-                val += h2 > '9' ? (h2 | 32) - 'a' + 10 : h2 - '0';
-                result.append_char((char)val);
-            case 'u':
-                // Unicode escape \uHHHH
-                if (i + 4 >= len) return INVALID_UNICODE_ESCAPE?;
-                uint val;
-                for (int j = 0; j < 4; j++)
-                {
-                    char hex_char = s[++i];
-                    if (!hex_char.is_xdigit()) return INVALID_UNICODE_ESCAPE?;
-                    val = val << 4 + (hex_char > '9' ? (hex_char | 32) - 'a' + 10 : hex_char - '0');
-                }
-                result.append_char32(val);
-            case 'U':
-                // Unicode escape \UHHHHHHHH
-                if (i + 8 >= len) return INVALID_UNICODE_ESCAPE?;
-                uint val;
-                for (int j = 0; j < 8; j++)
-                {
-                    char hex_char = s[++i];
-                    if (!hex_char.is_xdigit()) return INVALID_UNICODE_ESCAPE?;
-                    val = val << 4 + (hex_char > '9' ? (hex_char | 32) - 'a' + 10 : hex_char - '0');
-                }
-                result.append_char32(val);
-            default:
-                return INVALID_ESCAPE_SEQUENCE?;
-        }
-    }
-
-    return result.copy_str(allocator);
+		char escape_char = s[++i];
+		switch (escape_char)
+		{
+			case '"':  result.append_char('"');
+			case '\\': result.append_char('\\');
+			case '/':  result.append_char('/');
+			case 'b':  result.append_char('\b');
+			case 'f':  result.append_char('\f');
+			case 'n':  result.append_char('\n');
+			case 'r':  result.append_char('\r');
+			case 't':  result.append_char('\t');
+			case 'v':  result.append_char('\v');
+			case '0':  result.append_char('\0');
+			case 'x':
+				// Hex escape \xHH
+				if (i + 2 >= len) return INVALID_HEX_ESCAPE?;
+				char h1 = s[++i];
+				char h2 = s[++i];
+				if (!h1.is_xdigit() || !h2.is_xdigit()) return INVALID_HEX_ESCAPE?;
+				uint val = h1 > '9' ? (h1 | 32) - 'a' + 10 : h1 - '0';
+				val = val << 4;
+				val += h2 > '9' ? (h2 | 32) - 'a' + 10 : h2 - '0';
+				result.append_char((char)val);
+			case 'u':
+				// Unicode escape \uHHHH
+				if (i + 4 >= len) return INVALID_UNICODE_ESCAPE?;
+				uint val;
+				for (int j = 0; j < 4; j++)
+				{
+					char hex_char = s[++i];
+					if (!hex_char.is_xdigit()) return INVALID_UNICODE_ESCAPE?;
+					val = val << 4 + (hex_char > '9' ? (hex_char | 32) - 'a' + 10 : hex_char - '0');
+				}
+				result.append_char32(val);
+			case 'U':
+				// Unicode escape \UHHHHHHHH
+				if (i + 8 >= len) return INVALID_UNICODE_ESCAPE?;
+				uint val;
+				for (int j = 0; j < 8; j++)
+				{
+					char hex_char = s[++i];
+					if (!hex_char.is_xdigit()) return INVALID_UNICODE_ESCAPE?;
+					val = val << 4 + (hex_char > '9' ? (hex_char | 32) - 'a' + 10 : hex_char - '0');
+				}
+				result.append_char32(val);
+			default:
+				if (!lenient) return INVALID_ESCAPE_SEQUENCE?;
+				result.append_char(escape_char);
+		}
+	}
 }

 <*
@@ -202,10 +229,11 @@ fn String? String.unescape(String s, Allocator allocator, bool allow_unquoted =

 @param s : "The quoted string to unescape"
 @param allow_unquoted : "Set to true to unescape strings not surrounded by quotes, defaults to false"
+ @param lenient : "Be lenient with escapes, resolving unknown sequences to the escape character, defaults to false"
 @return "The unescaped string without quotes"
 @return? UNTERMINATED_STRING, INVALID_ESCAPE_SEQUENCE, INVALID_HEX_ESCAPE, INVALID_UNICODE_ESCAPE
 *>
-fn String? String.tunescape(String s, bool allow_unquoted = false) => s.unescape(tmem, allow_unquoted);
+fn String? String.tunescape(String s, bool allow_unquoted = false, bool lenient = false) => s.unescape(tmem, allow_unquoted, lenient);

 <*
 Check if a character needs to be escaped in a string literal.
@@ -215,19 +243,19 @@ fn String? String.tunescape(String s, bool allow_unquoted = false) => s.unescape
 *>
 fn bool needs_escape(char c)
 {
-    switch (c)
-    {
-        case '"':
-        case '\\':
-        case '\b':
-        case '\f':
-        case '\n':
-        case '\r':
-        case '\t':
-        case '\v':
-        case '\0':
-            return true;
-        default:
-            return c < 32 || c > 126;
-    }
+	switch (c)
+	{
+		case '"':
+		case '\\':
+		case '\b':
+		case '\f':
+		case '\n':
+		case '\r':
+		case '\t':
+		case '\v':
+		case '\0':
+			return true;
+		default:
+			return c < 32 || c > 126;
+	}
 }