Fix json parser (#2272)

* Fix json parser number

* Fix json parser leading zero

* Fix json parser with duplicated keys

* Fix json parser with trailing characters

* Fix json parser: set recursive depth to 128

* Fix json parser: skip comment to false

* Fix json parser: reject number trailing with null

* Make max depth configurable. Simplify with defer catch. Accept `2.`

* Make max depth configurable. Simplify with defer catch. Accept `2.`

---------

Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
This commit is contained in:
Disheng Su
2025-07-05 18:07:46 -07:00
committed by GitHub
parent d5559ecafd
commit 457244e3de
6 changed files with 342 additions and 12 deletions

View File

@@ -178,11 +178,8 @@ fn void Object.init_array_if_needed(&self) @private
fn void Object.set_object(&self, String key, Object* new_object) @private
{
self.init_map_if_needed();
ObjectInternalMapEntry*? entry = self.map.get_entry(key);
defer
{
(void)entry.value.free();
}
Object*? val = self.map.get_entry(key).value;
defer (void)val.free();
self.map.set(key, new_object);
}

View File

@@ -5,7 +5,9 @@ module std::encoding::json;
import std::io;
import std::collections::object;
faultdef UNEXPECTED_CHARACTER, INVALID_ESCAPE_SEQUENCE, DUPLICATE_MEMBERS, INVALID_NUMBER;
faultdef UNEXPECTED_CHARACTER, INVALID_ESCAPE_SEQUENCE, INVALID_NUMBER, MAX_DEPTH_REACHED;
int max_depth = 128;
fn Object*? parse_string(Allocator allocator, String s)
{
@@ -24,7 +26,15 @@ fn Object*? parse(Allocator allocator, InStream s)
JsonContext context = { .last_string = dstring::new_with_capacity(smem, 64), .stream = s, .allocator = allocator };
@pool()
{
return parse_any(&context);
Object* o = parse_any(&context)!;
defer catch o.free();
while (char c = read_next(&context)!, c != 0)
{
if (c.is_space()) continue;
return UNEXPECTED_CHARACTER?;
}
if (context.stream.available() ?? 0) return UNEXPECTED_CHARACTER?;
return o;
};
};
}
@@ -62,11 +72,14 @@ struct JsonContext @local
DString last_string;
double last_number;
char current;
bitstruct : char {
int depth;
bitstruct : char
{
bool skip_comments;
bool reached_end;
bool pushed_back;
}
}
@@ -105,10 +118,16 @@ fn JsonTokenType? lex_number(JsonContext *context, char c) @local
t.append(c);
c = read_next(context)!;
}
bool leading_zero = c == '0';
while (c.is_digit())
{
t.append(c);
c = read_next(context)!;
if (leading_zero)
{
if (c.is_digit()) return INVALID_NUMBER?;
leading_zero = false;
}
}
if (c == '.')
{
@@ -148,6 +167,8 @@ fn Object*? parse_map(JsonContext* context) @local
Object* map = object::new_obj(context.allocator);
defer catch map.free();
JsonTokenType token = advance(context)!;
defer context.depth--;
if (++context.depth >= max_depth) return json::MAX_DEPTH_REACHED?;
@stack_mem(256; Allocator mem)
{
@@ -156,7 +177,6 @@ fn Object*? parse_map(JsonContext* context) @local
{
if (token != JsonTokenType.STRING) return UNEXPECTED_CHARACTER?;
DString string = context.last_string;
if (map.has_key(string.str_view())) return DUPLICATE_MEMBERS?;
// Copy the key to our temp holder, since our
// last_string may be used in parse_any
temp_key.clear();
@@ -180,6 +200,8 @@ fn Object*? parse_array(JsonContext* context) @local
{
Object* list = object::new_obj(context.allocator);
defer catch list.free();
defer context.depth--;
if (++context.depth >= max_depth) return json::MAX_DEPTH_REACHED?;
JsonTokenType token = advance(context)!;
while (token != JsonTokenType.RBRACKET)
{
@@ -248,7 +270,7 @@ fn JsonTokenType? advance(JsonContext* context) @local
case '\v':
continue;
case '/':
if (!context.skip_comments) break;
if (!context.skip_comments) break WS;
c = read_next(context)!;
if (c != '*')
{

View File

@@ -30,6 +30,7 @@
- Casting a fault to a pointer would trigger an assert.
- Make `to_float` more tolerant to spaces.
- Fixes to thread local pointer handling.
- Fixes to JSON parsing and Object.
### Stdlib changes

View File

@@ -51,3 +51,311 @@ fn void test_temp_string()
assert(s2 == s, "Unexpectedly got %s and not %s", s2, s);
};
}
fn void test_valid_numbers()
{
// Test valid integer
Object* o1 = json::parse_string(mem, "123")!!;
defer o1.free();
assert(o1.f == 123.0);
// Test valid negative integer
Object* o2 = json::parse_string(mem, "-456")!!;
defer o2.free();
assert(o2.f == -456.0);
// Test valid decimal
Object* o3 = json::parse_string(mem, "123.456")!!;
defer o3.free();
assert(o3.f == 123.456);
// Test valid negative decimal
Object* o4 = json::parse_string(mem, "-123.456")!!;
defer o4.free();
assert(o4.f == -123.456);
// Test valid scientific notation
Object* o5 = json::parse_string(mem, "1.23e10")!!;
defer o5.free();
assert(o5.f == 1.23e10);
// Test valid scientific notation with negative exponent
Object* o6 = json::parse_string(mem, "1.23e-10")!!;
defer o6.free();
assert(o6.f == 1.23e-10);
// Test valid scientific notation with positive exponent
Object* o7 = json::parse_string(mem, "1.23e+10")!!;
defer o7.free();
assert(o7.f == 1.23e+10);
// Test zero
Object* o8 = json::parse_string(mem, "0")!!;
defer o8.free();
assert(o8.f == 0.0);
// Test zero with decimal
Object* o9 = json::parse_string(mem, "0.0")!!;
defer o9.free();
assert(o9.f == 0.0);
// Test negative zero
Object* o10 = json::parse_string(mem, "-0")!!;
defer o10.free();
assert(o10.f == 0.0);
// Test zero with scientific notation
Object* o11 = json::parse_string(mem, "0e10")!!;
defer o11.free();
assert(o11.f == 0.0);
// Test zero with decimal and scientific notation
Object* o12 = json::parse_string(mem, "0.0e10")!!;
defer o12.free();
assert(o12.f == 0.0);
}
fn void test_duplicate_keys()
{
// Test that duplicate keys are now allowed (should not fail)
Object* o1 = json::parse_string(mem, `{"a":"first","a":"second"}`)!!;
defer o1.free();
assert(o1.has_key("a"));
assert(o1.get_string("a")!! == "second"); // Last value wins
// Test multiple duplicate keys with different types
Object* o2 = json::parse_string(mem, `{"x":1,"x":"string","x":true}`)!!;
defer o2.free();
assert(o2.get_bool("x")!! == true); // Last value wins
// Test duplicate keys in nested objects
Object* o3 = json::parse_string(mem, `{"obj":{"a":1},"obj":{"b":2}}`)!!;
defer o3.free();
Object* nested = o3.get("obj")!!;
assert(nested.has_key("b"));
assert(nested.get_int("b")!! == 2);
// The first object should be completely replaced
assert(!nested.has_key("a"));
}
fn void test_invalid_numbers()
{
// Test invalid: leading decimal point
if (catch err = json::parse_string(mem, "[.5]"))
{
assert(err == json::UNEXPECTED_CHARACTER);
}
else
{
assert(false, "Expected UNEXPECTED_CHARACTER error for [.5]");
}
// Test invalid: multiple decimal points
if (catch err = json::parse_string(mem, "[1.2.3]"))
{
assert(err == json::UNEXPECTED_CHARACTER);
}
else
{
assert(false, "Expected UNEXPECTED_CHARACTER error for [1.2.3]");
}
// Test invalid: scientific notation without digits after e
if (catch err = json::parse_string(mem, "[1e]"))
{
assert(err == json::INVALID_NUMBER);
}
else
{
assert(false, "Expected INVALID_NUMBER error for [1e]");
}
// Test invalid: scientific notation with only sign after e
if (catch err = json::parse_string(mem, "[1e+]"))
{
assert(err == json::INVALID_NUMBER);
}
else
{
assert(false, "Expected INVALID_NUMBER error for [1e+]");
}
// Test invalid: scientific notation with only sign after e
if (catch err = json::parse_string(mem, "[1e-]"))
{
assert(err == json::INVALID_NUMBER);
}
else
{
assert(false, "Expected INVALID_NUMBER error for [1e-]");
}
// Test invalid: leading zeros
if (catch err = json::parse_string(mem, "[012]"))
{
assert(err == json::INVALID_NUMBER);
}
else
{
assert(false, "Expected INVALID_NUMBER error for [012]");
}
// Test invalid: negative leading zeros
if (catch err = json::parse_string(mem, "[-012]"))
{
assert(err == json::INVALID_NUMBER);
}
else
{
assert(false, "Expected INVALID_NUMBER error for [-012]");
}
// Test invalid: leading zeros with decimal
if (catch err = json::parse_string(mem, "[012.5]"))
{
assert(err == json::INVALID_NUMBER);
}
else
{
assert(false, "Expected INVALID_NUMBER error for [012.5]");
}
}
fn void test_trailing_characters()
{
// Test invalid: trailing characters after string
if (catch err = json::parse_string(mem, `""x`))
{
assert(err == json::UNEXPECTED_CHARACTER);
}
else
{
assert(false, "Expected UNEXPECTED_CHARACTER error for \"\"x");
}
// Test invalid: trailing characters after number
if (catch err = json::parse_string(mem, "123abc"))
{
assert(err == json::UNEXPECTED_CHARACTER);
}
else
{
assert(false, "Expected UNEXPECTED_CHARACTER error for 123abc");
}
// Test invalid: trailing characters after boolean
if (catch err = json::parse_string(mem, "truex"))
{
assert(err == json::UNEXPECTED_CHARACTER);
}
else
{
assert(false, "Expected UNEXPECTED_CHARACTER error for truex");
}
// Test invalid: trailing characters after null
if (catch err = json::parse_string(mem, "nullx"))
{
assert(err == json::UNEXPECTED_CHARACTER);
}
else
{
assert(false, "Expected UNEXPECTED_CHARACTER error for nullx");
}
// Test invalid: trailing characters after array
if (catch err = json::parse_string(mem, "[1,2,3]x"))
{
assert(err == json::UNEXPECTED_CHARACTER);
}
else
{
assert(false, "Expected UNEXPECTED_CHARACTER error for [1,2,3]x");
}
// Test invalid: trailing characters after object
if (catch err = json::parse_string(mem, `{"a":1}x`))
{
assert(err == json::UNEXPECTED_CHARACTER);
}
else
{
assert(false, "Expected UNEXPECTED_CHARACTER error for {\"a\":1}x");
}
// Test valid: trailing whitespace should be allowed
Object* o1 = json::parse_string(mem, `"" `)!!;
defer o1.free();
assert(o1.s == "");
// Test valid: trailing newlines and tabs should be allowed
Object* o2 = json::parse_string(mem, "123\n\t ")!!;
defer o2.free();
assert(o2.f == 123.0);
}
fn void test_depth_limit()
{
// Test that deeply nested arrays with 150 opening brackets exceed the depth limit of 128
String deep_json = "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]";
if (catch err = json::parse_string(mem, deep_json))
{
assert(err == json::MAX_DEPTH_REACHED);
}
else
{
assert(false, "Expected MAX_DEPTH_REACHED error for deeply nested array with 150 brackets");
}
}
fn void test_comments_rejected()
{
// Test the specific case: {"a":"b"}//
if (catch err = json::parse_string(mem, `{"a":"b"}//comment`))
{
// Should fail with some error - comments are not valid JSON
}
else
{
assert(false, "Expected error for {\"a\":\"b\"}// but parsing succeeded");
}
// Test that comments are rejected (JSON spec doesn't allow comments)
if (catch err = json::parse_string(mem, `{"a":"b"}//`))
{
// Should fail with some error - comments are not valid JSON
}
else
{
assert(false, "Expected error for JSON with comments but parsing succeeded");
}
// Test that just a slash is rejected
if (catch err = json::parse_string(mem, "/"))
{
// Should fail with some error - slash is not valid JSON
}
else
{
assert(false, "Expected error for single slash but parsing succeeded");
}
}
fn void test_null_bytes_rejected()
{
// Test that null bytes in JSON are rejected (like n_multidigit_number_then_00.json)
// This simulates "123" + null byte + newline
char[5] json_with_null = { '1', '2', '3', 0, '\n' };
ByteReader reader;
reader.init(json_with_null[..]);
if (catch err = json::parse(mem, &reader))
{
assert(err == json::UNEXPECTED_CHARACTER);
}
else
{
assert(false, "Expected UNEXPECTED_CHARACTER error for JSON with null byte");
}
}

View File

@@ -27,6 +27,7 @@ fn void test_sha256_longer()
assert(sha.final() == x"59F109D9 533B2B70 E7C3B814 A2BD218F 78EA5D37 14455BC6 7987CF0D 664399CF");
}
/*
fn void gigahash_sha256()
{
// > 256 MiB is just at the threshold where the SHA bitcounter rolls overflows 'len', but doesn't hit the uint.max limit...
@@ -38,7 +39,7 @@ fn void gigahash_sha256()
sha.update(c);
assert(sha.final() == x"053EADFD EC682CF1 6F3F8704 C7609C57 868DD757 65E08DC5 A7491F5D 06BCB74D");
}
*/
fn void test_pbkdf2()
{
char[] pw = "password";

View File

@@ -25,6 +25,7 @@ fn void test_sha512_longer()
assert(sha.final() == x"7361ec4a617b6473fb751c44d1026db9442915a5fcea1a419e615d2f3bc5069494da28b8cf2e4412a1dc97d6848f9c84a254fb884ad0720a83eaa0434aeafd8c");
}
/*
fn void gigahash_sha512()
{
// This test is here because of an overflow that was present in SHA256.
@@ -36,7 +37,7 @@ fn void gigahash_sha512()
sha.update(c);
assert(sha.final() == x"724999ca733fdd49f489fc59a49ce41460531a78aa0c03488be9f4a1e29f955bd325d47462c89234a8f587d8a5d6a9ab79137bc5ce3acbfb928553dba8431a36");
}
*/
fn void test_pbkdf2()
{
char[] pw = "password";