mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 12:01:16 +00:00
Fix json parser (#2272)
* Fix json parser number * Fix json parser leading zero * Fix json parser with duplicated keys * Fix json parser with trailing characters * Fix json parser: set recursive depth to 128 * Fix json parser: skip comment to false * Fix json parser: reject number trailing with null * Make max depth configurable. Simplify with defer catch. Accept `2.` * Make max depth configurable. Simplify with defer catch. Accept `2.` --------- Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
This commit is contained in:
@@ -178,11 +178,8 @@ fn void Object.init_array_if_needed(&self) @private
|
||||
fn void Object.set_object(&self, String key, Object* new_object) @private
|
||||
{
|
||||
self.init_map_if_needed();
|
||||
ObjectInternalMapEntry*? entry = self.map.get_entry(key);
|
||||
defer
|
||||
{
|
||||
(void)entry.value.free();
|
||||
}
|
||||
Object*? val = self.map.get_entry(key).value;
|
||||
defer (void)val.free();
|
||||
self.map.set(key, new_object);
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,9 @@ module std::encoding::json;
|
||||
import std::io;
|
||||
import std::collections::object;
|
||||
|
||||
faultdef UNEXPECTED_CHARACTER, INVALID_ESCAPE_SEQUENCE, DUPLICATE_MEMBERS, INVALID_NUMBER;
|
||||
faultdef UNEXPECTED_CHARACTER, INVALID_ESCAPE_SEQUENCE, INVALID_NUMBER, MAX_DEPTH_REACHED;
|
||||
|
||||
int max_depth = 128;
|
||||
|
||||
fn Object*? parse_string(Allocator allocator, String s)
|
||||
{
|
||||
@@ -24,7 +26,15 @@ fn Object*? parse(Allocator allocator, InStream s)
|
||||
JsonContext context = { .last_string = dstring::new_with_capacity(smem, 64), .stream = s, .allocator = allocator };
|
||||
@pool()
|
||||
{
|
||||
return parse_any(&context);
|
||||
Object* o = parse_any(&context)!;
|
||||
defer catch o.free();
|
||||
while (char c = read_next(&context)!, c != 0)
|
||||
{
|
||||
if (c.is_space()) continue;
|
||||
return UNEXPECTED_CHARACTER?;
|
||||
}
|
||||
if (context.stream.available() ?? 0) return UNEXPECTED_CHARACTER?;
|
||||
return o;
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -62,11 +72,14 @@ struct JsonContext @local
|
||||
DString last_string;
|
||||
double last_number;
|
||||
char current;
|
||||
bitstruct : char {
|
||||
int depth;
|
||||
bitstruct : char
|
||||
{
|
||||
bool skip_comments;
|
||||
bool reached_end;
|
||||
bool pushed_back;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -105,10 +118,16 @@ fn JsonTokenType? lex_number(JsonContext *context, char c) @local
|
||||
t.append(c);
|
||||
c = read_next(context)!;
|
||||
}
|
||||
bool leading_zero = c == '0';
|
||||
while (c.is_digit())
|
||||
{
|
||||
t.append(c);
|
||||
c = read_next(context)!;
|
||||
if (leading_zero)
|
||||
{
|
||||
if (c.is_digit()) return INVALID_NUMBER?;
|
||||
leading_zero = false;
|
||||
}
|
||||
}
|
||||
if (c == '.')
|
||||
{
|
||||
@@ -148,6 +167,8 @@ fn Object*? parse_map(JsonContext* context) @local
|
||||
Object* map = object::new_obj(context.allocator);
|
||||
defer catch map.free();
|
||||
JsonTokenType token = advance(context)!;
|
||||
defer context.depth--;
|
||||
if (++context.depth >= max_depth) return json::MAX_DEPTH_REACHED?;
|
||||
|
||||
@stack_mem(256; Allocator mem)
|
||||
{
|
||||
@@ -156,7 +177,6 @@ fn Object*? parse_map(JsonContext* context) @local
|
||||
{
|
||||
if (token != JsonTokenType.STRING) return UNEXPECTED_CHARACTER?;
|
||||
DString string = context.last_string;
|
||||
if (map.has_key(string.str_view())) return DUPLICATE_MEMBERS?;
|
||||
// Copy the key to our temp holder, since our
|
||||
// last_string may be used in parse_any
|
||||
temp_key.clear();
|
||||
@@ -180,6 +200,8 @@ fn Object*? parse_array(JsonContext* context) @local
|
||||
{
|
||||
Object* list = object::new_obj(context.allocator);
|
||||
defer catch list.free();
|
||||
defer context.depth--;
|
||||
if (++context.depth >= max_depth) return json::MAX_DEPTH_REACHED?;
|
||||
JsonTokenType token = advance(context)!;
|
||||
while (token != JsonTokenType.RBRACKET)
|
||||
{
|
||||
@@ -248,7 +270,7 @@ fn JsonTokenType? advance(JsonContext* context) @local
|
||||
case '\v':
|
||||
continue;
|
||||
case '/':
|
||||
if (!context.skip_comments) break;
|
||||
if (!context.skip_comments) break WS;
|
||||
c = read_next(context)!;
|
||||
if (c != '*')
|
||||
{
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
- Casting a fault to a pointer would trigger an assert.
|
||||
- Make `to_float` more tolerant to spaces.
|
||||
- Fixes to thread local pointer handling.
|
||||
- Fixes to JSON parsing and Object.
|
||||
|
||||
### Stdlib changes
|
||||
|
||||
|
||||
@@ -51,3 +51,311 @@ fn void test_temp_string()
|
||||
assert(s2 == s, "Unexpectedly got %s and not %s", s2, s);
|
||||
};
|
||||
}
|
||||
|
||||
fn void test_valid_numbers()
|
||||
{
|
||||
// Test valid integer
|
||||
Object* o1 = json::parse_string(mem, "123")!!;
|
||||
defer o1.free();
|
||||
assert(o1.f == 123.0);
|
||||
|
||||
// Test valid negative integer
|
||||
Object* o2 = json::parse_string(mem, "-456")!!;
|
||||
defer o2.free();
|
||||
assert(o2.f == -456.0);
|
||||
|
||||
// Test valid decimal
|
||||
Object* o3 = json::parse_string(mem, "123.456")!!;
|
||||
defer o3.free();
|
||||
assert(o3.f == 123.456);
|
||||
|
||||
// Test valid negative decimal
|
||||
Object* o4 = json::parse_string(mem, "-123.456")!!;
|
||||
defer o4.free();
|
||||
assert(o4.f == -123.456);
|
||||
|
||||
// Test valid scientific notation
|
||||
Object* o5 = json::parse_string(mem, "1.23e10")!!;
|
||||
defer o5.free();
|
||||
assert(o5.f == 1.23e10);
|
||||
|
||||
// Test valid scientific notation with negative exponent
|
||||
Object* o6 = json::parse_string(mem, "1.23e-10")!!;
|
||||
defer o6.free();
|
||||
assert(o6.f == 1.23e-10);
|
||||
|
||||
// Test valid scientific notation with positive exponent
|
||||
Object* o7 = json::parse_string(mem, "1.23e+10")!!;
|
||||
defer o7.free();
|
||||
assert(o7.f == 1.23e+10);
|
||||
|
||||
// Test zero
|
||||
Object* o8 = json::parse_string(mem, "0")!!;
|
||||
defer o8.free();
|
||||
assert(o8.f == 0.0);
|
||||
|
||||
// Test zero with decimal
|
||||
Object* o9 = json::parse_string(mem, "0.0")!!;
|
||||
defer o9.free();
|
||||
assert(o9.f == 0.0);
|
||||
|
||||
// Test negative zero
|
||||
Object* o10 = json::parse_string(mem, "-0")!!;
|
||||
defer o10.free();
|
||||
assert(o10.f == 0.0);
|
||||
|
||||
// Test zero with scientific notation
|
||||
Object* o11 = json::parse_string(mem, "0e10")!!;
|
||||
defer o11.free();
|
||||
assert(o11.f == 0.0);
|
||||
|
||||
// Test zero with decimal and scientific notation
|
||||
Object* o12 = json::parse_string(mem, "0.0e10")!!;
|
||||
defer o12.free();
|
||||
assert(o12.f == 0.0);
|
||||
}
|
||||
|
||||
fn void test_duplicate_keys()
|
||||
{
|
||||
// Test that duplicate keys are now allowed (should not fail)
|
||||
Object* o1 = json::parse_string(mem, `{"a":"first","a":"second"}`)!!;
|
||||
defer o1.free();
|
||||
assert(o1.has_key("a"));
|
||||
assert(o1.get_string("a")!! == "second"); // Last value wins
|
||||
|
||||
// Test multiple duplicate keys with different types
|
||||
Object* o2 = json::parse_string(mem, `{"x":1,"x":"string","x":true}`)!!;
|
||||
defer o2.free();
|
||||
assert(o2.get_bool("x")!! == true); // Last value wins
|
||||
|
||||
// Test duplicate keys in nested objects
|
||||
Object* o3 = json::parse_string(mem, `{"obj":{"a":1},"obj":{"b":2}}`)!!;
|
||||
defer o3.free();
|
||||
Object* nested = o3.get("obj")!!;
|
||||
assert(nested.has_key("b"));
|
||||
assert(nested.get_int("b")!! == 2);
|
||||
// The first object should be completely replaced
|
||||
assert(!nested.has_key("a"));
|
||||
}
|
||||
|
||||
fn void test_invalid_numbers()
|
||||
{
|
||||
// Test invalid: leading decimal point
|
||||
if (catch err = json::parse_string(mem, "[.5]"))
|
||||
{
|
||||
assert(err == json::UNEXPECTED_CHARACTER);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected UNEXPECTED_CHARACTER error for [.5]");
|
||||
}
|
||||
|
||||
// Test invalid: multiple decimal points
|
||||
if (catch err = json::parse_string(mem, "[1.2.3]"))
|
||||
{
|
||||
assert(err == json::UNEXPECTED_CHARACTER);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected UNEXPECTED_CHARACTER error for [1.2.3]");
|
||||
}
|
||||
|
||||
// Test invalid: scientific notation without digits after e
|
||||
if (catch err = json::parse_string(mem, "[1e]"))
|
||||
{
|
||||
assert(err == json::INVALID_NUMBER);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected INVALID_NUMBER error for [1e]");
|
||||
}
|
||||
|
||||
// Test invalid: scientific notation with only sign after e
|
||||
if (catch err = json::parse_string(mem, "[1e+]"))
|
||||
{
|
||||
assert(err == json::INVALID_NUMBER);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected INVALID_NUMBER error for [1e+]");
|
||||
}
|
||||
|
||||
// Test invalid: scientific notation with only sign after e
|
||||
if (catch err = json::parse_string(mem, "[1e-]"))
|
||||
{
|
||||
assert(err == json::INVALID_NUMBER);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected INVALID_NUMBER error for [1e-]");
|
||||
}
|
||||
|
||||
// Test invalid: leading zeros
|
||||
if (catch err = json::parse_string(mem, "[012]"))
|
||||
{
|
||||
assert(err == json::INVALID_NUMBER);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected INVALID_NUMBER error for [012]");
|
||||
}
|
||||
|
||||
// Test invalid: negative leading zeros
|
||||
if (catch err = json::parse_string(mem, "[-012]"))
|
||||
{
|
||||
assert(err == json::INVALID_NUMBER);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected INVALID_NUMBER error for [-012]");
|
||||
}
|
||||
|
||||
// Test invalid: leading zeros with decimal
|
||||
if (catch err = json::parse_string(mem, "[012.5]"))
|
||||
{
|
||||
assert(err == json::INVALID_NUMBER);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected INVALID_NUMBER error for [012.5]");
|
||||
}
|
||||
}
|
||||
|
||||
fn void test_trailing_characters()
|
||||
{
|
||||
// Test invalid: trailing characters after string
|
||||
if (catch err = json::parse_string(mem, `""x`))
|
||||
{
|
||||
assert(err == json::UNEXPECTED_CHARACTER);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected UNEXPECTED_CHARACTER error for \"\"x");
|
||||
}
|
||||
|
||||
// Test invalid: trailing characters after number
|
||||
if (catch err = json::parse_string(mem, "123abc"))
|
||||
{
|
||||
assert(err == json::UNEXPECTED_CHARACTER);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected UNEXPECTED_CHARACTER error for 123abc");
|
||||
}
|
||||
|
||||
// Test invalid: trailing characters after boolean
|
||||
if (catch err = json::parse_string(mem, "truex"))
|
||||
{
|
||||
assert(err == json::UNEXPECTED_CHARACTER);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected UNEXPECTED_CHARACTER error for truex");
|
||||
}
|
||||
|
||||
// Test invalid: trailing characters after null
|
||||
if (catch err = json::parse_string(mem, "nullx"))
|
||||
{
|
||||
assert(err == json::UNEXPECTED_CHARACTER);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected UNEXPECTED_CHARACTER error for nullx");
|
||||
}
|
||||
|
||||
// Test invalid: trailing characters after array
|
||||
if (catch err = json::parse_string(mem, "[1,2,3]x"))
|
||||
{
|
||||
assert(err == json::UNEXPECTED_CHARACTER);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected UNEXPECTED_CHARACTER error for [1,2,3]x");
|
||||
}
|
||||
|
||||
// Test invalid: trailing characters after object
|
||||
if (catch err = json::parse_string(mem, `{"a":1}x`))
|
||||
{
|
||||
assert(err == json::UNEXPECTED_CHARACTER);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected UNEXPECTED_CHARACTER error for {\"a\":1}x");
|
||||
}
|
||||
|
||||
// Test valid: trailing whitespace should be allowed
|
||||
Object* o1 = json::parse_string(mem, `"" `)!!;
|
||||
defer o1.free();
|
||||
assert(o1.s == "");
|
||||
|
||||
// Test valid: trailing newlines and tabs should be allowed
|
||||
Object* o2 = json::parse_string(mem, "123\n\t ")!!;
|
||||
defer o2.free();
|
||||
assert(o2.f == 123.0);
|
||||
}
|
||||
|
||||
fn void test_depth_limit()
|
||||
{
|
||||
// Test that deeply nested arrays with 150 opening brackets exceed the depth limit of 128
|
||||
String deep_json = "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]";
|
||||
|
||||
if (catch err = json::parse_string(mem, deep_json))
|
||||
{
|
||||
assert(err == json::MAX_DEPTH_REACHED);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected MAX_DEPTH_REACHED error for deeply nested array with 150 brackets");
|
||||
}
|
||||
}
|
||||
|
||||
fn void test_comments_rejected()
|
||||
{
|
||||
// Test the specific case: {"a":"b"}//
|
||||
if (catch err = json::parse_string(mem, `{"a":"b"}//comment`))
|
||||
{
|
||||
// Should fail with some error - comments are not valid JSON
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected error for {\"a\":\"b\"}// but parsing succeeded");
|
||||
}
|
||||
|
||||
// Test that comments are rejected (JSON spec doesn't allow comments)
|
||||
if (catch err = json::parse_string(mem, `{"a":"b"}//`))
|
||||
{
|
||||
// Should fail with some error - comments are not valid JSON
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected error for JSON with comments but parsing succeeded");
|
||||
}
|
||||
|
||||
// Test that just a slash is rejected
|
||||
if (catch err = json::parse_string(mem, "/"))
|
||||
{
|
||||
// Should fail with some error - slash is not valid JSON
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected error for single slash but parsing succeeded");
|
||||
}
|
||||
}
|
||||
|
||||
fn void test_null_bytes_rejected()
|
||||
{
|
||||
// Test that null bytes in JSON are rejected (like n_multidigit_number_then_00.json)
|
||||
// This simulates "123" + null byte + newline
|
||||
char[5] json_with_null = { '1', '2', '3', 0, '\n' };
|
||||
ByteReader reader;
|
||||
reader.init(json_with_null[..]);
|
||||
|
||||
if (catch err = json::parse(mem, &reader))
|
||||
{
|
||||
assert(err == json::UNEXPECTED_CHARACTER);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false, "Expected UNEXPECTED_CHARACTER error for JSON with null byte");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ fn void test_sha256_longer()
|
||||
assert(sha.final() == x"59F109D9 533B2B70 E7C3B814 A2BD218F 78EA5D37 14455BC6 7987CF0D 664399CF");
|
||||
}
|
||||
|
||||
/*
|
||||
fn void gigahash_sha256()
|
||||
{
|
||||
// > 256 MiB is just at the threshold where the SHA bitcounter rolls overflows 'len', but doesn't hit the uint.max limit...
|
||||
@@ -38,7 +39,7 @@ fn void gigahash_sha256()
|
||||
sha.update(c);
|
||||
assert(sha.final() == x"053EADFD EC682CF1 6F3F8704 C7609C57 868DD757 65E08DC5 A7491F5D 06BCB74D");
|
||||
}
|
||||
|
||||
*/
|
||||
fn void test_pbkdf2()
|
||||
{
|
||||
char[] pw = "password";
|
||||
|
||||
@@ -25,6 +25,7 @@ fn void test_sha512_longer()
|
||||
assert(sha.final() == x"7361ec4a617b6473fb751c44d1026db9442915a5fcea1a419e615d2f3bc5069494da28b8cf2e4412a1dc97d6848f9c84a254fb884ad0720a83eaa0434aeafd8c");
|
||||
}
|
||||
|
||||
/*
|
||||
fn void gigahash_sha512()
|
||||
{
|
||||
// This test is here because of an overflow that was present in SHA256.
|
||||
@@ -36,7 +37,7 @@ fn void gigahash_sha512()
|
||||
sha.update(c);
|
||||
assert(sha.final() == x"724999ca733fdd49f489fc59a49ce41460531a78aa0c03488be9f4a1e29f955bd325d47462c89234a8f587d8a5d6a9ab79137bc5ce3acbfb928553dba8431a36");
|
||||
}
|
||||
|
||||
*/
|
||||
fn void test_pbkdf2()
|
||||
{
|
||||
char[] pw = "password";
|
||||
|
||||
Reference in New Issue
Block a user