// Copyright (c) 2023 Christoffer Lerno. All rights reserved. // Use of this source code is governed by the MIT license // a copy of which can be found in the LICENSE_STDLIB file. module std::encoding::json; import std::io; import std::ascii; import std::collections::object; enum JsonTokenType { NO_TOKEN, LBRACE, LBRACKET, COMMA, COLON, RBRACE, RBRACKET, STRING, NUMBER, TRUE, FALSE, NULL, EOF, } struct JsonParser { uint line; Stream stream; Allocator* allocator; JsonTokenType token; DString last_string; double last_number; char current; anyerr current_err; bool skip_comments; bool reached_end; } fault JsonParsingError { EOF, UNEXPECTED_CHARACTER, INVALID_ESCAPE_SEQUENCE, DUPLICATE_MEMBERS, INVALID_NUMBER, } fn void JsonParser.init(JsonParser* parser, Stream s, Allocator* using = mem::heap()) { *parser = { .last_string = dstring::new_with_capacity(64, using), .stream = s, .allocator = using }; } fn Object*! JsonParser.parse_from_token(JsonParser* this, JsonTokenType token) { switch (token) { case NO_TOKEN: unreachable(); case LBRACE: return this.parse_map(); case LBRACKET: return this.parse_array(); case COMMA: case RBRACE: case RBRACKET: case COLON: return JsonParsingError.UNEXPECTED_CHARACTER!; case STRING: return object::new_string(this.last_string.str(), this.allocator); case NUMBER: return object::new_float(this.last_number, this.allocator); case TRUE: return object::new_bool(true); case FALSE: return object::new_bool(false); case NULL: return object::new_null(); case EOF: return JsonParsingError.EOF!; } unreachable(); } fn Object*! JsonParser.parse_any(JsonParser* this) { return this.parse_from_token(this.advance()); } fn JsonTokenType! JsonParser.lex_number(JsonParser* this, char c) { @pool() { DString t = dstring::tnew_with_capacity(32); bool negate = c == '-'; if (negate) { t.append(c); c = this.read_next()?; } while (c >= '0' && c <= '9') { t.append(c); c = this.read_next()?; } if (c == '.') { t.append(c); while (c = this.read_next()?, c >= '0' && c <= '9') { t.append(c); } } if ((c | 32) == 'e') { t.append(c); c = this.read_next()?; switch (c) { case '-': case '+': t.append(c); c = this.read_next()?; } if (c < '0' || c > '9') return JsonParsingError.INVALID_NUMBER!; while (c >= '0' && c <= '9') { t.append(c); c = this.read_next()?; } } this.pushback(); double! d = str::to_double(t.str()) ?? JsonParsingError.INVALID_NUMBER!; this.last_number = d?; return NUMBER; }; } fn Object*! JsonParser.parse_map(JsonParser* this) { Object* map = object::new_obj(this.allocator); JsonTokenType token = this.advance()?; defer catch map.free(); DString temp_key = dstring::new_with_capacity(32, this.allocator); defer temp_key.free(); while (token != JsonTokenType.RBRACE) { if (token != JsonTokenType.STRING) return JsonParsingError.UNEXPECTED_CHARACTER!; DString string = this.last_string; if (map.has_key(string.str())) return JsonParsingError.DUPLICATE_MEMBERS!; // Copy the key to our temp holder. We do this to work around the issue // if the temp allocator should be used as the default allocator. temp_key.clear(); temp_key.append(string); this.parse_expected(COLON)?; Object* element = this.parse_any()?; map.set(temp_key.str(), element); token = this.advance()?; if (token == JsonTokenType.COMMA) { token = this.advance()?; continue; } if (token != JsonTokenType.RBRACE) return JsonParsingError.UNEXPECTED_CHARACTER!; } return map; } fn Object*! JsonParser.parse_array(JsonParser* this) { Object* list = object::new_obj(this.allocator); defer catch list.free(); JsonTokenType token = this.advance()?; while (token != JsonTokenType.RBRACKET) { Object* element = this.parse_from_token(token)?; list.append(element); token = this.advance()?; if (token == JsonTokenType.COMMA) { token = this.advance()?; continue; } if (token != JsonTokenType.RBRACKET) return JsonParsingError.UNEXPECTED_CHARACTER!; } return list; } fn void JsonParser.pushback(JsonParser* this) { if (!this.reached_end) this.stream.pushback_byte()!!; } fn char! JsonParser.read_next(JsonParser* this) { if (this.reached_end) return '\0'; char! c = this.stream.read_byte(); if (catch err = c) { case IoError.EOF: this.reached_end = true; return '\0'; default: return err!; } if (c == 0) { this.reached_end = true; } return c; } fn JsonTokenType! JsonParser.advance(JsonParser* this) { char c; // Skip whitespace while WS: (c = this.read_next()?) { switch (c) { case '\n': this.line++; nextcase; case ' ': case '\t': case '\r': case '\v': continue; case '/': if (!this.skip_comments) break; c = this.read_next()?; if (c != '*') { this.pushback(); break WS; } while COMMENT: (1) { // Skip to */ while (c = this.read_next()?) { if (c == '\n') this.line++; if (c != '*') continue; // Skip through all the '*' while (c = this.read_next()?) { if (c == '\n') this.line++; if (c != '*') break; } if (c == '/') break COMMENT; } } continue; default: break WS; } } switch (c) { case '\0': return IoError.EOF!; case '{': return LBRACE; case '}': return RBRACE; case '[': return LBRACKET; case ']': return RBRACKET; case ':': return COLON; case ',': return COMMA; case '"': return this.lex_string(); case '-': case '0'..'9': return this.lex_number(c); case 't': this.match("rue")?; return TRUE; case 'f': this.match("alse")?; return FALSE; case 'n': this.match("ull")?; return NULL; default: return JsonParsingError.UNEXPECTED_CHARACTER!; } } fn void! JsonParser.match(JsonParser* this, String str) { foreach (c : str) { char l = this.read_next()?; if (l != c) return JsonParsingError.UNEXPECTED_CHARACTER!; } } fn void! JsonParser.parse_expected(JsonParser* this, JsonTokenType token) @local { if (this.advance()? != token) return JsonParsingError.UNEXPECTED_CHARACTER!; } fn JsonTokenType! JsonParser.lex_string(JsonParser *this) { this.last_string.clear(); while LOOP: (1) { char c = this.read_next()?; switch (c) { case '\0': return JsonParsingError.EOF!; case 1..31: return JsonParsingError.UNEXPECTED_CHARACTER!; case '"': break LOOP; case '\\': break; default: this.last_string.append(c); continue; } c = this.read_next()?; switch (c) { case '\0': return JsonParsingError.EOF!; case 1..31: return JsonParsingError.UNEXPECTED_CHARACTER!; case '"': case '\\': case '/': break; case 'b': c = '\b'; case 'f': c = '\f'; case 'n': c = '\n'; case 'r': c = '\r'; case 't': c = '\t'; case 'u': uint val; for (int i = 0; i < 4; i++) { c = this.read_next()?; if (!c.is_xdigit()) return JsonParsingError.INVALID_ESCAPE_SEQUENCE!; val = val << 4 + (c > '9' ? (c | 32) - 'a' + 10 : c - '0'); } this.last_string.append_char32(val); continue; default: return JsonParsingError.INVALID_ESCAPE_SEQUENCE!; } } return STRING; }