c3c/lib/std/encoding/json.c3

// Copyright (c) 2023 Christoffer Lerno. All rights reserved.
// Use of this source code is governed by the MIT license
// a copy of which can be found in the LICENSE_STDLIB file.
module std::encoding::json;
import std::io;
import std::ascii;
import std::collections::object;

enum JsonTokenType
{
	NO_TOKEN,
	LBRACE,
	LBRACKET,
	COMMA,
	COLON,
	RBRACE,
	RBRACKET,
	STRING,
	NUMBER,
	TRUE,
	FALSE,
	NULL,
	EOF,
}

struct JsonParser
{
	uint line;
	Stream stream;
	Allocator* allocator;
	JsonTokenType token;
	DString last_string;
	double last_number;
	char current;
	anyfault current_err;
	bool skip_comments;
	bool reached_end;
}

fault JsonParsingError
{
	EOF,
	UNEXPECTED_CHARACTER,
	INVALID_ESCAPE_SEQUENCE,
	DUPLICATE_MEMBERS,
	INVALID_NUMBER,
}

fn void JsonParser.init(JsonParser* parser, Stream s, Allocator* using = mem::heap())
{
	*parser = { .last_string = dstring::new_with_capacity(64, using), .stream = s, .allocator = using };
}

fn Object*! JsonParser.parse_from_token(JsonParser* this, JsonTokenType token)
{
	switch (token)
	{
		case NO_TOKEN: unreachable();
		case LBRACE: return this.parse_map();
		case LBRACKET: return this.parse_array();
		case COMMA:
		case RBRACE:
		case RBRACKET:
		case COLON: return JsonParsingError.UNEXPECTED_CHARACTER?;
		case STRING: return object::new_string(this.last_string.str(), this.allocator);
		case NUMBER: return object::new_float(this.last_number, this.allocator);
		case TRUE: return object::new_bool(true);
		case FALSE: return object::new_bool(false);
		case NULL: return object::new_null();
		case EOF: return JsonParsingError.EOF?;
	}
	unreachable();
}
fn Object*! JsonParser.parse_any(JsonParser* this)
{
	return this.parse_from_token(this.advance());
}

fn JsonTokenType! JsonParser.lex_number(JsonParser* this, char c)
{
	@pool()
	{
		DString t = dstring::tnew_with_capacity(32);
		bool negate = c == '-';
		if (negate)
		{
			t.append(c);
			c = this.read_next()!;
		}
		while (c >= '0' && c <= '9')
		{
			t.append(c);
			c = this.read_next()!;
		}
		if (c == '.')
		{
			t.append(c);
			while (c = this.read_next()!, c >= '0' && c <= '9')
			{
				t.append(c);
			}
		}
		if ((c | 32) == 'e')
		{
			t.append(c);
			c = this.read_next()!;
			switch (c)
			{
				case '-':
				case '+':
					t.append(c);
					c = this.read_next()!;
			}
			if (c < '0' || c > '9') return JsonParsingError.INVALID_NUMBER?;
			while (c >= '0' && c <= '9')
			{
				t.append(c);
				c = this.read_next()!;
			}
		}
		this.pushback();
		double! d = t.str().to_double() ?? JsonParsingError.INVALID_NUMBER?;
		this.last_number = d!;
		return NUMBER;
	};
}

fn Object*! JsonParser.parse_map(JsonParser* this)
{
	Object* map = object::new_obj(this.allocator);
	JsonTokenType token = this.advance()!;
	defer catch map.free();

	DString temp_key = dstring::new_with_capacity(32, this.allocator);
	defer temp_key.free();
	while (token != JsonTokenType.RBRACE)
	{
		if (token != JsonTokenType.STRING) return JsonParsingError.UNEXPECTED_CHARACTER?;
        DString string = this.last_string;
        if (map.has_key(string.str())) return JsonParsingError.DUPLICATE_MEMBERS?;
        // Copy the key to our temp holder. We do this to work around the issue
        // if the temp allocator should be used as the default allocator.
        temp_key.clear();
        temp_key.append(string);
        this.parse_expected(COLON)!;
        Object* element = this.parse_any()!;
		map.set(temp_key.str(), element);
        token = this.advance()!;
        if (token == JsonTokenType.COMMA)
        {
            token = this.advance()!;
            continue;
        }
        if (token != JsonTokenType.RBRACE) return JsonParsingError.UNEXPECTED_CHARACTER?;
	}
	return map;
}

fn Object*! JsonParser.parse_array(JsonParser* this)
{
	Object* list = object::new_obj(this.allocator);
	defer catch list.free();
	JsonTokenType token = this.advance()!;
	while (token != JsonTokenType.RBRACKET)
	{
		Object* element = this.parse_from_token(token)!;
		list.append(element);
		token = this.advance()!;
		if (token == JsonTokenType.COMMA)
		{
			token = this.advance()!;
		    continue;
		}
		if (token != JsonTokenType.RBRACKET) return JsonParsingError.UNEXPECTED_CHARACTER?;
	}
	return list;
}

fn void JsonParser.pushback(JsonParser* this)
{
	if (!this.reached_end) this.stream.pushback_byte()!!;
}

fn char! JsonParser.read_next(JsonParser* this)
{
	if (this.reached_end) return '\0';
	char! c = this.stream.read_byte();
	if (catch err = c)
	{
		case IoError.EOF:
			this.reached_end = true;
			return '\0';
		default:
			return err?;
	}
	if (c == 0)
	{
		this.reached_end = true;
	}
	return c;
}

fn JsonTokenType! JsonParser.advance(JsonParser* this)
{
	char c;
	// Skip whitespace
	while WS: (c = this.read_next()!)
	{
		switch (c)
		{
			case '\n':
				this.line++;
				nextcase;
			case ' ':
			case '\t':
			case '\r':
			case '\v':
				continue;
			case '/':
				if (!this.skip_comments) break;
				c = this.read_next()!;
				if (c != '*')
				{
					this.pushback();
					break WS;
				}
				while COMMENT: (1)
				{
					// Skip to */
					while (c = this.read_next()!)
					{
						if (c == '\n') this.line++;
						if (c != '*') continue;
						// Skip through all the '*'
						while (c = this.read_next()!)
						{
							if (c == '\n') this.line++;
							if (c != '*') break;
						}
						if (c == '/') break COMMENT;
					}
				}
				continue;
			default:
				break WS;
		}
	}
	switch (c)
	{
		case '\0':
			return IoError.EOF?;
		case '{':
			return LBRACE;
		case '}':
			return RBRACE;
		case '[':
			return LBRACKET;
		case ']':
			return RBRACKET;
		case ':':
			return COLON;
		case ',':
			return COMMA;
		case '"':
			return this.lex_string();
		case '-':
		case '0'..'9':
			return this.lex_number(c);
		case 't':
			this.match("rue")!;
			return TRUE;
		case 'f':
			this.match("alse")!;
			return FALSE;
		case 'n':
			this.match("ull")!;
			return NULL;
		default:
			return JsonParsingError.UNEXPECTED_CHARACTER?;
	}
}

fn void! JsonParser.match(JsonParser* this, String str)
{
	foreach (c : str)
	{
		char l = this.read_next()!;
		if (l != c) return JsonParsingError.UNEXPECTED_CHARACTER?;
	}
}

fn void! JsonParser.parse_expected(JsonParser* this, JsonTokenType token) @local
{
	if (this.advance()! != token) return JsonParsingError.UNEXPECTED_CHARACTER?;
}

fn JsonTokenType! JsonParser.lex_string(JsonParser *this)
{
	this.last_string.clear();
	while LOOP: (1)
	{
		char c = this.read_next()!;
		switch (c)
		{
			case '\0':
				return JsonParsingError.EOF?;
			case 1..31:
				return JsonParsingError.UNEXPECTED_CHARACTER?;
			case '"':
				break LOOP;
			case '\\':
				break;
			default:
				this.last_string.append(c);
				continue;
		}
		c = this.read_next()!;
		switch (c)
		{
			case '\0':
				return JsonParsingError.EOF?;
			case 1..31:
				return JsonParsingError.UNEXPECTED_CHARACTER?;
			case '"':
			case '\\':
			case '/':
				break;
			case 'b':
				c = '\b';
			case 'f':
				c = '\f';
			case 'n':
				c = '\n';
			case 'r':
				c = '\r';
			case 't':
				c = '\t';
			case 'u':
				uint val;
				for (int i = 0; i < 4; i++)
				{
					c = this.read_next()!;
					if (!c.is_xdigit()) return JsonParsingError.INVALID_ESCAPE_SEQUENCE?;
					val = val << 4 + (c > '9' ? (c | 32) - 'a' + 10 : c - '0');
				}
				this.last_string.append_char32(val);
				continue;
			default:
				 return JsonParsingError.INVALID_ESCAPE_SEQUENCE?;
		}
	}
	return STRING;
}