mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 12:01:16 +00:00
Fixes to lib7, added parallel test structure.
This commit is contained in:
406
lib7/std/encoding/base32.c3
Normal file
406
lib7/std/encoding/base32.c3
Normal file
@@ -0,0 +1,406 @@
|
||||
module std::encoding::base32;
|
||||
|
||||
// This module implements base32 encoding according to RFC 4648
|
||||
// (https://www.rfc-editor.org/rfc/rfc4648)
|
||||
|
||||
struct Base32Alphabet
|
||||
{
|
||||
char[32] encoding;
|
||||
char[256] reverse;
|
||||
}
|
||||
|
||||
const char NO_PAD = 0;
|
||||
const char DEFAULT_PAD = '=';
|
||||
|
||||
<*
|
||||
Encode the content of src into a newly allocated string
|
||||
@param [in] src "The input to be encoded."
|
||||
@param padding "The padding character or 0 if none"
|
||||
@param alphabet "The alphabet to use"
|
||||
@require padding < 0xFF "Invalid padding character"
|
||||
@return "The encoded string."
|
||||
*>
|
||||
fn String! encode(char[] src, Allocator allocator, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD)
|
||||
{
|
||||
char[] dst = allocator::alloc_array(allocator, char, encode_len(src.len, padding));
|
||||
return encode_buffer(src, dst, padding, alphabet);
|
||||
}
|
||||
|
||||
<*
|
||||
Decode the content of src into a newly allocated char array.
|
||||
@param [in] src "The input to be encoded."
|
||||
@param padding "The padding character or 0 if none"
|
||||
@param alphabet "The alphabet to use"
|
||||
@require padding < 0xFF "Invalid padding character"
|
||||
@return "The decoded data."
|
||||
*>
|
||||
fn char[]! decode(char[] src, Allocator allocator, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD)
|
||||
{
|
||||
char[] dst = allocator::alloc_array(allocator, char, decode_len(src.len, padding));
|
||||
return decode_buffer(src, dst, padding, alphabet);
|
||||
}
|
||||
|
||||
fn String! encode_new(char[] code, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) @inline => encode(code, allocator::heap(), padding, alphabet);
|
||||
fn String! encode_temp(char[] code, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) @inline => encode(code, allocator::temp(), padding, alphabet);
|
||||
fn char[]! decode_new(char[] code, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) @inline => decode(code, allocator::heap(), padding, alphabet);
|
||||
fn char[]! decode_temp(char[] code, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) @inline => decode(code, allocator::temp(), padding, alphabet);
|
||||
|
||||
<*
|
||||
Calculate the length in bytes of the decoded data.
|
||||
@param n "Length in bytes of input."
|
||||
@param padding "The padding character or 0 if none"
|
||||
@require padding < 0xFF "Invalid padding character"
|
||||
@return "Length in bytes of the decoded data."
|
||||
*>
|
||||
fn usz decode_len(usz n, char padding)
|
||||
{
|
||||
if (padding) return (n / 8) * 5;
|
||||
// no padding
|
||||
usz trailing = n % 8;
|
||||
return n / 8 * 5 + (trailing * 5 ) / 8;
|
||||
}
|
||||
|
||||
<*
|
||||
Calculate the length in bytes of the encoded data.
|
||||
@param n "Length in bytes on input."
|
||||
@param padding "The padding character or 0 if none"
|
||||
@require padding < 0xFF "Invalid padding character"
|
||||
@return "Length in bytes of the encoded data."
|
||||
*>
|
||||
fn usz encode_len(usz n, char padding)
|
||||
{
|
||||
// A character is encoded into 8 x 5-bit blocks.
|
||||
if (padding) return (n + 4) / 5 * 8;
|
||||
|
||||
// no padding
|
||||
usz trailing = n % 5;
|
||||
return n / 5 * 8 + (trailing * 8 + 4) / 5;
|
||||
}
|
||||
|
||||
<*
|
||||
Decode the content of src into dst, which must be properly sized.
|
||||
@param src "The input to be decoded."
|
||||
@param dst "The decoded input."
|
||||
@param padding "The padding character or 0 if none"
|
||||
@param alphabet "The alphabet to use"
|
||||
@require padding < 0xFF "Invalid padding character"
|
||||
@require dst.len >= decode_len(src.len, padding) "Destination buffer too small"
|
||||
@return "The resulting dst buffer"
|
||||
@return! DecodingFailure
|
||||
*>
|
||||
fn char[]! decode_buffer(char[] src, char[] dst, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD)
|
||||
{
|
||||
if (src.len == 0) return dst[:0];
|
||||
char* dst_ptr = dst;
|
||||
usz dn = decode_len(src.len, padding);
|
||||
usz n;
|
||||
char[8] buf;
|
||||
while (src.len > 0 && dst.len > 0)
|
||||
{
|
||||
usz i @noinit;
|
||||
// load 8 bytes into buffer
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
if (src.len == 0)
|
||||
{
|
||||
if (padding > 0) return DecodingFailure.INVALID_PADDING?;
|
||||
break;
|
||||
}
|
||||
if (src[0] == padding) break;
|
||||
buf[i] = alphabet.reverse[src[0]];
|
||||
if (buf[i] == INVALID) return DecodingFailure.INVALID_CHARACTER?;
|
||||
src = src[1..];
|
||||
}
|
||||
|
||||
// extract 5-bytes from the buffer which contains 8 x 5 bit chunks
|
||||
switch (i)
|
||||
{
|
||||
case 8:
|
||||
// |66677777| dst[4]
|
||||
// | 77777| buf[7]
|
||||
// |666 | buf[6] << 5
|
||||
dst[4] = buf[7] | buf[6] << 5;
|
||||
n++;
|
||||
nextcase 7;
|
||||
case 7:
|
||||
// |45555566| dst[3]
|
||||
// | 66| buf[6] >> 3
|
||||
// | 55555 | buf[5] << 2
|
||||
// |4 | buf[4] << 7
|
||||
dst[3] = buf[6] >> 3 | buf[5] << 2 | buf[4] << 7;
|
||||
n++;
|
||||
nextcase 5;
|
||||
case 5:
|
||||
// |33334444| dst[2]
|
||||
// | 4444| buf[4] >> 1
|
||||
// |3333 | buf[3] << 4
|
||||
dst[2] = buf[4] >> 1 | buf[3] << 4;
|
||||
n++;
|
||||
nextcase 4;
|
||||
case 4:
|
||||
// |11222223| dst[1]
|
||||
// | 3| buf[3] >> 4
|
||||
// | 22222 | buf[2] << 1
|
||||
// |11 | buf[1] << 6
|
||||
dst[1] = buf[3] >> 4 | buf[2] << 1 | buf[1] << 6;
|
||||
n++;
|
||||
nextcase 2;
|
||||
case 2:
|
||||
// |00000111| dst[0]
|
||||
// | 111| buf[1] >> 2
|
||||
// |00000 | buf[0] << 3
|
||||
dst[0] = buf[1] >> 2 | buf[0] << 3;
|
||||
n++;
|
||||
default:
|
||||
return DecodingFailure.INVALID_CHARACTER?;
|
||||
}
|
||||
if (dst.len < 5) break;
|
||||
dst = dst[5..];
|
||||
}
|
||||
return dst_ptr[:n];
|
||||
}
|
||||
|
||||
<*
|
||||
Encode the content of src into dst, which must be properly sized.
|
||||
@param [in] src "The input to be encoded."
|
||||
@param [inout] dst "The encoded input."
|
||||
@param padding "The padding character or 0 if none"
|
||||
@param alphabet "The alphabet to use"
|
||||
@require padding < 0xFF "Invalid padding character"
|
||||
@require dst.len >= encode_len(src.len, padding) "Destination buffer too small"
|
||||
@return "The encoded size."
|
||||
*>
|
||||
fn String encode_buffer(char[] src, char[] dst, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD)
|
||||
{
|
||||
if (src.len == 0) return (String)dst[:0];
|
||||
|
||||
char* dst_ptr = dst;
|
||||
usz n = (src.len / 5) * 5;
|
||||
usz dn = encode_len(src.len, padding);
|
||||
|
||||
uint msb, lsb;
|
||||
for (usz i = 0; i < n; i += 5)
|
||||
{
|
||||
// to fit 40 bits we need two 32-bit uints
|
||||
msb = (uint)src[i] << 24 | (uint)src[i+1] << 16
|
||||
| (uint)src[i+2] << 8 | (uint)src[i+3];
|
||||
lsb = msb << 8 | (uint)src[i+4];
|
||||
|
||||
// now slice them into 5-bit chunks and translate to the
|
||||
// alphabet.
|
||||
dst[0] = alphabet.encoding[(msb >> 27) & MASK];
|
||||
dst[1] = alphabet.encoding[(msb >> 22) & MASK];
|
||||
dst[2] = alphabet.encoding[(msb >> 17) & MASK];
|
||||
dst[3] = alphabet.encoding[(msb >> 12) & MASK];
|
||||
dst[4] = alphabet.encoding[(msb >> 7) & MASK];
|
||||
dst[5] = alphabet.encoding[(msb >> 2) & MASK];
|
||||
dst[6] = alphabet.encoding[(lsb >> 5) & MASK];
|
||||
dst[7] = alphabet.encoding[lsb & MASK];
|
||||
|
||||
dst = dst[8..];
|
||||
}
|
||||
|
||||
usz trailing = src.len - n;
|
||||
if (trailing == 0) return (String)dst_ptr[:dn];
|
||||
|
||||
msb = 0;
|
||||
switch (trailing)
|
||||
{
|
||||
case 4:
|
||||
msb |= (uint)src[n+3];
|
||||
lsb = msb << 8;
|
||||
dst[6] = alphabet.encoding[(lsb >> 5) & MASK];
|
||||
dst[5] = alphabet.encoding[(msb >> 2) & MASK];
|
||||
nextcase 3;
|
||||
case 3:
|
||||
msb |= (uint)src[n+2] << 8;
|
||||
dst[4] = alphabet.encoding[(msb >> 7) & MASK];
|
||||
nextcase 2;
|
||||
case 2:
|
||||
msb |= (uint)src[n+1] << 16;
|
||||
dst[3] = alphabet.encoding[(msb >> 12) & MASK];
|
||||
dst[2] = alphabet.encoding[(msb >> 17) & MASK];
|
||||
nextcase 1;
|
||||
case 1:
|
||||
msb |= (uint)src[n] << 24;
|
||||
dst[1] = alphabet.encoding[(msb >> 22) & MASK];
|
||||
dst[0] = alphabet.encoding[(msb >> 27) & MASK];
|
||||
}
|
||||
|
||||
// add the padding
|
||||
if (padding > 0)
|
||||
{
|
||||
for (usz i = (trailing * 8 / 5) + 1; i < 8; i++)
|
||||
{
|
||||
dst[i] = padding;
|
||||
}
|
||||
}
|
||||
return (String)dst_ptr[:dn];
|
||||
}
|
||||
|
||||
const uint MASK @private = 0b11111;
|
||||
const char INVALID @private = 0xff;
|
||||
|
||||
const int STD_PADDING = '=';
|
||||
const int NO_PADDING = -1;
|
||||
|
||||
fault Base32Error
|
||||
{
|
||||
DUPLICATE_IN_ALPHABET,
|
||||
PADDING_IN_ALPHABET,
|
||||
INVALID_CHARACTER_IN_ALPHABET,
|
||||
DESTINATION_TOO_SMALL,
|
||||
INVALID_PADDING,
|
||||
CORRUPT_INPUT
|
||||
}
|
||||
|
||||
struct Base32Encoder @deprecated
|
||||
{
|
||||
Base32Alphabet alphabet;
|
||||
char padding;
|
||||
}
|
||||
|
||||
<*
|
||||
@param encoder "The 32-character alphabet for encoding."
|
||||
@param padding "Set to a negative value to disable padding."
|
||||
@require padding < 256
|
||||
*>
|
||||
fn void! Base32Encoder.init(&self, Alphabet encoder = STD_ALPHABET, int padding = STD_PADDING)
|
||||
{
|
||||
encoder.validate(padding)!;
|
||||
*self = { .alphabet = { .encoding = (char[32])encoder }, .padding = padding < 0 ? (char)0 : (char)padding};
|
||||
}
|
||||
|
||||
<*
|
||||
Calculate the length in bytes of the encoded data.
|
||||
@param n "Length in bytes on input."
|
||||
@return "Length in bytes of the encoded data."
|
||||
*>
|
||||
fn usz Base32Encoder.encode_len(&self, usz n)
|
||||
{
|
||||
return encode_len(n, self.padding);
|
||||
}
|
||||
|
||||
<*
|
||||
Encode the content of src into dst, which must be properly sized.
|
||||
@param [in] src "The input to be encoded."
|
||||
@param [inout] dst "The encoded input."
|
||||
@return "The encoded size."
|
||||
@return! Base32Error.DESTINATION_TOO_SMALL
|
||||
*>
|
||||
fn usz! Base32Encoder.encode(&self, char[] src, char[] dst)
|
||||
{
|
||||
usz dn = self.encode_len(src.len);
|
||||
if (dst.len < dn) return Base32Error.DESTINATION_TOO_SMALL?;
|
||||
return encode_buffer(src, dst, self.padding, &self.alphabet).len;
|
||||
}
|
||||
|
||||
struct Base32Decoder @deprecated
|
||||
{
|
||||
Base32Alphabet alphabet;
|
||||
char padding;
|
||||
}
|
||||
|
||||
<*
|
||||
@param decoder "The alphabet used for decoding."
|
||||
@param padding "Set to a negative value to disable padding."
|
||||
@require padding < 256
|
||||
*>
|
||||
fn void! Base32Decoder.init(&self, Alphabet decoder = STD_ALPHABET, int padding = STD_PADDING)
|
||||
{
|
||||
decoder.validate(padding)!;
|
||||
*self = { .alphabet = { .encoding = (char[32])decoder }, .padding = padding < 0 ? (char)0 : (char)padding };
|
||||
|
||||
self.alphabet.reverse[..] = INVALID;
|
||||
foreach (char i, c : decoder)
|
||||
{
|
||||
self.alphabet.reverse[c] = i;
|
||||
}
|
||||
}
|
||||
|
||||
<*
|
||||
Calculate the length in bytes of the decoded data.
|
||||
@param n "Length in bytes of input."
|
||||
@return "Length in bytes of the decoded data."
|
||||
*>
|
||||
fn usz Base32Decoder.decode_len(&self, usz n)
|
||||
{
|
||||
return decode_len(n, self.padding);
|
||||
}
|
||||
|
||||
<*
|
||||
Decode the content of src into dst, which must be properly sized.
|
||||
@param src "The input to be decoded."
|
||||
@param dst "The decoded input."
|
||||
@return "The decoded size."
|
||||
@return! Base32Error.DESTINATION_TOO_SMALL, Base32Error.CORRUPT_INPUT
|
||||
*>
|
||||
fn usz! Base32Decoder.decode(&self, char[] src, char[] dst)
|
||||
{
|
||||
if (src.len == 0) return 0;
|
||||
usz dn = self.decode_len(src.len);
|
||||
if (dst.len < dn) return Base32Error.DESTINATION_TOO_SMALL?;
|
||||
return decode_buffer(src, dst, self.padding, &self.alphabet).len;
|
||||
}
|
||||
|
||||
|
||||
// Validate the 32-character alphabet to make sure that no character occurs
|
||||
// twice and that the padding is not present in the alphabet.
|
||||
fn void! Alphabet.validate(&self, int padding)
|
||||
{
|
||||
bool[256] checked;
|
||||
foreach (c : self)
|
||||
{
|
||||
if (checked[c])
|
||||
{
|
||||
return Base32Error.DUPLICATE_IN_ALPHABET?;
|
||||
}
|
||||
checked[c] = true;
|
||||
if (c == '\r' || c == '\n')
|
||||
{
|
||||
return Base32Error.INVALID_CHARACTER_IN_ALPHABET?;
|
||||
}
|
||||
}
|
||||
if (padding >= 0)
|
||||
{
|
||||
char pad = (char)padding;
|
||||
if (pad == '\r' || pad == '\n')
|
||||
{
|
||||
return Base32Error.INVALID_PADDING?;
|
||||
}
|
||||
if (checked[pad])
|
||||
{
|
||||
return Base32Error.PADDING_IN_ALPHABET?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
distinct Alphabet = char[32];
|
||||
// Standard base32 Alphabet
|
||||
const Alphabet STD_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
|
||||
// Extended Hex Alphabet
|
||||
const Alphabet HEX_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUV";
|
||||
|
||||
const Base32Alphabet STANDARD = {
|
||||
.encoding = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567",
|
||||
.reverse = x`ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffffffff1a1b1c1d1e1fffffffffffffffff
|
||||
ff000102030405060708090a0b0c0d0e0f10111213141516171819ffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff`
|
||||
};
|
||||
|
||||
const Base32Alphabet HEX = {
|
||||
.encoding = "0123456789ABCDEFGHIJKLMNOPQRSTUV",
|
||||
.reverse = x`ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffff00010203040506070809ffffffffffff
|
||||
ff0a0b0c0d0e0f101112131415161718191a1b1c1d1e1fffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff`
|
||||
};
|
||||
401
lib7/std/encoding/base64.c3
Normal file
401
lib7/std/encoding/base64.c3
Normal file
@@ -0,0 +1,401 @@
|
||||
module std::encoding::base64;
|
||||
import std::core::bitorder;
|
||||
|
||||
// The implementation is based on https://www.rfc-editor.org/rfc/rfc4648
|
||||
// Specifically this section:
|
||||
// https://www.rfc-editor.org/rfc/rfc4648#section-4
|
||||
|
||||
const char NO_PAD = 0;
|
||||
const char DEFAULT_PAD = '=';
|
||||
|
||||
struct Base64Alphabet
|
||||
{
|
||||
char[64] encoding;
|
||||
char[256] reverse;
|
||||
}
|
||||
|
||||
const Base64Alphabet STANDARD = {
|
||||
.encoding = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
|
||||
.reverse =
|
||||
x`ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffff3effffff3f3435363738393a3b3c3dffffffffffff
|
||||
ff000102030405060708090a0b0c0d0e0f10111213141516171819ffffffffff
|
||||
ff1a1b1c1d1e1f202122232425262728292a2b2c2d2e2f30313233ffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff`
|
||||
};
|
||||
|
||||
const Base64Alphabet URL = {
|
||||
.encoding = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
|
||||
.reverse =
|
||||
x`ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffff3effff3435363738393a3b3c3dffffffffffff
|
||||
ff000102030405060708090a0b0c0d0e0f10111213141516171819ffffffff3f
|
||||
ff1a1b1c1d1e1f202122232425262728292a2b2c2d2e2f30313233ffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff`
|
||||
};
|
||||
|
||||
const STD_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
const URL_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
|
||||
|
||||
fn String encode(char[] src, Allocator allocator, char padding = DEFAULT_PAD, Base64Alphabet* alphabet = &STANDARD)
|
||||
{
|
||||
char[] dst = allocator::alloc_array(allocator, char, encode_len(src.len, padding));
|
||||
return encode_buffer(src, dst, padding, alphabet);
|
||||
}
|
||||
|
||||
fn char[]! decode(char[] src, Allocator allocator, char padding = DEFAULT_PAD, Base64Alphabet* alphabet = &STANDARD)
|
||||
{
|
||||
char[] dst = allocator::alloc_array(allocator, char, decode_len(src.len, padding))!;
|
||||
return decode_buffer(src, dst, padding, alphabet);
|
||||
}
|
||||
|
||||
fn String encode_new(char[] code, char padding = DEFAULT_PAD, Base64Alphabet* alphabet = &STANDARD) @inline => encode(code, allocator::heap(), padding, alphabet);
|
||||
fn String encode_temp(char[] code, char padding = DEFAULT_PAD, Base64Alphabet* alphabet = &STANDARD) @inline => encode(code, allocator::temp(), padding, alphabet);
|
||||
fn char[]! decode_new(char[] code, char padding = DEFAULT_PAD, Base64Alphabet* alphabet = &STANDARD) @inline => decode(code, allocator::heap(), padding, alphabet);
|
||||
fn char[]! decode_temp(char[] code, char padding = DEFAULT_PAD, Base64Alphabet* alphabet = &STANDARD) @inline => decode(code, allocator::temp(), padding, alphabet);
|
||||
|
||||
|
||||
<*
|
||||
Calculate the size of the encoded data.
|
||||
@param n "Size of the input to be encoded."
|
||||
@param padding "The padding character or 0 if none"
|
||||
@require padding < 0xFF "Invalid padding character"
|
||||
@return "The size of the input once encoded."
|
||||
*>
|
||||
fn usz encode_len(usz n, char padding)
|
||||
{
|
||||
if (padding) return (n + 2) / 3 * 4;
|
||||
usz trailing = n % 3;
|
||||
return n / 3 * 4 + (trailing * 4 + 2) / 3;
|
||||
}
|
||||
|
||||
<*
|
||||
Calculate the size of the decoded data.
|
||||
@param n "Size of the input to be decoded."
|
||||
@param padding "The padding character or 0 if none"
|
||||
@require padding < 0xFF "Invalid padding character"
|
||||
@return "The size of the input once decoded."
|
||||
@return! DecodingFailure.INVALID_PADDING
|
||||
*>
|
||||
fn usz! decode_len(usz n, char padding)
|
||||
{
|
||||
usz dn = n / 4 * 3;
|
||||
usz trailing = n % 4;
|
||||
if (padding)
|
||||
{
|
||||
if (trailing != 0) return DecodingFailure.INVALID_PADDING?;
|
||||
// source size is multiple of 4
|
||||
return dn;
|
||||
}
|
||||
if (trailing == 1) return DecodingFailure.INVALID_PADDING?;
|
||||
return dn + trailing * 3 / 4;
|
||||
}
|
||||
|
||||
<*
|
||||
Encode the content of src into dst, which must be properly sized.
|
||||
@param src "The input to be encoded."
|
||||
@param dst "The encoded input."
|
||||
@param padding "The padding character or 0 if none"
|
||||
@param alphabet "The alphabet to use"
|
||||
@require padding < 0xFF "Invalid padding character"
|
||||
@return "The encoded size."
|
||||
@return! Base64Error.DESTINATION_TOO_SMALL
|
||||
*>
|
||||
fn String encode_buffer(char[] src, char[] dst, char padding = DEFAULT_PAD, Base64Alphabet* alphabet = &STANDARD)
|
||||
{
|
||||
if (src.len == 0) return (String)dst[:0];
|
||||
usz dn = encode_len(src.len, padding);
|
||||
char* dst_ptr = dst;
|
||||
assert(dst.len >= dn);
|
||||
usz trailing = src.len % 3;
|
||||
char[] src3 = src[:^trailing];
|
||||
|
||||
while (src3.len > 0)
|
||||
{
|
||||
uint group = (uint)src3[0] << 16 | (uint)src3[1] << 8 | (uint)src3[2];
|
||||
dst[0] = alphabet.encoding[group >> 18 & MASK];
|
||||
dst[1] = alphabet.encoding[group >> 12 & MASK];
|
||||
dst[2] = alphabet.encoding[group >> 6 & MASK];
|
||||
dst[3] = alphabet.encoding[group & MASK];
|
||||
dst = dst[4..];
|
||||
src3 = src3[3..];
|
||||
}
|
||||
|
||||
// Encode the remaining bytes according to:
|
||||
// https://www.rfc-editor.org/rfc/rfc4648#section-3.5
|
||||
switch (trailing)
|
||||
{
|
||||
case 1:
|
||||
uint group = (uint)src[^1] << 16;
|
||||
dst[0] = alphabet.encoding[group >> 18 & MASK];
|
||||
dst[1] = alphabet.encoding[group >> 12 & MASK];
|
||||
if (padding > 0)
|
||||
{
|
||||
dst[2] = padding;
|
||||
dst[3] = padding;
|
||||
}
|
||||
case 2:
|
||||
uint group = (uint)src[^2] << 16 | (uint)src[^1] << 8;
|
||||
dst[0] = alphabet.encoding[group >> 18 & MASK];
|
||||
dst[1] = alphabet.encoding[group >> 12 & MASK];
|
||||
dst[2] = alphabet.encoding[group >> 6 & MASK];
|
||||
if (padding > 0)
|
||||
{
|
||||
dst[3] = padding;
|
||||
}
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
unreachable();
|
||||
}
|
||||
return (String)dst_ptr[:dn];
|
||||
}
|
||||
|
||||
<*
|
||||
Decode the content of src into dst, which must be properly sized.
|
||||
@param src "The input to be decoded."
|
||||
@param dst "The decoded input."
|
||||
@param padding "The padding character or 0 if none"
|
||||
@param alphabet "The alphabet to use"
|
||||
@require (decode_len(src.len, padding) ?? 0) <= dst.len "Destination buffer too small"
|
||||
@require padding < 0xFF "Invalid padding character"
|
||||
@return "The decoded data."
|
||||
@return! DecodingFailure
|
||||
*>
|
||||
fn char[]! decode_buffer(char[] src, char[] dst, char padding = DEFAULT_PAD, Base64Alphabet* alphabet = &STANDARD)
|
||||
{
|
||||
if (src.len == 0) return dst[:0];
|
||||
usz dn = decode_len(src.len, padding)!;
|
||||
assert(dst.len >= dn);
|
||||
|
||||
usz trailing = src.len % 4;
|
||||
char* dst_ptr = dst;
|
||||
char[] src4 = src;
|
||||
switch
|
||||
{
|
||||
case !padding:
|
||||
src4 = src[:^trailing];
|
||||
default:
|
||||
// If there is padding, keep the last 4 bytes for later.
|
||||
// NB. src.len >= 4 as decode_len passed
|
||||
trailing = 4;
|
||||
if (src[^1] == padding) src4 = src[:^4];
|
||||
}
|
||||
while (src4.len > 0)
|
||||
{
|
||||
char c0 = alphabet.reverse[src4[0]];
|
||||
char c1 = alphabet.reverse[src4[1]];
|
||||
char c2 = alphabet.reverse[src4[2]];
|
||||
char c3 = alphabet.reverse[src4[3]];
|
||||
switch (0xFF)
|
||||
{
|
||||
case c0:
|
||||
case c1:
|
||||
case c2:
|
||||
case c3:
|
||||
return DecodingFailure.INVALID_CHARACTER?;
|
||||
}
|
||||
uint group = (uint)c0 << 18 | (uint)c1 << 12 | (uint)c2 << 6 | (uint)c3;
|
||||
dst[0] = (char)(group >> 16);
|
||||
dst[1] = (char)(group >> 8);
|
||||
dst[2] = (char)group;
|
||||
dst = dst[3..];
|
||||
src4 = src4[4..];
|
||||
}
|
||||
|
||||
if (trailing == 0) return dst_ptr[:dn];
|
||||
|
||||
src = src[^trailing..];
|
||||
char c0 = alphabet.reverse[src[0]];
|
||||
char c1 = alphabet.reverse[src[1]];
|
||||
if (c0 == 0xFF || c1 == 0xFF) return DecodingFailure.INVALID_PADDING?;
|
||||
if (!padding)
|
||||
{
|
||||
switch (src.len)
|
||||
{
|
||||
case 2:
|
||||
uint group = (uint)c0 << 18 | (uint)c1 << 12;
|
||||
dst[0] = (char)(group >> 16);
|
||||
case 3:
|
||||
char c2 = alphabet.reverse[src[2]];
|
||||
if (c2 == 0xFF) return DecodingFailure.INVALID_CHARACTER?;
|
||||
uint group = (uint)c0 << 18 | (uint)c1 << 12 | (uint)c2 << 6;
|
||||
dst[0] = (char)(group >> 16);
|
||||
dst[1] = (char)(group >> 8);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Valid paddings are:
|
||||
// 2: xx==
|
||||
// 1: xxx=
|
||||
switch (padding)
|
||||
{
|
||||
case src[2]:
|
||||
if (src[3] != padding) return DecodingFailure.INVALID_PADDING?;
|
||||
uint group = (uint)c0 << 18 | (uint)c1 << 12;
|
||||
dst[0] = (char)(group >> 16);
|
||||
dn -= 2;
|
||||
case src[3]:
|
||||
char c2 = alphabet.reverse[src[2]];
|
||||
if (c2 == 0xFF) return DecodingFailure.INVALID_CHARACTER?;
|
||||
uint group = (uint)c0 << 18 | (uint)c1 << 12 | (uint)c2 << 6;
|
||||
dst[0] = (char)(group >> 16);
|
||||
dst[1] = (char)(group >> 8);
|
||||
dn -= 1;
|
||||
}
|
||||
}
|
||||
return dst_ptr[:dn];
|
||||
}
|
||||
|
||||
const MASK @private = 0b111111;
|
||||
|
||||
struct Base64Encoder @deprecated
|
||||
{
|
||||
char padding;
|
||||
String alphabet;
|
||||
}
|
||||
|
||||
fault Base64Error
|
||||
{
|
||||
DUPLICATE_IN_ALPHABET,
|
||||
PADDING_IN_ALPHABET,
|
||||
DESTINATION_TOO_SMALL,
|
||||
INVALID_PADDING,
|
||||
INVALID_CHARACTER,
|
||||
}
|
||||
|
||||
<*
|
||||
@param alphabet "The alphabet used for encoding."
|
||||
@param padding "Set to a negative value to disable padding."
|
||||
@require alphabet.len == 64
|
||||
@require padding < 256
|
||||
@return! Base64Error.DUPLICATE_IN_ALPHABET, Base64Error.PADDING_IN_ALPHABET
|
||||
*>
|
||||
fn Base64Encoder*! Base64Encoder.init(&self, String alphabet, int padding = '=')
|
||||
{
|
||||
check_alphabet(alphabet, padding)!;
|
||||
*self = { .padding = padding < 0 ? 0 : (char)padding, .alphabet = alphabet };
|
||||
return self;
|
||||
}
|
||||
|
||||
<*
|
||||
Calculate the size of the encoded data.
|
||||
@param n "Size of the input to be encoded."
|
||||
@return "The size of the input once encoded."
|
||||
*>
|
||||
fn usz Base64Encoder.encode_len(&self, usz n)
|
||||
{
|
||||
return encode_len(n, self.padding);
|
||||
}
|
||||
|
||||
<*
|
||||
Encode the content of src into dst, which must be properly sized.
|
||||
@param src "The input to be encoded."
|
||||
@param dst "The encoded input."
|
||||
@return "The encoded size."
|
||||
@return! Base64Error.DESTINATION_TOO_SMALL
|
||||
*>
|
||||
fn usz! Base64Encoder.encode(&self, char[] src, char[] dst)
|
||||
{
|
||||
if (src.len == 0) return 0;
|
||||
usz dn = self.encode_len(src.len);
|
||||
if (dst.len < dn) return Base64Error.DESTINATION_TOO_SMALL?;
|
||||
Base64Alphabet a = { .encoding = self.alphabet[:64] };
|
||||
return encode_buffer(src, dst, self.padding, &a).len;
|
||||
}
|
||||
|
||||
struct Base64Decoder @deprecated
|
||||
{
|
||||
char padding;
|
||||
Base64Alphabet encoding;
|
||||
bool init_done;
|
||||
}
|
||||
|
||||
import std;
|
||||
<*
|
||||
@param alphabet "The alphabet used for encoding."
|
||||
@param padding "Set to a negative value to disable padding."
|
||||
@require alphabet.len == 64
|
||||
@require padding < 256
|
||||
@return! Base64Error.DUPLICATE_IN_ALPHABET, Base64Error.PADDING_IN_ALPHABET
|
||||
*>
|
||||
fn void! Base64Decoder.init(&self, String alphabet, int padding = '=')
|
||||
{
|
||||
self.init_done = true;
|
||||
check_alphabet(alphabet, padding)!;
|
||||
*self = { .padding = padding < 0 ? 0 : (char)padding, .encoding.encoding = alphabet[:64] };
|
||||
|
||||
self.encoding.reverse[..] = 0xFF;
|
||||
|
||||
foreach (i, c : alphabet)
|
||||
{
|
||||
self.encoding.reverse[c] = (char)i;
|
||||
}
|
||||
}
|
||||
|
||||
<*
|
||||
Calculate the size of the decoded data.
|
||||
@param n "Size of the input to be decoded."
|
||||
@return "The size of the input once decoded."
|
||||
@return! Base64Error.INVALID_PADDING
|
||||
*>
|
||||
fn usz! Base64Decoder.decode_len(&self, usz n)
|
||||
{
|
||||
return decode_len(n, self.padding) ?? Base64Error.INVALID_PADDING?;
|
||||
}
|
||||
|
||||
<*
|
||||
Decode the content of src into dst, which must be properly sized.
|
||||
@param src "The input to be decoded."
|
||||
@param dst "The decoded input."
|
||||
@return "The decoded size."
|
||||
@return! Base64Error.DESTINATION_TOO_SMALL, Base64Error.INVALID_PADDING, Base64Error.INVALID_CHARACTER
|
||||
*>
|
||||
fn usz! Base64Decoder.decode(&self, char[] src, char[] dst)
|
||||
{
|
||||
if (src.len == 0) return 0;
|
||||
usz dn = self.decode_len(src.len)!;
|
||||
if (dst.len < dn) return Base64Error.DESTINATION_TOO_SMALL?;
|
||||
char[]! decoded = decode_buffer(src, dst, self.padding, &self.encoding);
|
||||
if (catch err = decoded)
|
||||
{
|
||||
case DecodingFailure.INVALID_PADDING:
|
||||
return Base64Error.INVALID_PADDING?;
|
||||
case DecodingFailure.INVALID_CHARACTER:
|
||||
return Base64Error.INVALID_CHARACTER?;
|
||||
default:
|
||||
return err?;
|
||||
}
|
||||
return decoded.len;
|
||||
}
|
||||
|
||||
// Make sure that all bytes in the alphabet are unique and
|
||||
// the padding is not present in the alphabet.
|
||||
fn void! check_alphabet(String alphabet, int padding) @local
|
||||
{
|
||||
bool[256] checked;
|
||||
if (padding < 0)
|
||||
{
|
||||
foreach (c : alphabet)
|
||||
{
|
||||
if (checked[c]) return Base64Error.DUPLICATE_IN_ALPHABET?;
|
||||
checked[c] = true;
|
||||
}
|
||||
return;
|
||||
}
|
||||
char pad = (char)padding;
|
||||
foreach (c : alphabet)
|
||||
{
|
||||
if (c == pad) return Base64Error.PADDING_IN_ALPHABET?;
|
||||
if (checked[c]) return Base64Error.DUPLICATE_IN_ALPHABET?;
|
||||
checked[c] = true;
|
||||
}
|
||||
}
|
||||
|
||||
95
lib7/std/encoding/csv.c3
Normal file
95
lib7/std/encoding/csv.c3
Normal file
@@ -0,0 +1,95 @@
|
||||
module std::encoding::csv;
|
||||
import std::io;
|
||||
|
||||
|
||||
struct CsvReader
|
||||
{
|
||||
InStream stream;
|
||||
String separator;
|
||||
}
|
||||
|
||||
struct CsvRow (Printable)
|
||||
{
|
||||
String[] list;
|
||||
String row;
|
||||
Allocator allocator;
|
||||
}
|
||||
|
||||
fn usz! CsvRow.to_format(&self, Formatter* f) @dynamic
|
||||
{
|
||||
return f.printf("%s", self.list);
|
||||
}
|
||||
|
||||
fn usz CsvRow.len(&self) @operator(len)
|
||||
{
|
||||
return self.list.len;
|
||||
}
|
||||
|
||||
<*
|
||||
@require col < self.list.len
|
||||
*>
|
||||
fn String CsvRow.get_col(&self, usz col) @operator([])
|
||||
{
|
||||
return self.list[col];
|
||||
}
|
||||
|
||||
fn void CsvReader.init(&self, InStream stream, String separator = ",")
|
||||
{
|
||||
self.stream = stream;
|
||||
self.separator = separator;
|
||||
}
|
||||
|
||||
fn CsvRow! CsvReader.read_new_row(self)
|
||||
{
|
||||
return self.read_row(allocator::heap()) @inline;
|
||||
}
|
||||
|
||||
<*
|
||||
@param [&inout] allocator
|
||||
*>
|
||||
fn CsvRow! CsvReader.read_row(self, Allocator allocator)
|
||||
{
|
||||
String row = io::readline(self.stream, allocator: allocator)!;
|
||||
defer catch allocator::free(allocator, row);
|
||||
String[] list = row.split(self.separator, allocator: allocator);
|
||||
return { list, row, allocator };
|
||||
}
|
||||
|
||||
fn CsvRow! CsvReader.read_temp_row(self)
|
||||
{
|
||||
return self.read_row(allocator::temp()) @inline;
|
||||
}
|
||||
|
||||
<*
|
||||
@require self.allocator != null `Row already freed`
|
||||
*>
|
||||
fn void CsvRow.free(&self)
|
||||
{
|
||||
allocator::free(self.allocator, self.list);
|
||||
allocator::free(self.allocator, self.row);
|
||||
self.allocator = null;
|
||||
}
|
||||
|
||||
fn void! CsvReader.skip_row(self) @maydiscard => @pool()
|
||||
{
|
||||
(void)io::treadline(self.stream);
|
||||
}
|
||||
|
||||
macro void! CsvReader.@each_row(self, int rows = int.max; @body(String[] row)) @maydiscard
|
||||
{
|
||||
InStream stream = self.stream;
|
||||
String sep = self.separator;
|
||||
while (rows--)
|
||||
{
|
||||
@stack_mem(512; Allocator mem)
|
||||
{
|
||||
String! s = io::readline(stream, mem);
|
||||
if (catch err = s)
|
||||
{
|
||||
if (err == IoError.EOF) return;
|
||||
return err?;
|
||||
}
|
||||
@body(s.split(sep, allocator: mem));
|
||||
};
|
||||
}
|
||||
}
|
||||
7
lib7/std/encoding/encoding.c3
Normal file
7
lib7/std/encoding/encoding.c3
Normal file
@@ -0,0 +1,7 @@
|
||||
module std::encoding;
|
||||
|
||||
fault DecodingFailure
|
||||
{
|
||||
INVALID_CHARACTER,
|
||||
INVALID_PADDING,
|
||||
}
|
||||
109
lib7/std/encoding/hex.c3
Normal file
109
lib7/std/encoding/hex.c3
Normal file
@@ -0,0 +1,109 @@
|
||||
module std::encoding::hex;
|
||||
import std::encoding @norecurse;
|
||||
|
||||
// The implementation is based on https://www.rfc-editor.org/rfc/rfc4648
|
||||
|
||||
fn String encode_buffer(char[] code, char[] buffer)
|
||||
{
|
||||
return (String)buffer[:encode_bytes(code, buffer)];
|
||||
}
|
||||
|
||||
fn char[]! decode_buffer(char[] code, char[] buffer)
|
||||
{
|
||||
return buffer[:decode_bytes(code, buffer)!];
|
||||
}
|
||||
|
||||
fn String encode(char[] code, Allocator allocator)
|
||||
{
|
||||
char[] data = allocator::alloc_array(allocator, char, encode_len(code.len));
|
||||
return (String)data[:encode_bytes(code, data)];
|
||||
}
|
||||
|
||||
fn char[]! decode(char[] code, Allocator allocator)
|
||||
{
|
||||
char[] data = allocator::alloc_array(allocator, char, decode_len(code.len));
|
||||
return data[:decode_bytes(code, data)!];
|
||||
}
|
||||
|
||||
fn String encode_new(char[] code) @inline => encode(code, allocator::heap());
|
||||
fn String encode_temp(char[] code) @inline => encode(code, allocator::temp());
|
||||
fn char[]! decode_new(char[] code) @inline => decode(code, allocator::heap());
|
||||
fn char[]! decode_temp(char[] code) @inline => decode(code, allocator::temp());
|
||||
|
||||
<*
|
||||
Calculate the size of the encoded data.
|
||||
@param n "Size of the input to be encoded."
|
||||
@return "The size of the input once encoded."
|
||||
*>
|
||||
fn usz encode_len(usz n) => n * 2;
|
||||
|
||||
<*
|
||||
Encode the content of src into dst, which must be properly sized.
|
||||
@param src "The input to be encoded."
|
||||
@param dst "The encoded input."
|
||||
@return "The encoded size."
|
||||
@require dst.len >= encode_len(src.len) "Destination array is not large enough"
|
||||
*>
|
||||
fn usz encode_bytes(char[] src, char[] dst)
|
||||
{
|
||||
usz j = 0;
|
||||
foreach (v : src)
|
||||
{
|
||||
dst[j] = HEXALPHABET[v >> 4];
|
||||
dst[j + 1] = HEXALPHABET[v & 0x0f];
|
||||
j = j + 2;
|
||||
}
|
||||
return src.len * 2;
|
||||
}
|
||||
|
||||
<*
|
||||
Calculate the size of the decoded data.
|
||||
@param n "Size of the input to be decoded."
|
||||
@return "The size of the input once decoded."
|
||||
*>
|
||||
macro usz decode_len(usz n) => n / 2;
|
||||
|
||||
<*
|
||||
Decodes src into bytes. Returns the actual number of bytes written to dst.
|
||||
|
||||
Expects that src only contains hexadecimal characters and that src has even
|
||||
length.
|
||||
|
||||
@param src "The input to be decoded."
|
||||
@param dst "The decoded input."
|
||||
@require src.len % 2 == 0 "src is not of even length"
|
||||
@require dst.len >= decode_len(src.len) "Destination array is not large enough"
|
||||
@return! DecodingFailure.INVALID_CHARACTER
|
||||
*>
|
||||
fn usz! decode_bytes(char[] src, char[] dst)
|
||||
{
|
||||
usz i;
|
||||
for (usz j = 1; j < src.len; j += 2)
|
||||
{
|
||||
char a = HEXREVERSE[src[j - 1]];
|
||||
char b = HEXREVERSE[src[j]];
|
||||
if (a > 0x0f || b > 0x0f) return DecodingFailure.INVALID_CHARACTER?;
|
||||
dst[i] = (a << 4) | b;
|
||||
i++;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
const char[?] HEXALPHABET @private = "0123456789abcdef";
|
||||
const char[?] HEXREVERSE @private =
|
||||
x`ffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffff
|
||||
00010203040506070809ffffffffffff
|
||||
ff0a0b0c0d0e0fffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffff
|
||||
ff0a0b0c0d0e0fffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffff
|
||||
ffffffffffffffffffffffffffffffff`;
|
||||
392
lib7/std/encoding/json.c3
Normal file
392
lib7/std/encoding/json.c3
Normal file
@@ -0,0 +1,392 @@
|
||||
// Copyright (c) 2023 Christoffer Lerno. All rights reserved.
|
||||
// Use of this source code is governed by the MIT license
|
||||
// a copy of which can be found in the LICENSE_STDLIB file.
|
||||
module std::encoding::json;
|
||||
import std::io;
|
||||
import std::ascii;
|
||||
import std::collections::object;
|
||||
|
||||
fault JsonParsingError
|
||||
{
|
||||
EOF,
|
||||
UNEXPECTED_CHARACTER,
|
||||
INVALID_ESCAPE_SEQUENCE,
|
||||
DUPLICATE_MEMBERS,
|
||||
INVALID_NUMBER,
|
||||
}
|
||||
|
||||
fn Object*! parse_string(String s, Allocator allocator = allocator::heap())
|
||||
{
|
||||
return parse((ByteReader){}.init(s), allocator);
|
||||
}
|
||||
|
||||
fn Object*! temp_parse_string(String s)
|
||||
{
|
||||
return parse((ByteReader){}.init(s), allocator::temp());
|
||||
}
|
||||
|
||||
fn Object*! parse(InStream s, Allocator allocator = allocator::heap())
|
||||
{
|
||||
@stack_mem(512; Allocator mem)
|
||||
{
|
||||
JsonContext context = { .last_string = dstring::new_with_capacity(64, mem), .stream = s, .allocator = allocator };
|
||||
@pool(allocator)
|
||||
{
|
||||
return parse_any(&context);
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
fn Object*! temp_parse(InStream s)
|
||||
{
|
||||
return parse(s, allocator::temp());
|
||||
}
|
||||
|
||||
// -- Implementation follows --
|
||||
|
||||
enum JsonTokenType @local
|
||||
{
|
||||
NO_TOKEN,
|
||||
LBRACE,
|
||||
LBRACKET,
|
||||
COMMA,
|
||||
COLON,
|
||||
RBRACE,
|
||||
RBRACKET,
|
||||
STRING,
|
||||
NUMBER,
|
||||
TRUE,
|
||||
FALSE,
|
||||
NULL,
|
||||
EOF,
|
||||
}
|
||||
|
||||
struct JsonContext @local
|
||||
{
|
||||
uint line;
|
||||
InStream stream;
|
||||
Allocator allocator;
|
||||
JsonTokenType token;
|
||||
DString last_string;
|
||||
double last_number;
|
||||
char current;
|
||||
bitstruct : char {
|
||||
bool skip_comments;
|
||||
bool reached_end;
|
||||
bool pushed_back;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn Object*! parse_from_token(JsonContext* context, JsonTokenType token) @local
|
||||
{
|
||||
switch (token)
|
||||
{
|
||||
case NO_TOKEN: unreachable();
|
||||
case LBRACE: return parse_map(context);
|
||||
case LBRACKET: return parse_array(context);
|
||||
case COMMA:
|
||||
case RBRACE:
|
||||
case RBRACKET:
|
||||
case COLON: return JsonParsingError.UNEXPECTED_CHARACTER?;
|
||||
case STRING: return object::new_string(context.last_string.str_view(), context.allocator);
|
||||
case NUMBER: return object::new_float(context.last_number, context.allocator);
|
||||
case TRUE: return object::new_bool(true);
|
||||
case FALSE: return object::new_bool(false);
|
||||
case NULL: return object::new_null();
|
||||
case EOF: return JsonParsingError.EOF?;
|
||||
}
|
||||
}
|
||||
fn Object*! parse_any(JsonContext* context) @local
|
||||
{
|
||||
return parse_from_token(context, advance(context));
|
||||
}
|
||||
|
||||
fn JsonTokenType! lex_number(JsonContext *context, char c) @local
|
||||
{
|
||||
@stack_mem(256; Allocator mem)
|
||||
{
|
||||
DString t = dstring::new_with_capacity(32, allocator: mem);
|
||||
bool negate = c == '-';
|
||||
if (negate)
|
||||
{
|
||||
t.append(c);
|
||||
c = read_next(context)!;
|
||||
}
|
||||
while (c.is_digit())
|
||||
{
|
||||
t.append(c);
|
||||
c = read_next(context)!;
|
||||
}
|
||||
if (c == '.')
|
||||
{
|
||||
t.append(c);
|
||||
while (c = read_next(context)!, c.is_digit())
|
||||
{
|
||||
t.append(c);
|
||||
}
|
||||
}
|
||||
if ((c | 32) == 'e')
|
||||
{
|
||||
t.append(c);
|
||||
c = read_next(context)!;
|
||||
switch (c)
|
||||
{
|
||||
case '-':
|
||||
case '+':
|
||||
t.append(c);
|
||||
c = read_next(context)!;
|
||||
}
|
||||
if (!c.is_digit()) return JsonParsingError.INVALID_NUMBER?;
|
||||
while (c.is_digit())
|
||||
{
|
||||
t.append(c);
|
||||
c = read_next(context)!;
|
||||
}
|
||||
}
|
||||
pushback(context, c);
|
||||
double! d = t.str_view().to_double() ?? JsonParsingError.INVALID_NUMBER?;
|
||||
context.last_number = d!;
|
||||
return NUMBER;
|
||||
};
|
||||
}
|
||||
|
||||
fn Object*! parse_map(JsonContext* context) @local
|
||||
{
|
||||
Object* map = object::new_obj(context.allocator);
|
||||
defer catch map.free();
|
||||
JsonTokenType token = advance(context)!;
|
||||
|
||||
@stack_mem(256; Allocator mem)
|
||||
{
|
||||
DString temp_key = dstring::new_with_capacity(32, mem);
|
||||
while (token != JsonTokenType.RBRACE)
|
||||
{
|
||||
if (token != JsonTokenType.STRING) return JsonParsingError.UNEXPECTED_CHARACTER?;
|
||||
DString string = context.last_string;
|
||||
if (map.has_key(string.str_view())) return JsonParsingError.DUPLICATE_MEMBERS?;
|
||||
// Copy the key to our temp holder, since our
|
||||
// last_string may be used in parse_any
|
||||
temp_key.clear();
|
||||
temp_key.append(string);
|
||||
parse_expected(context, COLON)!;
|
||||
Object* element = parse_any(context)!;
|
||||
map.set(temp_key.str_view(), element);
|
||||
token = advance(context)!;
|
||||
if (token == JsonTokenType.COMMA)
|
||||
{
|
||||
token = advance(context)!;
|
||||
continue;
|
||||
}
|
||||
if (token != JsonTokenType.RBRACE) return JsonParsingError.UNEXPECTED_CHARACTER?;
|
||||
}
|
||||
return map;
|
||||
};
|
||||
}
|
||||
|
||||
fn Object*! parse_array(JsonContext* context) @local
|
||||
{
|
||||
Object* list = object::new_obj(context.allocator);
|
||||
defer catch list.free();
|
||||
JsonTokenType token = advance(context)!;
|
||||
while (token != JsonTokenType.RBRACKET)
|
||||
{
|
||||
Object* element = parse_from_token(context, token)!;
|
||||
list.push(element);
|
||||
token = advance(context)!;
|
||||
if (token == JsonTokenType.COMMA)
|
||||
{
|
||||
token = advance(context)!;
|
||||
continue;
|
||||
}
|
||||
if (token != JsonTokenType.RBRACKET) return JsonParsingError.UNEXPECTED_CHARACTER?;
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
fn void pushback(JsonContext* context, char c) @local
|
||||
{
|
||||
if (!context.reached_end)
|
||||
{
|
||||
assert(!context.pushed_back);
|
||||
context.pushed_back = true;
|
||||
context.current = c;
|
||||
}
|
||||
}
|
||||
|
||||
fn char! read_next(JsonContext* context) @local
|
||||
{
|
||||
if (context.reached_end) return '\0';
|
||||
if (context.pushed_back)
|
||||
{
|
||||
context.pushed_back = false;
|
||||
return context.current;
|
||||
}
|
||||
char! c = context.stream.read_byte();
|
||||
if (catch err = c)
|
||||
{
|
||||
case IoError.EOF:
|
||||
context.reached_end = true;
|
||||
return '\0';
|
||||
default:
|
||||
return err?;
|
||||
}
|
||||
if (c == 0)
|
||||
{
|
||||
context.reached_end = true;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
fn JsonTokenType! advance(JsonContext* context) @local
|
||||
{
|
||||
char c;
|
||||
// Skip whitespace
|
||||
while WS: (c = read_next(context)!)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '\n':
|
||||
context.line++;
|
||||
nextcase;
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\r':
|
||||
case '\v':
|
||||
continue;
|
||||
case '/':
|
||||
if (!context.skip_comments) break;
|
||||
c = read_next(context)!;
|
||||
if (c != '*')
|
||||
{
|
||||
pushback(context, c);
|
||||
break WS;
|
||||
}
|
||||
while COMMENT: (true)
|
||||
{
|
||||
// Skip to */
|
||||
while (c = read_next(context)!)
|
||||
{
|
||||
if (c == '\n') context.line++;
|
||||
if (c != '*') continue;
|
||||
// Skip through all the '*'
|
||||
while (c = read_next(context)!)
|
||||
{
|
||||
if (c == '\n') context.line++;
|
||||
if (c != '*') break;
|
||||
}
|
||||
if (c == '/') break COMMENT;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
default:
|
||||
break WS;
|
||||
}
|
||||
}
|
||||
switch (c)
|
||||
{
|
||||
case '\0':
|
||||
return IoError.EOF?;
|
||||
case '{':
|
||||
return LBRACE;
|
||||
case '}':
|
||||
return RBRACE;
|
||||
case '[':
|
||||
return LBRACKET;
|
||||
case ']':
|
||||
return RBRACKET;
|
||||
case ':':
|
||||
return COLON;
|
||||
case ',':
|
||||
return COMMA;
|
||||
case '"':
|
||||
return lex_string(context);
|
||||
case '-':
|
||||
case '0'..'9':
|
||||
return lex_number(context, c);
|
||||
case 't':
|
||||
match(context, "rue")!;
|
||||
return TRUE;
|
||||
case 'f':
|
||||
match(context, "alse")!;
|
||||
return FALSE;
|
||||
case 'n':
|
||||
match(context, "ull")!;
|
||||
return NULL;
|
||||
default:
|
||||
return JsonParsingError.UNEXPECTED_CHARACTER?;
|
||||
}
|
||||
}
|
||||
|
||||
fn void! match(JsonContext* context, String str) @local
|
||||
{
|
||||
foreach (c : str)
|
||||
{
|
||||
char l = read_next(context)!;
|
||||
if (l != c) return JsonParsingError.UNEXPECTED_CHARACTER?;
|
||||
}
|
||||
}
|
||||
|
||||
fn void! parse_expected(JsonContext* context, JsonTokenType token) @local
|
||||
{
|
||||
if (advance(context)! != token) return JsonParsingError.UNEXPECTED_CHARACTER?;
|
||||
}
|
||||
|
||||
fn JsonTokenType! lex_string(JsonContext* context)
|
||||
{
|
||||
context.last_string.clear();
|
||||
while LOOP: (true)
|
||||
{
|
||||
char c = read_next(context)!;
|
||||
switch (c)
|
||||
{
|
||||
case '\0':
|
||||
return JsonParsingError.EOF?;
|
||||
case 1..31:
|
||||
return JsonParsingError.UNEXPECTED_CHARACTER?;
|
||||
case '"':
|
||||
break LOOP;
|
||||
case '\\':
|
||||
break;
|
||||
default:
|
||||
context.last_string.append(c);
|
||||
continue;
|
||||
}
|
||||
c = read_next(context)!;
|
||||
switch (c)
|
||||
{
|
||||
case '\0':
|
||||
return JsonParsingError.EOF?;
|
||||
case 1..31:
|
||||
return JsonParsingError.UNEXPECTED_CHARACTER?;
|
||||
case '"':
|
||||
case '\\':
|
||||
case '/':
|
||||
break;
|
||||
case 'b':
|
||||
c = '\b';
|
||||
case 'f':
|
||||
c = '\f';
|
||||
case 'n':
|
||||
c = '\n';
|
||||
case 'r':
|
||||
c = '\r';
|
||||
case 't':
|
||||
c = '\t';
|
||||
case 'u':
|
||||
uint val;
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
c = read_next(context)!;
|
||||
if (!c.is_xdigit()) return JsonParsingError.INVALID_ESCAPE_SEQUENCE?;
|
||||
val = val << 4 + (c > '9' ? (c | 32) - 'a' + 10 : c - '0');
|
||||
}
|
||||
context.last_string.append_char32(val);
|
||||
continue;
|
||||
default:
|
||||
return JsonParsingError.INVALID_ESCAPE_SEQUENCE?;
|
||||
}
|
||||
context.last_string.append(c);
|
||||
}
|
||||
return STRING;
|
||||
}
|
||||
Reference in New Issue
Block a user