From ac966f118ae7e5c498253d59174316b4bc0732ce Mon Sep 17 00:00:00 2001 From: Christoffer Lerno Date: Wed, 27 Nov 2024 11:58:28 +0100 Subject: [PATCH] Updated base32 / base64 API. --- lib/std/encoding/base32.c3 | 93 +++--- lib/std/encoding/base64.c3 | 453 ++++++++++++++++------------ test/unit/stdlib/encoding/base32.c3 | 9 +- test/unit/stdlib/encoding/base64.c3 | 52 ++-- 4 files changed, 327 insertions(+), 280 deletions(-) diff --git a/lib/std/encoding/base32.c3 b/lib/std/encoding/base32.c3 index 35d9fc1f8..856db8b96 100644 --- a/lib/std/encoding/base32.c3 +++ b/lib/std/encoding/base32.c3 @@ -12,26 +12,32 @@ struct Base32Alphabet const char NO_PAD = 0; const char DEFAULT_PAD = '='; -fn String! encode_buffer(char[] code, char[] buffer, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) +<* + Encode the content of src into a newly allocated string + @param [in] src "The input to be encoded." + @param padding "The padding character or 0 if none" + @param alphabet "The alphabet to use" + @require padding < 0xFF "Invalid padding character" + @return "The encoded string." +*> +fn String! encode(char[] src, Allocator allocator, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) { - return (String)encode_data(alphabet, code, buffer, padding); + char[] dst = allocator::alloc_array(allocator, char, encode_len(src.len, padding)); + return encode_buffer(src, dst, padding, alphabet); } -fn char[]! decode_buffer(char[] code, char[] buffer, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) +<* + Decode the content of src into a newly allocated char array. + @param [in] src "The input to be encoded." + @param padding "The padding character or 0 if none" + @param alphabet "The alphabet to use" + @require padding < 0xFF "Invalid padding character" + @return "The decoded data." +*> +fn char[]! decode(char[] src, Allocator allocator, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) { - return decode_data(alphabet, code, buffer, padding)!; -} - -fn String! encode(char[] code, Allocator allocator, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) -{ - char[] data = allocator::alloc_array(allocator, char, encode_len(code.len, padding > 0)); - return (String)encode_buffer(code, data, padding, alphabet); -} - -fn char[]! decode(char[] code, Allocator allocator, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) -{ - char[] data = allocator::alloc_array(allocator, char, decode_len(code.len, padding > 0)); - return decode_buffer(code, data, padding, alphabet); + char[] dst = allocator::alloc_array(allocator, char, decode_len(src.len, padding)); + return decode_buffer(src, dst, padding, alphabet); } fn String! encode_new(char[] code, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) @inline => encode(code, allocator::heap(), padding, alphabet); @@ -42,12 +48,13 @@ fn char[]! decode_temp(char[] code, char padding = DEFAULT_PAD, Base32Alphabet* <* Calculate the length in bytes of the decoded data. @param n "Length in bytes of input." - @param use_padding "Whether padding characters are used or not" + @param padding "The padding character or 0 if none" + @require padding < 0xFF "Invalid padding character" @return "Length in bytes of the decoded data." *> -fn usz decode_len(usz n, bool use_padding = true) +fn usz decode_len(usz n, char padding) { - if (use_padding) return (n / 8) * 5; + if (padding) return (n / 8) * 5; // no padding usz trailing = n % 8; return n / 8 * 5 + (trailing * 5 ) / 8; @@ -56,13 +63,14 @@ fn usz decode_len(usz n, bool use_padding = true) <* Calculate the length in bytes of the encoded data. @param n "Length in bytes on input." - @param use_padding "Whether padding characters are used or not" + @param padding "The padding character or 0 if none" + @require padding < 0xFF "Invalid padding character" @return "Length in bytes of the encoded data." *> -fn usz encode_len(usz n, bool use_padding = true) +fn usz encode_len(usz n, char padding) { // A character is encoded into 8 x 5-bit blocks. - if (use_padding) return (n + 4) / 5 * 8; + if (padding) return (n + 4) / 5 * 8; // no padding usz trailing = n % 5; @@ -73,15 +81,18 @@ fn usz encode_len(usz n, bool use_padding = true) Decode the content of src into dst, which must be properly sized. @param src "The input to be decoded." @param dst "The decoded input." - @require dst.len >= decode_len(src.len, padding > 0) "Destination buffer too small" + @param padding "The padding character or 0 if none" + @param alphabet "The alphabet to use" + @require padding < 0xFF "Invalid padding character" + @require dst.len >= decode_len(src.len, padding) "Destination buffer too small" @return "The resulting dst buffer" @return! DecodingFailure *> -fn char[]! decode_data(Base32Alphabet* alphabet, char[] src, char[] dst, char padding) @private +fn char[]! decode_buffer(char[] src, char[] dst, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) { if (src.len == 0) return dst[:0]; char* dst_ptr = dst; - usz dn = decode_len(src.len, padding > 0); + usz dn = decode_len(src.len, padding); usz n; char[8] buf; while (src.len > 0 && dst.len > 0) @@ -153,16 +164,19 @@ fn char[]! decode_data(Base32Alphabet* alphabet, char[] src, char[] dst, char pa Encode the content of src into dst, which must be properly sized. @param [in] src "The input to be encoded." @param [inout] dst "The encoded input." - @require dst.len >= encode_len(src.len, padding > 0) "Destination buffer too small" + @param padding "The padding character or 0 if none" + @param alphabet "The alphabet to use" + @require padding < 0xFF "Invalid padding character" + @require dst.len >= encode_len(src.len, padding) "Destination buffer too small" @return "The encoded size." *> -fn char[] encode_data(Base32Alphabet* alphabet, char[] src, char[] dst, char padding = DEFAULT_PADDING) +fn String encode_buffer(char[] src, char[] dst, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) { - if (src.len == 0) return dst[:0]; + if (src.len == 0) return (String)dst[:0]; char* dst_ptr = dst; usz n = (src.len / 5) * 5; - usz dn = encode_len(src.len, padding > 0); + usz dn = encode_len(src.len, padding); uint msb, lsb; for (usz i = 0; i < n; i += 5) @@ -187,7 +201,7 @@ fn char[] encode_data(Base32Alphabet* alphabet, char[] src, char[] dst, char pad } usz trailing = src.len - n; - if (trailing == 0) return dst_ptr[:dn]; + if (trailing == 0) return (String)dst_ptr[:dn]; msb = 0; switch (trailing) @@ -221,10 +235,9 @@ fn char[] encode_data(Base32Alphabet* alphabet, char[] src, char[] dst, char pad dst[i] = padding; } } - return dst_ptr[:dn]; + return (String)dst_ptr[:dn]; } - const uint MASK @private = 0b11111; const char INVALID @private = 0xff; @@ -244,7 +257,7 @@ fault Base32Error struct Base32Encoder @deprecated { Base32Alphabet alphabet; - int padding; + char padding; } <* @@ -255,7 +268,7 @@ struct Base32Encoder @deprecated fn void! Base32Encoder.init(&self, Alphabet encoder = STD_ALPHABET, int padding = STD_PADDING) { encoder.validate(padding)!; - *self = { .alphabet = { .encoding = (char[32])encoder }, .padding = padding }; + *self = { .alphabet = { .encoding = (char[32])encoder }, .padding = padding < 0 ? (char)0 : (char)padding}; } <* @@ -265,7 +278,7 @@ fn void! Base32Encoder.init(&self, Alphabet encoder = STD_ALPHABET, int padding *> fn usz Base32Encoder.encode_len(&self, usz n) { - return encode_len(n, self.padding >= 0); + return encode_len(n, self.padding); } <* @@ -279,13 +292,13 @@ fn usz! Base32Encoder.encode(&self, char[] src, char[] dst) { usz dn = self.encode_len(src.len); if (dst.len < dn) return Base32Error.DESTINATION_TOO_SMALL?; - return encode_data(&self.alphabet, src, dst, self.padding < 0 ? 0 : (char)self.padding).len; + return encode_buffer(src, dst, self.padding, &self.alphabet).len; } struct Base32Decoder @deprecated { Base32Alphabet alphabet; - int padding; + char padding; } <* @@ -296,7 +309,7 @@ struct Base32Decoder @deprecated fn void! Base32Decoder.init(&self, Alphabet decoder = STD_ALPHABET, int padding = STD_PADDING) { decoder.validate(padding)!; - *self = { .alphabet = { .encoding = (char[32])decoder }, .padding = padding }; + *self = { .alphabet = { .encoding = (char[32])decoder }, .padding = padding < 0 ? (char)0 : (char)padding }; self.alphabet.reverse[..] = INVALID; foreach (char i, c : decoder) @@ -312,7 +325,7 @@ fn void! Base32Decoder.init(&self, Alphabet decoder = STD_ALPHABET, int padding *> fn usz Base32Decoder.decode_len(&self, usz n) { - return decode_len(n, self.padding >= 0); + return decode_len(n, self.padding); } <* @@ -327,7 +340,7 @@ fn usz! Base32Decoder.decode(&self, char[] src, char[] dst) if (src.len == 0) return 0; usz dn = self.decode_len(src.len); if (dst.len < dn) return Base32Error.DESTINATION_TOO_SMALL?; - return decode_data(&self.alphabet, src, dst, self.padding < 0 ? 0 : (char)self.padding).len; + return decode_buffer(src, dst, self.padding, &self.alphabet).len; } diff --git a/lib/std/encoding/base64.c3 b/lib/std/encoding/base64.c3 index 7da874832..96828ab1e 100644 --- a/lib/std/encoding/base64.c3 +++ b/lib/std/encoding/base64.c3 @@ -5,77 +5,260 @@ import std::core::bitorder; // Specifically this section: // https://www.rfc-editor.org/rfc/rfc4648#section-4 +const char NO_PAD = 0; +const char DEFAULT_PAD = '='; + +struct Base64Alphabet +{ + char[64] encoding; + char[256] reverse; +} + +const Base64Alphabet STANDARD = { + .encoding = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", + .reverse = + x`ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffff3effffff3f3435363738393a3b3c3dffffffffffff + ff000102030405060708090a0b0c0d0e0f10111213141516171819ffffffffff + ff1a1b1c1d1e1f202122232425262728292a2b2c2d2e2f30313233ffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff` +}; + +const Base64Alphabet URL = { + .encoding = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", + .reverse = + x`ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffff3effff3435363738393a3b3c3dffffffffffff + ff000102030405060708090a0b0c0d0e0f10111213141516171819ffffffff3f + ff1a1b1c1d1e1f202122232425262728292a2b2c2d2e2f30313233ffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff` +}; + const STD_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; const URL_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; -fn String! encode_buffer(char[] code, char[] buffer) +fn String encode(char[] src, Allocator allocator, char padding = DEFAULT_PAD, Base64Alphabet* alphabet = &STANDARD) { - @check_coder(std_encoder); - return (String)buffer[:std_encoder.encode(code, buffer)!]; + char[] dst = allocator::alloc_array(allocator, char, encode_len(src.len, padding)); + return encode_buffer(src, dst, padding, alphabet); } -fn char[]! decode_buffer(char[] code, char[] buffer) +fn char[]! decode(char[] src, Allocator allocator, char padding = DEFAULT_PAD, Base64Alphabet* alphabet = &STANDARD) { - @check_coder(std_decoder); - return buffer[:std_decoder.decode(code, buffer)!]; + char[] dst = allocator::alloc_array(allocator, char, decode_len(src.len, padding))!; + return decode_buffer(src, dst, padding, alphabet); } -fn String! encode(char[] code, Allocator allocator) +fn String encode_new(char[] code, char padding = DEFAULT_PAD, Base64Alphabet* alphabet = &STANDARD) @inline => encode(code, allocator::heap(), padding, alphabet); +fn String encode_temp(char[] code, char padding = DEFAULT_PAD, Base64Alphabet* alphabet = &STANDARD) @inline => encode(code, allocator::temp(), padding, alphabet); +fn char[]! decode_new(char[] code, char padding = DEFAULT_PAD, Base64Alphabet* alphabet = &STANDARD) @inline => decode(code, allocator::heap(), padding, alphabet); +fn char[]! decode_temp(char[] code, char padding = DEFAULT_PAD, Base64Alphabet* alphabet = &STANDARD) @inline => decode(code, allocator::temp(), padding, alphabet); + + +<* + Calculate the size of the encoded data. + @param n "Size of the input to be encoded." + @param padding "The padding character or 0 if none" + @require padding < 0xFF "Invalid padding character" + @return "The size of the input once encoded." +*> +fn usz encode_len(usz n, char padding) { - @check_coder(std_encoder); - char[] data = allocator::alloc_array(allocator, char, std_encoder.encode_len(code.len)); - return (String)data[:std_encoder.encode(code, data)!]; + if (padding) return (n + 2) / 3 * 4; + usz trailing = n % 3; + return n / 3 * 4 + (trailing * 4 + 2) / 3; } -fn char[]! decode(char[] code, Allocator allocator) +<* + Calculate the size of the decoded data. + @param n "Size of the input to be decoded." + @param padding "The padding character or 0 if none" + @require padding < 0xFF "Invalid padding character" + @return "The size of the input once decoded." + @return! DecodingFailure.INVALID_PADDING +*> +fn usz! decode_len(usz n, char padding) { - @check_coder(std_decoder); - char[] data = allocator::alloc_array(allocator, char, std_decoder.decode_len(code.len))!; - return data[:std_decoder.decode(code, data)!]; + usz dn = n / 4 * 3; + usz trailing = n % 4; + if (padding) + { + if (trailing != 0) return DecodingFailure.INVALID_PADDING?; + // source size is multiple of 4 + return dn; + } + if (trailing == 1) return DecodingFailure.INVALID_PADDING?; + return dn + trailing * 3 / 4; } -fn String! encode_new(char[] code) @inline => encode(code, allocator::heap()); -fn String! encode_temp(char[] code) @inline => encode(code, allocator::temp()); -fn char[]! decode_new(char[] code) @inline => decode(code, allocator::heap()); -fn char[]! decode_temp(char[] code) @inline => decode(code, allocator::temp()); - -fn String! urlencode(char[] code, Allocator allocator) +<* + Encode the content of src into dst, which must be properly sized. + @param src "The input to be encoded." + @param dst "The encoded input." + @param padding "The padding character or 0 if none" + @param alphabet "The alphabet to use" + @require padding < 0xFF "Invalid padding character" + @return "The encoded size." + @return! Base64Error.DESTINATION_TOO_SMALL +*> +fn String encode_buffer(char[] src, char[] dst, char padding = DEFAULT_PAD, Base64Alphabet* alphabet = &STANDARD) { - @check_coder(url_encoder, URL_ALPHABET); - char[] data = allocator::alloc_array(allocator, char, url_encoder.encode_len(code.len)); - return (String)data[:url_encoder.encode(code, data)!]; + if (src.len == 0) return (String)dst[:0]; + usz dn = encode_len(src.len, padding); + char* dst_ptr = dst; + assert(dst.len >= dn); + usz trailing = src.len % 3; + char[] src3 = src[:^trailing]; + + while (src3.len > 0) + { + uint group = (uint)src3[0] << 16 | (uint)src3[1] << 8 | (uint)src3[2]; + dst[0] = alphabet.encoding[group >> 18 & MASK]; + dst[1] = alphabet.encoding[group >> 12 & MASK]; + dst[2] = alphabet.encoding[group >> 6 & MASK]; + dst[3] = alphabet.encoding[group & MASK]; + dst = dst[4..]; + src3 = src3[3..]; + } + + // Encode the remaining bytes according to: + // https://www.rfc-editor.org/rfc/rfc4648#section-3.5 + switch (trailing) + { + case 1: + uint group = (uint)src[^1] << 16; + dst[0] = alphabet.encoding[group >> 18 & MASK]; + dst[1] = alphabet.encoding[group >> 12 & MASK]; + if (padding > 0) + { + dst[2] = padding; + dst[3] = padding; + } + case 2: + uint group = (uint)src[^2] << 16 | (uint)src[^1] << 8; + dst[0] = alphabet.encoding[group >> 18 & MASK]; + dst[1] = alphabet.encoding[group >> 12 & MASK]; + dst[2] = alphabet.encoding[group >> 6 & MASK]; + if (padding > 0) + { + dst[3] = padding; + } + case 0: + break; + default: + unreachable(); + } + return (String)dst_ptr[:dn]; } -fn char[]! urldecode(char[] code, Allocator allocator) +<* + Decode the content of src into dst, which must be properly sized. + @param src "The input to be decoded." + @param dst "The decoded input." + @param padding "The padding character or 0 if none" + @param alphabet "The alphabet to use" + @require (decode_len(src.len, padding) ?? 0) <= dst.len "Destination buffer too small" + @require padding < 0xFF "Invalid padding character" + @return "The decoded data." + @return! DecodingFailure +*> +fn char[]! decode_buffer(char[] src, char[] dst, char padding = DEFAULT_PAD, Base64Alphabet* alphabet = &STANDARD) { - @check_coder(url_decoder, URL_ALPHABET); - char[] data = allocator::alloc_array(allocator, char, url_decoder.decode_len(code.len))!; - return data[:url_decoder.decode(code, data)!]; + if (src.len == 0) return dst[:0]; + usz dn = decode_len(src.len, padding)!; + assert(dst.len >= dn); + + usz trailing = src.len % 4; + char* dst_ptr = dst; + char[] src4 = src; + switch + { + case !padding: + src4 = src[:^trailing]; + default: + // If there is padding, keep the last 4 bytes for later. + // NB. src.len >= 4 as decode_len passed + trailing = 4; + if (src[^1] == padding) src4 = src[:^4]; + } + while (src4.len > 0) + { + char c0 = alphabet.reverse[src4[0]]; + char c1 = alphabet.reverse[src4[1]]; + char c2 = alphabet.reverse[src4[2]]; + char c3 = alphabet.reverse[src4[3]]; + switch (0xFF) + { + case c0: + case c1: + case c2: + case c3: + return DecodingFailure.INVALID_CHARACTER?; + } + uint group = (uint)c0 << 18 | (uint)c1 << 12 | (uint)c2 << 6 | (uint)c3; + dst[0] = (char)(group >> 16); + dst[1] = (char)(group >> 8); + dst[2] = (char)group; + dst = dst[3..]; + src4 = src4[4..]; + } + + if (trailing == 0) return dst_ptr[:dn]; + + src = src[^trailing..]; + char c0 = alphabet.reverse[src[0]]; + char c1 = alphabet.reverse[src[1]]; + if (c0 == 0xFF || c1 == 0xFF) return DecodingFailure.INVALID_PADDING?; + if (!padding) + { + switch (src.len) + { + case 2: + uint group = (uint)c0 << 18 | (uint)c1 << 12; + dst[0] = (char)(group >> 16); + case 3: + char c2 = alphabet.reverse[src[2]]; + if (c2 == 0xFF) return DecodingFailure.INVALID_CHARACTER?; + uint group = (uint)c0 << 18 | (uint)c1 << 12 | (uint)c2 << 6; + dst[0] = (char)(group >> 16); + dst[1] = (char)(group >> 8); + } + } + else + { + // Valid paddings are: + // 2: xx== + // 1: xxx= + switch (padding) + { + case src[2]: + if (src[3] != padding) return DecodingFailure.INVALID_PADDING?; + uint group = (uint)c0 << 18 | (uint)c1 << 12; + dst[0] = (char)(group >> 16); + dn -= 2; + case src[3]: + char c2 = alphabet.reverse[src[2]]; + if (c2 == 0xFF) return DecodingFailure.INVALID_CHARACTER?; + uint group = (uint)c0 << 18 | (uint)c1 << 12 | (uint)c2 << 6; + dst[0] = (char)(group >> 16); + dst[1] = (char)(group >> 8); + dn -= 1; + } + } + return dst_ptr[:dn]; } -fn String! urlencode_buffer(char[] code, char[] buffer) -{ - @check_coder(url_encoder, URL_ALPHABET); - return (String)buffer[:url_encoder.encode(code, buffer)!]; -} - -fn char[]! urldecode_buffer(char[] code, char[] buffer) -{ - @check_coder(url_decoder, URL_ALPHABET); - return buffer[:url_decoder.decode(code, buffer)!]; -} - -fn String! urlencode_new(char[] code) @inline => urlencode(code, allocator::heap()); -fn String! urlencode_temp(char[] code) @inline => urlencode(code, allocator::temp()); -fn char[]! urldecode_new(char[] code) @inline => urldecode(code, allocator::heap()); -fn char[]! urldecode_temp(char[] code) @inline => urldecode(code, allocator::temp()); - - const MASK @private = 0b111111; -struct Base64Encoder +struct Base64Encoder @deprecated { - int padding; + char padding; String alphabet; } @@ -98,7 +281,7 @@ fault Base64Error fn Base64Encoder*! Base64Encoder.init(&self, String alphabet, int padding = '=') { check_alphabet(alphabet, padding)!; - *self = { .padding = padding, .alphabet = alphabet }; + *self = { .padding = padding < 0 ? 0 : (char)padding, .alphabet = alphabet }; return self; } @@ -109,9 +292,7 @@ fn Base64Encoder*! Base64Encoder.init(&self, String alphabet, int padding = '=') *> fn usz Base64Encoder.encode_len(&self, usz n) { - if (self.padding >= 0) return (n + 2) / 3 * 4; - usz trailing = n % 3; - return n / 3 * 4 + (trailing * 4 + 2) / 3; + return encode_len(n, self.padding); } <* @@ -126,56 +307,18 @@ fn usz! Base64Encoder.encode(&self, char[] src, char[] dst) if (src.len == 0) return 0; usz dn = self.encode_len(src.len); if (dst.len < dn) return Base64Error.DESTINATION_TOO_SMALL?; - usz trailing = src.len % 3; - char[] src3 = src[:^trailing]; - - while (src3.len > 0) - { - uint group = (uint)src3[0] << 16 | (uint)src3[1] << 8 | (uint)src3[2]; - dst[0] = self.alphabet[group >> 18 & MASK]; - dst[1] = self.alphabet[group >> 12 & MASK]; - dst[2] = self.alphabet[group >> 6 & MASK]; - dst[3] = self.alphabet[group & MASK]; - dst = dst[4..]; - src3 = src3[3..]; - } - - // Encode the remaining bytes according to: - // https://www.rfc-editor.org/rfc/rfc4648#section-3.5 - switch (trailing) - { - case 1: - uint group = (uint)src[^1] << 16; - dst[0] = self.alphabet[group >> 18 & MASK]; - dst[1] = self.alphabet[group >> 12 & MASK]; - if (self.padding >= 0) - { - char pad = (char)self.padding; - dst[2] = pad; - dst[3] = pad; - } - case 2: - uint group = (uint)src[^2] << 16 | (uint)src[^1] << 8; - dst[0] = self.alphabet[group >> 18 & MASK]; - dst[1] = self.alphabet[group >> 12 & MASK]; - dst[2] = self.alphabet[group >> 6 & MASK]; - if (self.padding >= 0) - { - char pad = (char)self.padding; - dst[3] = pad; - } - } - return dn; + Base64Alphabet a = { .encoding = self.alphabet[:64] }; + return encode_buffer(src, dst, self.padding, &a).len; } -struct Base64Decoder +struct Base64Decoder @deprecated { - int padding; - String alphabet; - char[256] reverse; - char invalid; + char padding; + Base64Alphabet encoding; + bool init_done; } +import std; <* @param alphabet "The alphabet used for encoding." @param padding "Set to a negative value to disable padding." @@ -185,15 +328,15 @@ struct Base64Decoder *> fn void! Base64Decoder.init(&self, String alphabet, int padding = '=') { + self.init_done = true; check_alphabet(alphabet, padding)!; - *self = { .padding = padding, .alphabet = alphabet }; + *self = { .padding = padding < 0 ? 0 : (char)padding, .encoding.encoding = alphabet[:64] }; - self.invalid = 0xff; - self.reverse[..] = self.invalid; + self.encoding.reverse[..] = 0xFF; foreach (i, c : alphabet) { - self.reverse[c] = (char)i; + self.encoding.reverse[c] = (char)i; } } @@ -205,19 +348,7 @@ fn void! Base64Decoder.init(&self, String alphabet, int padding = '=') *> fn usz! Base64Decoder.decode_len(&self, usz n) { - usz dn = n / 4 * 3; - usz trailing = n % 4; - if (self.padding >= 0) - { - if (trailing != 0) return Base64Error.INVALID_PADDING?; - // source size is multiple of 4 - } - else - { - if (trailing == 1) return Base64Error.INVALID_PADDING?; - dn += trailing * 3 / 4; - } - return dn; + return decode_len(n, self.padding) ?? Base64Error.INVALID_PADDING?; } <* @@ -232,86 +363,17 @@ fn usz! Base64Decoder.decode(&self, char[] src, char[] dst) if (src.len == 0) return 0; usz dn = self.decode_len(src.len)!; if (dst.len < dn) return Base64Error.DESTINATION_TOO_SMALL?; - - usz trailing = src.len % 4; - char[] src4 = src; - switch + char[]! decoded = decode_buffer(src, dst, self.padding, &self.encoding); + if (catch err = decoded) { - case self.padding < 0: - src4 = src[:^trailing]; + case DecodingFailure.INVALID_PADDING: + return Base64Error.INVALID_PADDING?; + case DecodingFailure.INVALID_CHARACTER: + return Base64Error.INVALID_CHARACTER?; default: - // If there is padding, keep the last 4 bytes for later. - // NB. src.len >= 4 as decode_len passed - trailing = 4; - char pad = (char)self.padding; - if (src[^1] == pad) src4 = src[:^4]; + return err?; } - while (src4.len > 0) - { - char c0 = self.reverse[src4[0]]; - char c1 = self.reverse[src4[1]]; - char c2 = self.reverse[src4[2]]; - char c3 = self.reverse[src4[3]]; - switch (self.invalid) - { - case c0: - case c1: - case c2: - case c3: - return Base64Error.INVALID_CHARACTER?; - } - uint group = (uint)c0 << 18 | (uint)c1 << 12 | (uint)c2 << 6 | (uint)c3; - dst[0] = (char)(group >> 16); - dst[1] = (char)(group >> 8); - dst[2] = (char)group; - dst = dst[3..]; - src4 = src4[4..]; - } - - if (trailing == 0) return dn; - - src = src[^trailing..]; - char c0 = self.reverse[src[0]]; - char c1 = self.reverse[src[1]]; - if (c0 == self.invalid || c1 == self.invalid) return Base64Error.INVALID_PADDING?; - if (self.padding < 0) - { - switch (src.len) - { - case 2: - uint group = (uint)c0 << 18 | (uint)c1 << 12; - dst[0] = (char)(group >> 16); - case 3: - char c2 = self.reverse[src[2]]; - if (c2 == self.invalid) return Base64Error.INVALID_CHARACTER?; - uint group = (uint)c0 << 18 | (uint)c1 << 12 | (uint)c2 << 6; - dst[0] = (char)(group >> 16); - dst[1] = (char)(group >> 8); - } - } - else - { - // Valid paddings are: - // 2: xx== - // 1: xxx= - char pad = (char)self.padding; - switch (pad) - { - case src[2]: - if (src[3] != pad) return Base64Error.INVALID_PADDING?; - uint group = (uint)c0 << 18 | (uint)c1 << 12; - dst[0] = (char)(group >> 16); - dn -= 2; - case src[3]: - char c2 = self.reverse[src[2]]; - if (c2 == self.invalid) return Base64Error.INVALID_CHARACTER?; - uint group = (uint)c0 << 18 | (uint)c1 << 12 | (uint)c2 << 6; - dst[0] = (char)(group >> 16); - dst[1] = (char)(group >> 8); - dn -= 1; - } - } - return dn; + return decoded.len; } // Make sure that all bytes in the alphabet are unique and @@ -337,14 +399,3 @@ fn void! check_alphabet(String alphabet, int padding) @local } } -tlocal Base64Encoder std_encoder @local; -tlocal Base64Encoder url_encoder @local; -tlocal Base64Decoder std_decoder @local; -tlocal Base64Decoder url_decoder @local; - - -macro @check_coder(#coder, alphabet = STD_ALPHABET) @local -{ - if (#coder.alphabet == alphabet) return; - #coder.init(alphabet, '=')!!; -} diff --git a/test/unit/stdlib/encoding/base32.c3 b/test/unit/stdlib/encoding/base32.c3 index 88d131622..a457022b4 100644 --- a/test/unit/stdlib/encoding/base32.c3 +++ b/test/unit/stdlib/encoding/base32.c3 @@ -34,8 +34,8 @@ macro encode_tests(tests, alphabet, padding) foreach (t : tests) { char[64] buf; - usz n = base32::encode_len(t.dec.len, padding > 0); - base32::encode_buffer(t.dec, buf[:n], padding, alphabet)!!; + usz n = base32::encode_len(t.dec.len, padding); + base32::encode_buffer(t.dec, buf[:n], padding, alphabet); char[] want = t.enc; usz! pad_idx = array::index_of(want, '='); @@ -73,11 +73,10 @@ macro decode_tests(tests, alphabet, padding) } char[64] buf; - usz n = base32::decode_len(input.len, padding > 0); + usz n = base32::decode_len(input.len, padding); char[] buf2 = base32::decode_buffer(input, buf[:n], padding, alphabet)!!; - assert(buf2 == t.dec, "got: %s, want: %s", - (String)buf2, (String)t.dec); + assert(buf2 == t.dec, "got: %s, want: %s", buf2, (String)t.dec); } } diff --git a/test/unit/stdlib/encoding/base64.c3 b/test/unit/stdlib/encoding/base64.c3 index f03499f99..8c91893d5 100644 --- a/test/unit/stdlib/encoding/base64.c3 +++ b/test/unit/stdlib/encoding/base64.c3 @@ -1,4 +1,4 @@ -module encoding::base64 @test; +module encoding::base64_test @test; import std::encoding::base64; // https://www.rfc-editor.org/rfc/rfc4648#section-10 @@ -8,6 +8,7 @@ struct TestCase char[] in; char[] out; } +import std; fn void encode() { @@ -25,13 +26,11 @@ fn void encode() { @pool() { - Base64Encoder b; - b.init(base64::STD_ALPHABET)!; - usz n = b.encode_len(tc.in.len); + + usz n = base64::encode_len(tc.in.len, base64::DEFAULT_PAD); char[64] buf; - b.encode(tc.in, buf[:n])!; - assert(buf[:n] == tc.out); - assert(base64::encode_temp(tc.in)! == tc.out); + char[] res = base64::encode_buffer(tc.in, buf[:n]); + assert(res == tc.out); }; } } @@ -50,11 +49,9 @@ fn void encode_nopadding() }; foreach (tc : tcases) { - Base64Encoder b; - b.init(base64::STD_ALPHABET, -1)!; - usz n = b.encode_len(tc.in.len); + usz n = base64::encode_len(tc.in.len, base64::NO_PAD); char[64] buf; - b.encode(tc.in, buf[:n])!; + base64::encode_buffer(tc.in, buf[:n], padding: base64::NO_PAD); assert(buf[:n] == tc.out); } } @@ -74,12 +71,10 @@ fn void decode() }; foreach (tc : tcases) { - Base64Decoder b; - b.init(base64::STD_ALPHABET)!; - usz n = b.decode_len(tc.in.len)!; + usz n = base64::decode_len(tc.in.len, base64::DEFAULT_PAD)!; char[64] buf; - usz nn = b.decode(tc.in, buf[:n])!; - assert(buf[:nn] == tc.out); + char[] res = base64::decode_buffer(tc.in, buf[:n])!; + assert(res == tc.out); } } @@ -97,12 +92,10 @@ fn void decode_nopadding() }; foreach (tc : tcases) { - Base64Decoder b; - b.init(base64::STD_ALPHABET, -1)!; - usz n = b.decode_len(tc.in.len)!; + usz n = base64::decode_len(tc.in.len, base64::NO_PAD)!; char[64] buf; - usz nn = b.decode(tc.in, buf[:n])!; - assert(buf[:nn] == tc.out); + char[] res = base64::decode_buffer(tc.in, buf[:n], base64::NO_PAD)!; + assert(res == tc.out); } } @@ -118,21 +111,12 @@ fn void! urlencode() { char[64] buf; foreach (t : tcases) { - Base64Encoder enc; - enc.init(base64::URL_ALPHABET)!; - n = enc.encode(t.in, buf[..])!; - assert (buf[:n] == t.out, "got: %s, want: %s", (String)buf[:n], (String)t.out); + char[] res = base64::encode_buffer(t.in, buf[..], alphabet: &base64::URL); + assert (res == t.out, "got: %s, want: %s", (String)res, (String)t.out); - got = base64::urlencode_temp(t.in)!; - assert (got == t.out, "got: %s, want: %s", got, (String)t.out); + res = base64::decode_buffer(t.out, buf[..], alphabet: &base64::URL)!; + assert (res == t.in, "got: %s, want: %s", (String)res, (String)t.in); - Base64Decoder dec; - dec.init(base64::URL_ALPHABET)!; - n = dec.decode(t.out, buf[..])!; - assert (buf[:n] == t.in, "got: %s, want: %s", (String)buf[:n], (String)t.in); - - got = base64::urldecode_temp(t.out)!; - assert (got == t.in, "got: %s, want: %s", got, (String)t.in); } }; }