mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 12:01:16 +00:00
encoding: implement RFC4648 base32 encoding (#1596)
Implement base32 encoding and decoding according to RFC 4648 with the standard and extended hex alphabets. Add unit tests. Link: https://www.rfc-editor.org/rfc/rfc4648 Signed-off-by: Koni Marti <koni.marti@gmail.com> --------- Signed-off-by: Koni Marti <koni.marti@gmail.com>
This commit is contained in:
310
lib/std/encoding/base32.c3
Normal file
310
lib/std/encoding/base32.c3
Normal file
@@ -0,0 +1,310 @@
|
||||
module std::encoding::base32;
|
||||
|
||||
// This module implements base32 encoding according to RFC 4648
|
||||
// (https://www.rfc-editor.org/rfc/rfc4648)
|
||||
|
||||
distinct Alphabet = inline char[32];
|
||||
|
||||
// Standard base32 Alphabet
|
||||
const Alphabet STD_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
|
||||
|
||||
// Extended Hex Alphabet
|
||||
const Alphabet HEX_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUV";
|
||||
|
||||
const uint MASK @private = 0b11111;
|
||||
const char INVALID @private = 0xff;
|
||||
|
||||
const int STD_PADDING = '=';
|
||||
const int NO_PADDING = -1;
|
||||
|
||||
fault Base32Error
|
||||
{
|
||||
DUPLICATE_IN_ALPHABET,
|
||||
PADDING_IN_ALPHABET,
|
||||
INVALID_CHARACTER_IN_ALPHABET,
|
||||
DESTINATION_TOO_SMALL,
|
||||
INVALID_PADDING,
|
||||
CORRUPT_INPUT
|
||||
}
|
||||
|
||||
struct Base32Encoder
|
||||
{
|
||||
Alphabet alphabet;
|
||||
int padding;
|
||||
}
|
||||
|
||||
<*
|
||||
@param encoder "The 32-character alphabet for encoding."
|
||||
@param padding "Set to a negative value to disable padding."
|
||||
@require padding < 256
|
||||
*>
|
||||
fn void! Base32Encoder.init(&self, Alphabet encoder = STD_ALPHABET, int padding = STD_PADDING)
|
||||
{
|
||||
encoder.validate(padding)!;
|
||||
*self = { .alphabet = encoder, .padding = padding };
|
||||
}
|
||||
|
||||
<*
|
||||
Calculate the length in bytes of the encoded data.
|
||||
@param n "Length in bytes on input."
|
||||
@return "Length in bytes of the encoded data."
|
||||
*>
|
||||
fn usz Base32Encoder.encode_len(&self, usz n)
|
||||
{
|
||||
// A character is encoded into 8 x 5-bit blocks.
|
||||
if (self.padding >= 0)
|
||||
{
|
||||
// with padding
|
||||
return (n + 4) / 5 * 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
// no padding
|
||||
usz trailing = n % 5;
|
||||
return n / 5 * 8 + (trailing * 8 + 4) / 5;
|
||||
}
|
||||
}
|
||||
|
||||
<*
|
||||
Encode the content of src into dst, which must be properly sized.
|
||||
@param [in] src "The input to be encoded."
|
||||
@param [inout] dst "The encoded input."
|
||||
@return "The encoded size."
|
||||
@return! Base32Error.DESTINATION_TOO_SMALL
|
||||
*>
|
||||
fn usz! Base32Encoder.encode(&self, char[] src, char[] dst)
|
||||
{
|
||||
if (src.len == 0) return 0;
|
||||
|
||||
usz n = (src.len / 5) * 5;
|
||||
usz dn = self.encode_len(src.len);
|
||||
if (dst.len < dn) return Base32Error.DESTINATION_TOO_SMALL?;
|
||||
|
||||
uint msb, lsb;
|
||||
for (usz i = 0; i < n; i += 5)
|
||||
{
|
||||
// to fit 40 bits we need two 32-bit uints
|
||||
msb = (uint)src[i] << 24 | (uint)src[i+1] << 16
|
||||
| (uint)src[i+2] << 8 | (uint)src[i+3];
|
||||
lsb = msb << 8 | (uint)src[i+4];
|
||||
|
||||
// now slice them into 5-bit chunks and translate to the
|
||||
// alphabet.
|
||||
dst[0] = self.alphabet[(msb >> 27) & MASK];
|
||||
dst[1] = self.alphabet[(msb >> 22) & MASK];
|
||||
dst[2] = self.alphabet[(msb >> 17) & MASK];
|
||||
dst[3] = self.alphabet[(msb >> 12) & MASK];
|
||||
dst[4] = self.alphabet[(msb >> 7) & MASK];
|
||||
dst[5] = self.alphabet[(msb >> 2) & MASK];
|
||||
dst[6] = self.alphabet[(lsb >> 5) & MASK];
|
||||
dst[7] = self.alphabet[lsb & MASK];
|
||||
|
||||
dst = dst[8..];
|
||||
}
|
||||
|
||||
usz trailing = src.len - n;
|
||||
if (trailing == 0) return dn;
|
||||
|
||||
msb = 0;
|
||||
switch (trailing)
|
||||
{
|
||||
case 4:
|
||||
msb |= (uint)src[n+3];
|
||||
lsb = msb << 8;
|
||||
dst[6] = self.alphabet[(lsb >> 5) & MASK];
|
||||
dst[5] = self.alphabet[(msb >> 2) & MASK];
|
||||
nextcase 3;
|
||||
case 3:
|
||||
msb |= (uint)src[n+2] << 8;
|
||||
dst[4] = self.alphabet[(msb >> 7) & MASK];
|
||||
nextcase 2;
|
||||
case 2:
|
||||
msb |= (uint)src[n+1] << 16;
|
||||
dst[3] = self.alphabet[(msb >> 12) & MASK];
|
||||
dst[2] = self.alphabet[(msb >> 17) & MASK];
|
||||
nextcase 1;
|
||||
case 1:
|
||||
msb |= (uint)src[n] << 24;
|
||||
dst[1] = self.alphabet[(msb >> 22) & MASK];
|
||||
dst[0] = self.alphabet[(msb >> 27) & MASK];
|
||||
}
|
||||
|
||||
// add the padding
|
||||
if (self.padding >= 0)
|
||||
{
|
||||
char pad = (char)self.padding;
|
||||
for (usz i = (trailing * 8 / 5) + 1; i < 8; i++)
|
||||
{
|
||||
dst[i] = pad;
|
||||
}
|
||||
}
|
||||
|
||||
return dn;
|
||||
}
|
||||
|
||||
struct Base32Decoder
|
||||
{
|
||||
Alphabet alphabet;
|
||||
int padding;
|
||||
char[256] reverse;
|
||||
}
|
||||
|
||||
<*
|
||||
@param decoder "The alphabet used for decoding."
|
||||
@param padding "Set to a negative value to disable padding."
|
||||
@require padding < 256
|
||||
*>
|
||||
fn void! Base32Decoder.init(&self, Alphabet decoder = STD_ALPHABET, int padding = STD_PADDING)
|
||||
{
|
||||
decoder.validate(padding)!;
|
||||
*self = { .alphabet = decoder, .padding = padding };
|
||||
|
||||
self.reverse[..] = INVALID;
|
||||
foreach (char i, c : decoder)
|
||||
{
|
||||
self.reverse[c] = i;
|
||||
}
|
||||
}
|
||||
|
||||
<*
|
||||
Calculate the length in bytes of the decoded data.
|
||||
@param n "Length in bytes of input."
|
||||
@return "Length in bytes of the decoded data."
|
||||
*>
|
||||
fn usz Base32Decoder.decode_len(&self, usz n)
|
||||
{
|
||||
if (self.padding >= 0)
|
||||
{
|
||||
// with padding
|
||||
return (n / 8) * 5;
|
||||
}
|
||||
else
|
||||
{
|
||||
// no padding
|
||||
usz trailing = n % 8;
|
||||
return n / 8 * 5 + (trailing * 5 ) / 8;
|
||||
}
|
||||
}
|
||||
|
||||
<*
|
||||
Decode the content of src into dst, which must be properly sized.
|
||||
@param src "The input to be decoded."
|
||||
@param dst "The decoded input."
|
||||
@return "The decoded size."
|
||||
@return! Base32Error.DESTINATION_TOO_SMALL, Base32Error.CORRUPT_INPUT
|
||||
*>
|
||||
fn usz! Base32Decoder.decode(&self, char[] src, char[] dst)
|
||||
{
|
||||
if (src.len == 0) return 0;
|
||||
usz dn = self.decode_len(src.len);
|
||||
if (dst.len < dn) return Base32Error.DESTINATION_TOO_SMALL?;
|
||||
|
||||
usz j, n;
|
||||
char[8] buf;
|
||||
while (src.len > 0 && dst.len > 0)
|
||||
{
|
||||
|
||||
// load 8 bytes into buffer
|
||||
for (j = 0; j < 8; j++)
|
||||
{
|
||||
if (src.len == 0)
|
||||
{
|
||||
if (self.padding >= 0)
|
||||
{
|
||||
return Base32Error.CORRUPT_INPUT?;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (src[0] == (char)self.padding)
|
||||
{
|
||||
break;
|
||||
}
|
||||
buf[j] = self.reverse[src[0]];
|
||||
if (buf[j] == INVALID)
|
||||
{
|
||||
return Base32Error.CORRUPT_INPUT?;
|
||||
}
|
||||
src = src[1..];
|
||||
}
|
||||
|
||||
// extract 5-bytes from the buffer which contains 8 x 5 bit chunks
|
||||
switch (j)
|
||||
{
|
||||
case 8:
|
||||
// |66677777| dst[4]
|
||||
// | 77777| buf[7]
|
||||
// |666 | buf[6] << 5
|
||||
dst[4] = buf[7] | buf[6] << 5;
|
||||
n++;
|
||||
nextcase 7;
|
||||
case 7:
|
||||
// |45555566| dst[3]
|
||||
// | 66| buf[6] >> 3
|
||||
// | 55555 | buf[5] << 2
|
||||
// |4 | buf[4] << 7
|
||||
dst[3] = buf[6] >> 3 | buf[5] << 2 | buf[4] << 7;
|
||||
n++;
|
||||
nextcase 5;
|
||||
case 5:
|
||||
// |33334444| dst[2]
|
||||
// | 4444| buf[4] >> 1
|
||||
// |3333 | buf[3] << 4
|
||||
dst[2] = buf[4] >> 1 | buf[3] << 4;
|
||||
n++;
|
||||
nextcase 4;
|
||||
case 4:
|
||||
// |11222223| dst[1]
|
||||
// | 3| buf[3] >> 4
|
||||
// | 22222 | buf[2] << 1
|
||||
// |11 | buf[1] << 6
|
||||
dst[1] = buf[3] >> 4 | buf[2] << 1 | buf[1] << 6;
|
||||
n++;
|
||||
nextcase 2;
|
||||
case 2:
|
||||
// |00000111| dst[0]
|
||||
// | 111| buf[1] >> 2
|
||||
// |00000 | buf[0] << 3
|
||||
dst[0] = buf[1] >> 2 | buf[0] << 3;
|
||||
n++;
|
||||
default:
|
||||
return Base32Error.CORRUPT_INPUT?;
|
||||
}
|
||||
|
||||
if (dst.len < 5) break;
|
||||
dst = dst[5..];
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
// Validate the 32-character alphabet to make sure that no character occurs
|
||||
// twice and that the padding is not present in the alphabet.
|
||||
fn void! Alphabet.validate(&self, int padding)
|
||||
{
|
||||
bool[256] checked;
|
||||
foreach (c : self)
|
||||
{
|
||||
if (checked[c])
|
||||
{
|
||||
return Base32Error.DUPLICATE_IN_ALPHABET?;
|
||||
}
|
||||
checked[c] = true;
|
||||
if (c == '\r' || c == '\n')
|
||||
{
|
||||
return Base32Error.INVALID_CHARACTER_IN_ALPHABET?;
|
||||
}
|
||||
}
|
||||
if (padding >= 0)
|
||||
{
|
||||
char pad = (char)padding;
|
||||
if (pad == '\r' || pad == '\n')
|
||||
{
|
||||
return Base32Error.INVALID_PADDING?;
|
||||
}
|
||||
if (checked[pad])
|
||||
{
|
||||
return Base32Error.PADDING_IN_ALPHABET?;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -62,6 +62,7 @@
|
||||
- Add read/write to stream with big endian ints.
|
||||
- Move accidently hidden "wrap_bytes".
|
||||
- Added CBool #1530.
|
||||
- Added encoding/base32 module.
|
||||
|
||||
## 0.6.3 Change list
|
||||
|
||||
|
||||
100
test/unit/stdlib/encoding/base32.c3
Normal file
100
test/unit/stdlib/encoding/base32.c3
Normal file
@@ -0,0 +1,100 @@
|
||||
module encoding::base32 @test;
|
||||
import std::encoding::base32;
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc4648#section-10
|
||||
|
||||
struct TestCase
|
||||
{
|
||||
char[] dec;
|
||||
char[] enc;
|
||||
}
|
||||
|
||||
TestCase[*] std_tests = {
|
||||
{ "", "" },
|
||||
{ "f", "MY======" },
|
||||
{ "fo", "MZXQ====" },
|
||||
{ "foo", "MZXW6===" },
|
||||
{ "foob", "MZXW6YQ=" },
|
||||
{ "fooba", "MZXW6YTB" },
|
||||
{ "foobar", "MZXW6YTBOI======" },
|
||||
};
|
||||
|
||||
TestCase[*] hex_tests = {
|
||||
{ "", "" },
|
||||
{ "f", "CO======" },
|
||||
{ "fo", "CPNG====" },
|
||||
{ "foo", "CPNMU===" },
|
||||
{ "foob", "CPNMUOG=" },
|
||||
{ "fooba", "CPNMUOJ1" },
|
||||
{ "foobar", "CPNMUOJ1E8======" },
|
||||
};
|
||||
|
||||
macro encode_tests(tests, alphabet, padding)
|
||||
{
|
||||
foreach (t : tests)
|
||||
{
|
||||
Base32Encoder b;
|
||||
b.init(alphabet, padding)!!;
|
||||
|
||||
char[64] buf;
|
||||
usz n = b.encode_len(t.dec.len);
|
||||
b.encode(t.dec, buf[:n])!!;
|
||||
|
||||
char[] want = t.enc;
|
||||
usz! pad_idx = array::index_of(want, '=');
|
||||
if (try pad_idx && padding < 0)
|
||||
{
|
||||
want = want[:pad_idx];
|
||||
}
|
||||
|
||||
assert(buf[:n] == want, "got: %s, want: %s",
|
||||
(String)buf[:n], (String)want);
|
||||
}
|
||||
}
|
||||
|
||||
fn void encode()
|
||||
{
|
||||
encode_tests(std_tests, base32::STD_ALPHABET, '=');
|
||||
encode_tests(hex_tests, base32::HEX_ALPHABET, '=');
|
||||
}
|
||||
|
||||
fn void encode_nopadding()
|
||||
{
|
||||
encode_tests(std_tests, base32::STD_ALPHABET, -1);
|
||||
encode_tests(hex_tests, base32::HEX_ALPHABET, -1);
|
||||
}
|
||||
|
||||
macro decode_tests(tests, alphabet, padding)
|
||||
{
|
||||
foreach (t : tests)
|
||||
{
|
||||
Base32Decoder b;
|
||||
b.init(alphabet, padding)!!;
|
||||
|
||||
char[] input = t.enc[..];
|
||||
usz! pad_idx = array::index_of(input, '=');
|
||||
if (try pad_idx && padding < 0)
|
||||
{
|
||||
input = input[:pad_idx];
|
||||
}
|
||||
|
||||
char[64] buf;
|
||||
usz n = b.decode_len(input.len);
|
||||
n = b.decode(input, buf[:n])!!;
|
||||
|
||||
assert(buf[:n] == t.dec, "got: %s, want: %s",
|
||||
(String)buf[:n], (String)t.dec);
|
||||
}
|
||||
}
|
||||
|
||||
fn void decode()
|
||||
{
|
||||
decode_tests(std_tests, base32::STD_ALPHABET, '=');
|
||||
decode_tests(hex_tests, base32::HEX_ALPHABET, '=');
|
||||
}
|
||||
|
||||
fn void decode_nopadding()
|
||||
{
|
||||
decode_tests(std_tests, base32::STD_ALPHABET, -1);
|
||||
decode_tests(hex_tests, base32::HEX_ALPHABET, -1);
|
||||
}
|
||||
Reference in New Issue
Block a user