Files
c3c/lib/std/encoding/base32.c3
Christoffer Lerno cdabe8fd9e - Create optional with ~ instead of ?. return io::EOF?; becomes return io::EOF~.
- Deprecated use of `?` to create optional.
2026-01-20 16:10:28 +01:00

274 lines
8.6 KiB
Plaintext

module std::encoding::base32;
// This module implements base32 encoding according to RFC 4648
// (https://www.rfc-editor.org/rfc/rfc4648)
struct Base32Alphabet
{
char[32] encoding;
char[256] reverse;
}
const char NO_PAD = 0;
const char DEFAULT_PAD = '=';
<*
Encode the content of src into a newly allocated string
@param [in] src : "The input to be encoded."
@param padding : "The padding character or 0 if none"
@param alphabet : "The alphabet to use"
@require padding < 0xFF : "Invalid padding character"
@return "The encoded string."
*>
fn String? encode(Allocator allocator, char[] src, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD)
{
char[] dst = allocator::alloc_array(allocator, char, encode_len(src.len, padding));
return encode_buffer(src, dst, padding, alphabet);
}
<*
Decode the content of src into a newly allocated char array.
@param [in] src : "The input to be encoded."
@param padding : "The padding character or 0 if none"
@param alphabet : "The alphabet to use"
@require padding < 0xFF : "Invalid padding character"
@return "The decoded data."
*>
fn char[]? decode(Allocator allocator, char[] src, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD)
{
char[] dst = allocator::alloc_array(allocator, char, decode_len(src.len, padding));
return decode_buffer(src, dst, padding, alphabet);
}
fn String? tencode(char[] code, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) @inline => encode(tmem, code, padding, alphabet);
fn char[]? tdecode(char[] code, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) @inline => decode(tmem, code, padding, alphabet);
<*
Calculate the length in bytes of the decoded data.
@param n : "Length in bytes of input."
@param padding : "The padding character or 0 if none"
@require padding < 0xFF : "Invalid padding character"
@return "Length in bytes of the decoded data."
*>
fn usz decode_len(usz n, char padding)
{
if (padding) return (n / 8) * 5;
// no padding
usz trailing = n % 8;
return n / 8 * 5 + (trailing * 5 ) / 8;
}
<*
Calculate the length in bytes of the encoded data.
@param n : "Length in bytes on input."
@param padding : "The padding character or 0 if none"
@require padding < 0xFF : "Invalid padding character"
@return "Length in bytes of the encoded data."
*>
fn usz encode_len(usz n, char padding)
{
// A character is encoded into 8 x 5-bit blocks.
if (padding) return (n + 4) / 5 * 8;
// no padding
usz trailing = n % 5;
return n / 5 * 8 + (trailing * 8 + 4) / 5;
}
<*
Decode the content of src into dst, which must be properly sized.
@param src : "The input to be decoded."
@param dst : "The decoded input."
@param padding : "The padding character or 0 if none"
@param alphabet : "The alphabet to use"
@require padding < 0xFF : "Invalid padding character"
@require dst.len >= decode_len(src.len, padding) : "Destination buffer too small"
@return "The resulting dst buffer"
@return? encoding::INVALID_PADDING, encoding::INVALID_CHARACTER
*>
fn char[]? decode_buffer(char[] src, char[] dst, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD)
{
if (src.len == 0) return dst[:0];
char* dst_ptr = dst;
usz dn = decode_len(src.len, padding);
usz n;
char[8] buf;
while (src.len > 0 && dst.len > 0)
{
usz i @noinit;
// load 8 bytes into buffer
for (i = 0; i < 8; i++)
{
if (src.len == 0)
{
if (padding > 0) return encoding::INVALID_PADDING~;
break;
}
if (src[0] == padding) break;
buf[i] = alphabet.reverse[src[0]];
if (buf[i] == INVALID) return encoding::INVALID_CHARACTER~;
src = src[1..];
}
// extract 5-bytes from the buffer which contains 8 x 5 bit chunks
switch (i)
{
case 8:
// |66677777| dst[4]
// | 77777| buf[7]
// |666 | buf[6] << 5
dst[4] = buf[7] | buf[6] << 5;
n++;
nextcase 7;
case 7:
// |45555566| dst[3]
// | 66| buf[6] >> 3
// | 55555 | buf[5] << 2
// |4 | buf[4] << 7
dst[3] = buf[6] >> 3 | buf[5] << 2 | buf[4] << 7;
n++;
nextcase 5;
case 5:
// |33334444| dst[2]
// | 4444| buf[4] >> 1
// |3333 | buf[3] << 4
dst[2] = buf[4] >> 1 | buf[3] << 4;
n++;
nextcase 4;
case 4:
// |11222223| dst[1]
// | 3| buf[3] >> 4
// | 22222 | buf[2] << 1
// |11 | buf[1] << 6
dst[1] = buf[3] >> 4 | buf[2] << 1 | buf[1] << 6;
n++;
nextcase 2;
case 2:
// |00000111| dst[0]
// | 111| buf[1] >> 2
// |00000 | buf[0] << 3
dst[0] = buf[1] >> 2 | buf[0] << 3;
n++;
default:
return encoding::INVALID_CHARACTER~;
}
if (dst.len < 5) break;
dst = dst[5..];
}
return dst_ptr[:n];
}
<*
Encode the content of src into dst, which must be properly sized.
@param [in] src : "The input to be encoded."
@param [inout] dst : "The encoded input."
@param padding : "The padding character or 0 if none"
@param alphabet : "The alphabet to use"
@require padding < 0xFF : "Invalid padding character"
@require dst.len >= encode_len(src.len, padding) : "Destination buffer too small"
@return "The encoded size."
*>
fn String encode_buffer(char[] src, char[] dst, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD)
{
if (src.len == 0) return (String)dst[:0];
char* dst_ptr = dst;
usz n = (src.len / 5) * 5;
usz dn = encode_len(src.len, padding);
uint msb, lsb;
for (usz i = 0; i < n; i += 5)
{
// to fit 40 bits we need two 32-bit uints
msb = (uint)src[i] << 24 | (uint)src[i+1] << 16
| (uint)src[i+2] << 8 | (uint)src[i+3];
lsb = msb << 8 | (uint)src[i+4];
// now slice them into 5-bit chunks and translate to the
// alphabet.
dst[0] = alphabet.encoding[(msb >> 27) & MASK];
dst[1] = alphabet.encoding[(msb >> 22) & MASK];
dst[2] = alphabet.encoding[(msb >> 17) & MASK];
dst[3] = alphabet.encoding[(msb >> 12) & MASK];
dst[4] = alphabet.encoding[(msb >> 7) & MASK];
dst[5] = alphabet.encoding[(msb >> 2) & MASK];
dst[6] = alphabet.encoding[(lsb >> 5) & MASK];
dst[7] = alphabet.encoding[lsb & MASK];
dst = dst[8..];
}
usz trailing = src.len - n;
if (trailing == 0) return (String)dst_ptr[:dn];
msb = 0;
switch (trailing)
{
case 4:
msb |= (uint)src[n+3];
lsb = msb << 8;
dst[6] = alphabet.encoding[(lsb >> 5) & MASK];
dst[5] = alphabet.encoding[(msb >> 2) & MASK];
nextcase 3;
case 3:
msb |= (uint)src[n+2] << 8;
dst[4] = alphabet.encoding[(msb >> 7) & MASK];
nextcase 2;
case 2:
msb |= (uint)src[n+1] << 16;
dst[3] = alphabet.encoding[(msb >> 12) & MASK];
dst[2] = alphabet.encoding[(msb >> 17) & MASK];
nextcase 1;
case 1:
msb |= (uint)src[n] << 24;
dst[1] = alphabet.encoding[(msb >> 22) & MASK];
dst[0] = alphabet.encoding[(msb >> 27) & MASK];
}
// add the padding
if (padding > 0)
{
for (usz i = (trailing * 8 / 5) + 1; i < 8; i++)
{
dst[i] = padding;
}
}
return (String)dst_ptr[:dn];
}
const uint MASK @private = 0b11111;
const char INVALID @private = 0xff;
const int STD_PADDING = '=';
const int NO_PADDING = -1;
typedef Alphabet = char[32];
// Standard base32 Alphabet
const Alphabet STD_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
// Extended Hex Alphabet
const Alphabet HEX_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUV";
const Base32Alphabet STANDARD = {
.encoding = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567",
.reverse = x`ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffff1a1b1c1d1e1fffffffffffffffff
ff000102030405060708090a0b0c0d0e0f10111213141516171819ffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff`
};
const Base32Alphabet HEX = {
.encoding = "0123456789ABCDEFGHIJKLMNOPQRSTUV",
.reverse = x`ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffff00010203040506070809ffffffffffff
ff0a0b0c0d0e0f101112131415161718191a1b1c1d1e1fffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff`
};