Files
c3c/lib/std/net/url_encoding.c3
konimarti 0e44e63fa8 net/url: implement url encoding (RFC 3986) (#1795)
* net/url: implement url encoding (RFC 3986)

Implement url percent-encoding and -decoding functions according to RFC
3986. Add unit tests.

Link: https://datatracker.ietf.org/doc/html/rfc3986

* net/url: ensure correct encoding of URL components

Add encoding and decoding methods to the Url struct components according
to RFC 3986.

An Url can be parsed from a String with `new_parse()` or `temp_parse()`.
The parsed fields are decoded. The only field that is not decoded is
`raw_query`. To access the decoded query values, use
`Url.query_values()`.

`Url.to_string()` will re-assemble the fields into a valid Url string
with proper percent-encoded values.

If the Url struct fields are filled in manually, use the actual
(un-encoded) values. To create a raw query string, initialize an
`UrlQueryValues` map, use `UrlQueryValues.add()` to add the query
parameters and, finally, call `UrlQueryValues.to_string()`.

---------

Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
2025-01-12 22:52:25 +01:00

198 lines
4.4 KiB
Plaintext

<*
This module section provides encoding and decoding functions for URL
components according to RFC 3986.
*>
module std::net::url;
import std::encoding::hex;
enum UrlEncodingMode : char (String allowed)
{
UNRESERVED = "-_.~", // section 2.3
PATH = "$&+,/:;=@", // section 3.3
HOST = "!$&'()*+,;=:[]", // section 3.2.2 (also include ':', '[', ']' for ipv6 hosts)
USERPASS = ";:&=+$,", // section 3.2.1
QUERY = "", // section 3.4
FRAGMENT = "$&+,/:;=?@!()*", // section 4.1
}
fault UrlDecodingError
{
INVALID_HEX
}
<*
Returns true if char c should be encoded according to RFC 3986.
@param c "Character to check if it should be encoded."
@param mode "Url encoding mode."
*>
fn bool should_encode(char c, UrlEncodingMode mode) @private
{
// alphanumeric characters are allowed
if (c.is_alnum()) return false;
// unreserved characters are allowed
if (try UrlEncodingMode.UNRESERVED.allowed.index_of_char(c)) return false;
// some mode-specific characters are allowed
if (try mode.allowed.index_of_char(c)) return false;
// everything else must be encoded
return true;
}
<*
Calculate the length of the percent-encoded string.
*>
fn usz encode_len(String s, UrlEncodingMode mode) @inline
{
usz n;
foreach (c: s)
{
if (!should_encode(c, mode)) continue;
if (c != ' ' || mode != QUERY)
{
n++;
}
}
return s.len + 2 * n;
}
<*
Encode the string s for a given encoding mode.
Returned string must be freed.
@param s "String to encode"
@param mode "Url encoding mode"
@param [inout] allocator
@return "Percent-encoded String"
*>
fn String encode(String s, UrlEncodingMode mode, Allocator allocator)
{
usz n = encode_len(s, mode);
@pool(allocator)
{
DString builder = dstring::temp_with_capacity(n);
foreach(i, c: s)
{
switch
{
// encode spaces in queries
case c == ' ' && mode == QUERY:
builder.append_char('+');
// add encoded char
case should_encode(c, mode):
builder.append_char('%');
String hex = hex::encode_temp(s[i:1]);
builder.append(hex.temp_ascii_to_upper());
// use char, no encoding needed
default:
builder.append_char(c);
}
}
return builder.copy_str(allocator);
};
}
<*
Encode the string s for a given encoding mode.
Returned string must be freed.
@param s "String to encode"
@param mode "Url encoding mode"
@return "Percent-encoded String"
*>
fn String new_encode(String s, UrlEncodingMode mode) => encode(s, mode, allocator::heap());
<*
Encode string s for a given encoding mode, stored on the temp allocator.
@param s "String to encode"
@param mode "Url encoding mode"
@return "Percent-encoded String"
*>
fn String temp_encode(String s, UrlEncodingMode mode) => encode(s, mode, allocator::temp());
<*
Calculate the length of the percent-decoded string.
@return! UrlDecodingError.INVALID_HEX
*>
fn usz! decode_len(String s, UrlEncodingMode mode) @inline
{
usz n;
foreach (i, c: s)
{
if (c != '%') continue;
if (i + 2 >= s.len || !s[i+1].is_xdigit() || !s[i+2].is_xdigit())
{
return UrlDecodingError.INVALID_HEX?;
}
n++;
}
return s.len - 2 * n;
}
<*
Decode string s for a given encoding mode.
Returned string must be freed.
@param s "String to decode"
@param mode "Url encoding mode"
@param [inout] allocator
@return "Percent-decoded String"
*>
fn String! decode(String s, UrlEncodingMode mode, Allocator allocator)
{
usz n = decode_len(s, mode)!;
@pool(allocator)
{
DString builder = dstring::temp_with_capacity(n);
for (usz i = 0; i < s.len; i++)
{
switch (s[i])
{
// decode encoded char
case '%':
char[] hex = hex::decode_temp(s[i+1:2])!;
builder.append(hex);
i += 2;
// decode space when in queries
case '+':
builder.append_char((mode == QUERY) ? ' ' : '+');
// use char, no decoding needed
default:
builder.append_char(s[i]);
}
}
return builder.copy_str(allocator);
};
}
<*
Decode string s for a given encoding mode.
Returned string must be freed.
@param s "String to decode"
@param mode "Url encoding mode"
@return "Percent-decoded String"
*>
fn String! new_decode(String s, UrlEncodingMode mode) => decode(s, mode, allocator::heap());
<*
Decode string s for a given encoding mode, stored on the temp allocator.
@param s "String to decode"
@param mode "Url encoding mode"
@return "Percent-decoded String"
*>
fn String! temp_decode(String s, UrlEncodingMode mode) => decode(s, mode, allocator::temp());