net/url: implement url encoding (RFC 3986) (#1795)

* net/url: implement url encoding (RFC 3986)

Implement url percent-encoding and -decoding functions according to RFC
3986. Add unit tests.

Link: https://datatracker.ietf.org/doc/html/rfc3986

* net/url: ensure correct encoding of URL components

Add encoding and decoding methods to the Url struct components according
to RFC 3986.

An Url can be parsed from a String with `new_parse()` or `temp_parse()`.
The parsed fields are decoded. The only field that is not decoded is
`raw_query`. To access the decoded query values, use
`Url.query_values()`.

`Url.to_string()` will re-assemble the fields into a valid Url string
with proper percent-encoded values.

If the Url struct fields are filled in manually, use the actual
(un-encoded) values. To create a raw query string, initialize an
`UrlQueryValues` map, use `UrlQueryValues.add()` to add the query
parameters and, finally, call `UrlQueryValues.to_string()`.

---------

Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
This commit is contained in:
konimarti
2025-01-12 22:52:25 +01:00
committed by GitHub
parent 2623d7d525
commit 0e44e63fa8
5 changed files with 844 additions and 123 deletions

View File

@@ -2,13 +2,32 @@ module std::net::url;
import std::io, std::collections::map, std::collections::list;
def UrlQueryValueList = List(<String>);
struct UrlQueryValues
fault UrlParsingResult
{
inline HashMap(<String, UrlQueryValueList>) map;
EMPTY,
INVALID_SCHEME,
INVALID_USER,
INVALID_PASSWORD,
INVALID_HOST,
INVALID_PATH,
INVALID_FRAGMENT,
}
<*
Represents the actual (decoded) Url.
An Url can be parsed from a String with `new_parse()` or `temp_parse()`. The
parsed fields are decoded. The only field that is not decoded is `query`.
To access the decoded query values, use `new_parse_query(query)`.
`Url.to_string()` will re-assemble the fields into a valid Url string with
proper percent-encoded values.
If the Url struct fields are filled in manually, use the actual (un-encoded)
values. To create a raw query string, initialize an `UrlQueryValues` map, use
`UrlQueryValues.add()` to add the query parameters and, finally, call
`UrlQueryValues.to_string()`.
*>
struct Url(Printable)
{
String scheme;
@@ -19,6 +38,8 @@ struct Url(Printable)
String path;
String query;
String fragment;
Allocator allocator;
}
<*
@@ -28,60 +49,67 @@ struct Url(Printable)
@require url_string.len > 0 "the url_string must be len 1 or more"
@return "the parsed Url"
*>
fn Url! parse(String url_string)
fn Url! temp_parse(String url_string) => new_parse(url_string, allocator::temp());
<*
Parse a URL string into a Url struct.
@param [in] url_string
@require url_string.len > 0 "the url_string must be len 1 or more"
@return "the parsed Url"
*>
fn Url! new_parse(String url_string, Allocator allocator = allocator::heap())
{
Url url;
url_string = url_string.trim();
if (!url_string.len)
{
return url;
}
if (!url_string) return UrlParsingResult.EMPTY?;
Url url = { .allocator = allocator };
// Parse scheme
if (try pos = url_string.index_of("://"))
{
url.scheme = url_string[:pos];
if (!pos) return UrlParsingResult.INVALID_SCHEME?;
url.scheme = url_string[:pos].copy(allocator);
url_string = url_string[url.scheme.len + 3 ..];
}
else if (url_string.contains(":"))
else if (try pos = url_string.index_of(":"))
{
// Handle schemes without authority like 'mailto:'
url.scheme = url_string[:url_string.index_of(":")!];
url_string = url_string[url.scheme.len + 1 ..];
url.path = url_string;
if (!pos) return UrlParsingResult.INVALID_SCHEME?;
url.scheme = url_string[:pos].copy(allocator);
url.path = decode(url_string[pos + 1 ..], PATH, allocator) ?? UrlParsingResult.INVALID_PATH?!;
return url;
}
// Parse host, port
if (url.scheme != "urn")
{
usz! authority_end = url_string.index_of_chars("/?#");
if (catch authority_end)
{
authority_end = url_string.len;
}
usz authority_end = url_string.index_of_chars("/?#") ?? url_string.len;
String authority = url_string[:authority_end];
String authority = url_string[:authority_end]!;
if (try usz userInfo_end = url_string.index_of_char('@'))
if (try user_info_end = authority.index_of_char('@'))
{
String userinfo = authority[:userInfo_end];
String[] userpass = userinfo.split(":");
defer free(userpass);
url.username = userpass[0];
if (userpass.len > 1)
String userinfo = authority[:user_info_end];
String username @noinit;
String password;
@pool(allocator)
{
url.password = userpass[1];
}
authority = authority[userInfo_end + 1 ..];
String[] userpass = userinfo.tsplit(":", 2);
username = userpass[0];
if (!username.len) return UrlParsingResult.INVALID_USER?;
url.host =
url.username = decode(username, HOST, allocator) ?? UrlParsingResult.INVALID_USER?!;
if (userpass.len) url.password = decode(userpass[1], USERPASS, allocator) ?? UrlParsingResult.INVALID_PASSWORD?!;
};
authority = authority[userinfo.len + 1 ..];
}
// Check for IPv6 address in square brackets
String host;
if (authority.starts_with("[") && authority.contains("]"))
{
usz ipv6_end = authority.index_of("]")!;
url.host = authority[0 .. ipv6_end]; // Includes closing bracket
host = authority[0 .. ipv6_end]; // Includes closing bracket
if ((ipv6_end + 1) < authority.len && authority[.. ipv6_end] == ":")
{
url.port = authority[.. ipv6_end + 1].to_uint()!;
@@ -89,58 +117,56 @@ fn Url! parse(String url_string)
}
else
{
String[] host_port = authority.split(":");
defer mem::free(host_port);
if (host_port.len > 1)
@pool(allocator)
{
url.host = host_port[0];
url.port = host_port[1].to_uint()!;
}
else
{
url.host = authority;
}
String[] host_port = authority.tsplit(":", 2);
if (host_port.len > 1)
{
host = host_port[0];
url.port = host_port[1].to_uint()!;
}
else
{
host = authority;
}
};
}
url_string = url_string[authority_end ..]!;
url.host = decode(host, HOST, allocator) ?? UrlParsingResult.INVALID_HOST?!;
url_string = url_string[authority_end ..];
}
// Parse path
long query_index = (long)url_string.index_of_char('?') ?? -1;
long fragment_index = (long)url_string.index_of_char('#') ?? -1;
usz! query_index = url_string.index_of_char('?');
usz! fragment_index = url_string.index_of_char('#');
if (query_index != -1 || fragment_index != -1)
if (@ok(query_index) || @ok(fragment_index))
{
long pathEnd = min(query_index == -1 ? url_string.len : query_index,
fragment_index == -1 ? url_string.len : fragment_index,
url_string.len);
url.path = url_string[:pathEnd];
usz path_end = min(query_index ?? url_string.len, fragment_index ?? url_string.len);
url.path = decode(url_string[:path_end], PATH, allocator) ?? UrlParsingResult.INVALID_PATH?!;
url_string = url_string[path_end ..];
}
else
{
url.path = url_string;
url.path = decode(url_string, PATH, allocator) ?? UrlParsingResult.INVALID_PATH?!;
url_string = "";
}
// Remove the path part from url for further parsing
url_string = url_string[url.path.len ..];
// Parse query
if (url_string.starts_with("?"))
{
fragment_index = (long)url_string.index_of_char('#') ?? -1;
if (fragment_index == -1)
{
fragment_index = url_string.len;
}
url.query = url_string[1 .. fragment_index - 1];
url_string = url_string[fragment_index ..];
usz index = url_string.index_of_char('#') ?? url_string.len;
url.query = url_string[1 .. index - 1].copy(allocator);
url_string = url_string[index ..];
}
// Parse fragment
if (url_string.starts_with("#"))
{
url.fragment = url_string[1 ..];
url.fragment = decode(url_string[1..], FRAGMENT, allocator) ?? UrlParsingResult.INVALID_FRAGMENT?!;
}
return url;
}
@@ -168,17 +194,22 @@ fn String Url.to_string(&self, Allocator allocator = allocator::heap()) @dynamic
// Add username and password if they exist
if (self.username != "")
{
builder.append_chars(self.username);
String username = temp_encode(self.username, USERPASS);
builder.append_chars(username);
if (self.password != "")
{
builder.append_char(':');
builder.append_chars(self.password);
String password = temp_encode(self.password, USERPASS);
builder.append_chars(password);
}
builder.append_char('@');
}
// Add host
builder.append_chars(self.host);
String host = temp_encode(self.host, HOST);
builder.append_chars(host);
// Add port
if (self.port != 0)
@@ -188,9 +219,11 @@ fn String Url.to_string(&self, Allocator allocator = allocator::heap()) @dynamic
}
// Add path
builder.append_chars(self.path);
String path = temp_encode(self.path, PATH);
builder.append_chars(path);
// Add query if it exists
// Add query if it exists (note that `query` is expected to
// be already properly encoded).
if (self.query != "")
{
builder.append_char('?');
@@ -201,77 +234,156 @@ fn String Url.to_string(&self, Allocator allocator = allocator::heap()) @dynamic
if (self.fragment != "")
{
builder.append_char('#');
builder.append_chars(self.fragment);
String fragment = temp_encode(self.fragment, FRAGMENT);
builder.append_chars(fragment);
}
return builder.copy_str(allocator);
};
}
def UrlQueryValueList = List(<String>);
struct UrlQueryValues
{
inline HashMap(<String, UrlQueryValueList>) map;
UrlQueryValueList key_order;
}
<*
Parse the query parameters of the Url into a UrlQueryValues map.
@param [in] self
@param [in] query
@return "a UrlQueryValues HashMap"
*>
fn UrlQueryValues temp_parse_query(String query) => parse_query(query, allocator::temp());
<*
Parse the query parameters of the Url into a UrlQueryValues map.
@param [in] query
@return "a UrlQueryValues HashMap"
*>
fn UrlQueryValues new_parse_query(String query) => parse_query(query, allocator::heap());
<*
Parse the query parameters of the Url into a UrlQueryValues map.
@param [in] query
@param [inout] allocator
@return "a UrlQueryValues HashMap"
*>
fn UrlQueryValues Url.query_values(&self, Allocator allocator)
fn UrlQueryValues parse_query(String query, Allocator allocator)
{
UrlQueryValues vals;
vals.init(allocator);
Splitter raw_vals = self.query.tokenize("&");
vals.map.init(allocator);
vals.key_order.new_init(allocator: allocator);
Splitter raw_vals = query.tokenize("&");
while (try String rv = raw_vals.next())
{
@pool(allocator)
{
String[] parts = rv.tsplit("=", 2);
if (try existing = vals.get_ref(parts[0]))
{
existing.push(parts[1]);
}
else
{
UrlQueryValueList new_list;
new_list.new_init_with_array({ parts[1] }, allocator);
vals[parts[0]] = new_list;
}
String key = temp_decode(parts[0], QUERY) ?? parts[0];
vals.add(key, parts.len == 1 ? key : (temp_decode(parts[1], QUERY) ?? parts[1]));
};
}
return vals;
}
<*
Parse the query parameters of the Url into a UrlQueryValues map,
to be freed using values.free()
Add copies of the key and value strings to the UrlQueryValues map. These
copies are freed when the UrlQueryValues map is freed.
@param [in] self
@param key
@param value
@return "a UrlQueryValues map"
*>
fn UrlQueryValues Url.new_query_values(&self)
fn UrlQueryValues* UrlQueryValues.add(&self, String key, String value)
{
return self.query_values(allocator::heap()) @inline;
String value_copy = value.copy(self.allocator);
if (try existing = self.get_ref(key))
{
existing.push(value_copy);
}
else
{
UrlQueryValueList new_list;
new_list.new_init_with_array({ value_copy }, self.allocator);
(*self)[key] = new_list;
self.key_order.push(key.copy(self.allocator));
}
return self;
}
<*
Parse the query parameters of the Url into a UrlQueryValues map.
stored on the temp allocator.
Stringify UrlQueryValues into an encoded query string.
@param [in] self
@return "a UrlQueryValues map"
@param [inout] allocator
@return "a percent-encoded query string"
*>
fn UrlQueryValues Url.temp_query_values(&self)
fn String UrlQueryValues.to_string(&self, Allocator allocator = allocator::heap()) @dynamic
{
return self.query_values(allocator::temp()) @inline;
@pool(allocator)
{
DString builder = dstring::temp_new();
usz i;
foreach (key: self.key_order)
{
String encoded_key = temp_encode(key, QUERY);
UrlQueryValueList! values = self.map.get(key);
if (catch values) continue;
foreach (value: values)
{
if (i > 0) builder.append_char('&');
builder.append_chars(encoded_key);
builder.append_char('=');
String encoded_value = temp_encode(value, QUERY);
builder.append_chars(encoded_value);
i++;
}
};
return builder.copy_str(allocator);
};
}
fn void UrlQueryValues.free(&self)
{
self.map.@each(;String key, UrlQueryValueList value)
self.map.@each(;String key, UrlQueryValueList values)
{
value.free();
foreach (value: values) value.free(self.allocator);
values.free();
};
self.map.free();
foreach (&key: self.key_order) key.free(self.allocator);
self.key_order.free();
}
<*
Free an Url struct.
@param [in] self
*>
fn void Url.free(&self)
{
if (!self.allocator) return;
self.scheme.free(self.allocator);
self.host.free(self.allocator);
self.username.free(self.allocator);
self.password.free(self.allocator);
self.path.free(self.allocator);
self.query.free(self.allocator);
self.fragment.free(self.allocator);
}