net/url: implement url encoding (RFC 3986) (#1795)

* net/url: implement url encoding (RFC 3986)

Implement url percent-encoding and -decoding functions according to RFC
3986. Add unit tests.

Link: https://datatracker.ietf.org/doc/html/rfc3986

* net/url: ensure correct encoding of URL components

Add encoding and decoding methods to the Url struct components according
to RFC 3986.

An Url can be parsed from a String with `new_parse()` or `temp_parse()`.
The parsed fields are decoded. The only field that is not decoded is
`raw_query`. To access the decoded query values, use
`Url.query_values()`.

`Url.to_string()` will re-assemble the fields into a valid Url string
with proper percent-encoded values.

If the Url struct fields are filled in manually, use the actual
(un-encoded) values. To create a raw query string, initialize an
`UrlQueryValues` map, use `UrlQueryValues.add()` to add the query
parameters and, finally, call `UrlQueryValues.to_string()`.

---------

Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
This commit is contained in:
konimarti
2025-01-12 22:52:25 +01:00
committed by GitHub
parent 2623d7d525
commit 0e44e63fa8
5 changed files with 844 additions and 123 deletions

View File

@@ -2,13 +2,32 @@ module std::net::url;
import std::io, std::collections::map, std::collections::list; import std::io, std::collections::map, std::collections::list;
def UrlQueryValueList = List(<String>); fault UrlParsingResult
struct UrlQueryValues
{ {
inline HashMap(<String, UrlQueryValueList>) map; EMPTY,
INVALID_SCHEME,
INVALID_USER,
INVALID_PASSWORD,
INVALID_HOST,
INVALID_PATH,
INVALID_FRAGMENT,
} }
<*
Represents the actual (decoded) Url.
An Url can be parsed from a String with `new_parse()` or `temp_parse()`. The
parsed fields are decoded. The only field that is not decoded is `query`.
To access the decoded query values, use `new_parse_query(query)`.
`Url.to_string()` will re-assemble the fields into a valid Url string with
proper percent-encoded values.
If the Url struct fields are filled in manually, use the actual (un-encoded)
values. To create a raw query string, initialize an `UrlQueryValues` map, use
`UrlQueryValues.add()` to add the query parameters and, finally, call
`UrlQueryValues.to_string()`.
*>
struct Url(Printable) struct Url(Printable)
{ {
String scheme; String scheme;
@@ -19,6 +38,8 @@ struct Url(Printable)
String path; String path;
String query; String query;
String fragment; String fragment;
Allocator allocator;
} }
<* <*
@@ -28,60 +49,67 @@ struct Url(Printable)
@require url_string.len > 0 "the url_string must be len 1 or more" @require url_string.len > 0 "the url_string must be len 1 or more"
@return "the parsed Url" @return "the parsed Url"
*> *>
fn Url! parse(String url_string) fn Url! temp_parse(String url_string) => new_parse(url_string, allocator::temp());
<*
Parse a URL string into a Url struct.
@param [in] url_string
@require url_string.len > 0 "the url_string must be len 1 or more"
@return "the parsed Url"
*>
fn Url! new_parse(String url_string, Allocator allocator = allocator::heap())
{ {
Url url;
url_string = url_string.trim(); url_string = url_string.trim();
if (!url_string.len) if (!url_string) return UrlParsingResult.EMPTY?;
{ Url url = { .allocator = allocator };
return url;
}
// Parse scheme // Parse scheme
if (try pos = url_string.index_of("://")) if (try pos = url_string.index_of("://"))
{ {
url.scheme = url_string[:pos]; if (!pos) return UrlParsingResult.INVALID_SCHEME?;
url.scheme = url_string[:pos].copy(allocator);
url_string = url_string[url.scheme.len + 3 ..]; url_string = url_string[url.scheme.len + 3 ..];
} }
else if (url_string.contains(":")) else if (try pos = url_string.index_of(":"))
{ {
// Handle schemes without authority like 'mailto:' // Handle schemes without authority like 'mailto:'
url.scheme = url_string[:url_string.index_of(":")!]; if (!pos) return UrlParsingResult.INVALID_SCHEME?;
url_string = url_string[url.scheme.len + 1 ..]; url.scheme = url_string[:pos].copy(allocator);
url.path = url_string; url.path = decode(url_string[pos + 1 ..], PATH, allocator) ?? UrlParsingResult.INVALID_PATH?!;
return url; return url;
} }
// Parse host, port // Parse host, port
if (url.scheme != "urn") if (url.scheme != "urn")
{ {
usz! authority_end = url_string.index_of_chars("/?#"); usz authority_end = url_string.index_of_chars("/?#") ?? url_string.len;
if (catch authority_end) String authority = url_string[:authority_end];
{
authority_end = url_string.len;
}
String authority = url_string[:authority_end]!; if (try user_info_end = authority.index_of_char('@'))
if (try usz userInfo_end = url_string.index_of_char('@'))
{ {
String userinfo = authority[:userInfo_end]; String userinfo = authority[:user_info_end];
String[] userpass = userinfo.split(":"); String username @noinit;
defer free(userpass); String password;
url.username = userpass[0]; @pool(allocator)
if (userpass.len > 1)
{ {
url.password = userpass[1]; String[] userpass = userinfo.tsplit(":", 2);
} username = userpass[0];
authority = authority[userInfo_end + 1 ..]; if (!username.len) return UrlParsingResult.INVALID_USER?;
url.host =
url.username = decode(username, HOST, allocator) ?? UrlParsingResult.INVALID_USER?!;
if (userpass.len) url.password = decode(userpass[1], USERPASS, allocator) ?? UrlParsingResult.INVALID_PASSWORD?!;
};
authority = authority[userinfo.len + 1 ..];
} }
// Check for IPv6 address in square brackets // Check for IPv6 address in square brackets
String host;
if (authority.starts_with("[") && authority.contains("]")) if (authority.starts_with("[") && authority.contains("]"))
{ {
usz ipv6_end = authority.index_of("]")!; usz ipv6_end = authority.index_of("]")!;
url.host = authority[0 .. ipv6_end]; // Includes closing bracket host = authority[0 .. ipv6_end]; // Includes closing bracket
if ((ipv6_end + 1) < authority.len && authority[.. ipv6_end] == ":") if ((ipv6_end + 1) < authority.len && authority[.. ipv6_end] == ":")
{ {
url.port = authority[.. ipv6_end + 1].to_uint()!; url.port = authority[.. ipv6_end + 1].to_uint()!;
@@ -89,58 +117,56 @@ fn Url! parse(String url_string)
} }
else else
{ {
String[] host_port = authority.split(":"); @pool(allocator)
defer mem::free(host_port);
if (host_port.len > 1)
{ {
url.host = host_port[0]; String[] host_port = authority.tsplit(":", 2);
url.port = host_port[1].to_uint()!; if (host_port.len > 1)
} {
else host = host_port[0];
{ url.port = host_port[1].to_uint()!;
url.host = authority; }
} else
{
host = authority;
}
};
} }
url_string = url_string[authority_end ..]!; url.host = decode(host, HOST, allocator) ?? UrlParsingResult.INVALID_HOST?!;
url_string = url_string[authority_end ..];
} }
// Parse path // Parse path
long query_index = (long)url_string.index_of_char('?') ?? -1; usz! query_index = url_string.index_of_char('?');
long fragment_index = (long)url_string.index_of_char('#') ?? -1; usz! fragment_index = url_string.index_of_char('#');
if (query_index != -1 || fragment_index != -1) if (@ok(query_index) || @ok(fragment_index))
{ {
long pathEnd = min(query_index == -1 ? url_string.len : query_index, usz path_end = min(query_index ?? url_string.len, fragment_index ?? url_string.len);
fragment_index == -1 ? url_string.len : fragment_index, url.path = decode(url_string[:path_end], PATH, allocator) ?? UrlParsingResult.INVALID_PATH?!;
url_string.len); url_string = url_string[path_end ..];
url.path = url_string[:pathEnd];
} }
else else
{ {
url.path = url_string; url.path = decode(url_string, PATH, allocator) ?? UrlParsingResult.INVALID_PATH?!;
url_string = "";
} }
// Remove the path part from url for further parsing // Remove the path part from url for further parsing
url_string = url_string[url.path.len ..];
// Parse query // Parse query
if (url_string.starts_with("?")) if (url_string.starts_with("?"))
{ {
fragment_index = (long)url_string.index_of_char('#') ?? -1; usz index = url_string.index_of_char('#') ?? url_string.len;
if (fragment_index == -1) url.query = url_string[1 .. index - 1].copy(allocator);
{ url_string = url_string[index ..];
fragment_index = url_string.len;
}
url.query = url_string[1 .. fragment_index - 1];
url_string = url_string[fragment_index ..];
} }
// Parse fragment // Parse fragment
if (url_string.starts_with("#")) if (url_string.starts_with("#"))
{ {
url.fragment = url_string[1 ..]; url.fragment = decode(url_string[1..], FRAGMENT, allocator) ?? UrlParsingResult.INVALID_FRAGMENT?!;
} }
return url; return url;
} }
@@ -168,17 +194,22 @@ fn String Url.to_string(&self, Allocator allocator = allocator::heap()) @dynamic
// Add username and password if they exist // Add username and password if they exist
if (self.username != "") if (self.username != "")
{ {
builder.append_chars(self.username); String username = temp_encode(self.username, USERPASS);
builder.append_chars(username);
if (self.password != "") if (self.password != "")
{ {
builder.append_char(':'); builder.append_char(':');
builder.append_chars(self.password);
String password = temp_encode(self.password, USERPASS);
builder.append_chars(password);
} }
builder.append_char('@'); builder.append_char('@');
} }
// Add host // Add host
builder.append_chars(self.host); String host = temp_encode(self.host, HOST);
builder.append_chars(host);
// Add port // Add port
if (self.port != 0) if (self.port != 0)
@@ -188,9 +219,11 @@ fn String Url.to_string(&self, Allocator allocator = allocator::heap()) @dynamic
} }
// Add path // Add path
builder.append_chars(self.path); String path = temp_encode(self.path, PATH);
builder.append_chars(path);
// Add query if it exists // Add query if it exists (note that `query` is expected to
// be already properly encoded).
if (self.query != "") if (self.query != "")
{ {
builder.append_char('?'); builder.append_char('?');
@@ -201,77 +234,156 @@ fn String Url.to_string(&self, Allocator allocator = allocator::heap()) @dynamic
if (self.fragment != "") if (self.fragment != "")
{ {
builder.append_char('#'); builder.append_char('#');
builder.append_chars(self.fragment);
String fragment = temp_encode(self.fragment, FRAGMENT);
builder.append_chars(fragment);
} }
return builder.copy_str(allocator); return builder.copy_str(allocator);
}; };
} }
def UrlQueryValueList = List(<String>);
struct UrlQueryValues
{
inline HashMap(<String, UrlQueryValueList>) map;
UrlQueryValueList key_order;
}
<* <*
Parse the query parameters of the Url into a UrlQueryValues map. Parse the query parameters of the Url into a UrlQueryValues map.
@param [in] self @param [in] query
@return "a UrlQueryValues HashMap"
*>
fn UrlQueryValues temp_parse_query(String query) => parse_query(query, allocator::temp());
<*
Parse the query parameters of the Url into a UrlQueryValues map.
@param [in] query
@return "a UrlQueryValues HashMap"
*>
fn UrlQueryValues new_parse_query(String query) => parse_query(query, allocator::heap());
<*
Parse the query parameters of the Url into a UrlQueryValues map.
@param [in] query
@param [inout] allocator @param [inout] allocator
@return "a UrlQueryValues HashMap" @return "a UrlQueryValues HashMap"
*> *>
fn UrlQueryValues Url.query_values(&self, Allocator allocator) fn UrlQueryValues parse_query(String query, Allocator allocator)
{ {
UrlQueryValues vals; UrlQueryValues vals;
vals.init(allocator); vals.map.init(allocator);
vals.key_order.new_init(allocator: allocator);
Splitter raw_vals = self.query.tokenize("&");
Splitter raw_vals = query.tokenize("&");
while (try String rv = raw_vals.next()) while (try String rv = raw_vals.next())
{ {
@pool(allocator) @pool(allocator)
{ {
String[] parts = rv.tsplit("=", 2); String[] parts = rv.tsplit("=", 2);
if (try existing = vals.get_ref(parts[0])) String key = temp_decode(parts[0], QUERY) ?? parts[0];
{ vals.add(key, parts.len == 1 ? key : (temp_decode(parts[1], QUERY) ?? parts[1]));
existing.push(parts[1]);
}
else
{
UrlQueryValueList new_list;
new_list.new_init_with_array({ parts[1] }, allocator);
vals[parts[0]] = new_list;
}
}; };
} }
return vals; return vals;
} }
<* <*
Parse the query parameters of the Url into a UrlQueryValues map, Add copies of the key and value strings to the UrlQueryValues map. These
to be freed using values.free() copies are freed when the UrlQueryValues map is freed.
@param [in] self @param [in] self
@param key
@param value
@return "a UrlQueryValues map" @return "a UrlQueryValues map"
*> *>
fn UrlQueryValues Url.new_query_values(&self) fn UrlQueryValues* UrlQueryValues.add(&self, String key, String value)
{ {
return self.query_values(allocator::heap()) @inline; String value_copy = value.copy(self.allocator);
if (try existing = self.get_ref(key))
{
existing.push(value_copy);
}
else
{
UrlQueryValueList new_list;
new_list.new_init_with_array({ value_copy }, self.allocator);
(*self)[key] = new_list;
self.key_order.push(key.copy(self.allocator));
}
return self;
} }
<* <*
Parse the query parameters of the Url into a UrlQueryValues map. Stringify UrlQueryValues into an encoded query string.
stored on the temp allocator.
@param [in] self @param [in] self
@return "a UrlQueryValues map" @param [inout] allocator
@return "a percent-encoded query string"
*> *>
fn UrlQueryValues Url.temp_query_values(&self) fn String UrlQueryValues.to_string(&self, Allocator allocator = allocator::heap()) @dynamic
{ {
return self.query_values(allocator::temp()) @inline; @pool(allocator)
{
DString builder = dstring::temp_new();
usz i;
foreach (key: self.key_order)
{
String encoded_key = temp_encode(key, QUERY);
UrlQueryValueList! values = self.map.get(key);
if (catch values) continue;
foreach (value: values)
{
if (i > 0) builder.append_char('&');
builder.append_chars(encoded_key);
builder.append_char('=');
String encoded_value = temp_encode(value, QUERY);
builder.append_chars(encoded_value);
i++;
}
};
return builder.copy_str(allocator);
};
} }
fn void UrlQueryValues.free(&self) fn void UrlQueryValues.free(&self)
{ {
self.map.@each(;String key, UrlQueryValueList value) self.map.@each(;String key, UrlQueryValueList values)
{ {
value.free(); foreach (value: values) value.free(self.allocator);
values.free();
}; };
self.map.free(); self.map.free();
foreach (&key: self.key_order) key.free(self.allocator);
self.key_order.free();
} }
<*
Free an Url struct.
@param [in] self
*>
fn void Url.free(&self)
{
if (!self.allocator) return;
self.scheme.free(self.allocator);
self.host.free(self.allocator);
self.username.free(self.allocator);
self.password.free(self.allocator);
self.path.free(self.allocator);
self.query.free(self.allocator);
self.fragment.free(self.allocator);
}

197
lib/std/net/url_encoding.c3 Normal file
View File

@@ -0,0 +1,197 @@
<*
This module section provides encoding and decoding functions for URL
components according to RFC 3986.
*>
module std::net::url;
import std::encoding::hex;
enum UrlEncodingMode : char (String allowed)
{
UNRESERVED = "-_.~", // section 2.3
PATH = "$&+,/:;=@", // section 3.3
HOST = "!$&'()*+,;=:[]", // section 3.2.2 (also include ':', '[', ']' for ipv6 hosts)
USERPASS = ";:&=+$,", // section 3.2.1
QUERY = "", // section 3.4
FRAGMENT = "$&+,/:;=?@!()*", // section 4.1
}
fault UrlDecodingError
{
INVALID_HEX
}
<*
Returns true if char c should be encoded according to RFC 3986.
@param c "Character to check if it should be encoded."
@param mode "Url encoding mode."
*>
fn bool should_encode(char c, UrlEncodingMode mode) @private
{
// alphanumeric characters are allowed
if (c.is_alnum()) return false;
// unreserved characters are allowed
if (try UrlEncodingMode.UNRESERVED.allowed.index_of_char(c)) return false;
// some mode-specific characters are allowed
if (try mode.allowed.index_of_char(c)) return false;
// everything else must be encoded
return true;
}
<*
Calculate the length of the percent-encoded string.
*>
fn usz encode_len(String s, UrlEncodingMode mode) @inline
{
usz n;
foreach (c: s)
{
if (!should_encode(c, mode)) continue;
if (c != ' ' || mode != QUERY)
{
n++;
}
}
return s.len + 2 * n;
}
<*
Encode the string s for a given encoding mode.
Returned string must be freed.
@param s "String to encode"
@param mode "Url encoding mode"
@param [inout] allocator
@return "Percent-encoded String"
*>
fn String encode(String s, UrlEncodingMode mode, Allocator allocator)
{
usz n = encode_len(s, mode);
@pool(allocator)
{
DString builder = dstring::temp_with_capacity(n);
foreach(i, c: s)
{
switch
{
// encode spaces in queries
case c == ' ' && mode == QUERY:
builder.append_char('+');
// add encoded char
case should_encode(c, mode):
builder.append_char('%');
String hex = hex::encode_temp(s[i:1]);
builder.append(hex.temp_ascii_to_upper());
// use char, no encoding needed
default:
builder.append_char(c);
}
}
return builder.copy_str(allocator);
};
}
<*
Encode the string s for a given encoding mode.
Returned string must be freed.
@param s "String to encode"
@param mode "Url encoding mode"
@return "Percent-encoded String"
*>
fn String new_encode(String s, UrlEncodingMode mode) => encode(s, mode, allocator::heap());
<*
Encode string s for a given encoding mode, stored on the temp allocator.
@param s "String to encode"
@param mode "Url encoding mode"
@return "Percent-encoded String"
*>
fn String temp_encode(String s, UrlEncodingMode mode) => encode(s, mode, allocator::temp());
<*
Calculate the length of the percent-decoded string.
@return! UrlDecodingError.INVALID_HEX
*>
fn usz! decode_len(String s, UrlEncodingMode mode) @inline
{
usz n;
foreach (i, c: s)
{
if (c != '%') continue;
if (i + 2 >= s.len || !s[i+1].is_xdigit() || !s[i+2].is_xdigit())
{
return UrlDecodingError.INVALID_HEX?;
}
n++;
}
return s.len - 2 * n;
}
<*
Decode string s for a given encoding mode.
Returned string must be freed.
@param s "String to decode"
@param mode "Url encoding mode"
@param [inout] allocator
@return "Percent-decoded String"
*>
fn String! decode(String s, UrlEncodingMode mode, Allocator allocator)
{
usz n = decode_len(s, mode)!;
@pool(allocator)
{
DString builder = dstring::temp_with_capacity(n);
for (usz i = 0; i < s.len; i++)
{
switch (s[i])
{
// decode encoded char
case '%':
char[] hex = hex::decode_temp(s[i+1:2])!;
builder.append(hex);
i += 2;
// decode space when in queries
case '+':
builder.append_char((mode == QUERY) ? ' ' : '+');
// use char, no decoding needed
default:
builder.append_char(s[i]);
}
}
return builder.copy_str(allocator);
};
}
<*
Decode string s for a given encoding mode.
Returned string must be freed.
@param s "String to decode"
@param mode "Url encoding mode"
@return "Percent-decoded String"
*>
fn String! new_decode(String s, UrlEncodingMode mode) => decode(s, mode, allocator::heap());
<*
Decode string s for a given encoding mode, stored on the temp allocator.
@param s "String to decode"
@param mode "Url encoding mode"
@return "Percent-decoded String"
*>
fn String! temp_decode(String s, UrlEncodingMode mode) => decode(s, mode, allocator::temp());

View File

@@ -147,6 +147,7 @@
- Add `memcpy` / `memset` / `memcmp` to nolibc. - Add `memcpy` / `memset` / `memcmp` to nolibc.
- Add `sort::quickselect` to find the k-th smallest element in an unordered list. - Add `sort::quickselect` to find the k-th smallest element in an unordered list.
- Add `sort::is_sorted` to determine if a list is sorted. - Add `sort::is_sorted` to determine if a list is sorted.
- Implement RFC 3986 for url encoding and decoding.
## 0.6.4 Change list ## 0.6.4 Change list

View File

@@ -7,7 +7,8 @@ import std::net::url;
fn void test_parse_foo() fn void test_parse_foo()
{ {
Url url = url::parse("foo://example.com:8042/over/there?name=ferret#nose")!!; Url url = url::new_parse("foo://example.com:8042/over/there?name=ferret#nose")!!;
defer url.free();
assert(url.scheme == "foo", "got '%s'", url.scheme); assert(url.scheme == "foo", "got '%s'", url.scheme);
assert(url.host == "example.com", "got '%s'", url.host); assert(url.host == "example.com", "got '%s'", url.host);
@@ -21,7 +22,8 @@ fn void test_parse_foo()
fn void test_parse_urn() fn void test_parse_urn()
{ {
Url url = url::parse("urn:example:animal:ferret:nose")!!; Url url = url::new_parse("urn:example:animal:ferret:nose")!!;
defer url.free();
assert(url.scheme == "urn"); assert(url.scheme == "urn");
assert(url.host == ""); assert(url.host == "");
@@ -35,7 +37,8 @@ fn void test_parse_urn()
fn void test_parse_jdbc() fn void test_parse_jdbc()
{ {
Url url = url::parse("jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true")!!; Url url = url::new_parse("jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true")!!;
defer url.free();
assert(url.scheme == "jdbc:mysql"); assert(url.scheme == "jdbc:mysql");
assert(url.host == "localhost"); assert(url.host == "localhost");
@@ -49,7 +52,8 @@ fn void test_parse_jdbc()
fn void test_parse_ftp() fn void test_parse_ftp()
{ {
Url url = url::parse("ftp://ftp.is.co.za/rfc/rfc1808.txt")!!; Url url = url::new_parse("ftp://ftp.is.co.za/rfc/rfc1808.txt")!!;
defer url.free();
assert(url.scheme == "ftp"); assert(url.scheme == "ftp");
assert(url.host == "ftp.is.co.za"); assert(url.host == "ftp.is.co.za");
@@ -63,7 +67,8 @@ fn void test_parse_ftp()
fn void test_parse_http() fn void test_parse_http()
{ {
Url url = url::parse("http://www.ietf.org/rfc/rfc2396.txt#header1")!!; Url url = url::new_parse("http://www.ietf.org/rfc/rfc2396.txt#header1")!!;
defer url.free();
assert(url.scheme == "http"); assert(url.scheme == "http");
assert(url.host == "www.ietf.org"); assert(url.host == "www.ietf.org");
@@ -77,7 +82,8 @@ fn void test_parse_http()
fn void test_parse_ldap() fn void test_parse_ldap()
{ {
Url url = url::parse("ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two")!!; Url url = url::new_parse("ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two")!!;
defer url.free();
assert(url.scheme == "ldap"); assert(url.scheme == "ldap");
assert(url.host == "[2001:db8::7]"); assert(url.host == "[2001:db8::7]");
@@ -91,7 +97,8 @@ fn void test_parse_ldap()
fn void test_parse_mailto() fn void test_parse_mailto()
{ {
Url url = url::parse("mailto:John.Doe@example.com")!!; Url url = url::new_parse("mailto:John.Doe@example.com")!!;
defer url.free();
assert(url.scheme == "mailto"); assert(url.scheme == "mailto");
assert(url.host == ""); assert(url.host == "");
@@ -103,9 +110,10 @@ fn void test_parse_mailto()
assert(url.fragment == ""); assert(url.fragment == "");
} }
fn void test_parse_news() fn void test_new_parses()
{ {
Url url = url::parse("news:comp.infosystems.www.servers.unix")!!; Url url = url::new_parse("news:comp.infosystems.www.servers.unix")!!;
defer url.free();
assert(url.scheme == "news"); assert(url.scheme == "news");
assert(url.host == ""); assert(url.host == "");
@@ -119,7 +127,8 @@ fn void test_parse_news()
fn void test_parse_tel() fn void test_parse_tel()
{ {
Url url = url::parse("tel:+1-816-555-1212")!!; Url url = url::new_parse("tel:+1-816-555-1212")!!;
defer url.free();
assert(url.scheme == "tel"); assert(url.scheme == "tel");
assert(url.host == ""); assert(url.host == "");
@@ -133,7 +142,8 @@ fn void test_parse_tel()
fn void test_parse_telnet() fn void test_parse_telnet()
{ {
Url url = url::parse("telnet://192.0.2.16:80/")!!; Url url = url::new_parse("telnet://192.0.2.16:80/")!!;
defer url.free();
assert(url.scheme == "telnet"); assert(url.scheme == "telnet");
assert(url.host == "192.0.2.16"); assert(url.host == "192.0.2.16");
@@ -147,7 +157,8 @@ fn void test_parse_telnet()
fn void test_parse_urn2() fn void test_parse_urn2()
{ {
Url url = url::parse("urn:oasis:names:specification:docbook:dtd:xml:4.1.2")!!; Url url = url::new_parse("urn:oasis:names:specification:docbook:dtd:xml:4.1.2")!!;
defer url.free();
assert(url.scheme == "urn"); assert(url.scheme == "urn");
assert(url.host == ""); assert(url.host == "");
@@ -161,16 +172,54 @@ fn void test_parse_urn2()
fn void test_parse_empty() fn void test_parse_empty()
{ {
Url url = url::parse(" ")!!; assert(@catch(url::new_parse(" ")) == UrlParsingResult.EMPTY);
}
assert(url.scheme == ""); // Parser tests with escape sequences
assert(url.host == "");
fn void test_parse_path_with_escape_sequence()
{
Url url = url::new_parse("foo://example.com:8042/file/name%20one%26two?name=ferret#nose")!!;
defer url.free();
assert(url.scheme == "foo", "got '%s'", url.scheme);
assert(url.host == "example.com", "got '%s'", url.host);
assert(url.port == 8042, "got '%d'", url.port);
assert(url.username == "", "got '%s'", url.username);
assert(url.password == "", "got '%s'", url.password);
assert(url.path == "/file/name one&two", "got '%s'", url.path);
assert(url.query == "name=ferret", "got '%s'", url.query);
assert(url.fragment == "nose", "got: '%s'", url.fragment);
}
fn void test_parse_username_and_password_with_escape_sequence()
{
Url url = url::new_parse("jdbc:mysql://test%20user:ouu%40pppssss@localhost:3306/sakila?profileSQL=true")!!;
defer url.free();
assert(url.scheme == "jdbc:mysql");
assert(url.host == "localhost");
assert(url.port == 3306);
assert(url.username == "test user", "got '%s'", url.username);
assert(url.password == "ouu@pppssss", "got '%s'", url.password);
assert(url.path == "/sakila");
assert(url.query == "profileSQL=true");
assert(url.fragment == "");
}
fn void test_parse_fragment_with_escape_sequence()
{
Url url = url::new_parse("http://www.ietf.org/rfc/rfc2396.txt#header%201%262")!!;
defer url.free();
assert(url.scheme == "http");
assert(url.host == "www.ietf.org");
assert(url.port == 0); assert(url.port == 0);
assert(url.username == "", "got '%s'", url.username); assert(url.username == "", "got '%s'", url.username);
assert(url.password == "", "got '%s'", url.password); assert(url.password == "", "got '%s'", url.password);
assert(url.path == ""); assert(url.path == "/rfc/rfc2396.txt");
assert(url.query == ""); assert(url.query == "");
assert(url.fragment == ""); assert(url.fragment == "header 1&2");
} }
// to_string() tests // to_string() tests
@@ -179,6 +228,7 @@ fn void test_string_foo()
{ {
Url url = {.scheme="foo", .host="example.com", .port=8042, .path="/over/there", .query="name=ferret", .fragment="nose"}; Url url = {.scheme="foo", .host="example.com", .port=8042, .path="/over/there", .query="name=ferret", .fragment="nose"};
String str = string::new_format("%s", url); String str = string::new_format("%s", url);
defer str.free();
assert(str == "foo://example.com:8042/over/there?name=ferret#nose"); assert(str == "foo://example.com:8042/over/there?name=ferret#nose");
} }
@@ -187,6 +237,7 @@ fn void test_string_urn()
{ {
Url url = {.scheme="urn", .path="example:animal:ferret:nose"}; Url url = {.scheme="urn", .path="example:animal:ferret:nose"};
String str = string::new_format("%s", url); String str = string::new_format("%s", url);
defer str.free();
assert(str == "urn:example:animal:ferret:nose"); assert(str == "urn:example:animal:ferret:nose");
} }
@@ -195,6 +246,7 @@ fn void test_string_jdbc()
{ {
Url url = {.scheme="jdbc:mysql", .host="localhost", .port=3306, .username="test_user", .password="ouupppssss", .path="/sakila", .query="profileSQL=true"}; Url url = {.scheme="jdbc:mysql", .host="localhost", .port=3306, .username="test_user", .password="ouupppssss", .path="/sakila", .query="profileSQL=true"};
String str = string::new_format("%s", url); String str = string::new_format("%s", url);
defer str.free();
assert(str == "jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true"); assert(str == "jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true");
} }
@@ -203,30 +255,34 @@ fn void test_string_ftp()
{ {
Url url = {.scheme="ftp", .host="ftp.is.co.za", .path="/rfc/rfc1808.txt"}; Url url = {.scheme="ftp", .host="ftp.is.co.za", .path="/rfc/rfc1808.txt"};
String str = string::new_format("%s", url); String str = string::new_format("%s", url);
defer str.free();
assert(str == "ftp://ftp.is.co.za/rfc/rfc1808.txt"); assert(str == "ftp://ftp.is.co.za/rfc/rfc1808.txt");
} }
fn void test_string_http() fn void test_string_http()
{ {
Url url = {.scheme="http", .host="www.ietf.org", .path="/rfc/rfc2396.txt#header1"}; Url url = {.scheme="http", .host="www.ietf.org", .path="/rfc/rfc2396.txt", .fragment="header1"};
String str = string::new_format("%s", url); String str = string::new_format("%s", url);
defer str.free();
assert(str == "http://www.ietf.org/rfc/rfc2396.txt#header1"); assert(str == "http://www.ietf.org/rfc/rfc2396.txt#header1", "got: '%s'", str);
} }
fn void test_string_ldap() fn void test_string_ldap()
{ {
Url url = {.scheme="ldap", .host="[2001:db8::7]", .path="/c=GB?objectClass=one&objectClass=two"}; Url url = {.scheme="ldap", .host="[2001:db8::7]", .path="/c=GB", .query="objectClass=one&objectClass=two"};
String str = string::new_format("%s", url); String str = string::new_format("%s", url);
defer str.free();
assert(str == "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two"); assert(str == "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two", "got: '%s'", str);
} }
fn void test_string_mailto() fn void test_string_mailto()
{ {
Url url = {.scheme="mailto", .path="John.Doe@example.com"}; Url url = {.scheme="mailto", .path="John.Doe@example.com"};
String str = string::new_format("%s", url); String str = string::new_format("%s", url);
defer str.free();
assert(str == "mailto:John.Doe@example.com"); assert(str == "mailto:John.Doe@example.com");
} }
@@ -235,6 +291,7 @@ fn void test_string_news()
{ {
Url url = {.scheme="news", .path="comp.infosystems.www.servers.unix"}; Url url = {.scheme="news", .path="comp.infosystems.www.servers.unix"};
String str = string::new_format("%s", url); String str = string::new_format("%s", url);
defer str.free();
assert(str == "news:comp.infosystems.www.servers.unix"); assert(str == "news:comp.infosystems.www.servers.unix");
} }
@@ -242,6 +299,7 @@ fn void test_string_tel()
{ {
Url url = {.scheme="tel", .path="+1-816-555-1212"}; Url url = {.scheme="tel", .path="+1-816-555-1212"};
String str = string::new_format("%s", url); String str = string::new_format("%s", url);
defer str.free();
assert(str == "tel:+1-816-555-1212"); assert(str == "tel:+1-816-555-1212");
} }
@@ -250,6 +308,7 @@ fn void test_string_telnet()
{ {
Url url = {.scheme="telnet", .host="192.0.2.16", .port=80, .path="/"}; Url url = {.scheme="telnet", .host="192.0.2.16", .port=80, .path="/"};
String str = string::new_format("%s", url); String str = string::new_format("%s", url);
defer str.free();
assert(str == "telnet://192.0.2.16:80/"); assert(str == "telnet://192.0.2.16:80/");
} }
@@ -258,6 +317,7 @@ fn void test_string_urn2()
{ {
Url url = {.scheme="urn", .path="oasis:names:specification:docbook:dtd:xml:4.1.2"}; Url url = {.scheme="urn", .path="oasis:names:specification:docbook:dtd:xml:4.1.2"};
String str = string::new_format("%s", url); String str = string::new_format("%s", url);
defer str.free();
assert(str == "urn:oasis:names:specification:docbook:dtd:xml:4.1.2"); assert(str == "urn:oasis:names:specification:docbook:dtd:xml:4.1.2");
} }
@@ -266,6 +326,7 @@ fn void test_string_empty()
{ {
Url url = {}; Url url = {};
String str = string::new_format("%s", url); String str = string::new_format("%s", url);
defer str.free();
assert(str == ""); assert(str == "");
} }
@@ -274,9 +335,10 @@ fn void test_string_empty()
fn void test_query_values1() fn void test_query_values1()
{ {
Url url = url::parse("foo://example.com:8042/over/there?name=ferret=ok#nose")!!; Url url = url::new_parse("foo://example.com:8042/over/there?name=ferret=ok#nose")!!;
defer url.free();
UrlQueryValues vals = url.new_query_values(); UrlQueryValues vals = url::temp_parse_query(url.query);
defer vals.free(); defer vals.free();
assert(vals.len() == 1); assert(vals.len() == 1);
@@ -288,9 +350,10 @@ fn void test_query_values1()
fn void test_query_values2() fn void test_query_values2()
{ {
Url url = url::parse("foo://example.com:8042/over/there?name=ferret&age=99&age=11#nose")!!; Url url = url::new_parse("foo://example.com:8042/over/there?name=ferret&age=99&age=11#nose")!!;
defer url.free();
UrlQueryValues vals = url.new_query_values(); UrlQueryValues vals = url::new_parse_query(url.query);
defer vals.free(); defer vals.free();
assert(vals.len() == 2); assert(vals.len() == 2);
@@ -304,11 +367,93 @@ fn void test_query_values2()
assert(l_age[1] == "11"); assert(l_age[1] == "11");
} }
fn void test_query_values_withempty() fn void test_escaped_query_values()
{ {
Url url = url::parse("foo://example.com:8042/over/there?name=ferret&&&age=99&age=11")!!; Url url = url::new_parse("foo://example.com:8042/over/there?k%3Bey=%3Ckey%3A+0x90%3E&age=99&age=11#nose")!!;
defer url.free();
UrlQueryValues vals = url.new_query_values(); UrlQueryValues vals = url::new_parse_query(url.query);
defer vals.free(); defer vals.free();
assert(vals.len() == 2); assert(vals.len() == 2);
}
UrlQueryValueList l_key = vals["k;ey"]!!;
assert(l_key.len() == 1);
assert(l_key[0] == "<key: 0x90>");
}
fn void test_query_values_withempty()
{
Url url = url::new_parse("foo://example.com:8042/over/there?name=ferret&&&age=99&age=11")!!;
defer url.free();
UrlQueryValues vals = url::new_parse_query(url.query);
defer vals.free();
assert(vals.len() == 2);
}
// url compose and parse should be idempotent
fn void test_url_idempotence()
{
UrlQueryValues query_builder;
query_builder.new_init();
defer query_builder.free();
query_builder.add("profileSQL", "true");
query_builder.add("k;ey", "<key: 0x90>");
String query = query_builder.to_string();
defer query.free();
Url url = {
.scheme = "jdbc:mysql",
.host = "localhost",
.port = 3306,
.username = "test user",
.password = "ouu@pppssss",
.path = "/sakila",
.query = query,
.fragment = "no se",
};
String url_string = url.to_string();
defer url_string.free();
String want = "jdbc:mysql://test%20user:ouu%40pppssss@localhost:3306"
"/sakila?profileSQL=true&k%3Bey=%3Ckey%3A+0x90%3E#no%20se";
assert(url_string == want, "got: %s, want: %s", url_string, want);
Url parsed = url::new_parse(url_string)!!;
defer parsed.free();
UrlQueryValues vals = url::new_parse_query(parsed.query);
defer vals.free();
assert(vals.len() == 2);
UrlQueryValueList key;
key = vals["k;ey"]!!;
assert(key.len() == 1);
assert(key[0] == "<key: 0x90>");
key = vals["profileSQL"]!!;
assert(key.len() == 1);
assert(key[0] == "true");
String parsed_query = vals.to_string();
defer parsed_query.free();
assert(parsed.scheme == url.scheme);
assert(parsed.host == url.host);
assert(parsed.port == url.port);
assert(parsed.username == url.username);
assert(parsed.password == url.password);
assert(parsed.path == url.path);
assert(parsed.query == parsed_query);
assert(parsed.fragment == url.fragment);
String parsed_string = parsed.to_string();
defer parsed_string.free();
assert(url_string == parsed_string);
}

View File

@@ -0,0 +1,266 @@
module url_encode_test @test;
import std::io;
import std::net::url @public;
struct EncodeTest
{
String in;
String out;
anyfault err;
UrlEncodingMode mode;
}
EncodeTest[*] decode_with_error_tests @local = {
{
"",
"",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"abc",
"abc",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"1%41",
"1A",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"1%41%42%43",
"1ABC",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"%4a",
"J",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"%6F",
"o",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"%",
"",
UrlDecodingError.INVALID_HEX,
UrlEncodingMode.QUERY,
},
{
"%a",
"",
UrlDecodingError.INVALID_HEX,
UrlEncodingMode.QUERY,
},
{
"%1",
"",
UrlDecodingError.INVALID_HEX,
UrlEncodingMode.QUERY,
},
{
"123%45%6",
"",
UrlDecodingError.INVALID_HEX,
UrlEncodingMode.QUERY,
},
{
"%zzzzz",
"",
UrlDecodingError.INVALID_HEX,
UrlEncodingMode.QUERY,
},
{
"a+b",
"a b",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"a%20b",
"a b",
anyfault{},
UrlEncodingMode.QUERY,
},
};
fn void test_decoding_with_error()
{
String! actual;
@pool() {
foreach (test: decode_with_error_tests)
{
actual = url::temp_decode(test.in, test.mode);
if (catch excuse = actual)
{
assert(excuse == test.err, "unescape(%s, %s); "
"got: %s, want: %s", test.in, test.mode, excuse, test.err);
continue;
}
assert(actual == test.out, "unescape(%s, %s); "
"got: %s, want: %s", test.in, test.mode, actual, test.out);
}
};
}
EncodeTest[*] encode_tests @local = {
{
"",
"",
anyfault{},
UrlEncodingMode.PATH,
},
{
"abc",
"abc",
anyfault{},
UrlEncodingMode.PATH,
},
{
"abc+def",
"abc+def",
anyfault{},
UrlEncodingMode.PATH,
},
{
"a/b",
"a/b",
anyfault{},
UrlEncodingMode.PATH,
},
{
"one two",
"one%20two",
anyfault{},
UrlEncodingMode.PATH,
},
{
"10%",
"10%25",
anyfault{},
UrlEncodingMode.PATH,
},
{
"",
"",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"abc",
"abc",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"one two",
"one+two",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"10%",
"10%25",
anyfault{},
UrlEncodingMode.QUERY,
},
{
" ?&=#+%!<>#\"{}|\\^[]`☺\t:/@$'()*,;",
"+%3F%26%3D%23%2B%25%21%3C%3E%23%22%7B%7D%7C%5C%5E%5B%5D%60%E2%98%BA%09%3A%2F%40%24%27%28%29%2A%2C%3B",
anyfault{},
UrlEncodingMode.QUERY,
},
};
fn void test_percent_encode_and_decode()
{
String actual;
@pool() {
foreach (test: encode_tests)
{
actual = url::temp_encode(test.in, test.mode);
assert(actual == test.out, "escape(%s, %s); "
"got: %s, want: %s", test.in, test.mode, actual, test.out);
actual = url::temp_decode(test.out, test.mode)!!;
assert(actual == test.in, "unescape(%s, %s); "
"got: %s, want: %s", test.out, test.mode, actual, test.in);
}
};
}
struct ShouldEncodeTest
{
char in;
UrlEncodingMode mode;
bool escape;
}
ShouldEncodeTest[*] should_encode_tests = {
{'a', UrlEncodingMode.PATH, false},
{'a', UrlEncodingMode.USERPASS, false},
{'a', UrlEncodingMode.QUERY, false},
{'a', UrlEncodingMode.FRAGMENT, false},
{'a', UrlEncodingMode.HOST, false},
{'z', UrlEncodingMode.PATH, false},
{'A', UrlEncodingMode.PATH, false},
{'Z', UrlEncodingMode.PATH, false},
{'0', UrlEncodingMode.PATH, false},
{'9', UrlEncodingMode.PATH, false},
{'-', UrlEncodingMode.PATH, false},
{'-', UrlEncodingMode.USERPASS, false},
{'-', UrlEncodingMode.QUERY, false},
{'-', UrlEncodingMode.FRAGMENT, false},
{'.', UrlEncodingMode.PATH, false},
{'_', UrlEncodingMode.PATH, false},
{'~', UrlEncodingMode.PATH, false},
{'/', UrlEncodingMode.USERPASS, true},
{'?', UrlEncodingMode.USERPASS, true},
{'@', UrlEncodingMode.USERPASS, true},
{'$', UrlEncodingMode.USERPASS, false},
{'&', UrlEncodingMode.USERPASS, false},
{'+', UrlEncodingMode.USERPASS, false},
{',', UrlEncodingMode.USERPASS, false},
{';', UrlEncodingMode.USERPASS, false},
{'=', UrlEncodingMode.USERPASS, false},
{'!', UrlEncodingMode.HOST, false},
{'$', UrlEncodingMode.HOST, false},
{'&', UrlEncodingMode.HOST, false},
{'\'', UrlEncodingMode.HOST, false},
{'(', UrlEncodingMode.HOST, false},
{')', UrlEncodingMode.HOST, false},
{'*', UrlEncodingMode.HOST, false},
{'+', UrlEncodingMode.HOST, false},
{',', UrlEncodingMode.HOST, false},
{';', UrlEncodingMode.HOST, false},
{'=', UrlEncodingMode.HOST, false},
{'0', UrlEncodingMode.HOST, false},
{'9', UrlEncodingMode.HOST, false},
{'A', UrlEncodingMode.HOST, false},
{'z', UrlEncodingMode.HOST, false},
{'_', UrlEncodingMode.HOST, false},
{'-', UrlEncodingMode.HOST, false},
{'.', UrlEncodingMode.HOST, false},
};
fn void test_should_encode()
{
bool actual;
foreach (test: should_encode_tests)
{
actual = url::should_encode(test.in, test.mode);
assert(actual == test.escape, "should_encode(%c, %s); "
"got: %s, want: %s", test.in, test.mode, actual, test.escape);
}
}