net/url: implement url encoding (RFC 3986) (#1795)

* net/url: implement url encoding (RFC 3986)

Implement url percent-encoding and -decoding functions according to RFC
3986. Add unit tests.

Link: https://datatracker.ietf.org/doc/html/rfc3986

* net/url: ensure correct encoding of URL components

Add encoding and decoding methods to the Url struct components according
to RFC 3986.

An Url can be parsed from a String with `new_parse()` or `temp_parse()`.
The parsed fields are decoded. The only field that is not decoded is
`raw_query`. To access the decoded query values, use
`Url.query_values()`.

`Url.to_string()` will re-assemble the fields into a valid Url string
with proper percent-encoded values.

If the Url struct fields are filled in manually, use the actual
(un-encoded) values. To create a raw query string, initialize an
`UrlQueryValues` map, use `UrlQueryValues.add()` to add the query
parameters and, finally, call `UrlQueryValues.to_string()`.

---------

Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
This commit is contained in:
konimarti
2025-01-12 22:52:25 +01:00
committed by GitHub
parent 2623d7d525
commit 0e44e63fa8
5 changed files with 844 additions and 123 deletions

View File

@@ -7,7 +7,8 @@ import std::net::url;
fn void test_parse_foo()
{
Url url = url::parse("foo://example.com:8042/over/there?name=ferret#nose")!!;
Url url = url::new_parse("foo://example.com:8042/over/there?name=ferret#nose")!!;
defer url.free();
assert(url.scheme == "foo", "got '%s'", url.scheme);
assert(url.host == "example.com", "got '%s'", url.host);
@@ -21,7 +22,8 @@ fn void test_parse_foo()
fn void test_parse_urn()
{
Url url = url::parse("urn:example:animal:ferret:nose")!!;
Url url = url::new_parse("urn:example:animal:ferret:nose")!!;
defer url.free();
assert(url.scheme == "urn");
assert(url.host == "");
@@ -35,7 +37,8 @@ fn void test_parse_urn()
fn void test_parse_jdbc()
{
Url url = url::parse("jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true")!!;
Url url = url::new_parse("jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true")!!;
defer url.free();
assert(url.scheme == "jdbc:mysql");
assert(url.host == "localhost");
@@ -49,7 +52,8 @@ fn void test_parse_jdbc()
fn void test_parse_ftp()
{
Url url = url::parse("ftp://ftp.is.co.za/rfc/rfc1808.txt")!!;
Url url = url::new_parse("ftp://ftp.is.co.za/rfc/rfc1808.txt")!!;
defer url.free();
assert(url.scheme == "ftp");
assert(url.host == "ftp.is.co.za");
@@ -63,7 +67,8 @@ fn void test_parse_ftp()
fn void test_parse_http()
{
Url url = url::parse("http://www.ietf.org/rfc/rfc2396.txt#header1")!!;
Url url = url::new_parse("http://www.ietf.org/rfc/rfc2396.txt#header1")!!;
defer url.free();
assert(url.scheme == "http");
assert(url.host == "www.ietf.org");
@@ -77,7 +82,8 @@ fn void test_parse_http()
fn void test_parse_ldap()
{
Url url = url::parse("ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two")!!;
Url url = url::new_parse("ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two")!!;
defer url.free();
assert(url.scheme == "ldap");
assert(url.host == "[2001:db8::7]");
@@ -91,7 +97,8 @@ fn void test_parse_ldap()
fn void test_parse_mailto()
{
Url url = url::parse("mailto:John.Doe@example.com")!!;
Url url = url::new_parse("mailto:John.Doe@example.com")!!;
defer url.free();
assert(url.scheme == "mailto");
assert(url.host == "");
@@ -103,9 +110,10 @@ fn void test_parse_mailto()
assert(url.fragment == "");
}
fn void test_parse_news()
fn void test_new_parses()
{
Url url = url::parse("news:comp.infosystems.www.servers.unix")!!;
Url url = url::new_parse("news:comp.infosystems.www.servers.unix")!!;
defer url.free();
assert(url.scheme == "news");
assert(url.host == "");
@@ -119,7 +127,8 @@ fn void test_parse_news()
fn void test_parse_tel()
{
Url url = url::parse("tel:+1-816-555-1212")!!;
Url url = url::new_parse("tel:+1-816-555-1212")!!;
defer url.free();
assert(url.scheme == "tel");
assert(url.host == "");
@@ -133,7 +142,8 @@ fn void test_parse_tel()
fn void test_parse_telnet()
{
Url url = url::parse("telnet://192.0.2.16:80/")!!;
Url url = url::new_parse("telnet://192.0.2.16:80/")!!;
defer url.free();
assert(url.scheme == "telnet");
assert(url.host == "192.0.2.16");
@@ -147,7 +157,8 @@ fn void test_parse_telnet()
fn void test_parse_urn2()
{
Url url = url::parse("urn:oasis:names:specification:docbook:dtd:xml:4.1.2")!!;
Url url = url::new_parse("urn:oasis:names:specification:docbook:dtd:xml:4.1.2")!!;
defer url.free();
assert(url.scheme == "urn");
assert(url.host == "");
@@ -161,16 +172,54 @@ fn void test_parse_urn2()
fn void test_parse_empty()
{
Url url = url::parse(" ")!!;
assert(@catch(url::new_parse(" ")) == UrlParsingResult.EMPTY);
}
assert(url.scheme == "");
assert(url.host == "");
// Parser tests with escape sequences
fn void test_parse_path_with_escape_sequence()
{
Url url = url::new_parse("foo://example.com:8042/file/name%20one%26two?name=ferret#nose")!!;
defer url.free();
assert(url.scheme == "foo", "got '%s'", url.scheme);
assert(url.host == "example.com", "got '%s'", url.host);
assert(url.port == 8042, "got '%d'", url.port);
assert(url.username == "", "got '%s'", url.username);
assert(url.password == "", "got '%s'", url.password);
assert(url.path == "/file/name one&two", "got '%s'", url.path);
assert(url.query == "name=ferret", "got '%s'", url.query);
assert(url.fragment == "nose", "got: '%s'", url.fragment);
}
fn void test_parse_username_and_password_with_escape_sequence()
{
Url url = url::new_parse("jdbc:mysql://test%20user:ouu%40pppssss@localhost:3306/sakila?profileSQL=true")!!;
defer url.free();
assert(url.scheme == "jdbc:mysql");
assert(url.host == "localhost");
assert(url.port == 3306);
assert(url.username == "test user", "got '%s'", url.username);
assert(url.password == "ouu@pppssss", "got '%s'", url.password);
assert(url.path == "/sakila");
assert(url.query == "profileSQL=true");
assert(url.fragment == "");
}
fn void test_parse_fragment_with_escape_sequence()
{
Url url = url::new_parse("http://www.ietf.org/rfc/rfc2396.txt#header%201%262")!!;
defer url.free();
assert(url.scheme == "http");
assert(url.host == "www.ietf.org");
assert(url.port == 0);
assert(url.username == "", "got '%s'", url.username);
assert(url.password == "", "got '%s'", url.password);
assert(url.path == "");
assert(url.path == "/rfc/rfc2396.txt");
assert(url.query == "");
assert(url.fragment == "");
assert(url.fragment == "header 1&2");
}
// to_string() tests
@@ -179,6 +228,7 @@ fn void test_string_foo()
{
Url url = {.scheme="foo", .host="example.com", .port=8042, .path="/over/there", .query="name=ferret", .fragment="nose"};
String str = string::new_format("%s", url);
defer str.free();
assert(str == "foo://example.com:8042/over/there?name=ferret#nose");
}
@@ -187,6 +237,7 @@ fn void test_string_urn()
{
Url url = {.scheme="urn", .path="example:animal:ferret:nose"};
String str = string::new_format("%s", url);
defer str.free();
assert(str == "urn:example:animal:ferret:nose");
}
@@ -195,6 +246,7 @@ fn void test_string_jdbc()
{
Url url = {.scheme="jdbc:mysql", .host="localhost", .port=3306, .username="test_user", .password="ouupppssss", .path="/sakila", .query="profileSQL=true"};
String str = string::new_format("%s", url);
defer str.free();
assert(str == "jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true");
}
@@ -203,30 +255,34 @@ fn void test_string_ftp()
{
Url url = {.scheme="ftp", .host="ftp.is.co.za", .path="/rfc/rfc1808.txt"};
String str = string::new_format("%s", url);
defer str.free();
assert(str == "ftp://ftp.is.co.za/rfc/rfc1808.txt");
}
fn void test_string_http()
{
Url url = {.scheme="http", .host="www.ietf.org", .path="/rfc/rfc2396.txt#header1"};
Url url = {.scheme="http", .host="www.ietf.org", .path="/rfc/rfc2396.txt", .fragment="header1"};
String str = string::new_format("%s", url);
defer str.free();
assert(str == "http://www.ietf.org/rfc/rfc2396.txt#header1");
assert(str == "http://www.ietf.org/rfc/rfc2396.txt#header1", "got: '%s'", str);
}
fn void test_string_ldap()
{
Url url = {.scheme="ldap", .host="[2001:db8::7]", .path="/c=GB?objectClass=one&objectClass=two"};
Url url = {.scheme="ldap", .host="[2001:db8::7]", .path="/c=GB", .query="objectClass=one&objectClass=two"};
String str = string::new_format("%s", url);
defer str.free();
assert(str == "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two");
assert(str == "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two", "got: '%s'", str);
}
fn void test_string_mailto()
{
Url url = {.scheme="mailto", .path="John.Doe@example.com"};
String str = string::new_format("%s", url);
defer str.free();
assert(str == "mailto:John.Doe@example.com");
}
@@ -235,6 +291,7 @@ fn void test_string_news()
{
Url url = {.scheme="news", .path="comp.infosystems.www.servers.unix"};
String str = string::new_format("%s", url);
defer str.free();
assert(str == "news:comp.infosystems.www.servers.unix");
}
@@ -242,6 +299,7 @@ fn void test_string_tel()
{
Url url = {.scheme="tel", .path="+1-816-555-1212"};
String str = string::new_format("%s", url);
defer str.free();
assert(str == "tel:+1-816-555-1212");
}
@@ -250,6 +308,7 @@ fn void test_string_telnet()
{
Url url = {.scheme="telnet", .host="192.0.2.16", .port=80, .path="/"};
String str = string::new_format("%s", url);
defer str.free();
assert(str == "telnet://192.0.2.16:80/");
}
@@ -258,6 +317,7 @@ fn void test_string_urn2()
{
Url url = {.scheme="urn", .path="oasis:names:specification:docbook:dtd:xml:4.1.2"};
String str = string::new_format("%s", url);
defer str.free();
assert(str == "urn:oasis:names:specification:docbook:dtd:xml:4.1.2");
}
@@ -266,6 +326,7 @@ fn void test_string_empty()
{
Url url = {};
String str = string::new_format("%s", url);
defer str.free();
assert(str == "");
}
@@ -274,9 +335,10 @@ fn void test_string_empty()
fn void test_query_values1()
{
Url url = url::parse("foo://example.com:8042/over/there?name=ferret=ok#nose")!!;
Url url = url::new_parse("foo://example.com:8042/over/there?name=ferret=ok#nose")!!;
defer url.free();
UrlQueryValues vals = url.new_query_values();
UrlQueryValues vals = url::temp_parse_query(url.query);
defer vals.free();
assert(vals.len() == 1);
@@ -288,9 +350,10 @@ fn void test_query_values1()
fn void test_query_values2()
{
Url url = url::parse("foo://example.com:8042/over/there?name=ferret&age=99&age=11#nose")!!;
Url url = url::new_parse("foo://example.com:8042/over/there?name=ferret&age=99&age=11#nose")!!;
defer url.free();
UrlQueryValues vals = url.new_query_values();
UrlQueryValues vals = url::new_parse_query(url.query);
defer vals.free();
assert(vals.len() == 2);
@@ -304,11 +367,93 @@ fn void test_query_values2()
assert(l_age[1] == "11");
}
fn void test_query_values_withempty()
fn void test_escaped_query_values()
{
Url url = url::parse("foo://example.com:8042/over/there?name=ferret&&&age=99&age=11")!!;
Url url = url::new_parse("foo://example.com:8042/over/there?k%3Bey=%3Ckey%3A+0x90%3E&age=99&age=11#nose")!!;
defer url.free();
UrlQueryValues vals = url.new_query_values();
UrlQueryValues vals = url::new_parse_query(url.query);
defer vals.free();
assert(vals.len() == 2);
}
UrlQueryValueList l_key = vals["k;ey"]!!;
assert(l_key.len() == 1);
assert(l_key[0] == "<key: 0x90>");
}
fn void test_query_values_withempty()
{
Url url = url::new_parse("foo://example.com:8042/over/there?name=ferret&&&age=99&age=11")!!;
defer url.free();
UrlQueryValues vals = url::new_parse_query(url.query);
defer vals.free();
assert(vals.len() == 2);
}
// url compose and parse should be idempotent
fn void test_url_idempotence()
{
UrlQueryValues query_builder;
query_builder.new_init();
defer query_builder.free();
query_builder.add("profileSQL", "true");
query_builder.add("k;ey", "<key: 0x90>");
String query = query_builder.to_string();
defer query.free();
Url url = {
.scheme = "jdbc:mysql",
.host = "localhost",
.port = 3306,
.username = "test user",
.password = "ouu@pppssss",
.path = "/sakila",
.query = query,
.fragment = "no se",
};
String url_string = url.to_string();
defer url_string.free();
String want = "jdbc:mysql://test%20user:ouu%40pppssss@localhost:3306"
"/sakila?profileSQL=true&k%3Bey=%3Ckey%3A+0x90%3E#no%20se";
assert(url_string == want, "got: %s, want: %s", url_string, want);
Url parsed = url::new_parse(url_string)!!;
defer parsed.free();
UrlQueryValues vals = url::new_parse_query(parsed.query);
defer vals.free();
assert(vals.len() == 2);
UrlQueryValueList key;
key = vals["k;ey"]!!;
assert(key.len() == 1);
assert(key[0] == "<key: 0x90>");
key = vals["profileSQL"]!!;
assert(key.len() == 1);
assert(key[0] == "true");
String parsed_query = vals.to_string();
defer parsed_query.free();
assert(parsed.scheme == url.scheme);
assert(parsed.host == url.host);
assert(parsed.port == url.port);
assert(parsed.username == url.username);
assert(parsed.password == url.password);
assert(parsed.path == url.path);
assert(parsed.query == parsed_query);
assert(parsed.fragment == url.fragment);
String parsed_string = parsed.to_string();
defer parsed_string.free();
assert(url_string == parsed_string);
}

View File

@@ -0,0 +1,266 @@
module url_encode_test @test;
import std::io;
import std::net::url @public;
struct EncodeTest
{
String in;
String out;
anyfault err;
UrlEncodingMode mode;
}
EncodeTest[*] decode_with_error_tests @local = {
{
"",
"",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"abc",
"abc",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"1%41",
"1A",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"1%41%42%43",
"1ABC",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"%4a",
"J",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"%6F",
"o",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"%",
"",
UrlDecodingError.INVALID_HEX,
UrlEncodingMode.QUERY,
},
{
"%a",
"",
UrlDecodingError.INVALID_HEX,
UrlEncodingMode.QUERY,
},
{
"%1",
"",
UrlDecodingError.INVALID_HEX,
UrlEncodingMode.QUERY,
},
{
"123%45%6",
"",
UrlDecodingError.INVALID_HEX,
UrlEncodingMode.QUERY,
},
{
"%zzzzz",
"",
UrlDecodingError.INVALID_HEX,
UrlEncodingMode.QUERY,
},
{
"a+b",
"a b",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"a%20b",
"a b",
anyfault{},
UrlEncodingMode.QUERY,
},
};
fn void test_decoding_with_error()
{
String! actual;
@pool() {
foreach (test: decode_with_error_tests)
{
actual = url::temp_decode(test.in, test.mode);
if (catch excuse = actual)
{
assert(excuse == test.err, "unescape(%s, %s); "
"got: %s, want: %s", test.in, test.mode, excuse, test.err);
continue;
}
assert(actual == test.out, "unescape(%s, %s); "
"got: %s, want: %s", test.in, test.mode, actual, test.out);
}
};
}
EncodeTest[*] encode_tests @local = {
{
"",
"",
anyfault{},
UrlEncodingMode.PATH,
},
{
"abc",
"abc",
anyfault{},
UrlEncodingMode.PATH,
},
{
"abc+def",
"abc+def",
anyfault{},
UrlEncodingMode.PATH,
},
{
"a/b",
"a/b",
anyfault{},
UrlEncodingMode.PATH,
},
{
"one two",
"one%20two",
anyfault{},
UrlEncodingMode.PATH,
},
{
"10%",
"10%25",
anyfault{},
UrlEncodingMode.PATH,
},
{
"",
"",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"abc",
"abc",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"one two",
"one+two",
anyfault{},
UrlEncodingMode.QUERY,
},
{
"10%",
"10%25",
anyfault{},
UrlEncodingMode.QUERY,
},
{
" ?&=#+%!<>#\"{}|\\^[]`☺\t:/@$'()*,;",
"+%3F%26%3D%23%2B%25%21%3C%3E%23%22%7B%7D%7C%5C%5E%5B%5D%60%E2%98%BA%09%3A%2F%40%24%27%28%29%2A%2C%3B",
anyfault{},
UrlEncodingMode.QUERY,
},
};
fn void test_percent_encode_and_decode()
{
String actual;
@pool() {
foreach (test: encode_tests)
{
actual = url::temp_encode(test.in, test.mode);
assert(actual == test.out, "escape(%s, %s); "
"got: %s, want: %s", test.in, test.mode, actual, test.out);
actual = url::temp_decode(test.out, test.mode)!!;
assert(actual == test.in, "unescape(%s, %s); "
"got: %s, want: %s", test.out, test.mode, actual, test.in);
}
};
}
struct ShouldEncodeTest
{
char in;
UrlEncodingMode mode;
bool escape;
}
ShouldEncodeTest[*] should_encode_tests = {
{'a', UrlEncodingMode.PATH, false},
{'a', UrlEncodingMode.USERPASS, false},
{'a', UrlEncodingMode.QUERY, false},
{'a', UrlEncodingMode.FRAGMENT, false},
{'a', UrlEncodingMode.HOST, false},
{'z', UrlEncodingMode.PATH, false},
{'A', UrlEncodingMode.PATH, false},
{'Z', UrlEncodingMode.PATH, false},
{'0', UrlEncodingMode.PATH, false},
{'9', UrlEncodingMode.PATH, false},
{'-', UrlEncodingMode.PATH, false},
{'-', UrlEncodingMode.USERPASS, false},
{'-', UrlEncodingMode.QUERY, false},
{'-', UrlEncodingMode.FRAGMENT, false},
{'.', UrlEncodingMode.PATH, false},
{'_', UrlEncodingMode.PATH, false},
{'~', UrlEncodingMode.PATH, false},
{'/', UrlEncodingMode.USERPASS, true},
{'?', UrlEncodingMode.USERPASS, true},
{'@', UrlEncodingMode.USERPASS, true},
{'$', UrlEncodingMode.USERPASS, false},
{'&', UrlEncodingMode.USERPASS, false},
{'+', UrlEncodingMode.USERPASS, false},
{',', UrlEncodingMode.USERPASS, false},
{';', UrlEncodingMode.USERPASS, false},
{'=', UrlEncodingMode.USERPASS, false},
{'!', UrlEncodingMode.HOST, false},
{'$', UrlEncodingMode.HOST, false},
{'&', UrlEncodingMode.HOST, false},
{'\'', UrlEncodingMode.HOST, false},
{'(', UrlEncodingMode.HOST, false},
{')', UrlEncodingMode.HOST, false},
{'*', UrlEncodingMode.HOST, false},
{'+', UrlEncodingMode.HOST, false},
{',', UrlEncodingMode.HOST, false},
{';', UrlEncodingMode.HOST, false},
{'=', UrlEncodingMode.HOST, false},
{'0', UrlEncodingMode.HOST, false},
{'9', UrlEncodingMode.HOST, false},
{'A', UrlEncodingMode.HOST, false},
{'z', UrlEncodingMode.HOST, false},
{'_', UrlEncodingMode.HOST, false},
{'-', UrlEncodingMode.HOST, false},
{'.', UrlEncodingMode.HOST, false},
};
fn void test_should_encode()
{
bool actual;
foreach (test: should_encode_tests)
{
actual = url::should_encode(test.in, test.mode);
assert(actual == test.escape, "should_encode(%c, %s); "
"got: %s, want: %s", test.in, test.mode, actual, test.escape);
}
}