From 0e44e63fa82ee0f28ee2247a9692b1cf5ee49420 Mon Sep 17 00:00:00 2001 From: konimarti <30975830+konimarti@users.noreply.github.com> Date: Sun, 12 Jan 2025 22:52:25 +0100 Subject: [PATCH] net/url: implement url encoding (RFC 3986) (#1795) * net/url: implement url encoding (RFC 3986) Implement url percent-encoding and -decoding functions according to RFC 3986. Add unit tests. Link: https://datatracker.ietf.org/doc/html/rfc3986 * net/url: ensure correct encoding of URL components Add encoding and decoding methods to the Url struct components according to RFC 3986. An Url can be parsed from a String with `new_parse()` or `temp_parse()`. The parsed fields are decoded. The only field that is not decoded is `raw_query`. To access the decoded query values, use `Url.query_values()`. `Url.to_string()` will re-assemble the fields into a valid Url string with proper percent-encoded values. If the Url struct fields are filled in manually, use the actual (un-encoded) values. To create a raw query string, initialize an `UrlQueryValues` map, use `UrlQueryValues.add()` to add the query parameters and, finally, call `UrlQueryValues.to_string()`. --------- Co-authored-by: Christoffer Lerno --- lib/std/net/url.c3 | 300 ++++++++++++++++++--------- lib/std/net/url_encoding.c3 | 197 ++++++++++++++++++ releasenotes.md | 1 + test/unit/stdlib/net/url.c3 | 203 +++++++++++++++--- test/unit/stdlib/net/url_encoding.c3 | 266 ++++++++++++++++++++++++ 5 files changed, 844 insertions(+), 123 deletions(-) create mode 100644 lib/std/net/url_encoding.c3 create mode 100644 test/unit/stdlib/net/url_encoding.c3 diff --git a/lib/std/net/url.c3 b/lib/std/net/url.c3 index 116da036b..3514c100e 100644 --- a/lib/std/net/url.c3 +++ b/lib/std/net/url.c3 @@ -2,13 +2,32 @@ module std::net::url; import std::io, std::collections::map, std::collections::list; -def UrlQueryValueList = List(); - -struct UrlQueryValues +fault UrlParsingResult { - inline HashMap() map; + EMPTY, + INVALID_SCHEME, + INVALID_USER, + INVALID_PASSWORD, + INVALID_HOST, + INVALID_PATH, + INVALID_FRAGMENT, } +<* + Represents the actual (decoded) Url. + + An Url can be parsed from a String with `new_parse()` or `temp_parse()`. The + parsed fields are decoded. The only field that is not decoded is `query`. + To access the decoded query values, use `new_parse_query(query)`. + + `Url.to_string()` will re-assemble the fields into a valid Url string with + proper percent-encoded values. + + If the Url struct fields are filled in manually, use the actual (un-encoded) + values. To create a raw query string, initialize an `UrlQueryValues` map, use + `UrlQueryValues.add()` to add the query parameters and, finally, call + `UrlQueryValues.to_string()`. +*> struct Url(Printable) { String scheme; @@ -19,6 +38,8 @@ struct Url(Printable) String path; String query; String fragment; + + Allocator allocator; } <* @@ -28,60 +49,67 @@ struct Url(Printable) @require url_string.len > 0 "the url_string must be len 1 or more" @return "the parsed Url" *> -fn Url! parse(String url_string) +fn Url! temp_parse(String url_string) => new_parse(url_string, allocator::temp()); + +<* + Parse a URL string into a Url struct. + + @param [in] url_string + @require url_string.len > 0 "the url_string must be len 1 or more" + @return "the parsed Url" +*> +fn Url! new_parse(String url_string, Allocator allocator = allocator::heap()) { - Url url; url_string = url_string.trim(); - if (!url_string.len) - { - return url; - } + if (!url_string) return UrlParsingResult.EMPTY?; + Url url = { .allocator = allocator }; // Parse scheme if (try pos = url_string.index_of("://")) { - url.scheme = url_string[:pos]; + if (!pos) return UrlParsingResult.INVALID_SCHEME?; + url.scheme = url_string[:pos].copy(allocator); url_string = url_string[url.scheme.len + 3 ..]; } - else if (url_string.contains(":")) + else if (try pos = url_string.index_of(":")) { // Handle schemes without authority like 'mailto:' - url.scheme = url_string[:url_string.index_of(":")!]; - url_string = url_string[url.scheme.len + 1 ..]; - url.path = url_string; - + if (!pos) return UrlParsingResult.INVALID_SCHEME?; + url.scheme = url_string[:pos].copy(allocator); + url.path = decode(url_string[pos + 1 ..], PATH, allocator) ?? UrlParsingResult.INVALID_PATH?!; return url; } // Parse host, port if (url.scheme != "urn") { - usz! authority_end = url_string.index_of_chars("/?#"); - if (catch authority_end) - { - authority_end = url_string.len; - } + usz authority_end = url_string.index_of_chars("/?#") ?? url_string.len; + String authority = url_string[:authority_end]; - String authority = url_string[:authority_end]!; - - if (try usz userInfo_end = url_string.index_of_char('@')) + if (try user_info_end = authority.index_of_char('@')) { - String userinfo = authority[:userInfo_end]; - String[] userpass = userinfo.split(":"); - defer free(userpass); - url.username = userpass[0]; - if (userpass.len > 1) + String userinfo = authority[:user_info_end]; + String username @noinit; + String password; + @pool(allocator) { - url.password = userpass[1]; - } - authority = authority[userInfo_end + 1 ..]; + String[] userpass = userinfo.tsplit(":", 2); + username = userpass[0]; + if (!username.len) return UrlParsingResult.INVALID_USER?; + url.host = + + url.username = decode(username, HOST, allocator) ?? UrlParsingResult.INVALID_USER?!; + if (userpass.len) url.password = decode(userpass[1], USERPASS, allocator) ?? UrlParsingResult.INVALID_PASSWORD?!; + }; + authority = authority[userinfo.len + 1 ..]; } // Check for IPv6 address in square brackets + String host; if (authority.starts_with("[") && authority.contains("]")) { usz ipv6_end = authority.index_of("]")!; - url.host = authority[0 .. ipv6_end]; // Includes closing bracket + host = authority[0 .. ipv6_end]; // Includes closing bracket if ((ipv6_end + 1) < authority.len && authority[.. ipv6_end] == ":") { url.port = authority[.. ipv6_end + 1].to_uint()!; @@ -89,58 +117,56 @@ fn Url! parse(String url_string) } else { - String[] host_port = authority.split(":"); - defer mem::free(host_port); - if (host_port.len > 1) + @pool(allocator) { - url.host = host_port[0]; - url.port = host_port[1].to_uint()!; - } - else - { - url.host = authority; - } + String[] host_port = authority.tsplit(":", 2); + if (host_port.len > 1) + { + host = host_port[0]; + url.port = host_port[1].to_uint()!; + } + else + { + host = authority; + } + }; } - url_string = url_string[authority_end ..]!; + url.host = decode(host, HOST, allocator) ?? UrlParsingResult.INVALID_HOST?!; + url_string = url_string[authority_end ..]; } // Parse path - long query_index = (long)url_string.index_of_char('?') ?? -1; - long fragment_index = (long)url_string.index_of_char('#') ?? -1; + usz! query_index = url_string.index_of_char('?'); + usz! fragment_index = url_string.index_of_char('#'); - if (query_index != -1 || fragment_index != -1) + if (@ok(query_index) || @ok(fragment_index)) { - long pathEnd = min(query_index == -1 ? url_string.len : query_index, - fragment_index == -1 ? url_string.len : fragment_index, - url_string.len); - url.path = url_string[:pathEnd]; + usz path_end = min(query_index ?? url_string.len, fragment_index ?? url_string.len); + url.path = decode(url_string[:path_end], PATH, allocator) ?? UrlParsingResult.INVALID_PATH?!; + url_string = url_string[path_end ..]; } else { - url.path = url_string; + url.path = decode(url_string, PATH, allocator) ?? UrlParsingResult.INVALID_PATH?!; + url_string = ""; } // Remove the path part from url for further parsing - url_string = url_string[url.path.len ..]; + // Parse query if (url_string.starts_with("?")) { - fragment_index = (long)url_string.index_of_char('#') ?? -1; - if (fragment_index == -1) - { - fragment_index = url_string.len; - } - url.query = url_string[1 .. fragment_index - 1]; - url_string = url_string[fragment_index ..]; + usz index = url_string.index_of_char('#') ?? url_string.len; + url.query = url_string[1 .. index - 1].copy(allocator); + url_string = url_string[index ..]; } // Parse fragment if (url_string.starts_with("#")) { - url.fragment = url_string[1 ..]; + url.fragment = decode(url_string[1..], FRAGMENT, allocator) ?? UrlParsingResult.INVALID_FRAGMENT?!; } - return url; } @@ -168,17 +194,22 @@ fn String Url.to_string(&self, Allocator allocator = allocator::heap()) @dynamic // Add username and password if they exist if (self.username != "") { - builder.append_chars(self.username); + String username = temp_encode(self.username, USERPASS); + builder.append_chars(username); + if (self.password != "") { builder.append_char(':'); - builder.append_chars(self.password); + + String password = temp_encode(self.password, USERPASS); + builder.append_chars(password); } builder.append_char('@'); } // Add host - builder.append_chars(self.host); + String host = temp_encode(self.host, HOST); + builder.append_chars(host); // Add port if (self.port != 0) @@ -188,9 +219,11 @@ fn String Url.to_string(&self, Allocator allocator = allocator::heap()) @dynamic } // Add path - builder.append_chars(self.path); + String path = temp_encode(self.path, PATH); + builder.append_chars(path); - // Add query if it exists + // Add query if it exists (note that `query` is expected to + // be already properly encoded). if (self.query != "") { builder.append_char('?'); @@ -201,77 +234,156 @@ fn String Url.to_string(&self, Allocator allocator = allocator::heap()) @dynamic if (self.fragment != "") { builder.append_char('#'); - builder.append_chars(self.fragment); + + String fragment = temp_encode(self.fragment, FRAGMENT); + builder.append_chars(fragment); } return builder.copy_str(allocator); }; } +def UrlQueryValueList = List(); + +struct UrlQueryValues +{ + inline HashMap() map; + UrlQueryValueList key_order; +} + <* Parse the query parameters of the Url into a UrlQueryValues map. - @param [in] self + @param [in] query + @return "a UrlQueryValues HashMap" +*> +fn UrlQueryValues temp_parse_query(String query) => parse_query(query, allocator::temp()); + +<* + Parse the query parameters of the Url into a UrlQueryValues map. + + @param [in] query + @return "a UrlQueryValues HashMap" +*> +fn UrlQueryValues new_parse_query(String query) => parse_query(query, allocator::heap()); + +<* + Parse the query parameters of the Url into a UrlQueryValues map. + + @param [in] query @param [inout] allocator @return "a UrlQueryValues HashMap" *> -fn UrlQueryValues Url.query_values(&self, Allocator allocator) +fn UrlQueryValues parse_query(String query, Allocator allocator) { UrlQueryValues vals; - vals.init(allocator); - - Splitter raw_vals = self.query.tokenize("&"); + vals.map.init(allocator); + vals.key_order.new_init(allocator: allocator); + Splitter raw_vals = query.tokenize("&"); while (try String rv = raw_vals.next()) { @pool(allocator) { String[] parts = rv.tsplit("=", 2); - if (try existing = vals.get_ref(parts[0])) - { - existing.push(parts[1]); - } - else - { - UrlQueryValueList new_list; - new_list.new_init_with_array({ parts[1] }, allocator); - vals[parts[0]] = new_list; - } + String key = temp_decode(parts[0], QUERY) ?? parts[0]; + vals.add(key, parts.len == 1 ? key : (temp_decode(parts[1], QUERY) ?? parts[1])); }; } return vals; } <* - Parse the query parameters of the Url into a UrlQueryValues map, - to be freed using values.free() + Add copies of the key and value strings to the UrlQueryValues map. These + copies are freed when the UrlQueryValues map is freed. @param [in] self + @param key + @param value @return "a UrlQueryValues map" *> -fn UrlQueryValues Url.new_query_values(&self) +fn UrlQueryValues* UrlQueryValues.add(&self, String key, String value) { - return self.query_values(allocator::heap()) @inline; + String value_copy = value.copy(self.allocator); + if (try existing = self.get_ref(key)) + { + existing.push(value_copy); + } + else + { + UrlQueryValueList new_list; + new_list.new_init_with_array({ value_copy }, self.allocator); + (*self)[key] = new_list; + self.key_order.push(key.copy(self.allocator)); + } + return self; } + <* - Parse the query parameters of the Url into a UrlQueryValues map. - stored on the temp allocator. + Stringify UrlQueryValues into an encoded query string. @param [in] self - @return "a UrlQueryValues map" + @param [inout] allocator + @return "a percent-encoded query string" *> -fn UrlQueryValues Url.temp_query_values(&self) +fn String UrlQueryValues.to_string(&self, Allocator allocator = allocator::heap()) @dynamic { - return self.query_values(allocator::temp()) @inline; + @pool(allocator) + { + DString builder = dstring::temp_new(); + + usz i; + foreach (key: self.key_order) + { + String encoded_key = temp_encode(key, QUERY); + + UrlQueryValueList! values = self.map.get(key); + if (catch values) continue; + + foreach (value: values) + { + if (i > 0) builder.append_char('&'); + + builder.append_chars(encoded_key); + builder.append_char('='); + + String encoded_value = temp_encode(value, QUERY); + builder.append_chars(encoded_value); + i++; + } + }; + + return builder.copy_str(allocator); + }; } fn void UrlQueryValues.free(&self) { - self.map.@each(;String key, UrlQueryValueList value) + self.map.@each(;String key, UrlQueryValueList values) { - value.free(); + foreach (value: values) value.free(self.allocator); + values.free(); }; self.map.free(); + + foreach (&key: self.key_order) key.free(self.allocator); + self.key_order.free(); } +<* + Free an Url struct. + + @param [in] self +*> +fn void Url.free(&self) +{ + if (!self.allocator) return; + self.scheme.free(self.allocator); + self.host.free(self.allocator); + self.username.free(self.allocator); + self.password.free(self.allocator); + self.path.free(self.allocator); + self.query.free(self.allocator); + self.fragment.free(self.allocator); +} diff --git a/lib/std/net/url_encoding.c3 b/lib/std/net/url_encoding.c3 new file mode 100644 index 000000000..e6f9011ea --- /dev/null +++ b/lib/std/net/url_encoding.c3 @@ -0,0 +1,197 @@ +<* + This module section provides encoding and decoding functions for URL + components according to RFC 3986. +*> +module std::net::url; +import std::encoding::hex; + +enum UrlEncodingMode : char (String allowed) +{ + UNRESERVED = "-_.~", // section 2.3 + PATH = "$&+,/:;=@", // section 3.3 + HOST = "!$&'()*+,;=:[]", // section 3.2.2 (also include ':', '[', ']' for ipv6 hosts) + USERPASS = ";:&=+$,", // section 3.2.1 + QUERY = "", // section 3.4 + FRAGMENT = "$&+,/:;=?@!()*", // section 4.1 +} + +fault UrlDecodingError +{ + INVALID_HEX +} + +<* + Returns true if char c should be encoded according to RFC 3986. + + @param c "Character to check if it should be encoded." + @param mode "Url encoding mode." +*> +fn bool should_encode(char c, UrlEncodingMode mode) @private +{ + // alphanumeric characters are allowed + if (c.is_alnum()) return false; + + // unreserved characters are allowed + if (try UrlEncodingMode.UNRESERVED.allowed.index_of_char(c)) return false; + + // some mode-specific characters are allowed + if (try mode.allowed.index_of_char(c)) return false; + + // everything else must be encoded + return true; +} + +<* + Calculate the length of the percent-encoded string. +*> +fn usz encode_len(String s, UrlEncodingMode mode) @inline +{ + usz n; + foreach (c: s) + { + if (!should_encode(c, mode)) continue; + if (c != ' ' || mode != QUERY) + { + n++; + } + } + return s.len + 2 * n; +} + +<* + Encode the string s for a given encoding mode. + Returned string must be freed. + + @param s "String to encode" + @param mode "Url encoding mode" + @param [inout] allocator + @return "Percent-encoded String" +*> +fn String encode(String s, UrlEncodingMode mode, Allocator allocator) +{ + usz n = encode_len(s, mode); + @pool(allocator) + { + DString builder = dstring::temp_with_capacity(n); + + foreach(i, c: s) + { + switch + { + // encode spaces in queries + case c == ' ' && mode == QUERY: + builder.append_char('+'); + + // add encoded char + case should_encode(c, mode): + builder.append_char('%'); + String hex = hex::encode_temp(s[i:1]); + builder.append(hex.temp_ascii_to_upper()); + + // use char, no encoding needed + default: + builder.append_char(c); + } + } + + return builder.copy_str(allocator); + }; +} + +<* + Encode the string s for a given encoding mode. + Returned string must be freed. + + @param s "String to encode" + @param mode "Url encoding mode" + @return "Percent-encoded String" +*> +fn String new_encode(String s, UrlEncodingMode mode) => encode(s, mode, allocator::heap()); + +<* + Encode string s for a given encoding mode, stored on the temp allocator. + + @param s "String to encode" + @param mode "Url encoding mode" + @return "Percent-encoded String" +*> +fn String temp_encode(String s, UrlEncodingMode mode) => encode(s, mode, allocator::temp()); + +<* + Calculate the length of the percent-decoded string. + + @return! UrlDecodingError.INVALID_HEX +*> +fn usz! decode_len(String s, UrlEncodingMode mode) @inline +{ + usz n; + foreach (i, c: s) + { + if (c != '%') continue; + if (i + 2 >= s.len || !s[i+1].is_xdigit() || !s[i+2].is_xdigit()) + { + return UrlDecodingError.INVALID_HEX?; + } + n++; + } + return s.len - 2 * n; +} + +<* + Decode string s for a given encoding mode. + Returned string must be freed. + + @param s "String to decode" + @param mode "Url encoding mode" + @param [inout] allocator + @return "Percent-decoded String" +*> +fn String! decode(String s, UrlEncodingMode mode, Allocator allocator) +{ + usz n = decode_len(s, mode)!; + @pool(allocator) + { + DString builder = dstring::temp_with_capacity(n); + + for (usz i = 0; i < s.len; i++) + { + switch (s[i]) + { + // decode encoded char + case '%': + char[] hex = hex::decode_temp(s[i+1:2])!; + builder.append(hex); + i += 2; + + // decode space when in queries + case '+': + builder.append_char((mode == QUERY) ? ' ' : '+'); + + // use char, no decoding needed + default: + builder.append_char(s[i]); + } + } + + return builder.copy_str(allocator); + }; +} + +<* + Decode string s for a given encoding mode. + Returned string must be freed. + + @param s "String to decode" + @param mode "Url encoding mode" + @return "Percent-decoded String" +*> +fn String! new_decode(String s, UrlEncodingMode mode) => decode(s, mode, allocator::heap()); + +<* + Decode string s for a given encoding mode, stored on the temp allocator. + + @param s "String to decode" + @param mode "Url encoding mode" + @return "Percent-decoded String" +*> +fn String! temp_decode(String s, UrlEncodingMode mode) => decode(s, mode, allocator::temp()); diff --git a/releasenotes.md b/releasenotes.md index 18a28a03c..c1fcf572f 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -147,6 +147,7 @@ - Add `memcpy` / `memset` / `memcmp` to nolibc. - Add `sort::quickselect` to find the k-th smallest element in an unordered list. - Add `sort::is_sorted` to determine if a list is sorted. +- Implement RFC 3986 for url encoding and decoding. ## 0.6.4 Change list diff --git a/test/unit/stdlib/net/url.c3 b/test/unit/stdlib/net/url.c3 index 7ea8b2bfa..f13576240 100644 --- a/test/unit/stdlib/net/url.c3 +++ b/test/unit/stdlib/net/url.c3 @@ -7,7 +7,8 @@ import std::net::url; fn void test_parse_foo() { - Url url = url::parse("foo://example.com:8042/over/there?name=ferret#nose")!!; + Url url = url::new_parse("foo://example.com:8042/over/there?name=ferret#nose")!!; + defer url.free(); assert(url.scheme == "foo", "got '%s'", url.scheme); assert(url.host == "example.com", "got '%s'", url.host); @@ -21,7 +22,8 @@ fn void test_parse_foo() fn void test_parse_urn() { - Url url = url::parse("urn:example:animal:ferret:nose")!!; + Url url = url::new_parse("urn:example:animal:ferret:nose")!!; + defer url.free(); assert(url.scheme == "urn"); assert(url.host == ""); @@ -35,7 +37,8 @@ fn void test_parse_urn() fn void test_parse_jdbc() { - Url url = url::parse("jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true")!!; + Url url = url::new_parse("jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true")!!; + defer url.free(); assert(url.scheme == "jdbc:mysql"); assert(url.host == "localhost"); @@ -49,7 +52,8 @@ fn void test_parse_jdbc() fn void test_parse_ftp() { - Url url = url::parse("ftp://ftp.is.co.za/rfc/rfc1808.txt")!!; + Url url = url::new_parse("ftp://ftp.is.co.za/rfc/rfc1808.txt")!!; + defer url.free(); assert(url.scheme == "ftp"); assert(url.host == "ftp.is.co.za"); @@ -63,7 +67,8 @@ fn void test_parse_ftp() fn void test_parse_http() { - Url url = url::parse("http://www.ietf.org/rfc/rfc2396.txt#header1")!!; + Url url = url::new_parse("http://www.ietf.org/rfc/rfc2396.txt#header1")!!; + defer url.free(); assert(url.scheme == "http"); assert(url.host == "www.ietf.org"); @@ -77,7 +82,8 @@ fn void test_parse_http() fn void test_parse_ldap() { - Url url = url::parse("ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two")!!; + Url url = url::new_parse("ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two")!!; + defer url.free(); assert(url.scheme == "ldap"); assert(url.host == "[2001:db8::7]"); @@ -91,7 +97,8 @@ fn void test_parse_ldap() fn void test_parse_mailto() { - Url url = url::parse("mailto:John.Doe@example.com")!!; + Url url = url::new_parse("mailto:John.Doe@example.com")!!; + defer url.free(); assert(url.scheme == "mailto"); assert(url.host == ""); @@ -103,9 +110,10 @@ fn void test_parse_mailto() assert(url.fragment == ""); } -fn void test_parse_news() +fn void test_new_parses() { - Url url = url::parse("news:comp.infosystems.www.servers.unix")!!; + Url url = url::new_parse("news:comp.infosystems.www.servers.unix")!!; + defer url.free(); assert(url.scheme == "news"); assert(url.host == ""); @@ -119,7 +127,8 @@ fn void test_parse_news() fn void test_parse_tel() { - Url url = url::parse("tel:+1-816-555-1212")!!; + Url url = url::new_parse("tel:+1-816-555-1212")!!; + defer url.free(); assert(url.scheme == "tel"); assert(url.host == ""); @@ -133,7 +142,8 @@ fn void test_parse_tel() fn void test_parse_telnet() { - Url url = url::parse("telnet://192.0.2.16:80/")!!; + Url url = url::new_parse("telnet://192.0.2.16:80/")!!; + defer url.free(); assert(url.scheme == "telnet"); assert(url.host == "192.0.2.16"); @@ -147,7 +157,8 @@ fn void test_parse_telnet() fn void test_parse_urn2() { - Url url = url::parse("urn:oasis:names:specification:docbook:dtd:xml:4.1.2")!!; + Url url = url::new_parse("urn:oasis:names:specification:docbook:dtd:xml:4.1.2")!!; + defer url.free(); assert(url.scheme == "urn"); assert(url.host == ""); @@ -161,16 +172,54 @@ fn void test_parse_urn2() fn void test_parse_empty() { - Url url = url::parse(" ")!!; + assert(@catch(url::new_parse(" ")) == UrlParsingResult.EMPTY); +} - assert(url.scheme == ""); - assert(url.host == ""); +// Parser tests with escape sequences + +fn void test_parse_path_with_escape_sequence() +{ + Url url = url::new_parse("foo://example.com:8042/file/name%20one%26two?name=ferret#nose")!!; + defer url.free(); + + assert(url.scheme == "foo", "got '%s'", url.scheme); + assert(url.host == "example.com", "got '%s'", url.host); + assert(url.port == 8042, "got '%d'", url.port); + assert(url.username == "", "got '%s'", url.username); + assert(url.password == "", "got '%s'", url.password); + assert(url.path == "/file/name one&two", "got '%s'", url.path); + assert(url.query == "name=ferret", "got '%s'", url.query); + assert(url.fragment == "nose", "got: '%s'", url.fragment); +} + +fn void test_parse_username_and_password_with_escape_sequence() +{ + Url url = url::new_parse("jdbc:mysql://test%20user:ouu%40pppssss@localhost:3306/sakila?profileSQL=true")!!; + defer url.free(); + + assert(url.scheme == "jdbc:mysql"); + assert(url.host == "localhost"); + assert(url.port == 3306); + assert(url.username == "test user", "got '%s'", url.username); + assert(url.password == "ouu@pppssss", "got '%s'", url.password); + assert(url.path == "/sakila"); + assert(url.query == "profileSQL=true"); + assert(url.fragment == ""); +} + +fn void test_parse_fragment_with_escape_sequence() +{ + Url url = url::new_parse("http://www.ietf.org/rfc/rfc2396.txt#header%201%262")!!; + defer url.free(); + + assert(url.scheme == "http"); + assert(url.host == "www.ietf.org"); assert(url.port == 0); assert(url.username == "", "got '%s'", url.username); assert(url.password == "", "got '%s'", url.password); - assert(url.path == ""); + assert(url.path == "/rfc/rfc2396.txt"); assert(url.query == ""); - assert(url.fragment == ""); + assert(url.fragment == "header 1&2"); } // to_string() tests @@ -179,6 +228,7 @@ fn void test_string_foo() { Url url = {.scheme="foo", .host="example.com", .port=8042, .path="/over/there", .query="name=ferret", .fragment="nose"}; String str = string::new_format("%s", url); + defer str.free(); assert(str == "foo://example.com:8042/over/there?name=ferret#nose"); } @@ -187,6 +237,7 @@ fn void test_string_urn() { Url url = {.scheme="urn", .path="example:animal:ferret:nose"}; String str = string::new_format("%s", url); + defer str.free(); assert(str == "urn:example:animal:ferret:nose"); } @@ -195,6 +246,7 @@ fn void test_string_jdbc() { Url url = {.scheme="jdbc:mysql", .host="localhost", .port=3306, .username="test_user", .password="ouupppssss", .path="/sakila", .query="profileSQL=true"}; String str = string::new_format("%s", url); + defer str.free(); assert(str == "jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true"); } @@ -203,30 +255,34 @@ fn void test_string_ftp() { Url url = {.scheme="ftp", .host="ftp.is.co.za", .path="/rfc/rfc1808.txt"}; String str = string::new_format("%s", url); + defer str.free(); assert(str == "ftp://ftp.is.co.za/rfc/rfc1808.txt"); } fn void test_string_http() { - Url url = {.scheme="http", .host="www.ietf.org", .path="/rfc/rfc2396.txt#header1"}; + Url url = {.scheme="http", .host="www.ietf.org", .path="/rfc/rfc2396.txt", .fragment="header1"}; String str = string::new_format("%s", url); + defer str.free(); - assert(str == "http://www.ietf.org/rfc/rfc2396.txt#header1"); + assert(str == "http://www.ietf.org/rfc/rfc2396.txt#header1", "got: '%s'", str); } fn void test_string_ldap() { - Url url = {.scheme="ldap", .host="[2001:db8::7]", .path="/c=GB?objectClass=one&objectClass=two"}; + Url url = {.scheme="ldap", .host="[2001:db8::7]", .path="/c=GB", .query="objectClass=one&objectClass=two"}; String str = string::new_format("%s", url); + defer str.free(); - assert(str == "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two"); + assert(str == "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two", "got: '%s'", str); } fn void test_string_mailto() { Url url = {.scheme="mailto", .path="John.Doe@example.com"}; String str = string::new_format("%s", url); + defer str.free(); assert(str == "mailto:John.Doe@example.com"); } @@ -235,6 +291,7 @@ fn void test_string_news() { Url url = {.scheme="news", .path="comp.infosystems.www.servers.unix"}; String str = string::new_format("%s", url); + defer str.free(); assert(str == "news:comp.infosystems.www.servers.unix"); } @@ -242,6 +299,7 @@ fn void test_string_tel() { Url url = {.scheme="tel", .path="+1-816-555-1212"}; String str = string::new_format("%s", url); + defer str.free(); assert(str == "tel:+1-816-555-1212"); } @@ -250,6 +308,7 @@ fn void test_string_telnet() { Url url = {.scheme="telnet", .host="192.0.2.16", .port=80, .path="/"}; String str = string::new_format("%s", url); + defer str.free(); assert(str == "telnet://192.0.2.16:80/"); } @@ -258,6 +317,7 @@ fn void test_string_urn2() { Url url = {.scheme="urn", .path="oasis:names:specification:docbook:dtd:xml:4.1.2"}; String str = string::new_format("%s", url); + defer str.free(); assert(str == "urn:oasis:names:specification:docbook:dtd:xml:4.1.2"); } @@ -266,6 +326,7 @@ fn void test_string_empty() { Url url = {}; String str = string::new_format("%s", url); + defer str.free(); assert(str == ""); } @@ -274,9 +335,10 @@ fn void test_string_empty() fn void test_query_values1() { - Url url = url::parse("foo://example.com:8042/over/there?name=ferret=ok#nose")!!; + Url url = url::new_parse("foo://example.com:8042/over/there?name=ferret=ok#nose")!!; + defer url.free(); - UrlQueryValues vals = url.new_query_values(); + UrlQueryValues vals = url::temp_parse_query(url.query); defer vals.free(); assert(vals.len() == 1); @@ -288,9 +350,10 @@ fn void test_query_values1() fn void test_query_values2() { - Url url = url::parse("foo://example.com:8042/over/there?name=ferret&age=99&age=11#nose")!!; + Url url = url::new_parse("foo://example.com:8042/over/there?name=ferret&age=99&age=11#nose")!!; + defer url.free(); - UrlQueryValues vals = url.new_query_values(); + UrlQueryValues vals = url::new_parse_query(url.query); defer vals.free(); assert(vals.len() == 2); @@ -304,11 +367,93 @@ fn void test_query_values2() assert(l_age[1] == "11"); } -fn void test_query_values_withempty() +fn void test_escaped_query_values() { - Url url = url::parse("foo://example.com:8042/over/there?name=ferret&&&age=99&age=11")!!; + Url url = url::new_parse("foo://example.com:8042/over/there?k%3Bey=%3Ckey%3A+0x90%3E&age=99&age=11#nose")!!; + defer url.free(); - UrlQueryValues vals = url.new_query_values(); + UrlQueryValues vals = url::new_parse_query(url.query); defer vals.free(); assert(vals.len() == 2); -} \ No newline at end of file + + UrlQueryValueList l_key = vals["k;ey"]!!; + assert(l_key.len() == 1); + assert(l_key[0] == ""); +} + +fn void test_query_values_withempty() +{ + Url url = url::new_parse("foo://example.com:8042/over/there?name=ferret&&&age=99&age=11")!!; + defer url.free(); + + UrlQueryValues vals = url::new_parse_query(url.query); + defer vals.free(); + assert(vals.len() == 2); +} + +// url compose and parse should be idempotent + +fn void test_url_idempotence() +{ + UrlQueryValues query_builder; + query_builder.new_init(); + defer query_builder.free(); + + query_builder.add("profileSQL", "true"); + query_builder.add("k;ey", ""); + + String query = query_builder.to_string(); + defer query.free(); + + Url url = { + .scheme = "jdbc:mysql", + .host = "localhost", + .port = 3306, + .username = "test user", + .password = "ouu@pppssss", + .path = "/sakila", + .query = query, + .fragment = "no se", + }; + + String url_string = url.to_string(); + defer url_string.free(); + + String want = "jdbc:mysql://test%20user:ouu%40pppssss@localhost:3306" + "/sakila?profileSQL=true&k%3Bey=%3Ckey%3A+0x90%3E#no%20se"; + assert(url_string == want, "got: %s, want: %s", url_string, want); + + Url parsed = url::new_parse(url_string)!!; + defer parsed.free(); + + UrlQueryValues vals = url::new_parse_query(parsed.query); + defer vals.free(); + assert(vals.len() == 2); + + UrlQueryValueList key; + key = vals["k;ey"]!!; + assert(key.len() == 1); + assert(key[0] == ""); + + key = vals["profileSQL"]!!; + assert(key.len() == 1); + assert(key[0] == "true"); + + String parsed_query = vals.to_string(); + defer parsed_query.free(); + + assert(parsed.scheme == url.scheme); + assert(parsed.host == url.host); + assert(parsed.port == url.port); + assert(parsed.username == url.username); + assert(parsed.password == url.password); + assert(parsed.path == url.path); + assert(parsed.query == parsed_query); + assert(parsed.fragment == url.fragment); + + String parsed_string = parsed.to_string(); + defer parsed_string.free(); + + assert(url_string == parsed_string); +} + diff --git a/test/unit/stdlib/net/url_encoding.c3 b/test/unit/stdlib/net/url_encoding.c3 new file mode 100644 index 000000000..b52047e61 --- /dev/null +++ b/test/unit/stdlib/net/url_encoding.c3 @@ -0,0 +1,266 @@ +module url_encode_test @test; + +import std::io; +import std::net::url @public; + +struct EncodeTest +{ + String in; + String out; + anyfault err; + UrlEncodingMode mode; +} + +EncodeTest[*] decode_with_error_tests @local = { + { + "", + "", + anyfault{}, + UrlEncodingMode.QUERY, + }, + { + "abc", + "abc", + anyfault{}, + UrlEncodingMode.QUERY, + }, + { + "1%41", + "1A", + anyfault{}, + UrlEncodingMode.QUERY, + }, + { + "1%41%42%43", + "1ABC", + anyfault{}, + UrlEncodingMode.QUERY, + }, + { + "%4a", + "J", + anyfault{}, + UrlEncodingMode.QUERY, + }, + { + "%6F", + "o", + anyfault{}, + UrlEncodingMode.QUERY, + }, + { + "%", + "", + UrlDecodingError.INVALID_HEX, + UrlEncodingMode.QUERY, + }, + { + "%a", + "", + UrlDecodingError.INVALID_HEX, + UrlEncodingMode.QUERY, + }, + { + "%1", + "", + UrlDecodingError.INVALID_HEX, + UrlEncodingMode.QUERY, + }, + { + "123%45%6", + "", + UrlDecodingError.INVALID_HEX, + UrlEncodingMode.QUERY, + }, + { + "%zzzzz", + "", + UrlDecodingError.INVALID_HEX, + UrlEncodingMode.QUERY, + }, + { + "a+b", + "a b", + anyfault{}, + UrlEncodingMode.QUERY, + }, + { + "a%20b", + "a b", + anyfault{}, + UrlEncodingMode.QUERY, + }, +}; + +fn void test_decoding_with_error() +{ + String! actual; + @pool() { + foreach (test: decode_with_error_tests) + { + actual = url::temp_decode(test.in, test.mode); + if (catch excuse = actual) + { + assert(excuse == test.err, "unescape(%s, %s); " + "got: %s, want: %s", test.in, test.mode, excuse, test.err); + continue; + } + assert(actual == test.out, "unescape(%s, %s); " + "got: %s, want: %s", test.in, test.mode, actual, test.out); + } + }; +} + +EncodeTest[*] encode_tests @local = { + { + "", + "", + anyfault{}, + UrlEncodingMode.PATH, + }, + { + "abc", + "abc", + anyfault{}, + UrlEncodingMode.PATH, + }, + { + "abc+def", + "abc+def", + anyfault{}, + UrlEncodingMode.PATH, + }, + { + "a/b", + "a/b", + anyfault{}, + UrlEncodingMode.PATH, + }, + { + "one two", + "one%20two", + anyfault{}, + UrlEncodingMode.PATH, + }, + { + "10%", + "10%25", + anyfault{}, + UrlEncodingMode.PATH, + }, + { + "", + "", + anyfault{}, + UrlEncodingMode.QUERY, + }, + { + "abc", + "abc", + anyfault{}, + UrlEncodingMode.QUERY, + }, + { + "one two", + "one+two", + anyfault{}, + UrlEncodingMode.QUERY, + }, + { + "10%", + "10%25", + anyfault{}, + UrlEncodingMode.QUERY, + }, + { + " ?&=#+%!<>#\"{}|\\^[]`☺\t:/@$'()*,;", + "+%3F%26%3D%23%2B%25%21%3C%3E%23%22%7B%7D%7C%5C%5E%5B%5D%60%E2%98%BA%09%3A%2F%40%24%27%28%29%2A%2C%3B", + anyfault{}, + UrlEncodingMode.QUERY, + }, + +}; + +fn void test_percent_encode_and_decode() +{ + String actual; + @pool() { + foreach (test: encode_tests) + { + actual = url::temp_encode(test.in, test.mode); + assert(actual == test.out, "escape(%s, %s); " + "got: %s, want: %s", test.in, test.mode, actual, test.out); + + actual = url::temp_decode(test.out, test.mode)!!; + assert(actual == test.in, "unescape(%s, %s); " + "got: %s, want: %s", test.out, test.mode, actual, test.in); + } + }; +} + +struct ShouldEncodeTest +{ + char in; + UrlEncodingMode mode; + bool escape; +} + +ShouldEncodeTest[*] should_encode_tests = { + {'a', UrlEncodingMode.PATH, false}, + {'a', UrlEncodingMode.USERPASS, false}, + {'a', UrlEncodingMode.QUERY, false}, + {'a', UrlEncodingMode.FRAGMENT, false}, + {'a', UrlEncodingMode.HOST, false}, + {'z', UrlEncodingMode.PATH, false}, + {'A', UrlEncodingMode.PATH, false}, + {'Z', UrlEncodingMode.PATH, false}, + {'0', UrlEncodingMode.PATH, false}, + {'9', UrlEncodingMode.PATH, false}, + {'-', UrlEncodingMode.PATH, false}, + {'-', UrlEncodingMode.USERPASS, false}, + {'-', UrlEncodingMode.QUERY, false}, + {'-', UrlEncodingMode.FRAGMENT, false}, + {'.', UrlEncodingMode.PATH, false}, + {'_', UrlEncodingMode.PATH, false}, + {'~', UrlEncodingMode.PATH, false}, + + {'/', UrlEncodingMode.USERPASS, true}, + {'?', UrlEncodingMode.USERPASS, true}, + {'@', UrlEncodingMode.USERPASS, true}, + {'$', UrlEncodingMode.USERPASS, false}, + {'&', UrlEncodingMode.USERPASS, false}, + {'+', UrlEncodingMode.USERPASS, false}, + {',', UrlEncodingMode.USERPASS, false}, + {';', UrlEncodingMode.USERPASS, false}, + {'=', UrlEncodingMode.USERPASS, false}, + + {'!', UrlEncodingMode.HOST, false}, + {'$', UrlEncodingMode.HOST, false}, + {'&', UrlEncodingMode.HOST, false}, + {'\'', UrlEncodingMode.HOST, false}, + {'(', UrlEncodingMode.HOST, false}, + {')', UrlEncodingMode.HOST, false}, + {'*', UrlEncodingMode.HOST, false}, + {'+', UrlEncodingMode.HOST, false}, + {',', UrlEncodingMode.HOST, false}, + {';', UrlEncodingMode.HOST, false}, + {'=', UrlEncodingMode.HOST, false}, + {'0', UrlEncodingMode.HOST, false}, + {'9', UrlEncodingMode.HOST, false}, + {'A', UrlEncodingMode.HOST, false}, + {'z', UrlEncodingMode.HOST, false}, + {'_', UrlEncodingMode.HOST, false}, + {'-', UrlEncodingMode.HOST, false}, + {'.', UrlEncodingMode.HOST, false}, +}; + +fn void test_should_encode() +{ + bool actual; + foreach (test: should_encode_tests) + { + actual = url::should_encode(test.in, test.mode); + assert(actual == test.escape, "should_encode(%c, %s); " + "got: %s, want: %s", test.in, test.mode, actual, test.escape); + } +}