From 0e44e63fa82ee0f28ee2247a9692b1cf5ee49420 Mon Sep 17 00:00:00 2001
From: konimarti <30975830+konimarti@users.noreply.github.com>
Date: Sun, 12 Jan 2025 22:52:25 +0100
Subject: [PATCH] net/url: implement url encoding (RFC 3986) (#1795)

* net/url: implement url encoding (RFC 3986)

Implement url percent-encoding and -decoding functions according to RFC
3986. Add unit tests.

Link: https://datatracker.ietf.org/doc/html/rfc3986

* net/url: ensure correct encoding of URL components

Add encoding and decoding methods to the Url struct components according
to RFC 3986.

An Url can be parsed from a String with `new_parse()` or `temp_parse()`.
The parsed fields are decoded. The only field that is not decoded is
`raw_query`. To access the decoded query values, use
`Url.query_values()`.

`Url.to_string()` will re-assemble the fields into a valid Url string
with proper percent-encoded values.

If the Url struct fields are filled in manually, use the actual
(un-encoded) values. To create a raw query string, initialize an
`UrlQueryValues` map, use `UrlQueryValues.add()` to add the query
parameters and, finally, call `UrlQueryValues.to_string()`.

---------

Co-authored-by: Christoffer Lerno <christoffer@aegik.com>
---
 lib/std/net/url.c3                   | 300 ++++++++++++++++++---------
 lib/std/net/url_encoding.c3          | 197 ++++++++++++++++++
 releasenotes.md                      |   1 +
 test/unit/stdlib/net/url.c3          | 203 +++++++++++++++---
 test/unit/stdlib/net/url_encoding.c3 | 266 ++++++++++++++++++++++++
 5 files changed, 844 insertions(+), 123 deletions(-)
 create mode 100644 lib/std/net/url_encoding.c3
 create mode 100644 test/unit/stdlib/net/url_encoding.c3
diff --git a/lib/std/net/url.c3 b/lib/std/net/url.c3
index 116da036b..3514c100e 100644
--- a/lib/std/net/url.c3
+++ b/lib/std/net/url.c3
@@ -2,13 +2,32 @@ module std::net::url;
 
 import std::io, std::collections::map, std::collections::list;
 
-def UrlQueryValueList = List(<String>);
-
-struct UrlQueryValues
+fault UrlParsingResult
 {
-	inline HashMap(<String, UrlQueryValueList>) map;
+	EMPTY,
+	INVALID_SCHEME,
+	INVALID_USER,
+	INVALID_PASSWORD,
+	INVALID_HOST,
+	INVALID_PATH,
+	INVALID_FRAGMENT,
 }
 
+<*
+ Represents the actual (decoded) Url.
+
+ An Url can be parsed from a String with `new_parse()` or `temp_parse()`. The
+ parsed fields are decoded. The only field that is not decoded is `query`.
+ To access the decoded query values, use `new_parse_query(query)`.
+
+ `Url.to_string()` will re-assemble the fields into a valid Url string with
+ proper percent-encoded values.
+
+ If the Url struct fields are filled in manually, use the actual (un-encoded)
+ values. To create a raw query string, initialize an `UrlQueryValues` map, use
+ `UrlQueryValues.add()` to add the query parameters and, finally, call
+ `UrlQueryValues.to_string()`.
+*>
 struct Url(Printable)
 {
 	String scheme;
@@ -19,6 +38,8 @@ struct Url(Printable)
 	String path;
 	String query;
 	String fragment;
+
+	Allocator allocator;
 }
 
 <*
@@ -28,60 +49,67 @@ struct Url(Printable)
  @require url_string.len > 0 "the url_string must be len 1 or more"
  @return "the parsed Url"
 *>
-fn Url! parse(String url_string)
+fn Url! temp_parse(String url_string) => new_parse(url_string, allocator::temp());
+
+<*
+ Parse a URL string into a Url struct.
+
+ @param [in] url_string
+ @require url_string.len > 0 "the url_string must be len 1 or more"
+ @return "the parsed Url"
+*>
+fn Url! new_parse(String url_string, Allocator allocator = allocator::heap())
 {
-	Url url;
 	url_string = url_string.trim();
-	if (!url_string.len)
-	{
-		return url;
-	}
+	if (!url_string) return UrlParsingResult.EMPTY?;
+	Url url = { .allocator = allocator };
 
 	// Parse scheme
 	if (try pos = url_string.index_of("://"))
 	{
-		url.scheme = url_string[:pos];
+		if (!pos) return UrlParsingResult.INVALID_SCHEME?;
+		url.scheme = url_string[:pos].copy(allocator);
 		url_string = url_string[url.scheme.len + 3 ..];
 	}
-	else if (url_string.contains(":"))
+	else if (try pos = url_string.index_of(":"))
 	{
 		// Handle schemes without authority like 'mailto:'
-		url.scheme = url_string[:url_string.index_of(":")!];
-		url_string = url_string[url.scheme.len + 1 ..];
-		url.path = url_string;
-
+		if (!pos) return UrlParsingResult.INVALID_SCHEME?;
+		url.scheme = url_string[:pos].copy(allocator);
+		url.path = decode(url_string[pos + 1 ..], PATH, allocator) ?? UrlParsingResult.INVALID_PATH?!;
 		return url;
 	}
 
 	// Parse host, port
 	if (url.scheme != "urn")
 	{
-		usz! authority_end = url_string.index_of_chars("/?#");
-		if (catch authority_end)
-		{
-			authority_end = url_string.len;
-		}
+		usz authority_end = url_string.index_of_chars("/?#") ?? url_string.len;
+		String authority = url_string[:authority_end];
 
-		String authority = url_string[:authority_end]!;
-
-		if (try usz userInfo_end = url_string.index_of_char('@'))
+		if (try user_info_end = authority.index_of_char('@'))
 		{
-			String userinfo = authority[:userInfo_end];
-			String[] userpass = userinfo.split(":");
-			defer free(userpass);
-			url.username = userpass[0];
-			if (userpass.len > 1)
+			String userinfo = authority[:user_info_end];
+			String username @noinit;
+			String password;
+			@pool(allocator)
 			{
-				url.password = userpass[1];
-			}
-			authority = authority[userInfo_end + 1 ..];
+				String[] userpass = userinfo.tsplit(":", 2);
+				username = userpass[0];
+				if (!username.len) return UrlParsingResult.INVALID_USER?;
+							url.host =
+
+				url.username = decode(username, HOST, allocator) ?? UrlParsingResult.INVALID_USER?!;
+				if (userpass.len) url.password = decode(userpass[1], USERPASS, allocator) ?? UrlParsingResult.INVALID_PASSWORD?!;
+			};
+			authority = authority[userinfo.len + 1 ..];
 		}
 
 		// Check for IPv6 address in square brackets
+		String host;
 		if (authority.starts_with("[") && authority.contains("]"))
 		{
 			usz ipv6_end = authority.index_of("]")!;
-			url.host = authority[0 .. ipv6_end];  // Includes closing bracket
+			host = authority[0 .. ipv6_end];  // Includes closing bracket
 			if ((ipv6_end + 1) < authority.len && authority[.. ipv6_end] == ":")
 			{
 				url.port = authority[.. ipv6_end + 1].to_uint()!;
@@ -89,58 +117,56 @@ fn Url! parse(String url_string)
 		}
 		else
 		{
-			String[] host_port = authority.split(":");
-			defer mem::free(host_port);
-			if (host_port.len > 1)
+			@pool(allocator)
 			{
-				url.host = host_port[0];
-				url.port = host_port[1].to_uint()!;
-			}
-			else
-			{
-				url.host = authority;
-			}
+				String[] host_port = authority.tsplit(":", 2);
+				if (host_port.len > 1)
+				{
+					host = host_port[0];
+					url.port = host_port[1].to_uint()!;
+				}
+				else
+				{
+					host = authority;
+				}
+			};
 		}
-		url_string = url_string[authority_end ..]!;
+		url.host = decode(host, HOST, allocator) ?? UrlParsingResult.INVALID_HOST?!;
+		url_string = url_string[authority_end ..];
 	}
 
 	// Parse path
-	long query_index = (long)url_string.index_of_char('?') ?? -1;
-	long fragment_index = (long)url_string.index_of_char('#') ?? -1;
+	usz! query_index = url_string.index_of_char('?');
+	usz! fragment_index = url_string.index_of_char('#');
 
-	if (query_index != -1 || fragment_index != -1)
+	if (@ok(query_index) || @ok(fragment_index))
 	{
-		long pathEnd = min(query_index == -1 ? url_string.len : query_index,
-						   fragment_index == -1 ? url_string.len : fragment_index,
-						   url_string.len);
-		url.path = url_string[:pathEnd];
+		usz path_end = min(query_index ?? url_string.len, fragment_index ?? url_string.len);
+		url.path = decode(url_string[:path_end], PATH, allocator) ?? UrlParsingResult.INVALID_PATH?!;
+		url_string = url_string[path_end ..];
 	}
 	else
 	{
-		url.path = url_string;
+		url.path = decode(url_string, PATH, allocator) ?? UrlParsingResult.INVALID_PATH?!;
+		url_string = "";
 	}
 
 	// Remove the path part from url for further parsing
-	url_string = url_string[url.path.len ..];
+
 
 	// Parse query
 	if (url_string.starts_with("?"))
 	{
-		fragment_index = (long)url_string.index_of_char('#') ?? -1;
-		if (fragment_index == -1)
-		{
-			fragment_index = url_string.len;
-		}
-		url.query = url_string[1 .. fragment_index - 1];
-		url_string = url_string[fragment_index ..];
+		usz index = url_string.index_of_char('#') ?? url_string.len;
+		url.query = url_string[1 .. index - 1].copy(allocator);
+		url_string = url_string[index ..];
 	}
 
 	// Parse fragment
 	if (url_string.starts_with("#"))
 	{
-		url.fragment = url_string[1 ..];
+		url.fragment = decode(url_string[1..], FRAGMENT, allocator) ?? UrlParsingResult.INVALID_FRAGMENT?!;
 	}
-
 	return url;
 }
 
@@ -168,17 +194,22 @@ fn String Url.to_string(&self, Allocator allocator = allocator::heap()) @dynamic
 		// Add username and password if they exist
 		if (self.username != "")
 		{
-			builder.append_chars(self.username);
+			String username = temp_encode(self.username, USERPASS);
+			builder.append_chars(username);
+
 			if (self.password != "")
 			{
 				builder.append_char(':');
-				builder.append_chars(self.password);
+
+				String password = temp_encode(self.password, USERPASS);
+				builder.append_chars(password);
 			}
 			builder.append_char('@');
 		}
 
 		// Add host
-		builder.append_chars(self.host);
+		String host = temp_encode(self.host, HOST);
+		builder.append_chars(host);
 
 		// Add port
 		if (self.port != 0)
@@ -188,9 +219,11 @@ fn String Url.to_string(&self, Allocator allocator = allocator::heap()) @dynamic
 		}
 
 		// Add path
-		builder.append_chars(self.path);
+		String path = temp_encode(self.path, PATH);
+		builder.append_chars(path);
 
-		// Add query if it exists
+		// Add query if it exists (note that `query` is expected to
+		// be already properly encoded).
 		if (self.query != "")
 		{
 			builder.append_char('?');
@@ -201,77 +234,156 @@ fn String Url.to_string(&self, Allocator allocator = allocator::heap()) @dynamic
 		if (self.fragment != "")
 		{
 			builder.append_char('#');
-			builder.append_chars(self.fragment);
+
+			String fragment = temp_encode(self.fragment, FRAGMENT);
+			builder.append_chars(fragment);
 		}
 
 		return builder.copy_str(allocator);
 	};
 }
 
+def UrlQueryValueList = List(<String>);
+
+struct UrlQueryValues
+{
+	inline HashMap(<String, UrlQueryValueList>) map;
+	UrlQueryValueList key_order;
+}
+
 <*
  Parse the query parameters of the Url into a UrlQueryValues map.
 
- @param [in] self
+ @param [in] query
+ @return "a UrlQueryValues HashMap"
+*>
+fn UrlQueryValues temp_parse_query(String query) => parse_query(query, allocator::temp());
+
+<*
+ Parse the query parameters of the Url into a UrlQueryValues map.
+
+ @param [in] query
+ @return "a UrlQueryValues HashMap"
+*>
+fn UrlQueryValues new_parse_query(String query) => parse_query(query, allocator::heap());
+
+<*
+ Parse the query parameters of the Url into a UrlQueryValues map.
+
+ @param [in] query
  @param [inout] allocator
  @return "a UrlQueryValues HashMap"
 *>
-fn UrlQueryValues Url.query_values(&self, Allocator allocator)
+fn UrlQueryValues parse_query(String query, Allocator allocator)
 {
 	UrlQueryValues vals;
-	vals.init(allocator);
-
-	Splitter raw_vals = self.query.tokenize("&");
+	vals.map.init(allocator);
+	vals.key_order.new_init(allocator: allocator);
 
+	Splitter raw_vals = query.tokenize("&");
 	while (try String rv = raw_vals.next())
 	{
 		@pool(allocator)
 		{
 			String[] parts = rv.tsplit("=", 2);
-			if (try existing = vals.get_ref(parts[0]))
-			{
-				existing.push(parts[1]);
-			}
-			else
-			{
-				UrlQueryValueList new_list;
-				new_list.new_init_with_array({ parts[1] }, allocator);
-				vals[parts[0]] = new_list;
-			}
+			String key = temp_decode(parts[0], QUERY) ?? parts[0];
+			vals.add(key, parts.len == 1 ? key : (temp_decode(parts[1], QUERY) ?? parts[1]));
 		};
 	}
 	return vals;
 }
 
 <*
- Parse the query parameters of the Url into a UrlQueryValues map,
- to be freed using values.free()
+ Add copies of the key and value strings to the UrlQueryValues map. These
+ copies are freed when the UrlQueryValues map is freed.
 
  @param [in] self
+ @param key
+ @param value
  @return "a UrlQueryValues map"
 *>
-fn UrlQueryValues Url.new_query_values(&self)
+fn UrlQueryValues* UrlQueryValues.add(&self, String key, String value)
 {
-	return self.query_values(allocator::heap()) @inline;
+	String value_copy = value.copy(self.allocator);
+	if (try existing = self.get_ref(key))
+	{
+		existing.push(value_copy);
+	}
+	else
+	{
+		UrlQueryValueList new_list;
+		new_list.new_init_with_array({ value_copy }, self.allocator);
+		(*self)[key] = new_list;
+		self.key_order.push(key.copy(self.allocator));
+	}
+	return self;
 }
 
+
 <*
- Parse the query parameters of the Url into a UrlQueryValues map.
- stored on the temp allocator.
+ Stringify UrlQueryValues into an encoded query string.
 
  @param [in] self
- @return "a UrlQueryValues map"
+ @param [inout] allocator
+ @return "a percent-encoded query string"
 *>
-fn UrlQueryValues Url.temp_query_values(&self)
+fn String UrlQueryValues.to_string(&self, Allocator allocator = allocator::heap()) @dynamic
 {
-	return self.query_values(allocator::temp()) @inline;
+	@pool(allocator)
+	{
+		DString builder = dstring::temp_new();
+
+		usz i;
+		foreach (key: self.key_order)
+		{
+			String encoded_key = temp_encode(key, QUERY);
+
+			UrlQueryValueList! values = self.map.get(key);
+			if (catch values) continue;
+
+			foreach (value: values)
+			{
+				if (i > 0) builder.append_char('&');
+
+				builder.append_chars(encoded_key);
+				builder.append_char('=');
+
+				String encoded_value = temp_encode(value, QUERY);
+				builder.append_chars(encoded_value);
+				i++;
+			}
+		};
+
+		return builder.copy_str(allocator);
+	};
 }
 
 fn void UrlQueryValues.free(&self)
 {
-	self.map.@each(;String key, UrlQueryValueList value)
+	self.map.@each(;String key, UrlQueryValueList values)
 	{
-		value.free();
+		foreach (value: values) value.free(self.allocator);
+		values.free();
 	};
 	self.map.free();
+
+	foreach (&key: self.key_order) key.free(self.allocator);
+	self.key_order.free();
 }
 
+<*
+ Free an Url struct.
+
+ @param [in] self
+*>
+fn void Url.free(&self)
+{
+	if (!self.allocator) return;
+	self.scheme.free(self.allocator);
+	self.host.free(self.allocator);
+	self.username.free(self.allocator);
+	self.password.free(self.allocator);
+	self.path.free(self.allocator);
+	self.query.free(self.allocator);
+	self.fragment.free(self.allocator);
+}
diff --git a/lib/std/net/url_encoding.c3 b/lib/std/net/url_encoding.c3
new file mode 100644
index 000000000..e6f9011ea
--- /dev/null
+++ b/lib/std/net/url_encoding.c3
@@ -0,0 +1,197 @@
+<*
+ This module section provides encoding and decoding functions for URL
+ components according to RFC 3986.
+*>
+module std::net::url;
+import std::encoding::hex;
+
+enum UrlEncodingMode : char (String allowed)
+{
+	UNRESERVED = "-_.~",            // section 2.3
+	PATH 	   = "$&+,/:;=@",       // section 3.3
+	HOST 	   = "!$&'()*+,;=:[]",  // section 3.2.2 (also include ':', '[', ']' for ipv6 hosts)
+	USERPASS   = ";:&=+$,",         // section 3.2.1
+	QUERY 	   = "",                // section 3.4
+	FRAGMENT   = "$&+,/:;=?@!()*",	// section 4.1
+}
+
+fault UrlDecodingError
+{
+	INVALID_HEX
+}
+
+<*
+ Returns true if char c should be encoded according to RFC 3986.
+
+ @param c "Character to check if it should be encoded."
+ @param mode "Url encoding mode."
+*>
+fn bool should_encode(char c, UrlEncodingMode mode) @private
+{
+	// alphanumeric characters are allowed
+	if (c.is_alnum()) return false;
+
+	// unreserved characters are allowed
+	if (try UrlEncodingMode.UNRESERVED.allowed.index_of_char(c)) return false;
+
+	// some mode-specific characters are allowed
+	if (try mode.allowed.index_of_char(c)) return false;
+
+	// everything else must be encoded
+	return true;
+}
+
+<*
+ Calculate the length of the percent-encoded string.
+*>
+fn usz encode_len(String s, UrlEncodingMode mode) @inline
+{
+	usz n;
+	foreach (c: s)
+	{
+		if (!should_encode(c, mode)) continue;
+		if (c != ' ' || mode != QUERY)
+		{
+			n++;
+		}
+	}
+	return s.len + 2 * n;
+}
+
+<*
+ Encode the string s for a given encoding mode.
+ Returned string must be freed.
+
+ @param s "String to encode"
+ @param mode "Url encoding mode"
+ @param [inout] allocator
+ @return "Percent-encoded String"
+*>
+fn String encode(String s, UrlEncodingMode mode, Allocator allocator)
+{
+	usz n = encode_len(s, mode);
+	@pool(allocator)
+	{
+		DString builder = dstring::temp_with_capacity(n);
+
+		foreach(i, c: s)
+		{
+			switch
+			{
+				// encode spaces in queries
+				case c == ' ' && mode == QUERY:
+					builder.append_char('+');
+
+				// add encoded char
+				case should_encode(c, mode):
+					builder.append_char('%');
+					String hex = hex::encode_temp(s[i:1]);
+					builder.append(hex.temp_ascii_to_upper());
+
+				// use char, no encoding needed
+				default:
+					builder.append_char(c);
+			}
+		}
+
+		return builder.copy_str(allocator);
+	};
+}
+
+<*
+ Encode the string s for a given encoding mode.
+ Returned string must be freed.
+
+ @param s "String to encode"
+ @param mode "Url encoding mode"
+ @return "Percent-encoded String"
+*>
+fn String new_encode(String s, UrlEncodingMode mode) => encode(s, mode, allocator::heap());
+
+<*
+ Encode string s for a given encoding mode, stored on the temp allocator.
+
+ @param s "String to encode"
+ @param mode "Url encoding mode"
+ @return "Percent-encoded String"
+*>
+fn String temp_encode(String s, UrlEncodingMode mode) => encode(s, mode, allocator::temp());
+
+<*
+ Calculate the length of the percent-decoded string.
+
+ @return! UrlDecodingError.INVALID_HEX
+*>
+fn usz! decode_len(String s, UrlEncodingMode mode) @inline
+{
+	usz n;
+	foreach (i, c: s)
+	{
+		if (c != '%') continue;
+		if (i + 2 >= s.len || !s[i+1].is_xdigit() || !s[i+2].is_xdigit())
+		{
+			return UrlDecodingError.INVALID_HEX?;
+		}
+		n++;
+	}
+	return s.len - 2 * n;
+}
+
+<*
+ Decode string s for a given encoding mode.
+ Returned string must be freed.
+
+ @param s "String to decode"
+ @param mode "Url encoding mode"
+ @param [inout] allocator
+ @return "Percent-decoded String"
+*>
+fn String! decode(String s, UrlEncodingMode  mode, Allocator allocator)
+{
+	usz n = decode_len(s, mode)!;
+	@pool(allocator)
+	{
+		DString builder = dstring::temp_with_capacity(n);
+
+		for (usz i = 0; i < s.len; i++)
+		{
+			switch (s[i])
+			{
+				// decode encoded char
+				case '%':
+					char[] hex = hex::decode_temp(s[i+1:2])!;
+					builder.append(hex);
+					i += 2;
+
+				// decode space when in queries
+				case '+':
+					builder.append_char((mode == QUERY) ? ' ' : '+');
+
+				// use char, no decoding needed
+				default:
+					builder.append_char(s[i]);
+			}
+		}
+
+		return builder.copy_str(allocator);
+	};
+}
+
+<*
+ Decode string s for a given encoding mode.
+ Returned string must be freed.
+
+ @param s "String to decode"
+ @param mode "Url encoding mode"
+ @return "Percent-decoded String"
+*>
+fn String! new_decode(String s, UrlEncodingMode  mode) => decode(s, mode, allocator::heap());
+
+<*
+ Decode string s for a given encoding mode, stored on the temp allocator.
+
+ @param s "String to decode"
+ @param mode "Url encoding mode"
+ @return "Percent-decoded String"
+*>
+fn String! temp_decode(String s, UrlEncodingMode  mode) => decode(s, mode, allocator::temp());
diff --git a/releasenotes.md b/releasenotes.md
index 18a28a03c..c1fcf572f 100644
--- a/releasenotes.md
+++ b/releasenotes.md
@@ -147,6 +147,7 @@
 - Add `memcpy` / `memset` / `memcmp` to nolibc.
 - Add `sort::quickselect` to find the k-th smallest element in an unordered list.
 - Add `sort::is_sorted` to determine if a list is sorted.
+- Implement RFC 3986 for url encoding and decoding.
 
 ## 0.6.4 Change list
 
diff --git a/test/unit/stdlib/net/url.c3 b/test/unit/stdlib/net/url.c3
index 7ea8b2bfa..f13576240 100644
--- a/test/unit/stdlib/net/url.c3
+++ b/test/unit/stdlib/net/url.c3
@@ -7,7 +7,8 @@ import std::net::url;
 
 fn void test_parse_foo()
 {
-	Url url = url::parse("foo://example.com:8042/over/there?name=ferret#nose")!!;
+	Url url = url::new_parse("foo://example.com:8042/over/there?name=ferret#nose")!!;
+	defer url.free();
 
 	assert(url.scheme == "foo", "got '%s'", url.scheme);
 	assert(url.host == "example.com", "got '%s'", url.host);
@@ -21,7 +22,8 @@ fn void test_parse_foo()
 
 fn void test_parse_urn()
 {
-	Url url = url::parse("urn:example:animal:ferret:nose")!!;
+	Url url = url::new_parse("urn:example:animal:ferret:nose")!!;
+	defer url.free();
 
 	assert(url.scheme == "urn");
 	assert(url.host == "");
@@ -35,7 +37,8 @@ fn void test_parse_urn()
 
 fn void test_parse_jdbc()
 {
-	Url url = url::parse("jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true")!!;
+	Url url = url::new_parse("jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true")!!;
+	defer url.free();
 
 	assert(url.scheme == "jdbc:mysql");
 	assert(url.host == "localhost");
@@ -49,7 +52,8 @@ fn void test_parse_jdbc()
 
 fn void test_parse_ftp()
 {
-	Url url = url::parse("ftp://ftp.is.co.za/rfc/rfc1808.txt")!!;
+	Url url = url::new_parse("ftp://ftp.is.co.za/rfc/rfc1808.txt")!!;
+	defer url.free();
 
 	assert(url.scheme == "ftp");
 	assert(url.host == "ftp.is.co.za");
@@ -63,7 +67,8 @@ fn void test_parse_ftp()
 
 fn void test_parse_http()
 {
-	Url url = url::parse("http://www.ietf.org/rfc/rfc2396.txt#header1")!!;
+	Url url = url::new_parse("http://www.ietf.org/rfc/rfc2396.txt#header1")!!;
+	defer url.free();
 
 	assert(url.scheme == "http");
 	assert(url.host == "www.ietf.org");
@@ -77,7 +82,8 @@ fn void test_parse_http()
 
 fn void test_parse_ldap()
 {
-	Url url = url::parse("ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two")!!;
+	Url url = url::new_parse("ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two")!!;
+	defer url.free();
 
 	assert(url.scheme == "ldap");
 	assert(url.host == "[2001:db8::7]");
@@ -91,7 +97,8 @@ fn void test_parse_ldap()
 
 fn void test_parse_mailto()
 {
-	Url url = url::parse("mailto:John.Doe@example.com")!!;
+	Url url = url::new_parse("mailto:John.Doe@example.com")!!;
+	defer url.free();
 
 	assert(url.scheme == "mailto");
 	assert(url.host == "");
@@ -103,9 +110,10 @@ fn void test_parse_mailto()
 	assert(url.fragment == "");
 }
 
-fn void test_parse_news()
+fn void test_new_parses()
 {
-	Url url = url::parse("news:comp.infosystems.www.servers.unix")!!;
+	Url url = url::new_parse("news:comp.infosystems.www.servers.unix")!!;
+	defer url.free();
 
 	assert(url.scheme == "news");
 	assert(url.host == "");
@@ -119,7 +127,8 @@ fn void test_parse_news()
 
 fn void test_parse_tel()
 {
-	Url url = url::parse("tel:+1-816-555-1212")!!;
+	Url url = url::new_parse("tel:+1-816-555-1212")!!;
+	defer url.free();
 
 	assert(url.scheme == "tel");
 	assert(url.host == "");
@@ -133,7 +142,8 @@ fn void test_parse_tel()
 
 fn void test_parse_telnet()
 {
-	Url url = url::parse("telnet://192.0.2.16:80/")!!;
+	Url url = url::new_parse("telnet://192.0.2.16:80/")!!;
+	defer url.free();
 
 	assert(url.scheme == "telnet");
 	assert(url.host == "192.0.2.16");
@@ -147,7 +157,8 @@ fn void test_parse_telnet()
 
 fn void test_parse_urn2()
 {
-	Url url = url::parse("urn:oasis:names:specification:docbook:dtd:xml:4.1.2")!!;
+	Url url = url::new_parse("urn:oasis:names:specification:docbook:dtd:xml:4.1.2")!!;
+	defer url.free();
 
 	assert(url.scheme == "urn");
 	assert(url.host == "");
@@ -161,16 +172,54 @@ fn void test_parse_urn2()
 
 fn void test_parse_empty()
 {
-	Url url = url::parse("       ")!!;
+	assert(@catch(url::new_parse("       ")) == UrlParsingResult.EMPTY);
+}
 
-	assert(url.scheme == "");
-	assert(url.host == "");
+// Parser tests with escape sequences
+
+fn void test_parse_path_with_escape_sequence()
+{
+	Url url = url::new_parse("foo://example.com:8042/file/name%20one%26two?name=ferret#nose")!!;
+	defer url.free();
+
+	assert(url.scheme == "foo", "got '%s'", url.scheme);
+	assert(url.host == "example.com", "got '%s'", url.host);
+	assert(url.port == 8042, "got '%d'", url.port);
+	assert(url.username == "", "got '%s'", url.username);
+	assert(url.password == "", "got '%s'", url.password);
+	assert(url.path == "/file/name one&two", "got '%s'", url.path);
+	assert(url.query == "name=ferret", "got '%s'", url.query);
+	assert(url.fragment == "nose", "got: '%s'", url.fragment);
+}
+
+fn void test_parse_username_and_password_with_escape_sequence()
+{
+	Url url = url::new_parse("jdbc:mysql://test%20user:ouu%40pppssss@localhost:3306/sakila?profileSQL=true")!!;
+	defer url.free();
+
+	assert(url.scheme == "jdbc:mysql");
+	assert(url.host == "localhost");
+	assert(url.port == 3306);
+	assert(url.username == "test user", "got '%s'", url.username);
+	assert(url.password == "ouu@pppssss", "got '%s'", url.password);
+	assert(url.path == "/sakila");
+	assert(url.query == "profileSQL=true");
+	assert(url.fragment == "");
+}
+
+fn void test_parse_fragment_with_escape_sequence()
+{
+	Url url = url::new_parse("http://www.ietf.org/rfc/rfc2396.txt#header%201%262")!!;
+	defer url.free();
+
+	assert(url.scheme == "http");
+	assert(url.host == "www.ietf.org");
 	assert(url.port == 0);
 	assert(url.username == "", "got '%s'", url.username);
 	assert(url.password == "", "got '%s'", url.password);
-	assert(url.path == "");
+	assert(url.path == "/rfc/rfc2396.txt");
 	assert(url.query == "");
-	assert(url.fragment == "");
+	assert(url.fragment == "header 1&2");
 }
 
 // to_string() tests
@@ -179,6 +228,7 @@ fn void test_string_foo()
 {
 	Url url = {.scheme="foo", .host="example.com", .port=8042, .path="/over/there", .query="name=ferret", .fragment="nose"};
 	String str = string::new_format("%s", url);
+	defer str.free();
 
 	assert(str == "foo://example.com:8042/over/there?name=ferret#nose");
 }
@@ -187,6 +237,7 @@ fn void test_string_urn()
 {
 	Url url = {.scheme="urn", .path="example:animal:ferret:nose"};
 	String str = string::new_format("%s", url);
+	defer str.free();
 
 	assert(str == "urn:example:animal:ferret:nose");
 }
@@ -195,6 +246,7 @@ fn void test_string_jdbc()
 {
 	Url url = {.scheme="jdbc:mysql", .host="localhost", .port=3306, .username="test_user", .password="ouupppssss", .path="/sakila", .query="profileSQL=true"};
 	String str = string::new_format("%s", url);
+	defer str.free();
 
 	assert(str == "jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true");
 }
@@ -203,30 +255,34 @@ fn void test_string_ftp()
 {
 	Url url = {.scheme="ftp", .host="ftp.is.co.za", .path="/rfc/rfc1808.txt"};
 	String str = string::new_format("%s", url);
+	defer str.free();
 
 	assert(str == "ftp://ftp.is.co.za/rfc/rfc1808.txt");
 }
 
 fn void test_string_http()
 {
-	Url url = {.scheme="http", .host="www.ietf.org", .path="/rfc/rfc2396.txt#header1"};
+	Url url = {.scheme="http", .host="www.ietf.org", .path="/rfc/rfc2396.txt", .fragment="header1"};
 	String str = string::new_format("%s", url);
+	defer str.free();
 
-	assert(str == "http://www.ietf.org/rfc/rfc2396.txt#header1");
+	assert(str == "http://www.ietf.org/rfc/rfc2396.txt#header1", "got: '%s'", str);
 }
 
 fn void test_string_ldap()
 {
-	Url url = {.scheme="ldap", .host="[2001:db8::7]", .path="/c=GB?objectClass=one&objectClass=two"};
+	Url url = {.scheme="ldap", .host="[2001:db8::7]", .path="/c=GB", .query="objectClass=one&objectClass=two"};
 	String str = string::new_format("%s", url);
+	defer str.free();
 
-	assert(str == "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two");
+	assert(str == "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two", "got: '%s'", str);
 }
 
 fn void test_string_mailto()
 {
 	Url url = {.scheme="mailto", .path="John.Doe@example.com"};
 	String str = string::new_format("%s", url);
+	defer str.free();
 
 	assert(str == "mailto:John.Doe@example.com");
 }
@@ -235,6 +291,7 @@ fn void test_string_news()
 {
 	Url url = {.scheme="news", .path="comp.infosystems.www.servers.unix"};
 	String str = string::new_format("%s", url);
+	defer str.free();
 	assert(str == "news:comp.infosystems.www.servers.unix");
 }
 
@@ -242,6 +299,7 @@ fn void test_string_tel()
 {
 	Url url = {.scheme="tel", .path="+1-816-555-1212"};
 	String str = string::new_format("%s", url);
+	defer str.free();
 
 	assert(str == "tel:+1-816-555-1212");
 }
@@ -250,6 +308,7 @@ fn void test_string_telnet()
 {
 	Url url = {.scheme="telnet", .host="192.0.2.16", .port=80, .path="/"};
 	String str = string::new_format("%s", url);
+	defer str.free();
 
 	assert(str == "telnet://192.0.2.16:80/");
 }
@@ -258,6 +317,7 @@ fn void test_string_urn2()
 {
 	Url url = {.scheme="urn", .path="oasis:names:specification:docbook:dtd:xml:4.1.2"};
 	String str = string::new_format("%s", url);
+	defer str.free();
 
 	assert(str == "urn:oasis:names:specification:docbook:dtd:xml:4.1.2");
 }
@@ -266,6 +326,7 @@ fn void test_string_empty()
 {
 	Url url = {};
 	String str = string::new_format("%s", url);
+	defer str.free();
 
 	assert(str == "");
 }
@@ -274,9 +335,10 @@ fn void test_string_empty()
 
 fn void test_query_values1()
 {
-	Url url = url::parse("foo://example.com:8042/over/there?name=ferret=ok#nose")!!;
+	Url url = url::new_parse("foo://example.com:8042/over/there?name=ferret=ok#nose")!!;
+	defer url.free();
 
-	UrlQueryValues vals = url.new_query_values();
+	UrlQueryValues vals = url::temp_parse_query(url.query);
 	defer vals.free();
 
 	assert(vals.len() == 1);
@@ -288,9 +350,10 @@ fn void test_query_values1()
 
 fn void test_query_values2()
 {
-	Url url = url::parse("foo://example.com:8042/over/there?name=ferret&age=99&age=11#nose")!!;
+	Url url = url::new_parse("foo://example.com:8042/over/there?name=ferret&age=99&age=11#nose")!!;
+	defer url.free();
 
-	UrlQueryValues vals = url.new_query_values();
+	UrlQueryValues vals = url::new_parse_query(url.query);
 	defer vals.free();
 	assert(vals.len() == 2);
 
@@ -304,11 +367,93 @@ fn void test_query_values2()
 	assert(l_age[1] == "11");
 }
 
-fn void test_query_values_withempty()
+fn void test_escaped_query_values()
 {
-	Url url = url::parse("foo://example.com:8042/over/there?name=ferret&&&age=99&age=11")!!;
+	Url url = url::new_parse("foo://example.com:8042/over/there?k%3Bey=%3Ckey%3A+0x90%3E&age=99&age=11#nose")!!;
+	defer url.free();
 
-	UrlQueryValues vals = url.new_query_values();
+	UrlQueryValues vals = url::new_parse_query(url.query);
 	defer vals.free();
 	assert(vals.len() == 2);
-}
\ No newline at end of file
+
+	UrlQueryValueList l_key = vals["k;ey"]!!;
+	assert(l_key.len() == 1);
+	assert(l_key[0] == "<key: 0x90>");
+}
+
+fn void test_query_values_withempty()
+{
+	Url url = url::new_parse("foo://example.com:8042/over/there?name=ferret&&&age=99&age=11")!!;
+	defer url.free();
+
+	UrlQueryValues vals = url::new_parse_query(url.query);
+	defer vals.free();
+	assert(vals.len() == 2);
+}
+
+// url compose and parse should be idempotent
+
+fn void test_url_idempotence()
+{
+	UrlQueryValues query_builder;
+	query_builder.new_init();
+	defer query_builder.free();
+
+	query_builder.add("profileSQL", "true");
+	query_builder.add("k;ey", "<key: 0x90>");
+
+	String query = query_builder.to_string();
+	defer query.free();
+
+	Url url = {
+		.scheme    = "jdbc:mysql",
+		.host      = "localhost",
+		.port      = 3306,
+		.username  = "test user",
+		.password  = "ouu@pppssss",
+		.path      = "/sakila",
+		.query = query,
+		.fragment  = "no se",
+	};
+
+	String url_string = url.to_string();
+	defer url_string.free();
+
+	String want = "jdbc:mysql://test%20user:ouu%40pppssss@localhost:3306"
+		"/sakila?profileSQL=true&k%3Bey=%3Ckey%3A+0x90%3E#no%20se";
+	assert(url_string == want, "got: %s, want: %s", url_string, want);
+
+	Url parsed = url::new_parse(url_string)!!;
+	defer parsed.free();
+
+	UrlQueryValues vals = url::new_parse_query(parsed.query);
+	defer vals.free();
+	assert(vals.len() == 2);
+
+	UrlQueryValueList key;
+	key = vals["k;ey"]!!;
+	assert(key.len() == 1);
+	assert(key[0] == "<key: 0x90>");
+
+	key = vals["profileSQL"]!!;
+	assert(key.len() == 1);
+	assert(key[0] == "true");
+
+	String parsed_query = vals.to_string();
+	defer parsed_query.free();
+
+	assert(parsed.scheme == url.scheme);
+	assert(parsed.host == url.host);
+	assert(parsed.port == url.port);
+	assert(parsed.username == url.username);
+	assert(parsed.password == url.password);
+	assert(parsed.path == url.path);
+	assert(parsed.query == parsed_query);
+	assert(parsed.fragment == url.fragment);
+
+	String parsed_string = parsed.to_string();
+	defer parsed_string.free();
+
+	assert(url_string == parsed_string);
+}
+
diff --git a/test/unit/stdlib/net/url_encoding.c3 b/test/unit/stdlib/net/url_encoding.c3
new file mode 100644
index 000000000..b52047e61
--- /dev/null
+++ b/test/unit/stdlib/net/url_encoding.c3
@@ -0,0 +1,266 @@
+module url_encode_test @test;
+
+import std::io;
+import std::net::url @public;
+
+struct EncodeTest
+{
+	String in;
+	String out;
+	anyfault err;
+	UrlEncodingMode mode;
+}
+
+EncodeTest[*] decode_with_error_tests @local = {
+	{
+		"",
+		"",
+		anyfault{},
+		UrlEncodingMode.QUERY,
+	},
+	{
+		"abc",
+		"abc",
+		anyfault{},
+		UrlEncodingMode.QUERY,
+	},
+	{
+		"1%41",
+		"1A",
+		anyfault{},
+		UrlEncodingMode.QUERY,
+	},
+	{
+		"1%41%42%43",
+		"1ABC",
+		anyfault{},
+		UrlEncodingMode.QUERY,
+	},
+	{
+		"%4a",
+		"J",
+		anyfault{},
+		UrlEncodingMode.QUERY,
+	},
+	{
+		"%6F",
+		"o",
+		anyfault{},
+		UrlEncodingMode.QUERY,
+	},
+	{
+		"%",
+		"",
+		UrlDecodingError.INVALID_HEX,
+		UrlEncodingMode.QUERY,
+	},
+	{
+		"%a",
+		"",
+		UrlDecodingError.INVALID_HEX,
+		UrlEncodingMode.QUERY,
+	},
+	{
+		"%1",
+		"",
+		UrlDecodingError.INVALID_HEX,
+		UrlEncodingMode.QUERY,
+	},
+	{
+		"123%45%6",
+		"",
+		UrlDecodingError.INVALID_HEX,
+		UrlEncodingMode.QUERY,
+	},
+	{
+		"%zzzzz",
+		"",
+		UrlDecodingError.INVALID_HEX,
+		UrlEncodingMode.QUERY,
+	},
+	{
+		"a+b",
+		"a b",
+		anyfault{},
+		UrlEncodingMode.QUERY,
+	},
+	{
+		"a%20b",
+		"a b",
+		anyfault{},
+		UrlEncodingMode.QUERY,
+	},
+};
+
+fn void test_decoding_with_error()
+{
+	String! actual;
+	@pool() {
+		foreach (test: decode_with_error_tests)
+		{
+			actual = url::temp_decode(test.in, test.mode);
+			if (catch excuse = actual)
+			{
+				assert(excuse == test.err, "unescape(%s, %s); "
+					"got: %s, want: %s", test.in, test.mode, excuse, test.err);
+				continue;
+			}
+			assert(actual == test.out, "unescape(%s, %s); "
+				"got: %s, want: %s", test.in, test.mode, actual, test.out);
+		}
+	};
+}
+
+EncodeTest[*] encode_tests @local = {
+	{
+		"",
+		"",
+		anyfault{},
+		UrlEncodingMode.PATH,
+	},
+	{
+		"abc",
+		"abc",
+		anyfault{},
+		UrlEncodingMode.PATH,
+	},
+	{
+		"abc+def",
+		"abc+def",
+		anyfault{},
+		UrlEncodingMode.PATH,
+	},
+	{
+		"a/b",
+		"a/b",
+		anyfault{},
+		UrlEncodingMode.PATH,
+	},
+	{
+		"one two",
+		"one%20two",
+		anyfault{},
+		UrlEncodingMode.PATH,
+	},
+	{
+		"10%",
+		"10%25",
+		anyfault{},
+		UrlEncodingMode.PATH,
+	},
+	{
+		"",
+		"",
+		anyfault{},
+		UrlEncodingMode.QUERY,
+	},
+	{
+		"abc",
+		"abc",
+		anyfault{},
+		UrlEncodingMode.QUERY,
+	},
+	{
+		"one two",
+		"one+two",
+		anyfault{},
+		UrlEncodingMode.QUERY,
+	},
+	{
+		"10%",
+		"10%25",
+		anyfault{},
+		UrlEncodingMode.QUERY,
+	},
+	{
+		" ?&=#+%!<>#\"{}|\\^[]`☺\t:/@$'()*,;",
+		"+%3F%26%3D%23%2B%25%21%3C%3E%23%22%7B%7D%7C%5C%5E%5B%5D%60%E2%98%BA%09%3A%2F%40%24%27%28%29%2A%2C%3B",
+		anyfault{},
+		UrlEncodingMode.QUERY,
+	},
+
+};
+
+fn void test_percent_encode_and_decode()
+{
+	String actual;
+	@pool() {
+		foreach (test: encode_tests)
+		{
+			actual = url::temp_encode(test.in, test.mode);
+			assert(actual == test.out, "escape(%s, %s); "
+				"got: %s, want: %s", test.in, test.mode, actual, test.out);
+
+			actual = url::temp_decode(test.out, test.mode)!!;
+			assert(actual == test.in, "unescape(%s, %s); "
+				"got: %s, want: %s", test.out, test.mode, actual, test.in);
+		}
+	};
+}
+
+struct ShouldEncodeTest
+{
+	char in;
+	UrlEncodingMode mode;
+	bool escape;
+}
+
+ShouldEncodeTest[*] should_encode_tests = {
+	{'a', UrlEncodingMode.PATH, false},
+	{'a', UrlEncodingMode.USERPASS, false},
+	{'a', UrlEncodingMode.QUERY, false},
+	{'a', UrlEncodingMode.FRAGMENT, false},
+	{'a', UrlEncodingMode.HOST, false},
+	{'z', UrlEncodingMode.PATH, false},
+	{'A', UrlEncodingMode.PATH, false},
+	{'Z', UrlEncodingMode.PATH, false},
+	{'0', UrlEncodingMode.PATH, false},
+	{'9', UrlEncodingMode.PATH, false},
+	{'-', UrlEncodingMode.PATH, false},
+	{'-', UrlEncodingMode.USERPASS, false},
+	{'-', UrlEncodingMode.QUERY, false},
+	{'-', UrlEncodingMode.FRAGMENT, false},
+	{'.', UrlEncodingMode.PATH, false},
+	{'_', UrlEncodingMode.PATH, false},
+	{'~', UrlEncodingMode.PATH, false},
+
+	{'/', UrlEncodingMode.USERPASS, true},
+	{'?', UrlEncodingMode.USERPASS, true},
+	{'@', UrlEncodingMode.USERPASS, true},
+	{'$', UrlEncodingMode.USERPASS, false},
+	{'&', UrlEncodingMode.USERPASS, false},
+	{'+', UrlEncodingMode.USERPASS, false},
+	{',', UrlEncodingMode.USERPASS, false},
+	{';', UrlEncodingMode.USERPASS, false},
+	{'=', UrlEncodingMode.USERPASS, false},
+
+	{'!', UrlEncodingMode.HOST, false},
+	{'$', UrlEncodingMode.HOST, false},
+	{'&', UrlEncodingMode.HOST, false},
+	{'\'', UrlEncodingMode.HOST, false},
+	{'(', UrlEncodingMode.HOST, false},
+	{')', UrlEncodingMode.HOST, false},
+	{'*', UrlEncodingMode.HOST, false},
+	{'+', UrlEncodingMode.HOST, false},
+	{',', UrlEncodingMode.HOST, false},
+	{';', UrlEncodingMode.HOST, false},
+	{'=', UrlEncodingMode.HOST, false},
+	{'0', UrlEncodingMode.HOST, false},
+	{'9', UrlEncodingMode.HOST, false},
+	{'A', UrlEncodingMode.HOST, false},
+	{'z', UrlEncodingMode.HOST, false},
+	{'_', UrlEncodingMode.HOST, false},
+	{'-', UrlEncodingMode.HOST, false},
+	{'.', UrlEncodingMode.HOST, false},
+};
+
+fn void test_should_encode()
+{
+	bool actual;
+	foreach (test: should_encode_tests)
+	{
+		actual = url::should_encode(test.in, test.mode);
+		assert(actual == test.escape, "should_encode(%c, %s); "
+			"got: %s, want: %s", test.in, test.mode, actual, test.escape);
+	}
+}