From d027a15b4af4c6703946ae8240c8b8cb1da8ef97 Mon Sep 17 00:00:00 2001 From: Louis Brauer Date: Thu, 2 Jan 2025 21:13:42 +0100 Subject: [PATCH] add std::net::url - with fixes (#1748) * add std::net::url for parsing/generating URLs * Move String.index_of_chars into std * Fix param contract * Idiomatic type naming, Allman formatting, slicing, document functions * Use String.tokenize * Don't return str_view() from freed dstring * Change indentation to tabs * Variable casing according to guidlelines * Updated API and added line to the releasenotes. --------- Co-authored-by: Christoffer Lerno --- lib/std/core/string.c3 | 23 +++ lib/std/net/url.c3 | 277 +++++++++++++++++++++++++++++++ releasenotes.md | 1 + test/unit/stdlib/net/url.c3 | 314 ++++++++++++++++++++++++++++++++++++ 4 files changed, 615 insertions(+) create mode 100644 lib/std/net/url.c3 create mode 100644 test/unit/stdlib/net/url.c3 diff --git a/lib/std/core/string.c3 b/lib/std/core/string.c3 index e7764d39c..611d5b800 100644 --- a/lib/std/core/string.c3 +++ b/lib/std/core/string.c3 @@ -362,6 +362,29 @@ fn usz! String.index_of_char(s, char needle) return SearchResult.MISSING?; } +<* + Find the index of the first incidence of a one of the chars. + + @param [in] s + @param [in] needle "The characters to look for" + @pure + @ensure return < s.len + @return "the index of the needle" + @return! SearchResult.MISSING "if the needle cannot be found" +*> +fn usz! String.index_of_chars(String s, char[] needle) +{ + foreach (i, c : s) + { + foreach (j, pin : needle) + { + if (c == pin) return i; + } + } + + return SearchResult.MISSING?; +} + <* Find the index of the first incidence of a character. diff --git a/lib/std/net/url.c3 b/lib/std/net/url.c3 new file mode 100644 index 000000000..116da036b --- /dev/null +++ b/lib/std/net/url.c3 @@ -0,0 +1,277 @@ +module std::net::url; + +import std::io, std::collections::map, std::collections::list; + +def UrlQueryValueList = List(); + +struct UrlQueryValues +{ + inline HashMap() map; +} + +struct Url(Printable) +{ + String scheme; + String host; + uint port; + String username; + String password; + String path; + String query; + String fragment; +} + +<* + Parse a URL string into a Url struct. + + @param [in] url_string + @require url_string.len > 0 "the url_string must be len 1 or more" + @return "the parsed Url" +*> +fn Url! parse(String url_string) +{ + Url url; + url_string = url_string.trim(); + if (!url_string.len) + { + return url; + } + + // Parse scheme + if (try pos = url_string.index_of("://")) + { + url.scheme = url_string[:pos]; + url_string = url_string[url.scheme.len + 3 ..]; + } + else if (url_string.contains(":")) + { + // Handle schemes without authority like 'mailto:' + url.scheme = url_string[:url_string.index_of(":")!]; + url_string = url_string[url.scheme.len + 1 ..]; + url.path = url_string; + + return url; + } + + // Parse host, port + if (url.scheme != "urn") + { + usz! authority_end = url_string.index_of_chars("/?#"); + if (catch authority_end) + { + authority_end = url_string.len; + } + + String authority = url_string[:authority_end]!; + + if (try usz userInfo_end = url_string.index_of_char('@')) + { + String userinfo = authority[:userInfo_end]; + String[] userpass = userinfo.split(":"); + defer free(userpass); + url.username = userpass[0]; + if (userpass.len > 1) + { + url.password = userpass[1]; + } + authority = authority[userInfo_end + 1 ..]; + } + + // Check for IPv6 address in square brackets + if (authority.starts_with("[") && authority.contains("]")) + { + usz ipv6_end = authority.index_of("]")!; + url.host = authority[0 .. ipv6_end]; // Includes closing bracket + if ((ipv6_end + 1) < authority.len && authority[.. ipv6_end] == ":") + { + url.port = authority[.. ipv6_end + 1].to_uint()!; + } + } + else + { + String[] host_port = authority.split(":"); + defer mem::free(host_port); + if (host_port.len > 1) + { + url.host = host_port[0]; + url.port = host_port[1].to_uint()!; + } + else + { + url.host = authority; + } + } + url_string = url_string[authority_end ..]!; + } + + // Parse path + long query_index = (long)url_string.index_of_char('?') ?? -1; + long fragment_index = (long)url_string.index_of_char('#') ?? -1; + + if (query_index != -1 || fragment_index != -1) + { + long pathEnd = min(query_index == -1 ? url_string.len : query_index, + fragment_index == -1 ? url_string.len : fragment_index, + url_string.len); + url.path = url_string[:pathEnd]; + } + else + { + url.path = url_string; + } + + // Remove the path part from url for further parsing + url_string = url_string[url.path.len ..]; + + // Parse query + if (url_string.starts_with("?")) + { + fragment_index = (long)url_string.index_of_char('#') ?? -1; + if (fragment_index == -1) + { + fragment_index = url_string.len; + } + url.query = url_string[1 .. fragment_index - 1]; + url_string = url_string[fragment_index ..]; + } + + // Parse fragment + if (url_string.starts_with("#")) + { + url.fragment = url_string[1 ..]; + } + + return url; +} + +<* + Stringify a Url struct. + + @param [in] self + @param [inout] allocator + @return "Url as a string" +*> +fn String Url.to_string(&self, Allocator allocator = allocator::heap()) @dynamic +{ + @pool(allocator) + { + DString builder = dstring::temp_new(); + + // Add scheme if it exists + if (self.scheme != "") + { + builder.append_chars(self.scheme); + builder.append_char(':'); + if (self.host.len > 0) builder.append_chars("//"); + } + + // Add username and password if they exist + if (self.username != "") + { + builder.append_chars(self.username); + if (self.password != "") + { + builder.append_char(':'); + builder.append_chars(self.password); + } + builder.append_char('@'); + } + + // Add host + builder.append_chars(self.host); + + // Add port + if (self.port != 0) + { + builder.append_char(':'); + builder.appendf("%d", self.port); + } + + // Add path + builder.append_chars(self.path); + + // Add query if it exists + if (self.query != "") + { + builder.append_char('?'); + builder.append_chars(self.query); + } + + // Add fragment if it exists + if (self.fragment != "") + { + builder.append_char('#'); + builder.append_chars(self.fragment); + } + + return builder.copy_str(allocator); + }; +} + +<* + Parse the query parameters of the Url into a UrlQueryValues map. + + @param [in] self + @param [inout] allocator + @return "a UrlQueryValues HashMap" +*> +fn UrlQueryValues Url.query_values(&self, Allocator allocator) +{ + UrlQueryValues vals; + vals.init(allocator); + + Splitter raw_vals = self.query.tokenize("&"); + + while (try String rv = raw_vals.next()) + { + @pool(allocator) + { + String[] parts = rv.tsplit("=", 2); + if (try existing = vals.get_ref(parts[0])) + { + existing.push(parts[1]); + } + else + { + UrlQueryValueList new_list; + new_list.new_init_with_array({ parts[1] }, allocator); + vals[parts[0]] = new_list; + } + }; + } + return vals; +} + +<* + Parse the query parameters of the Url into a UrlQueryValues map, + to be freed using values.free() + + @param [in] self + @return "a UrlQueryValues map" +*> +fn UrlQueryValues Url.new_query_values(&self) +{ + return self.query_values(allocator::heap()) @inline; +} + +<* + Parse the query parameters of the Url into a UrlQueryValues map. + stored on the temp allocator. + + @param [in] self + @return "a UrlQueryValues map" +*> +fn UrlQueryValues Url.temp_query_values(&self) +{ + return self.query_values(allocator::temp()) @inline; +} + +fn void UrlQueryValues.free(&self) +{ + self.map.@each(;String key, UrlQueryValueList value) + { + value.free(); + }; + self.map.free(); +} + diff --git a/releasenotes.md b/releasenotes.md index afd7a7f43..95f7e741b 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -54,6 +54,7 @@ - Add "skip_empty" to split methods. Add split_to_buffer method. - Add `@enum_from_value`. - Updated hash function. +- Added URL parser. ## 0.6.5 Change list diff --git a/test/unit/stdlib/net/url.c3 b/test/unit/stdlib/net/url.c3 new file mode 100644 index 000000000..115eea69e --- /dev/null +++ b/test/unit/stdlib/net/url.c3 @@ -0,0 +1,314 @@ +module urltest @test; + +import std::io; +import std::net::url; + +// Parser tests + +fn void! test_parse_foo() +{ + Url url = url::parse("foo://example.com:8042/over/there?name=ferret#nose")!; + + assert(url.scheme == "foo", "got '%s'", url.scheme); + assert(url.host == "example.com", "got '%s'", url.host); + assert(url.port == 8042, "got '%d'", url.port); + assert(url.username == "", "got '%s'", url.username); + assert(url.password == "", "got '%s'", url.password); + assert(url.path == "/over/there", "got '%s'", url.path); + assert(url.query == "name=ferret", "got '%s'", url.query); + assert(url.fragment == "nose", "got: '%s'", url.fragment); +} + +fn void! test_parse_urn() +{ + Url url = url::parse("urn:example:animal:ferret:nose")!; + + assert(url.scheme == "urn"); + assert(url.host == ""); + assert(url.port == 0); + assert(url.username == "", "got '%s'", url.username); + assert(url.password == "", "got '%s'", url.password); + assert(url.path == "example:animal:ferret:nose"); + assert(url.query == ""); + assert(url.fragment == ""); +} + +fn void! test_parse_jdbc() +{ + Url url = url::parse("jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true")!; + + assert(url.scheme == "jdbc:mysql"); + assert(url.host == "localhost"); + assert(url.port == 3306); + assert(url.username == "test_user", "got '%s'", url.username); + assert(url.password == "ouupppssss", "got '%s'", url.password); + assert(url.path == "/sakila"); + assert(url.query == "profileSQL=true"); + assert(url.fragment == ""); +} + +fn void! test_parse_ftp() +{ + Url url = url::parse("ftp://ftp.is.co.za/rfc/rfc1808.txt")!; + + assert(url.scheme == "ftp"); + assert(url.host == "ftp.is.co.za"); + assert(url.port == 0); + assert(url.username == "", "got '%s'", url.username); + assert(url.password == "", "got '%s'", url.password); + assert(url.path == "/rfc/rfc1808.txt"); + assert(url.query == ""); + assert(url.fragment == ""); +} + +fn void! test_parse_http() +{ + Url url = url::parse("http://www.ietf.org/rfc/rfc2396.txt#header1")!; + + assert(url.scheme == "http"); + assert(url.host == "www.ietf.org"); + assert(url.port == 0); + assert(url.username == "", "got '%s'", url.username); + assert(url.password == "", "got '%s'", url.password); + assert(url.path == "/rfc/rfc2396.txt"); + assert(url.query == ""); + assert(url.fragment == "header1"); +} + +fn void! test_parse_ldap() +{ + Url url = url::parse("ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two")!; + + assert(url.scheme == "ldap"); + assert(url.host == "[2001:db8::7]"); + assert(url.port == 0); + assert(url.username == "", "got '%s'", url.username); + assert(url.password == "", "got '%s'", url.password); + assert(url.path == "/c=GB"); + assert(url.query == "objectClass=one&objectClass=two"); + assert(url.fragment == ""); +} + +fn void! test_parse_mailto() +{ + Url url = url::parse("mailto:John.Doe@example.com")!; + + assert(url.scheme == "mailto"); + assert(url.host == ""); + assert(url.port == 0); + assert(url.username == "", "got '%s'", url.username); + assert(url.password == "", "got '%s'", url.password); + assert(url.path == "John.Doe@example.com"); + assert(url.query == ""); + assert(url.fragment == ""); +} + +fn void! test_parse_news() +{ + Url url = url::parse("news:comp.infosystems.www.servers.unix")!; + + assert(url.scheme == "news"); + assert(url.host == ""); + assert(url.port == 0); + assert(url.username == "", "got '%s'", url.username); + assert(url.password == "", "got '%s'", url.password); + assert(url.path == "comp.infosystems.www.servers.unix"); + assert(url.query == ""); + assert(url.fragment == ""); +} + +fn void! test_parse_tel() +{ + Url url = url::parse("tel:+1-816-555-1212")!; + + assert(url.scheme == "tel"); + assert(url.host == ""); + assert(url.port == 0); + assert(url.username == "", "got '%s'", url.username); + assert(url.password == "", "got '%s'", url.password); + assert(url.path == "+1-816-555-1212"); + assert(url.query == ""); + assert(url.fragment == ""); +} + +fn void! test_parse_telnet() +{ + Url url = url::parse("telnet://192.0.2.16:80/")!; + + assert(url.scheme == "telnet"); + assert(url.host == "192.0.2.16"); + assert(url.port == 80); + assert(url.username == "", "got '%s'", url.username); + assert(url.password == "", "got '%s'", url.password); + assert(url.path == "/"); + assert(url.query == ""); + assert(url.fragment == ""); +} + +fn void! test_parse_urn2() +{ + Url url = url::parse("urn:oasis:names:specification:docbook:dtd:xml:4.1.2")!; + + assert(url.scheme == "urn"); + assert(url.host == ""); + assert(url.port == 0); + assert(url.username == "", "got '%s'", url.username); + assert(url.password == "", "got '%s'", url.password); + assert(url.path == "oasis:names:specification:docbook:dtd:xml:4.1.2"); + assert(url.query == ""); + assert(url.fragment == ""); +} + +fn void test_parse_empty() +{ + Url url = url::parse(" ")!; + + assert(url.scheme == ""); + assert(url.host == ""); + assert(url.port == 0); + assert(url.username == "", "got '%s'", url.username); + assert(url.password == "", "got '%s'", url.password); + assert(url.path == ""); + assert(url.query == ""); + assert(url.fragment == ""); +} + +// to_string() tests + +fn void! test_string_foo() +{ + Url url = {.scheme="foo", .host="example.com", .port=8042, .path="/over/there", .query="name=ferret", .fragment="nose"}; + String str = string::new_format("%s", url); + + assert(str == "foo://example.com:8042/over/there?name=ferret#nose"); +} + +fn void! test_string_urn() +{ + Url url = {.scheme="urn", .path="example:animal:ferret:nose"}; + String str = string::new_format("%s", url); + + assert(str == "urn:example:animal:ferret:nose"); +} + +fn void! test_string_jdbc() +{ + Url url = {.scheme="jdbc:mysql", .host="localhost", .port=3306, .username="test_user", .password="ouupppssss", .path="/sakila", .query="profileSQL=true"}; + String str = string::new_format("%s", url); + + assert(str == "jdbc:mysql://test_user:ouupppssss@localhost:3306/sakila?profileSQL=true"); +} + +fn void! test_string_ftp() +{ + Url url = {.scheme="ftp", .host="ftp.is.co.za", .path="/rfc/rfc1808.txt"}; + String str = string::new_format("%s", url); + + assert(str == "ftp://ftp.is.co.za/rfc/rfc1808.txt"); +} + +fn void! test_string_http() +{ + Url url = {.scheme="http", .host="www.ietf.org", .path="/rfc/rfc2396.txt#header1"}; + String str = string::new_format("%s", url); + + assert(str == "http://www.ietf.org/rfc/rfc2396.txt#header1"); +} + +fn void! test_string_ldap() +{ + Url url = {.scheme="ldap", .host="[2001:db8::7]", .path="/c=GB?objectClass=one&objectClass=two"}; + String str = string::new_format("%s", url); + + assert(str == "ldap://[2001:db8::7]/c=GB?objectClass=one&objectClass=two"); +} + +fn void! test_string_mailto() +{ + Url url = {.scheme="mailto", .path="John.Doe@example.com"}; + String str = string::new_format("%s", url); + + assert(str == "mailto:John.Doe@example.com"); +} + +fn void! test_string_news() +{ + Url url = {.scheme="news", .path="comp.infosystems.www.servers.unix"}; + String str = string::new_format("%s", url); + assert(str == "news:comp.infosystems.www.servers.unix"); +} + +fn void! test_string_tel() +{ + Url url = {.scheme="tel", .path="+1-816-555-1212"}; + String str = string::new_format("%s", url); + + assert(str == "tel:+1-816-555-1212"); +} + +fn void! test_string_telnet() +{ + Url url = {.scheme="telnet", .host="192.0.2.16", .port=80, .path="/"}; + String str = string::new_format("%s", url); + + assert(str == "telnet://192.0.2.16:80/"); +} + +fn void! test_string_urn2() +{ + Url url = {.scheme="urn", .path="oasis:names:specification:docbook:dtd:xml:4.1.2"}; + String str = string::new_format("%s", url); + + assert(str == "urn:oasis:names:specification:docbook:dtd:xml:4.1.2"); +} + +fn void! test_string_empty() +{ + Url url = {}; + String str = string::new_format("%s", url); + + assert(str == ""); +} + +// query_values + +fn void! test_query_values1() +{ + Url url = url::parse("foo://example.com:8042/over/there?name=ferret=ok#nose")!; + + UrlQueryValues vals = url.new_query_values(); + defer vals.free(); + + assert(vals.len() == 1); + UrlQueryValueList l = vals["name"]!; + + assert(l.len() == 1); + assert(l[0] == "ferret=ok"); +} + +fn void! test_query_values2() +{ + Url url = url::parse("foo://example.com:8042/over/there?name=ferret&age=99&age=11#nose")!; + + UrlQueryValues vals = url.new_query_values(); + defer vals.free(); + assert(vals.len() == 2); + + UrlQueryValueList l_name = vals["name"]!; + assert(l_name.len() == 1); + assert(l_name[0] == "ferret"); + + UrlQueryValueList l_age = vals["age"]!; + assert(l_age.len() == 2); + assert(l_age[0] == "99"); + assert(l_age[1] == "11"); +} + +fn void! test_query_values_withempty() +{ + Url url = url::parse("foo://example.com:8042/over/there?name=ferret&&&age=99&age=11")!; + + UrlQueryValues vals = url.new_query_values(); + defer vals.free(); + assert(vals.len() == 2); +} \ No newline at end of file