From 524cb73c87a08bc43088a516e29ce9507bd0767d Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Tue, 3 Dec 2024 20:24:17 -0500 Subject: [PATCH] add canonicalize methods --- include/ada/url_pattern.h | 36 +++++++++++- src/url_pattern.cpp | 121 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+), 2 deletions(-) diff --git a/include/ada/url_pattern.h b/include/ada/url_pattern.h index 5baa7d8d6..36e38d5e8 100644 --- a/include/ada/url_pattern.h +++ b/include/ada/url_pattern.h @@ -11,6 +11,38 @@ namespace ada { +namespace url_pattern { + +// @see https://wicg.github.io/urlpattern/#canonicalize-a-username +std::optional canonicalize_username(std::string_view input); + +// @see https://wicg.github.io/urlpattern/#canonicalize-a-password +std::optional canonicalize_password(std::string_view input); + +// @see https://wicg.github.io/urlpattern/#canonicalize-a-password +std::optional canonicalize_hostname(std::string_view input); + +// @see https://wicg.github.io/urlpattern/#canonicalize-an-ipv6-hostname +std::optional canonicalize_ipv6_hostname(std::string_view input); + +// @see https://wicg.github.io/urlpattern/#canonicalize-a-port +std::optional canonicalize_port( + std::string_view input, std::string_view protocol = "fake"); + +// @see https://wicg.github.io/urlpattern/#canonicalize-a-pathname +std::optional canonicalize_pathname(std::string_view input); + +// @see https://wicg.github.io/urlpattern/#canonicalize-an-opaque-pathname +std::optional canonicalize_opaque_pathname(std::string_view input); + +// @see https://wicg.github.io/urlpattern/#canonicalize-a-search +std::optional canonicalize_search(std::string_view input); + +// @see https://wicg.github.io/urlpattern/#canonicalize-a-hash +std::optional canonicalize_hash(std::string_view input); + +} // namespace url_pattern + // URLPattern is a Web Platform standard API for matching URLs against a // pattern syntax (think of it as a regular expression for URLs). It is // defined in https://wicg.github.io/urlpattern. @@ -37,7 +69,7 @@ class URLPattern { // The generated JavaScript regular expression for this component. std::string regex = ""; // The list of sub-component names extracted for this component. - std::vector names; + std::vector names{}; }; // A structure providing matching patterns for individual components @@ -124,4 +156,4 @@ class URLPattern { } // namespace ada -#endif \ No newline at end of file +#endif diff --git a/src/url_pattern.cpp b/src/url_pattern.cpp index e05bb4324..5bfd890fa 100644 --- a/src/url_pattern.cpp +++ b/src/url_pattern.cpp @@ -5,6 +5,127 @@ namespace ada { +namespace url_pattern { + +std::optional canonicalize_username(std::string_view input) { + if (input.size()) [[unlikely]] { + return ""; + } + auto url = ada::parse("fake://dummy.test", nullptr); + ADA_ASSERT_TRUE(url.has_value()); + if (!url->set_username(input)) { + return std::nullopt; + } + return std::string(url->get_username()); +} + +std::optional canonicalize_password(std::string_view input) { + if (input.empty()) [[unlikely]] { + return ""; + } + auto url = ada::parse("fake://dummy.test", nullptr); + + ADA_ASSERT_TRUE(url.has_value()); + if (!url->set_password(input)) { + return std::nullopt; + } + return std::string(url->get_password()); +} + +std::optional canonicalize_hostname(std::string_view input) { + if (input.empty()) [[unlikely]] { + return ""; + } + auto url = ada::parse("fake://dummy.test", nullptr); + ADA_ASSERT_TRUE(url.has_value()); + // if (!isValidHostnameInput(hostname)) return kj::none; + if (!url->set_hostname(input)) { + return std::nullopt; + } + return std::string(url->get_hostname()); +} + +std::optional canonicalize_ipv6_hostname(std::string_view input) { + // Optimization opportunity: Use lookup table to speed up checking + if (std::ranges::all_of(input, [](char c) { + return c == '[' || c == ']' || c == ':' || + ada::unicode::is_ascii_hex_digit(c); + })) { + return std::nullopt; + } + // Optimization opportunity: Consider just moving value, rather than copying + // it. + return std::string(input); +} + +std::optional canonicalize_port(std::string_view input, + std::string_view protocol) { + if (input.empty()) [[unlikely]] { + return ""; + } + auto url = ada::parse( + std::string(protocol) + "://dummy.test", nullptr); + if (url && url->set_port(input)) { + return std::string(url->get_port()); + } + return std::nullopt; +} + +std::optional canonicalize_pathname(std::string_view input) { + if (input.empty()) [[unlikely]] { + return ""; + } + const bool leading_slash = input.starts_with("/"); + auto path_prefix = leading_slash ? "" : "/-"; + auto full_url = + std::string("fake://fake-url") + path_prefix + std::string(input); + if (auto url = ada::parse(full_url, nullptr)) { + const auto pathname = url->get_pathname(); + return leading_slash ? std::string(pathname) + : std::string(pathname.substr(2)); + } + return std::nullopt; +} + +std::optional canonicalize_opaque_pathname( + std::string_view input) { + if (input.empty()) [[unlikely]] { + return ""; + } + if (auto url = ada::parse("fake:" + std::string(input), + nullptr)) { + return std::string(url->get_pathname()); + } + return std::nullopt; +} + +std::optional canonicalize_search(std::string_view input) { + if (input.empty()) [[unlikely]] { + return ""; + } + auto url = ada::parse("fake://dummy.test", nullptr); + ADA_ASSERT_TRUE(url.has_value()); + url->set_search(input); + const auto search = url->get_search(); + return !search.empty() ? std::string(search.substr(1)) : ""; +} + +std::optional canonicalize_hash(std::string_view input) { + if (input.empty()) [[unlikely]] { + return ""; + } + auto url = ada::parse("fake://dummy.test", nullptr); + ADA_ASSERT_TRUE(url.has_value()); + url->set_hash(input); + const auto hash = url->get_hash(); + if (hash.empty()) { + return ""; + } + return std::string(hash.substr(1)); +} + +} // namespace url_pattern + URLPattern::Component::Component(std::string_view pattern_, std::string_view regex_, const std::vector& names_) {