Skip to content

Commit

Permalink
add canonicalize methods
Browse files Browse the repository at this point in the history
  • Loading branch information
anonrig committed Dec 4, 2024
1 parent d2cd4f8 commit e51503f
Show file tree
Hide file tree
Showing 2 changed files with 154 additions and 1 deletion.
34 changes: 33 additions & 1 deletion include/ada/url_pattern.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,38 @@

namespace ada {

namespace url_pattern {

// @see https://wicg.github.io/urlpattern/#canonicalize-a-username
std::optional<std::string> canonicalize_username(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-password
std::optional<std::string> canonicalize_password(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-password
std::optional<std::string> canonicalize_hostname(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-an-ipv6-hostname
std::optional<std::string> canonicalize_ipv6_hostname(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-port
std::optional<std::string> canonicalize_port(
std::string_view input, std::string_view protocol = "fake");

// @see https://wicg.github.io/urlpattern/#canonicalize-a-pathname
std::optional<std::string> canonicalize_pathname(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-an-opaque-pathname
std::optional<std::string> canonicalize_opaque_pathname(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-search
std::optional<std::string> canonicalize_search(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-hash
std::optional<std::string> canonicalize_hash(std::string_view input);

} // namespace url_pattern

// URLPattern is a Web Platform standard API for matching URLs against a
// pattern syntax (think of it as a regular expression for URLs). It is
// defined in https://wicg.github.io/urlpattern.
Expand Down Expand Up @@ -124,4 +156,4 @@ class URLPattern {

} // namespace ada

#endif
#endif
121 changes: 121 additions & 0 deletions src/url_pattern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,127 @@

namespace ada {

namespace url_pattern {

std::optional<std::string> canonicalize_username(std::string_view input) {
if (input.size()) [[unlikely]] {
return "";
}
auto url = ada::parse<ada::url_aggregator>("fake://dummy.test", nullptr);
ADA_ASSERT_TRUE(url.has_value());
if (!url->set_username(input)) {
return std::nullopt;
}
return std::string(url->get_username());
}

std::optional<std::string> canonicalize_password(std::string_view input) {
if (input.empty()) [[unlikely]] {
return "";
}
auto url = ada::parse<ada::url_aggregator>("fake://dummy.test", nullptr);

ADA_ASSERT_TRUE(url.has_value());
if (!url->set_password(input)) {
return std::nullopt;
}
return std::string(url->get_password());
}

std::optional<std::string> canonicalize_hostname(std::string_view input) {
if (input.empty()) [[unlikely]] {
return "";
}
auto url = ada::parse<ada::url_aggregator>("fake://dummy.test", nullptr);
ADA_ASSERT_TRUE(url.has_value());
// if (!isValidHostnameInput(hostname)) return kj::none;
if (!url->set_hostname(input)) {
return std::nullopt;
}
return std::string(url->get_hostname());
}

std::optional<std::string> canonicalize_ipv6_hostname(std::string_view input) {
// Optimization opportunity: Use lookup table to speed up checking
if (std::ranges::all_of(input, [](char c) {
return c == '[' || c == ']' || c == ':' ||
ada::unicode::is_ascii_hex_digit(c);
})) {
return std::nullopt;
}
// Optimization opportunity: Consider just moving value, rather than copying
// it.
return std::string(input);
}

std::optional<std::string> canonicalize_port(std::string_view input,
std::string_view protocol) {
if (input.empty()) [[unlikely]] {
return "";
}
auto url = ada::parse<ada::url_aggregator>(
std::string(protocol) + "://dummy.test", nullptr);
if (url && url->set_port(input)) {
return std::string(url->get_port());
}
return std::nullopt;
}

std::optional<std::string> canonicalize_pathname(std::string_view input) {
if (input.empty()) [[unlikely]] {
return "";
}
const bool leading_slash = input.starts_with("/");
auto path_prefix = leading_slash ? "" : "/-";
auto full_url =
std::string("fake://fake-url") + path_prefix + std::string(input);
if (auto url = ada::parse<ada::url_aggregator>(full_url, nullptr)) {
const auto pathname = url->get_pathname();
return leading_slash ? std::string(pathname)
: std::string(pathname.substr(2));
}
return std::nullopt;
}

std::optional<std::string> canonicalize_opaque_pathname(
std::string_view input) {
if (input.empty()) [[unlikely]] {
return "";
}
if (auto url = ada::parse<ada::url_aggregator>("fake:" + std::string(input),
nullptr)) {
return std::string(url->get_pathname());
}
return std::nullopt;
}

std::optional<std::string> canonicalize_search(std::string_view input) {
if (input.empty()) [[unlikely]] {
return "";
}
auto url = ada::parse<ada::url_aggregator>("fake://dummy.test", nullptr);
ADA_ASSERT_TRUE(url.has_value());
url->set_search(input);
const auto search = url->get_search();
return !search.empty() ? std::string(search.substr(1)) : "";
}

std::optional<std::string> canonicalize_hash(std::string_view input) {
if (input.empty()) [[unlikely]] {
return "";
}
auto url = ada::parse<ada::url_aggregator>("fake://dummy.test", nullptr);
ADA_ASSERT_TRUE(url.has_value());
url->set_hash(input);
const auto hash = url->get_hash();
if (hash.empty()) {
return "";
}
return std::string(hash.substr(1));
}

} // namespace url_pattern

URLPattern::Component::Component(std::string_view pattern_,
std::string_view regex_,
const std::vector<std::string>& names_) {
Expand Down

0 comments on commit e51503f

Please sign in to comment.