From 54aeb6ad035717d84ab2b6e4b7f0d805d73fe750 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marius=20Andr=C3=A9=20Elsfjordstrand=20Beck?= Date: Wed, 1 Feb 2023 17:38:13 +0100 Subject: [PATCH] Prefix url parameter with https only if match type parameter is exact. Other match types results in the scheme being stripped anyway so no need to search twice with the same key. --- server/api/api.go | 29 +++++++++++++++++------------ server/api/api_test.go | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 12 deletions(-) create mode 100644 server/api/api_test.go diff --git a/server/api/api.go b/server/api/api.go index ca2561a..96db330 100644 --- a/server/api/api.go +++ b/server/api/api.go @@ -140,12 +140,28 @@ func Parse(r *http.Request) (*CoreAPI, error) { // currently the "cdx" does not accept collection as a query or param coreApi.Collection = "all" + matchType := query.Get("matchType") + if matchType != "" { + if !contains(matchTypes, matchType) { + return nil, fmt.Errorf("matchType must be one of %v, was: %s", matchTypes, matchType) + } + coreApi.MatchType = matchType + } else { + // Default to exact + coreApi.MatchType = MatchTypeExact + } + urls := query["url"] if len(urls) == 1 && !schemeRegExp.MatchString(urls[0]) { u := urls[0] + // Add http scheme urls = []string{ "http://" + u, - "https://" + u, + } + // Add https scheme only for exact match to get results for both http/https + // If match type is prefix, domain or host the scheme part will be stripped so no need. + if coreApi.MatchType == MatchTypeExact { + urls = append(urls, "https://"+u) } } for _, urlStr := range urls { @@ -160,17 +176,6 @@ func Parse(r *http.Request) (*CoreAPI, error) { return nil, err } - matchType := query.Get("matchType") - if matchType != "" { - if !contains(matchTypes, matchType) { - return nil, fmt.Errorf("matchType must be one of %v, was: %s", matchTypes, matchType) - } - coreApi.MatchType = matchType - } else { - // Default to exact - coreApi.MatchType = MatchTypeExact - } - limit := query.Get("limit") if limit != "" { l, err := strconv.Atoi(limit) diff --git a/server/api/api_test.go b/server/api/api_test.go new file mode 100644 index 0000000..a92e83d --- /dev/null +++ b/server/api/api_test.go @@ -0,0 +1,39 @@ +package api + +import ( + "net/http" + "net/url" + "testing" + + "github.com/nlnwa/gowarcserver/surt" +) + +func TestParse(t *testing.T) { + domains := []string{ + "no", + "kommune.no", + "nb.no", + } + + for _, domain := range domains { + u, _ := url.Parse("http://example.test/") + values := u.Query() + values.Set("url", domain) + values.Set("matchType", "domain") + u.RawQuery = values.Encode() + + r := &http.Request{URL: u} + + a, err := Parse(r) + if err != nil { + t.Error(err) + } + + got := SearchAPI{a}.Key() + want := MatchType(surt.UrlToSsurt(a.Urls[0]), MatchTypeDomain) + if got != want { + t.Errorf("Got: '%s', Want: '%s'", got, want) + } + } + +}