Skip to content

Commit

Permalink
Merge pull request #38 from Avokadoen/daterange
Browse files Browse the repository at this point in the history
Daterange test and stricter API use
  • Loading branch information
maeb authored Jul 15, 2021
2 parents 31e7297 + 3297cd9 commit 797b347
Show file tree
Hide file tree
Showing 5 changed files with 314 additions and 34 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/golangci-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:

# golangci-lint command line arguments.
# Enable additional linters (see: https://golangci-lint.run/usage/linters/)
args: -E "bodyclose" -E "dogsled" -E "durationcheck" -E "errorlint" -E "forcetypeassert" -E "noctx" -E "exhaustive" -E "exportloopref"
args: -E "bodyclose" -E "dogsled" -E "durationcheck" -E "errorlint" -E "forcetypeassert" -E "noctx" -E "exhaustive" -E "exportloopref" --timeout 3m0s

# Optional: show only new issues if it's a pull request. The default value is `false`.
# only-new-issues: true
Expand Down
46 changes: 25 additions & 21 deletions pkg/server/warcserver/cdxserverapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,17 @@ package warcserver

import (
"fmt"
"net/http"
"strconv"
"strings"

"github.com/dgraph-io/badger/v3"
"github.com/gorilla/mux"
"github.com/nlnwa/gowarcserver/pkg/index"
"github.com/nlnwa/gowarcserver/pkg/timestamp"
cdx "github.com/nlnwa/gowarcserver/proto"
log "github.com/sirupsen/logrus"
"google.golang.org/protobuf/proto"
"net/http"
"strconv"
"strings"
)

type RenderFunc func(record *cdx.Cdx) error
Expand All @@ -54,9 +55,9 @@ func cdxjToPywbJson(record *cdx.Cdx) *pywbJson {
Digest: record.Sha,
Length: record.Rle,
// Offset must be empty string or else pywb will try to use it's internal index.
Offset: "",
Offset: "",
// Filename must be an empty string or else pywb will try to use it's internal index.
Filename: "",
Filename: "",
}
return js
}
Expand Down Expand Up @@ -96,8 +97,7 @@ var outputs = []string{OutputCdxj, OutputJson, OutputContent}
type CdxjServerApi struct {
Collection string
Url string
From string
To string
FromTo DateRange
MatchType string
Limit uint
Sort string
Expand Down Expand Up @@ -132,11 +132,11 @@ func ParseCdxjApi(r *http.Request) (*CdxjServerApi, error) {
}
cdxjApi.Url = url

from := query.Get("from")
cdxjApi.From = From(from)

to := query.Get("to")
cdxjApi.To = To(to)
var err error
cdxjApi.FromTo, err = NewDateRange(query.Get("from"), query.Get("to"))
if err != nil {
return nil, err
}

matchType := query.Get("matchType")
if matchType != "" {
Expand Down Expand Up @@ -220,11 +220,6 @@ func (c DbCdxServer) search(api *CdxjServerApi, renderFunc RenderFunc) (uint, er
return 0, err
}

dateRange := DateRange{
from: api.From,
to: api.To,
}

filter := parseFilter(api.Filter)

sorter := &sorter{
Expand All @@ -235,14 +230,18 @@ func (c DbCdxServer) search(api *CdxjServerApi, renderFunc RenderFunc) (uint, er

perItemFn := func(item *badger.Item) (stopIteration bool) {
key := Key(item.Key())

if !dateRange.contains(key.ts()) {
contains, err := api.FromTo.contains(key.ts())
if err != nil {
log.Warnf("%s", err)
return false
}
if !contains {
log.Debugf("key timestamp not in range")
return false
}

result := new(cdx.Cdx)
err := item.Value(func(v []byte) error {
err = item.Value(func(v []byte) error {
if err := proto.Unmarshal(v, result); err != nil {
return err
}
Expand Down Expand Up @@ -274,7 +273,12 @@ func (c DbCdxServer) search(api *CdxjServerApi, renderFunc RenderFunc) (uint, er
sortPerItemFn := func(item *badger.Item) bool {
key := Key(item.Key())

if !dateRange.contains(key.ts()) {
contains, err := api.FromTo.contains(key.ts())
if err != nil {
log.Warnf("%s", err)
return false
}
if !contains {
return false
}

Expand Down
97 changes: 87 additions & 10 deletions pkg/server/warcserver/daterange.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,101 @@ package warcserver

import (
"fmt"
"math"
"time"
)

type DateRange struct {
from string
to string
from int64
to int64
}

const timeLayout = "20060102150405"

func NewDateRange(fromstr string, tostr string) (DateRange, error) {
from, err := From(fromstr)
if err != nil {
return DateRange{}, err
}
to, err := To(tostr)
if err != nil {
return DateRange{}, err
}

return DateRange{
from,
to,
}, nil
}

// contains returns true if the timestamp ts contained by the bounds defined by the DateRange d.
func (d DateRange) contains(ts string) bool {
return ts >= d.from && ts <= d.to
// input 'ts' is 'trusted' and does not have the same parsing complexity as a From or To string
func (d DateRange) contains(ts string) (bool, error) {
timestamp, err := time.Parse(timeLayout, ts)
if err != nil {
return false, fmt.Errorf("failed to parse ts: %w", err)
}
unixTs := timestamp.Unix()

return unixTs >= d.from && unixTs <= d.to, nil
}

// From pads the timestamp f with 0's on the right until the string is 14 characters in length.
func From(f string) string {
return fmt.Sprintf("%s%0*d", f, 14-len(f), 0)
// Implemented according to https://pywb.readthedocs.io/en/latest/manual/cdxserver_api.html#from-to:
func From(f string) (int64, error) {
fLen := len(f)
if fLen%2 != 0 {
return 0, fmt.Errorf("'from' string was an odd number, len: %d", fLen)
}
if fLen > 14 {
return 0, fmt.Errorf("expected 'from' string len less than 14, len: %d", fLen)
}

// No specified from date
if fLen < 4 {
return time.Time{}.Unix(), nil
}

from, err := time.Parse(timeLayout[:fLen], f)
if err != nil {
return 0, fmt.Errorf("failed to parse 'from' date %s, %w", f, err)
}

return from.Unix(), nil
}

// To pads the timestamp t with 9's on the right until the string is 14 characters in length.
func To(t string) string {
return fmt.Sprintf("%s%.*s", t, 14-len(t), "99999999999999")
// Implemented according to https://pywb.readthedocs.io/en/latest/manual/cdxserver_api.html#from-to:
func To(t string) (int64, error) {
tLen := len(t)
if tLen%2 != 0 {
return 0, fmt.Errorf("'to' string was an odd number, len: %d", tLen)
}
if tLen > 14 {
return 0, fmt.Errorf("expected 'to' string len less than 14, len: %d", tLen)
}

// No specified from date
if tLen < 4 {
return math.MaxInt64, nil
}

to, err := time.Parse(timeLayout[:tLen], t)
if err != nil {
return 0, fmt.Errorf("failed to parse 'to' date %s, %w", t, err)
}

switch tLen {
case 4:
to = to.AddDate(0, 12, -1).Add(time.Hour*23 + time.Minute*59 + time.Second*59)
case 6:
// add one month - one day, i.e: user supplies january, we add 29 - 1
to = to.AddDate(0, 1, -1).Add(time.Hour*23 + time.Minute*59 + time.Second*59)
case 8:
to = to.Add(time.Hour*23 + time.Minute*59 + time.Second*59)
case 10:
to = to.Add(time.Minute*59 + time.Second*59)
case 12:
to = to.Add(time.Second * 59)
}

return to.Unix(), nil
}
Loading

0 comments on commit 797b347

Please sign in to comment.