Skip to content

Commit

Permalink
add pagination to GET /api/v1/find endpoint
Browse files Browse the repository at this point in the history
`format=tree` pagination provides top-level comments with all replies
and returns the last top-level comment as `last_comment` to be used
as `offset` for the next page. If comments and replies overflow
the limit, the one stepping out of the limit will not be returned.
 If the first comment and its replies after the given offset overflow
 the limit, it will be returned with all the replies.

`format=plain` pagination works by providing all comments and returning
the last comment as `last_comment` to be used as `offset`
for the next page.
  • Loading branch information
paskal committed Dec 10, 2024
1 parent d5162d3 commit 82a0888
Show file tree
Hide file tree
Showing 6 changed files with 284 additions and 42 deletions.
64 changes: 60 additions & 4 deletions backend/app/rest/api/rest_public.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
cache "github.com/go-pkgz/lcw/v2"
log "github.com/go-pkgz/lgr"
R "github.com/go-pkgz/rest"
"github.com/google/uuid"
"github.com/skip2/go-qrcode"

"github.com/umputun/remark42/backend/app/rest"
Expand Down Expand Up @@ -48,10 +49,18 @@ type pubStore interface {
Counts(siteID string, postIDs []string) ([]store.PostInfo, error)
}

// GET /find?site=siteID&url=post-url&format=[tree|plain]&sort=[+/-time|+/-score|+/-controversy]&view=[user|all]&since=unix_ts_msec
// find comments for given post. Returns in tree or plain formats, sorted
// GET /find?site=siteID&url=post-url&format=[tree|plain]&sort=[+/-time|+/-score|+/-controversy]&view=[user|all]&since=unix_ts_msec&limit=100&offset_id={id}
// find comments for given post. Returns in tree or plain formats, sorted.
//
// When `url` parameter is not set (e.g. request is for site-wide comments), does not return deleted comments.
//
// When `limit` is set, first {limit} comments are returned. When `offset_id` is set, comments are returned starting
// after the comment with the given id.
// format="tree" limits comments by top-level comments and all their replies,
// and never returns parent comment with only part of replies.
//
// `count` in the response refers to total number of non-deleted comments,
// `count_left` to amount of comments left to be returned _including deleted_.
func (s *public) findCommentsCtrl(w http.ResponseWriter, r *http.Request) {
locator := store.Locator{SiteID: r.URL.Query().Get("site"), URL: r.URL.Query().Get("url")}
sort := r.URL.Query().Get("sort")
Expand All @@ -70,7 +79,24 @@ func (s *public) findCommentsCtrl(w http.ResponseWriter, r *http.Request) {
since = time.Time{} // since doesn't make sense for tree
}

log.Printf("[DEBUG] get comments for %+v, sort %s, format %s, since %v", locator, sort, format, since)
limitParam := r.URL.Query().Get("limit")
var limit int
if limitParam != "" {
if limit, err = strconv.Atoi(limitParam); err != nil {
rest.SendErrorJSON(w, r, http.StatusBadRequest, err, "bad limit value", rest.ErrCommentNotFound)
return
}
}

offsetID := r.URL.Query().Get("offset_id")
if offsetID != "" {
if _, err = uuid.Parse(offsetID); err != nil {
rest.SendErrorJSON(w, r, http.StatusBadRequest, err, "bad offset_id value", rest.ErrCommentNotFound)
return
}
}

log.Printf("[DEBUG] get comments for %+v, sort %s, format %s, since %v, limit %d, offset %s", locator, sort, format, since, limit, offsetID)

key := cache.NewKey(locator.SiteID).ID(URLKeyWithUser(r)).Scopes(locator.SiteID, locator.URL)
data, err := s.cache.Get(key, func() ([]byte, error) {
Expand Down Expand Up @@ -102,12 +128,20 @@ func (s *public) findCommentsCtrl(w http.ResponseWriter, r *http.Request) {
var b []byte
switch format {
case "tree":
withInfo := treeWithInfo{Tree: service.MakeTree(comments, sort), Info: commentsInfo}
withInfo := treeWithInfo{Tree: service.MakeTree(comments, sort, limit, offsetID), Info: commentsInfo}
withInfo.Info.CountLeft = withInfo.Tree.CountLeft()
withInfo.Info.LastComment = withInfo.Tree.LastComment()
if withInfo.Nodes == nil { // eliminate json nil serialization
withInfo.Nodes = []*service.Node{}
}
b, e = encodeJSONWithHTML(withInfo)
default:
if limit > 0 || offsetID != "" {
comments, commentsInfo.CountLeft = limitComments(comments, limit, offsetID)
}
if limit > 0 && len(comments) > 0 {
commentsInfo.LastComment = comments[len(comments)-1].ID
}
withInfo := commentsWithInfo{Comments: comments, Info: commentsInfo}
b, e = encodeJSONWithHTML(withInfo)
}
Expand Down Expand Up @@ -432,3 +466,25 @@ func (s *public) parseSince(r *http.Request) (time.Time, error) {
}
return sinceTS, nil
}

// limitComments returns limited list of comments and count of comments left after limit.
// If offsetID is provided, the list will be sliced starting from the comment with this ID.
// If offsetID is not found, the full list will be returned.
// It's used for only "
func limitComments(c []store.Comment, limit int, offsetID string) (comments []store.Comment, countLeft int) {
if offsetID != "" {
for i, comment := range c {
if comment.ID == offsetID {
c = c[i+1:]
break
}
}
}

if limit > 0 && len(c) > limit {
countLeft = len(c) - limit
c = c[:limit]
}

return c, countLeft
}
122 changes: 104 additions & 18 deletions backend/app/rest/api/rest_public_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (

cache "github.com/go-pkgz/lcw/v2"
R "github.com/go-pkgz/rest"
"github.com/google/uuid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

Expand Down Expand Up @@ -598,7 +599,7 @@ func TestPublic_FindCommentsCtrl_ConsistentCount(t *testing.T) {
setScore(commentLocator, ids[4], -3)
time.Sleep(time.Millisecond * 5)

c6 := store.Comment{Text: "third-level comment 2", ParentID: ids[4], Locator: commentLocator}
c6 := store.Comment{Text: "deleted third-level comment 2", ParentID: ids[4], Locator: commentLocator}
ids[5], timestamps[5] = addCommentGetCreatedTime(t, c6, ts)
// deleted later so not visible in site-wide requests
setScore(commentLocator, ids[5], 10)
Expand All @@ -612,7 +613,7 @@ func TestPublic_FindCommentsCtrl_ConsistentCount(t *testing.T) {
setScore(commentLocator, ids[6], 1)
time.Sleep(time.Millisecond * 5)

c8 := store.Comment{Text: "second-level comment 3", ParentID: ids[6], Locator: commentLocator}
c8 := store.Comment{Text: "deleted second-level comment 3", ParentID: ids[6], Locator: commentLocator}
ids[7], timestamps[7] = addCommentGetCreatedTime(t, c8, ts)
// deleted later so not visible in site-wide requests
setScore(commentLocator, ids[7], -20)
Expand Down Expand Up @@ -646,18 +647,19 @@ func TestPublic_FindCommentsCtrl_ConsistentCount(t *testing.T) {
params string
expectedBody string
}{
{"", fmt.Sprintf(`"info":{"count":7,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[8])},
{"url=test-url", fmt.Sprintf(`"info":{"url":"test-url","count":6,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[7])},
{"format=plain", fmt.Sprintf(`"info":{"count":7,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[8])},
{"format=plain&url=test-url", fmt.Sprintf(`"info":{"url":"test-url","count":6,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[7])},
{"since=" + sinceTenSecondsAgo, fmt.Sprintf(`"info":{"count":7,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[8])},
{"url=test-url&since=" + sinceTenSecondsAgo, fmt.Sprintf(`"info":{"url":"test-url","count":6,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[7])},
{"since=" + sinceTS[0], fmt.Sprintf(`"info":{"count":7,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[8])},
{"url=test-url&since=" + sinceTS[0], fmt.Sprintf(`"info":{"url":"test-url","count":6,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[7])},
{"since=" + sinceTS[1], fmt.Sprintf(`"info":{"count":6,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[8])},
{"url=test-url&since=" + sinceTS[1], fmt.Sprintf(`"info":{"url":"test-url","count":5,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[7])},
{"since=" + sinceTS[4], fmt.Sprintf(`"info":{"count":3,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[8])},
{"url=test-url&since=" + sinceTS[4], fmt.Sprintf(`"info":{"url":"test-url","count":2,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[7])},
// test parameters url, format, since, sort
{"", fmt.Sprintf(`"info":{"count":7,"count_left":0,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[8])},
{"url=test-url", fmt.Sprintf(`"info":{"url":"test-url","count":6,"count_left":0,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[7])},
{"format=plain", fmt.Sprintf(`"info":{"count":7,"count_left":0,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[8])},
{"format=plain&url=test-url", fmt.Sprintf(`"info":{"url":"test-url","count":6,"count_left":0,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[7])},
{"since=" + sinceTenSecondsAgo, fmt.Sprintf(`"info":{"count":7,"count_left":0,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[8])},
{"url=test-url&since=" + sinceTenSecondsAgo, fmt.Sprintf(`"info":{"url":"test-url","count":6,"count_left":0,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[7])},
{"since=" + sinceTS[0], fmt.Sprintf(`"info":{"count":7,"count_left":0,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[8])},
{"url=test-url&since=" + sinceTS[0], fmt.Sprintf(`"info":{"url":"test-url","count":6,"count_left":0,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[7])},
{"since=" + sinceTS[1], fmt.Sprintf(`"info":{"count":6,"count_left":0,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[8])},
{"url=test-url&since=" + sinceTS[1], fmt.Sprintf(`"info":{"url":"test-url","count":5,"count_left":0,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[7])},
{"since=" + sinceTS[4], fmt.Sprintf(`"info":{"count":3,"count_left":0,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[8])},
{"url=test-url&since=" + sinceTS[4], fmt.Sprintf(`"info":{"url":"test-url","count":2,"count_left":0,"first_time":%q,"last_time":%q}`, formattedTS[0], formattedTS[7])},
{"format=tree", `"info":{"count":7`},
{"format=tree&url=test-url", `"info":{"url":"test-url","count":6`},
{"format=tree&sort=+time", `"info":{"count":7`},
Expand All @@ -677,19 +679,103 @@ func TestPublic_FindCommentsCtrl_ConsistentCount(t *testing.T) {
// three comments of which last one deleted and doesn't have controversy so returned last
{"sort=-controversy&url=test-url&since=" + sinceTS[5], fmt.Sprintf(`"score":0,"vote":0,"time":%q,"delete":true}],"info":{"url":"test-url","count":1`, formattedTS[7])},
// test readonly status for the post without comments
{"url=readonly-test", `"info":{"count":0,"read_only":true`},
{"format=tree&url=readonly-test", `"info":{"count":0,"read_only":true`},
{"url=readonly-test", `"info":{"count":0,"count_left":0,"read_only":true`},
{"format=tree&url=readonly-test", `"info":{"count":0,"count_left":0,"read_only":true`},

// test parameters limit, offset_id for format=plain
{"limit=bad", `{"code":1,"details":"bad limit value","error":"strconv.Atoi: parsing \"bad\": invalid syntax"}`},
{"offset_id=bad", `{"code":1,"details":"bad offset_id value","error":"invalid UUID length: 3"}`},
{"limit=2", `"info":{"count":7,"count_left":5,"last_comment":"` + ids[1]},
{"limit=6", `"info":{"count":7,"count_left":1,"last_comment":"` + ids[6]},
{"limit=7", `"info":{"count":7,"count_left":0,"last_comment":"` + ids[8]},
{"limit=2&url=test-url", `"info":{"url":"test-url","count":6,"count_left":6,"last_comment":"` + ids[1]},
{"limit=6&url=test-url", `"info":{"url":"test-url","count":6,"count_left":2,"last_comment":"` + ids[5]},
{"limit=7&url=test-url", `"info":{"url":"test-url","count":6,"count_left":1,"last_comment":"` + ids[6]},
{fmt.Sprintf("limit=2&offset_id=%s", ids[2]), `"info":{"count":7,"count_left":2,"last_comment":"` + ids[4]},
{fmt.Sprintf("limit=2&offset_id=%s", ids[3]), `"info":{"count":7,"count_left":1,"last_comment":"` + ids[6]},
{fmt.Sprintf("limit=2&offset_id=%s", ids[4]), `"info":{"count":7,"count_left":0`},
{fmt.Sprintf("limit=1&offset_id=%s", ids[6]), `"info":{"count":7,"count_left":0`},
{fmt.Sprintf("limit=2&offset_id=%s", ids[8]), `"info":{"count":7,"count_left":0`},
{fmt.Sprintf("limit=2&url=test-url&offset_id=%s", ids[2]), `"info":{"url":"test-url","count":6,"count_left":3,"last_comment":"` + ids[4]},
{fmt.Sprintf("limit=2&url=test-url&offset_id=%s", ids[3]), `"info":{"url":"test-url","count":6,"count_left":2,"last_comment":"` + ids[5]},
{fmt.Sprintf("limit=2&url=test-url&offset_id=%s", ids[4]), `"info":{"url":"test-url","count":6,"count_left":1,"last_comment":"` + ids[6]},
{fmt.Sprintf("limit=1&url=test-url&offset_id=%s", ids[6]), `"info":{"url":"test-url","count":6,"count_left":0,"last_comment":"` + ids[7]},
{fmt.Sprintf("limit=2&url=test-url&offset_id=%s", ids[8]), `"info":{"url":"test-url","count":6,"count_left":6,`},
// deleted comment, offset is ignored in site-wide request but not for particular URL
{fmt.Sprintf("limit=2&offset_id=%s", ids[5]), `"info":{"count":7,"count_left":5,"last_comment":"` + ids[1]},
{fmt.Sprintf("limit=2&url=test-url&offset_id=%s", ids[5]), `"info":{"url":"test-url","count":6,"count_left":0,"last_comment":"` + ids[7]},
// non-existing comment, offset is ignored, deleted comments included into request with "url"
{fmt.Sprintf("limit=1&offset_id=%s", uuid.New().String()), `"info":{"count":7,"count_left":6,"last_comment":"` + ids[0]},
{fmt.Sprintf("limit=1&url=test-url&offset_id=%s", uuid.New().String()), `"info":{"url":"test-url","count":6,"count_left":7,"last_comment":"` + ids[0]},
// since is ignored for tree format, so we test it only for plain
{"limit=6&since=" + sinceTenSecondsAgo, `"info":{"count":7,"count_left":1,"last_comment":"` + ids[6]},
{"limit=1&since=" + sinceTS[4], `"info":{"count":3,"count_left":2,"last_comment":"` + ids[4]},
{"limit=6&url=test-url&since=" + sinceTenSecondsAgo, `"info":{"url":"test-url","count":6,"count_left":2,"last_comment":"` + ids[5]},
{"limit=1&url=test-url&since=" + sinceTS[4], `"info":{"url":"test-url","count":2,"count_left":3,"last_comment":"` + ids[4]},
// start with deleted comment timestamp
{"limit=1&since=" + sinceTS[5], `"info":{"count":2,"count_left":1,"last_comment":"` + ids[6]},
{"limit=1&since=" + sinceTS[6], `"info":{"count":2,"count_left":1,"last_comment":"` + ids[6]},
{"limit=1&url=test-url&since=" + sinceTS[5], `"info":{"url":"test-url","count":1,"count_left":2,"last_comment":"` + ids[5]},
{"limit=1&url=test-url&since=" + sinceTS[6], `"info":{"url":"test-url","count":1,"count_left":1,"last_comment":"` + ids[6]},
// test sort
{"limit=1&sort=+time&url=test-url", `"info":{"url":"test-url","count":6,"count_left":7,"last_comment":"` + ids[0]},
{"limit=1&sort=-time&url=test-url", `"info":{"url":"test-url","count":6,"count_left":7,"last_comment":"` + ids[7]},
{"limit=1&sort=+score&url=test-url", `"info":{"url":"test-url","count":6,"count_left":7,"last_comment":"` + ids[6]},
{"limit=1&sort=-score&url=test-url", `"info":{"url":"test-url","count":6,"count_left":7,"last_comment":"` + ids[2]},
{"limit=1&sort=+controversy&url=test-url", `"info":{"url":"test-url","count":6,"count_left":7,"last_comment":"` + ids[0]},
{"limit=1&sort=-controversy&url=test-url", `"info":{"url":"test-url","count":6,"count_left":7,"last_comment":"` + ids[3]},

// test parameters limit, offset_id for format=tree
{"format=tree&limit=bad", `{"code":1,"details":"bad limit value","error":"strconv.Atoi: parsing \"bad\": invalid syntax"}`},
{"format=tree&offset_id=bad", `{"code":1,"details":"bad offset_id value","error":"invalid UUID length: 3"}`},
{"format=tree&limit=2", `"info":{"count":7,"count_left":4,"last_comment":"` + ids[0]},
{"format=tree&limit=6", `"info":{"count":7,"count_left":2,"last_comment":"` + ids[1]},
{"format=tree&limit=7", `"info":{"count":7,"count_left":1,"last_comment":"` + ids[6]},
{"format=tree&url=test-url&limit=2", `"info":{"url":"test-url","count":6,"count_left":3,"last_comment":"` + ids[0]},
{"format=tree&url=test-url&limit=6", `"info":{"url":"test-url","count":6,"count_left":1,"last_comment":"` + ids[1]},
{"format=tree&url=test-url&limit=7", `"info":{"url":"test-url","count":6,"count_left":0,"last_comment":"` + ids[6]},
// start after first top-level comment
{fmt.Sprintf("format=tree&limit=2&offset_id=%s", ids[0]), `"info":{"count":7,"count_left":2,"last_comment":"` + ids[1]},
{fmt.Sprintf("format=tree&url=test-url&limit=2&offset_id=%s", ids[0]), `"info":{"url":"test-url","count":6,"count_left":1,"last_comment":"` + ids[1]},
// start after second top-level comment
{fmt.Sprintf("format=tree&limit=2&offset_id=%s", ids[1]), `"info":{"count":7,"count_left":1,"last_comment":"` + ids[6]},
{fmt.Sprintf("format=tree&url=test-url&limit=2&offset_id=%s", ids[1]), `"info":{"url":"test-url","count":6,"count_left":0,"last_comment":"` + ids[6]},
// start after third top-level comment, so expect comment to post 2, or no comments on post 1 if "url" is set
{fmt.Sprintf("format=tree&limit=1&offset_id=%s", ids[6]), `"info":{"count":7,"count_left":0,"last_comment":"` + ids[8]},
{fmt.Sprintf("format=tree&url=test-url&limit=1&offset_id=%s", ids[6]), `"info":{"url":"test-url","count":6,"count_left":0`},
// non-root comment IDs or non-existing IDs are ignored
{fmt.Sprintf("format=tree&limit=2&offset_id=%s", ids[2]), `"info":{"count":7,"count_left":4,"last_comment":"` + ids[0]},
{fmt.Sprintf("format=tree&limit=2&offset_id=%s", ids[3]), `"info":{"count":7,"count_left":4,"last_comment":"` + ids[0]},
{fmt.Sprintf("format=tree&limit=2&offset_id=%s", ids[4]), `"info":{"count":7,"count_left":4,"last_comment":"` + ids[0]},
{fmt.Sprintf("format=tree&limit=2&offset_id=%s", ids[7]), `"info":{"count":7,"count_left":4,"last_comment":"` + ids[0]},
{fmt.Sprintf("format=tree&limit=1&offset_id=%s", uuid.New().String()), `"info":{"count":7,"count_left":4,"last_comment":"` + ids[0]},
{fmt.Sprintf("format=tree&url=test-url&limit=2&offset_id=%s", ids[2]), `"info":{"url":"test-url","count":6,"count_left":3,"last_comment":"` + ids[0]},
{fmt.Sprintf("format=tree&url=test-url&limit=2&offset_id=%s", ids[3]), `"info":{"url":"test-url","count":6,"count_left":3,"last_comment":"` + ids[0]},
{fmt.Sprintf("format=tree&url=test-url&limit=2&offset_id=%s", ids[4]), `"info":{"url":"test-url","count":6,"count_left":3,"last_comment":"` + ids[0]},
{fmt.Sprintf("format=tree&url=test-url&limit=2&offset_id=%s", ids[7]), `"info":{"url":"test-url","count":6,"count_left":3,"last_comment":"` + ids[0]},
{fmt.Sprintf("format=tree&url=test-url&limit=1&offset_id=%s", uuid.New().String()), `"info":{"url":"test-url","count":6,"count_left":3,"last_comment":"` + ids[0]},
// test sort
{"format=tree&limit=1&sort=+time&url=test-url", `"info":{"url":"test-url","count":6,"count_left":3,"last_comment":"` + ids[0]},
{"format=tree&limit=1&sort=-time&url=test-url", `"info":{"url":"test-url","count":6,"count_left":5,"last_comment":"` + ids[6]},
{"format=tree&limit=1&sort=+score&url=test-url", `"info":{"url":"test-url","count":6,"count_left":5,"last_comment":"` + ids[6]},
{"format=tree&limit=1&sort=-score&url=test-url", `"info":{"url":"test-url","count":6,"count_left":4,"last_comment":"` + ids[1]},
{"format=tree&limit=1&sort=+controversy&url=test-url", `"info":{"url":"test-url","count":6,"count_left":3,"last_comment":"` + ids[0]},
{"format=tree&limit=1&sort=-controversy&url=test-url", `"info":{"url":"test-url","count":6,"count_left":5,"last_comment":"` + ids[6]},
}

for _, tc := range testCases {
t.Run(tc.params, func(t *testing.T) {
url := fmt.Sprintf(ts.URL+"/api/v1/find?site=remark42&%s", tc.params)
body, code := get(t, url)
assert.Equal(t, http.StatusOK, code)
expectedStatus := http.StatusOK
if strings.Contains(tc.params, "=bad") {
expectedStatus = http.StatusBadRequest
}
assert.Equal(t, expectedStatus, code)
assert.Contains(t, body, tc.expectedBody)
t.Log(body)
// prevent hit limiter from engaging
time.Sleep(50 * time.Millisecond)
time.Sleep(80 * time.Millisecond)
})
}
}
Expand Down
Loading

0 comments on commit 82a0888

Please sign in to comment.