Skip to content

Commit

Permalink
WIP: clamp pagination based on auth and privilege
Browse files Browse the repository at this point in the history
  • Loading branch information
sarayourfriend committed May 22, 2024
1 parent 2510e4e commit 47a42a8
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 21 deletions.
117 changes: 103 additions & 14 deletions api/api/serializers/media_serializers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import namedtuple
from math import floor
from typing import TypedDict

from django.conf import settings
Expand All @@ -21,6 +22,7 @@
from api.constants.sorting import DESCENDING, RELEVANCE, SORT_DIRECTIONS, SORT_FIELDS
from api.controllers import search_controller
from api.models.media import AbstractMedia
from api.models.oauth import ThrottledApplication
from api.serializers.base import BaseModelSerializer
from api.serializers.docs import (
COLLECTION_HELP_TEXT,
Expand All @@ -31,6 +33,7 @@
UNSTABLE_WARNING,
)
from api.serializers.fields import SchemableHyperlinkedIdentityField
from api.serializers.utils import get_throttled_application
from api.utils.help_text import make_comma_separated_help_text
from api.utils.url import add_protocol

Expand All @@ -49,31 +52,44 @@ class PaginatedRequestSerializer(serializers.Serializer):
]
page_size = serializers.IntegerField(
label="page_size",
help_text=f"Number of results to return per page. "
f"Maximum is {settings.MAX_AUTHED_PAGE_SIZE} for authenticated "
f"requests, and {settings.MAX_ANONYMOUS_PAGE_SIZE} for "
f"unauthenticated requests.",
help_text=(
f"Number of results to return per page. "
f"Maximum is {settings.PAGE_SIZE_LIMITS['authenticated']} for authenticated "
f"requests, and {settings.PAGE_SIZE_LIMITS['anonymous']} for "
"unauthenticated requests. Additional privileges may be granted upon request, "
"subject to review by the Openverse maintainers."
),
required=False,
default=settings.MAX_ANONYMOUS_PAGE_SIZE,
default=settings.PAGE_SIZE_LIMITS["anonymous"],
min_value=1,
)
page = serializers.IntegerField(
label="page",
help_text="The page of results to retrieve.",
required=False,
default=1,
max_value=settings.MAX_PAGINATION_DEPTH,
min_value=1,
)

def _get_page_size_limit_key(self):
application = get_throttled_application(self)

if application is None:
return "anonymous"

if (
ThrottledApplication.Privileges.increased_page_size.value
in application.privileges
):
return "privileged"

# else, authed but no special privilege
return "authenticated"

def validate_page_size(self, value):
request = self.context.get("request")
is_anonymous = getattr(request, "auth", None) is None
max_value = (
settings.MAX_ANONYMOUS_PAGE_SIZE
if is_anonymous
else settings.MAX_AUTHED_PAGE_SIZE
)
page_size_limit_key = self._get_page_size_limit_key()

max_value = settings.PAGE_SIZE_LIMITS[page_size_limit_key]

validator = MaxValueValidator(
max_value,
Expand All @@ -82,7 +98,7 @@ def validate_page_size(self, value):
),
)

if is_anonymous:
if page_size_limit_key != "privileged":
try:
validator(value)
except (ValidationError, DjangoValidationError) as e:
Expand All @@ -95,6 +111,79 @@ def validate_page_size(self, value):

return value

def _get_pagination_depth_limit_key(self):
application = get_throttled_application(self)

if application is None:
return "anonymous"

if (
ThrottledApplication.Privileges.increased_pagination_depth
in application.privileges
):
return "privileged"

# else, authed but no special privilege
return "authenticated"

def clamp_result_count(self, real_result_count):
pagination_depth_limit_key = self._get_pagination_depth_limit_key()
max_pagination_depth = settings.PAGINATION_DEPTH_LIMITS[
pagination_depth_limit_key
]

if real_result_count > max_pagination_depth:
return max_pagination_depth

return real_result_count

def clamp_page_count(self, real_page_count):
pagination_depth_limit_key = self._get_pagination_depth_limit_key()
max_pagination_depth = settings.PAGINATION_DEPTH_LIMITS[
pagination_depth_limit_key
]

page_size = self.data["page_size"]
max_possible_page_count = max_pagination_depth / page_size

if real_page_count > max_possible_page_count:
return floor(max_possible_page_count)

return real_page_count

def validate(self, data):
data = super().validate(data)

# pagination depth is validated as a combination of page and page size,
# and so cannot be validated in the individual field validation methods
pagination_depth_limit_key = self._get_pagination_depth_limit_key()

requested_pagination_depth = data["page"] * data["page_size"]

max_pagination_depth = settings.PAGINATION_DEPTH_LIMITS[
pagination_depth_limit_key
]

pagination_depth_validator = MaxValueValidator(
max_pagination_depth,
message=serializers.IntegerField.default_error_messages["max_value"].format(
max_value=max_pagination_depth
),
)

try:
pagination_depth_validator(requested_pagination_depth)
except (ValidationError, DjangoValidationError) as e:
if pagination_depth_limit_key != "privileged":
raise

raise NotAuthenticated(
detail=e.message,
code=e.code,
)

return data


@extend_schema_serializer(
# Hide internal fields from documentation.
Expand Down
10 changes: 10 additions & 0 deletions api/api/serializers/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
def get_throttled_application(serializer):
request = serializer.context.get("request")
if request is None:
return None

auth = getattr(request, "auth", None)
if auth is None:
return None

return auth.application
3 changes: 1 addition & 2 deletions api/api/utils/pagination.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from django.conf import settings
from rest_framework.pagination import PageNumberPagination
from rest_framework.response import Response

Expand All @@ -22,7 +21,7 @@ def __init__(self, *args, **kwargs):
def get_paginated_response(self, data):
response = {
"result_count": self.result_count,
"page_count": min(settings.MAX_PAGINATION_DEPTH, self.page_count),
"page_count": self.page_count,
"page_size": self.page_size,
"page": self.page,
"results": data,
Expand Down
4 changes: 2 additions & 2 deletions api/api/views/media_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,8 @@ def get_media_results(
filter_dead,
page,
)
self.paginator.page_count = num_pages
self.paginator.result_count = num_results
self.paginator.page_count = params.clamp_page_count(num_pages)
self.paginator.result_count = params.clamp_result_count(num_results)
except ValueError as e:
raise APIException(getattr(e, "message", str(e)))

Expand Down
18 changes: 15 additions & 3 deletions api/conf/settings/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,18 @@
default=f"Openverse{{purpose}}/{API_VERSION} (https://wordpress.org/openverse)",
)

MAX_ANONYMOUS_PAGE_SIZE = 20
MAX_AUTHED_PAGE_SIZE = 500
MAX_PAGINATION_DEPTH = 20
PAGE_SIZE_LIMITS = {
"anonymous": 20,
"authenticated": 50,
"privileged": 500,
}

# Total works, not pages!
# Intentionally identical for anon and unprivileged apps
PAGINATION_DEPTH_LIMITS = {
# 12 pages of 20 results
"anonymous": 12 * 20,
"authenticated": 12 * 20,
# 20 pages of 500 results is the original "authed" maximum, we'll keep it for privileged apps
"privileged": 20 * 500,
}

0 comments on commit 47a42a8

Please sign in to comment.