Skip to content

Commit

Permalink
Require login to view old trips
Browse files Browse the repository at this point in the history
As mentioned in a prior commit, there's a scraping script trying to
parse all trips. That's fine, and hopefully it's a one-off script:
33c1063

However, this is the slowest endpoint served by the application, and we
should be careful to avoid an easy DOS opportunity.

Also, get better about the "Previous trips" label -- when given a very
old date, there's no sense in us showing a "Previous trips" label, so
just dynamically hide it. This also helps get rid of the temporary hack
for hard-coded old dates.
  • Loading branch information
DavidCain committed Jun 24, 2024
1 parent 7e4cb49 commit 46e74b1
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 29 deletions.
10 changes: 9 additions & 1 deletion ws/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,15 @@ def __str__(self) -> str:


class Trip(models.Model):
# Constant to control the default "page size" when viewing old trips.
# Constant to control two things:
# 1. The default "page size" when letting users click back
# Generally, users only want to see more recent trips not *all* old ones.
# 2. The number of old trips anonymous users can view.
# The `/trips` endpoint lets you display over 2,000 trips.
# We make efforts for it to be available to end users even though it's slow.
# However, we've observed scrapers being employed in the wild.
# We don't want these scrapers to sap system resources.
# (and if somebody is abusing the endpoint, I'd like to know who they are)
#
# Putting this on the model so the business logic has one source of truth.
TRIPS_LOOKBACK = timedelta(days=365)
Expand Down
13 changes: 9 additions & 4 deletions ws/templates/trips/all/view.html
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,16 @@ <h3>Past trips</h3>
{% endif %}
<hr>
<p>
{% if one_year_prior %}
{# We hide this link when there are no trips earlier #}
<a href="{% url 'trips' %}?after={{ one_year_prior|date:"Y-m-d" }}">
{% if must_login_to_view_older_trips %}
<a href="{% url 'account_login' %}?next={% url 'trips' %}?after={{ previous_lookup_date|date:"Y-m-d" }}">
<i class="fas fa-history"></i>
Previous trips
Log in to view previous trips
</a>
{% elif previous_lookup_date %}
{# We hide this link when there are no trips earlier #}
<a href="{% url 'trips' %}?after={{ previous_lookup_date|date:"Y-m-d" }}">
<i class="fas fa-history"></i>
Previous trips
</a>
{% endif %}
<a class="pull-right" href="https://mitoc.mit.edu"><i class="fas fa-external-link-alt"></i>&nbsp;MITOC home page</a>
Expand Down
4 changes: 4 additions & 0 deletions ws/tests/views/test_participant.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ def test_unauthenticated_rendering_few_upcoming_trips(self):
# Recent trips are shown until we have 8 total
self.assertEqual(list(response.context["recent_trips"]), ten_past_trips[:6])

soup = BeautifulSoup(response.content, "html.parser")
prev_trips = soup.find("a", href="/trips/?after=2019-01-12")
self.assertEqual(prev_trips.get_text(strip=True), "Previous trips")


# NOTE: See test_ws_tags.py for direct testing of the templatetag too
class LectureAttendanceTests(TestCase):
Expand Down
103 changes: 100 additions & 3 deletions ws/tests/views/test_trips.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def _form_data(form):

def _get(self, url):
response = self.client.get(url)
assert response.status_code == 200
self.assertEqual(response.status_code, 200)
soup = BeautifulSoup(response.content, "html.parser")
return response, soup

Expand Down Expand Up @@ -100,6 +100,9 @@ def test_invalid_filter(self):
@freeze_time("2018-01-10 12:25:00 EST")
def test_trips_with_filter(self):
"""We support filtering the responded list of trips."""
# Need to log in to see old trips
self.client.force_login(factories.UserFactory.create())

# Make a very old trip that will not be in our filter
factories.TripFactory.create(trip_date="2016-12-23")

Expand All @@ -126,17 +129,34 @@ def test_trips_with_filter(self):
self._expect_past_trips(response, [one_week_ago.pk, one_month_ago.pk])
self._expect_link_for_date(soup, "2016-11-15")

def trips_after_first_trip_date(self):
"""We don't show 'Previous trips' before the first trip."""
trip = factories.TripFactory.create(name="Tecumseh", trip_date="2015-01-10")

# Filter based on a date in the past
response, soup = self._get("/trips/?after=2015-01-10")
self.assertFalse(response.context["date_invalid"])

self._expect_title(soup, "Trips after 2015-01-10")
self._expect_past_trips(response, [trip.pk])

self.assertIsNone(soup.find(string="Previous trips"))

def test_trips_with_very_early_date(self):
"""You can ask for trips starting after the year 1."""
"""Authed users can ask for trips starting after the year 1."""
self.client.force_login(factories.UserFactory.create())

trip = factories.TripFactory.create(trip_date="2016-12-23")

# Filter based on a date in the past
response, soup = self._get("/trips/?after=0001-10-17")
self.assertFalse(response.context["date_invalid"])

self._expect_title(soup, "Trips after 1900-01-01")
self._expect_title(soup, "Trips after 0001-10-17")
self._expect_past_trips(response, [trip.pk])

self.assertIsNone(soup.find(string="Previous trips"))

def test_upcoming_trips_can_be_filtered(self):
"""If supplying an 'after' date in the future, that still permits filtering!"""
_next_week = factories.TripFactory.create(trip_date="2019-02-22")
Expand All @@ -154,6 +174,83 @@ def test_upcoming_trips_can_be_filtered(self):
self._expect_current_trips(response, [next_month.pk, next_year.pk])


class AnonymousTripListViewTest(TestCase, Helpers):
@freeze_time("2024-06-25 12:45:59 EDT")
def test_custom_recent_trips(self):
"""Anonymous users can view any date in the last 365 days."""
this_year = factories.TripFactory.create(trip_date="2024-02-02")
factories.TripFactory.create(trip_date="2023-12-25")

response, soup = self._get("/trips/?after=2024-01-01")
self.assertIs(response.context["must_login_to_view_older_trips"], False)

self._expect_past_trips(response, [this_year.pk])
# We don't offer to go back an extra 365 days, just to the cutoff
self._expect_link_for_date(soup, "2023-06-26")

@freeze_time("2024-06-25 12:45:59 EDT")
def test_lookback_to_earliest_date(self):
"""We support lookback up to 365 days."""
within_window = factories.TripFactory.create(trip_date="2023-07-07")
factories.TripFactory.create(trip_date="2023-01-02")

response, soup = self._get("/trips/?after=2023-06-26")
self.assertIs(response.context["must_login_to_view_older_trips"], True)

self._expect_past_trips(response, [within_window.pk])
link = soup.find("a", href="/accounts/login/?next=/trips/?after=2022-06-26")
self.assertEqual(link.get_text(strip=True), "Log in to view previous trips")

@freeze_time("2024-06-25 12:45:59 EDT")
def test_lookback_too_old(self):
factories.TripFactory.create(trip_date="2023-07-07")
response, soup = self._get("/trips/?after=2016-01-12")

# Even though there *is* a trip that's viewable, we hide it.
# If anybody's writing a parser or similar, we don't want to mislead.
# (Showing *some* past trips in response to an invalid request is unwise)
self.assertNotIn("past_trips", response.context)
self.assertIsNone(soup.find(string="Previous trips"))

alert = soup.find(class_="alert-danger")
self.assertEqual(
strip_whitespace(alert.text),
"You must log in to view trips before 2023-06-26.",
)
self.assertEqual(
alert.find("a").attrs,
{"href": "/accounts/login/?next=%2Ftrips%2F%3Fafter%3D2016-01-12"},
)

# The footer links to another 365 days back, just for consistency
link = soup.find("a", href="/accounts/login/?next=/trips/?after=2015-01-12")
self.assertEqual(link.get_text(strip=True), "Log in to view previous trips")

@freeze_time("2024-06-25 12:45:59 EDT")
def test_lookup_before_first_trip(self):
factories.TripFactory.create(trip_date="2023-07-07")
response, soup = self._get("/trips/?after=2010-07-20")

# Even though there *is* a trip that's viewable, we hide it.
# If anybody's writing a parser or similar, we don't want to mislead.
# (Showing *some* past trips in response to an invalid request is unwise)
self.assertNotIn("past_trips", response.context)
self.assertIsNone(soup.find(string="Previous trips"))

alert = soup.find(class_="alert-danger")
self.assertEqual(
strip_whitespace(alert.text),
"You must log in to view trips before 2023-06-26.",
)
self.assertEqual(
alert.find("a").attrs,
{"href": "/accounts/login/?next=%2Ftrips%2F%3Fafter%3D2010-07-20"},
)

# Even if logged in, we couldn't show older trips!
self.assertIsNone(soup.find(string="Log in to view previous trips"))


class AllTripsViewTest(TestCase):
def test_simple_redirect(self):
response = self.client.get("/trips/all/")
Expand Down
110 changes: 89 additions & 21 deletions ws/views/trips.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""

from collections import defaultdict
from dataclasses import dataclass
from datetime import date
from typing import TYPE_CHECKING, Any, cast
from urllib.parse import urlencode
Expand All @@ -18,6 +19,7 @@
from django.forms.utils import ErrorList
from django.http import (
Http404,
HttpRequest,
HttpResponseBadRequest,
HttpResponseRedirect,
)
Expand Down Expand Up @@ -56,6 +58,10 @@
TRIPS_LOOKBACK = models.Trip.TRIPS_LOOKBACK


def _earliest_allowed_anon_lookback_date() -> date:
return local_date() - TRIPS_LOOKBACK


class TripView(DetailView):
"""Display the trip to both unregistered users and known participants.
Expand Down Expand Up @@ -474,6 +480,20 @@ def form_valid(self, form):
return super().form_valid(form)


@dataclass(frozen=True)
class PreviousLookup:
on_or_after_date: date | None
date_invalid: bool
previous_lookup_date: date | None

def must_login_to_view_older_trips(self, request: HttpRequest) -> bool:
if request.user.is_authenticated or self.on_or_after_date is None:
return False
if self.previous_lookup_date is None:
return False
return self.previous_lookup_date < _earliest_allowed_anon_lookback_date()


class TripListView(ListView):
"""Superclass for any view that displays a list of trips.
Expand All @@ -489,6 +509,14 @@ class TripListView(ListView):
template_name = "trips/all/view.html"
context_object_name = "trip_queryset"

def _can_view_requested_trips(self) -> bool:
if self.request.user.is_authenticated:
return True
start_date, _ = self._optionally_filter_from_args()
if start_date is None:
return True # We'll just show upcoming trips
return start_date >= _earliest_allowed_anon_lookback_date()

def get_queryset(self):
trips = super().get_queryset()
return annotated_for_trip_list(trips)
Expand All @@ -510,12 +538,42 @@ def _optionally_filter_from_args(self) -> tuple[date | None, bool]:
except (TypeError, ValueError):
return None, True

return (
# Guard against a weird edge case -- we can accept the year 1!
# We'll try to *subtract* time, and end up an exception rather than BCE datetimes
# There were no trips in our database before the 2010s anyway.
max(start_date, date(1900, 1, 1)),
False,
return start_date, False

def _get_lookback_info(self) -> PreviousLookup:
"""Describe the lookup range for trip dates, control links for larger ranges."""
on_or_after_date, date_invalid = self._optionally_filter_from_args()
oldest_requested_trip_date = on_or_after_date or local_date()

# Prevent rendering "Previous trips" if we know there won't be any.
# Also prevents a weird edge case -- you can pass a date in the year 1.
# (When the server tries to subtract 365 days, Python declines to make BCE dates)
if oldest_requested_trip_date < FIRST_TRIP_DATE:
previous_lookup_date = None
else:
# Get approximately one year prior for use in paginating back in time.
# (need not be exact/handle leap years)
one_year_prior = oldest_requested_trip_date - TRIPS_LOOKBACK
if not self.request.user.is_authenticated:
earliest_allowed = _earliest_allowed_anon_lookback_date()
previous_lookup_date = (
# If we can't go back any further, we'll show a link
one_year_prior
if earliest_allowed >= oldest_requested_trip_date
# We shouldn't link anonymous users to a page they can't see.
else max(earliest_allowed, one_year_prior)
)
else:
previous_lookup_date = one_year_prior

# Sanity check -- the previous date should *always* be in the past.
if previous_lookup_date is not None:
assert previous_lookup_date < oldest_requested_trip_date

return PreviousLookup(
on_or_after_date=on_or_after_date,
date_invalid=date_invalid,
previous_lookup_date=previous_lookup_date,
)

def get_context_data(self, **kwargs):
Expand All @@ -525,33 +583,43 @@ def get_context_data(self, **kwargs):
today = local_date()
context["today"] = today

on_or_after_date, context["date_invalid"] = self._optionally_filter_from_args()
if on_or_after_date:
context["on_or_after_date"] = on_or_after_date
trips = trips.filter(trip_date__gte=on_or_after_date)
info = self._get_lookback_info()
context["must_login_to_view_older_trips"] = info.must_login_to_view_older_trips(
self.request
)
context["date_invalid"] = info.date_invalid
context["previous_lookup_date"] = info.previous_lookup_date

oldest_trip_date = on_or_after_date or today
# Prevent rendering "Previous trips" if we know there won't be any.
# Also prevents a weird edge case -- you can pass a date in the year 1.
# (When the server tries to subtract 365 days, Python declines to make BCE dates)
if oldest_trip_date >= FIRST_TRIP_DATE:
# Get approximately one year prior for use in paginating back in time.
# (need not be exact/handle leap years)
context["one_year_prior"] = oldest_trip_date - TRIPS_LOOKBACK
if info.on_or_after_date:
context["on_or_after_date"] = info.on_or_after_date
trips = trips.filter(trip_date__gte=info.on_or_after_date)

# By default, just show upcoming trips.
context["current_trips"] = trips.filter(trip_date__gte=today)

# However, if we've explicitly opted in to showing past trips, include them.
if on_or_after_date:
if not self._can_view_requested_trips():
requested_url = (
reverse("trips") + "?after=" + info.on_or_after_date.isoformat()
)
# URL encoding is unnecessary, but might as well use safer patterns.
next_url = (
reverse("account_login") + "?" + urlencode({"next": requested_url})
)
messages.error(
self.request,
f'You must <a href="{next_url}">log in to view trips</a> before {_earliest_allowed_anon_lookback_date()}.',
extra_tags="safe",
)
elif info.on_or_after_date:
# Note that we need to sort past trips in descending order (most recent trips first).
# This is because we have a cutoff in the past, and it would display strangely to sort ascending.
context["past_trips"] = trips.filter(trip_date__lt=today).order_by(
"-trip_date", "-time_created"
)
if not on_or_after_date:
if not info.on_or_after_date:
# We're on the special 'all trips' view, so there are no add'l previous trips
context["one_year_prior"] = None
context["previous_lookup_date"] = None
return context


Expand Down

0 comments on commit 46e74b1

Please sign in to comment.