Skip to content

Commit

Permalink
Merge branch 'main' into 4752-document-webhooks-v2
Browse files Browse the repository at this point in the history
  • Loading branch information
albertisfu authored Jan 3, 2025
2 parents 0511430 + a93dbb0 commit 410e85a
Show file tree
Hide file tree
Showing 2 changed files with 206 additions and 39 deletions.
160 changes: 159 additions & 1 deletion cl/api/tests.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import json
from collections import OrderedDict, defaultdict
from datetime import date, datetime, timedelta, timezone
from http import HTTPStatus
from typing import Any, Dict
from unittest import mock
from unittest.mock import MagicMock, patch
from urllib.parse import parse_qs, urlparse

from asgiref.sync import async_to_sync, sync_to_async
Expand All @@ -29,7 +31,7 @@
from cl.api.factories import WebhookEventFactory, WebhookFactory
from cl.api.models import WEBHOOK_EVENT_STATUS, WebhookEvent, WebhookEventType
from cl.api.pagination import VersionBasedPagination
from cl.api.utils import LoggingMixin, get_logging_prefix
from cl.api.utils import LoggingMixin, get_logging_prefix, invert_user_logs
from cl.api.views import build_chart_data, coverage_data, make_court_variable
from cl.api.webhooks import send_webhook_event
from cl.audio.api_views import AudioViewSet
Expand Down Expand Up @@ -3297,3 +3299,159 @@ async def test_count_with_no_results(self):

self.assertEqual(response.status_code, 200)
self.assertEqual(response.data["count"], 0)


class TestApiUsage(SimpleTestCase):
"""Tests for combining v3 and v4 API usage data"""

def setUp(self):
"""
Set up test environment before each test.
We create fresh mocks to avoid state bleeding between tests.
"""
self.mock_redis = MagicMock()
self.mock_pipeline = MagicMock()
self.mock_redis.pipeline.return_value = self.mock_pipeline

@patch("cl.api.utils.get_redis_interface")
def test_single_date_combined_usage(self, mock_get_redis):
"""
Test that for a single date:
1. Both v3 and v4 usage is fetched
2. Counts are properly combined
3. The output format matches template expectations
"""
mock_get_redis.return_value = self.mock_redis

self.mock_pipeline.execute.return_value = [
# First result: v3 API data
[("1", 100.0), ("2", 50.0)],
# Second result: v4 API data
[("1", 50.0), ("2", 25.0)],
]

results = invert_user_logs(
start="2023-01-01", end="2023-01-01", add_usernames=False
)

expected = defaultdict(dict)
expected[1] = OrderedDict({"2023-01-01": 150, "total": 150})
expected[2] = OrderedDict({"2023-01-01": 75, "total": 75})

self.assertEqual(results, expected)

@patch("cl.api.utils.get_redis_interface")
def test_multiple_dates_combined_usage(self, mock_get_redis):
"""
Test that across multiple dates:
1. API usage is correctly combined from both v3 and v4
2. Totals accumulate properly across dates
3. The data structure matches template expectations
4. Date ordering is preserved
"""
mock_get_redis.return_value = self.mock_redis
self.mock_pipeline.execute.return_value = [
# January 1st - v3 API
[("1", 100.0), ("2", 50.0)],
# January 1st - v4 API
[("1", 50.0), ("2", 25.0)],
# January 2nd - v3 API
[("1", 200.0), ("2", 100.0)],
# January 2nd - v4 API
[("1", 100.0), ("2", 50.0)],
]

results = invert_user_logs(
start="2023-01-01", end="2023-01-02", add_usernames=False
)

# User 1:
# Jan 1: 150 (100 from v3 + 50 from v4)
# Jan 2: 300 (200 from v3 + 100 from v4)
# Total: 450
# User 2:
# Jan 1: 75 (50 from v3 + 25 from v4)
# Jan 2: 150 (100 from v3 + 50 from v4)
# Total: 225
expected = defaultdict(dict)
expected[1] = OrderedDict(
{"2023-01-01": 150, "2023-01-02": 300, "total": 450}
)
expected[2] = OrderedDict(
{"2023-01-01": 75, "2023-01-02": 150, "total": 225}
)

self.assertEqual(results, expected)

@patch("cl.api.utils.get_redis_interface")
def test_anonymous_user_handling(self, mock_get_redis):
"""
Test the handling of anonymous users, which have special requirements:
1. Both 'None' and 'AnonymousUser' should be treated as the same user
2. They should be combined under 'AnonymousUser' in the output
3. Their usage should be summed correctly across both identifiers
4. They should work with both API versions
"""
mock_get_redis.return_value = self.mock_redis
self.mock_pipeline.execute.return_value = [
# January 1st - v3
[
("None", 30.0),
("1", 100.0),
("AnonymousUser", 20.0),
],
# January 1st - v4
[
("AnonymousUser", 25.0),
("1", 50.0),
],
# January 2nd - v3
[("None", 40.0), ("1", 200.0)],
# January 2nd - v4
[
("1", 100.0),
("AnonymousUser", 35.0),
],
]

results = invert_user_logs(
start="2023-01-01", end="2023-01-02", add_usernames=False
)

expected = defaultdict(dict)
# Expected results:
# Anonymous user on Jan 1:
# - v3: 30 (None) + 20 (AnonymousUser) = 50
# - v4: 25 (AnonymousUser) = 25
# - Total for Jan 1: 75
# Anonymous user on Jan 2:
# - v3: 40 (None) = 40
# - v4: 35 (AnonymousUser) = 35
# - Total for Jan 2: 75
# Anonymous total across all dates: 150
expected["AnonymousUser"] = OrderedDict(
{
"2023-01-01": 75,
"2023-01-02": 75,
"total": 150,
}
)
expected[1] = OrderedDict(
{
"2023-01-01": 150,
"2023-01-02": 300,
"total": 450,
}
)

self.assertEqual(results, expected)

self.assertNotIn("None", results)

self.assertIn("AnonymousUser", results)

# Verify ordering is maintained even with anonymous users
anonymous_data = results["AnonymousUser"]
dates = list(anonymous_data.keys())
dates.remove("total")
self.assertEqual(dates, ["2023-01-01", "2023-01-02"])
85 changes: 47 additions & 38 deletions cl/api/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
from collections import OrderedDict, defaultdict
from datetime import date, datetime, timedelta, timezone
from itertools import batched, chain
from typing import Any, Dict, List, Set, TypedDict, Union

import eyecite
Expand Down Expand Up @@ -704,56 +705,64 @@ def invert_user_logs(
end: Union[str, datetime],
add_usernames: bool = True,
) -> Dict[str, Dict[str, int]]:
"""Invert the user logs for a period of time
"""Aggregate API usage statistics per user over a date range.
The user logs have the date in the key and the user as part of the set:
- Anonymous users are aggregated under the key 'AnonymousUser'.
- Both v3 and v4 API counts are combined in the results.
'api:v3.user.d:2016-10-01.counts': {
mlissner: 22,
joe_hazard: 33,
}
This inverts these entries to:
:param start: Beginning date (inclusive) for the query range
:param end: End date (inclusive) for the query range
:param add_usernames: If True, replaces user IDs with usernames as keys.
When False, uses only user IDs as keys.
users: {
mlissner: {
2016-10-01: 22,
total: 22,
},
joe_hazard: {
2016-10-01: 33,
total: 33,
}
}
:param start: The beginning date (inclusive) you want the results for. A
:param end: The end date (inclusive) you want the results for.
:param add_usernames: Stats are stored with the user ID. If this is True,
add an alias in the returned dictionary that contains the username as well.
:return The inverted dictionary
:return: Dictionary mapping user identifiers (usernames if `add_usernames=True`,
otherwise user IDs) to their daily API usage counts and totals.
Inner dictionaries are ordered by date. Only dates with usage are included.
"""
r = get_redis_interface("STATS")
pipe = r.pipeline()

dates = make_date_str_list(start, end)
versions = ["v3", "v4"]
for d in dates:
pipe.zrange(f"api:v3.user.d:{d}.counts", 0, -1, withscores=True)
for version in versions:
pipe.zrange(
f"api:{version}.user.d:{d}.counts",
0,
-1,
withscores=True,
)

# results contains alternating v3/v4 API usage data for each date queried.
# For example, if querying 2023-01-01 to 2023-01-02, results might look like:
# [
# # 2023-01-01 v3 data: [(user_id, count), ...]
# [("1", 100.0), ("2", 50.0)],
# # 2023-01-01 v4 data
# [("1", 50.0), ("2", 25.0)],
# # 2023-01-02 v3 data
# [("1", 200.0), ("2", 100.0)],
# # 2023-01-02 v4 data
# [("1", 100.0), ("2", 50.0)]
# ]
# We zip this with dates to combine v3/v4 counts per user per day
results = pipe.execute()

# results is a list of results for each of the zrange queries above. Zip
# those results with the date that created it, and invert the whole thing.
out: defaultdict = defaultdict(dict)
for d, result in zip(dates, results):
for user_id, count in result:
if user_id == "None" or user_id == "AnonymousUser":
user_id = "AnonymousUser"
else:
user_id = int(user_id)
count = int(count)
if out.get(user_id):
out[user_id][d] = count
out[user_id]["total"] += count
else:
out[user_id] = {d: count, "total": count}

def update_user_counts(_user_id, _count, _date):
user_is_anonymous = _user_id == "None" or _user_id == "AnonymousUser"
_user_id = "AnonymousUser" if user_is_anonymous else int(_user_id)
_count = int(_count)
out.setdefault(_user_id, OrderedDict())
out[_user_id].setdefault(_date, 0)
out[_user_id][_date] += _count
out[_user_id].setdefault("total", 0)
out[_user_id]["total"] += _count

for d, api_usage in zip(dates, batched(results, len(versions))):
for user_id, count in chain(*api_usage):
update_user_counts(user_id, count, d)

# Sort the values
for k, v in out.items():
Expand Down

0 comments on commit 410e85a

Please sign in to comment.