From 931c3bc9f4514ae690d6a61fc1943c9364a11e4d Mon Sep 17 00:00:00 2001
From: Elisa Anguita <elisa@free.law>
Date: Tue, 31 Dec 2024 12:41:46 -0300
Subject: [PATCH 1/5] fix(api): include v4 usage in user's recent_api_usage

---
 cl/api/utils.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/cl/api/utils.py b/cl/api/utils.py
index 570c48c769..795992b0a7 100644
--- a/cl/api/utils.py
+++ b/cl/api/utils.py
@@ -1,6 +1,7 @@
 import logging
 from collections import OrderedDict, defaultdict
 from datetime import date, datetime, timedelta, timezone
+from itertools import chain
 from typing import Any, Dict, List, Set, TypedDict, Union
 
 import eyecite
@@ -737,23 +738,26 @@ def invert_user_logs(
     dates = make_date_str_list(start, end)
     for d in dates:
         pipe.zrange(f"api:v3.user.d:{d}.counts", 0, -1, withscores=True)
+        pipe.zrange(f"api:v4.user.d:{d}.counts", 0, -1, withscores=True)
     results = pipe.execute()
 
     # results is a list of results for each of the zrange queries above. Zip
     # those results with the date that created it, and invert the whole thing.
     out: defaultdict = defaultdict(dict)
-    for d, result in zip(dates, results):
-        for user_id, count in result:
-            if user_id == "None" or user_id == "AnonymousUser":
-                user_id = "AnonymousUser"
-            else:
-                user_id = int(user_id)
-            count = int(count)
-            if out.get(user_id):
-                out[user_id][d] = count
-                out[user_id]["total"] += count
-            else:
-                out[user_id] = {d: count, "total": count}
+
+    def update_user_counts(_user_id, _count, _date):
+        user_is_anonymous = _user_id == "None" or _user_id == "AnonymousUser"
+        _user_id = "AnonymousUser" if user_is_anonymous else int(_user_id)
+        _count = int(_count)
+        out.setdefault(_user_id, OrderedDict())
+        out[_user_id].setdefault(_date, 0)
+        out[_user_id][_date] += _count
+        out[_user_id].setdefault("total", 0)
+        out[_user_id]["total"] += _count  # Update total
+
+    for d, v3_data, v4_data in zip(dates, results[::2], results[1::2]):
+        for user_id, count in chain(v3_data, v4_data):
+            update_user_counts(user_id, count, d)
 
     # Sort the values
     for k, v in out.items():

From 7e6317e22795d910a902e4a3f85a696993c9d64c Mon Sep 17 00:00:00 2001
From: Elisa Anguita <elisa@free.law>
Date: Tue, 31 Dec 2024 13:14:20 -0300
Subject: [PATCH 2/5] docs(api): update invert_user_logs docstring and add
 descriptive comment

---
 cl/api/utils.py | 51 ++++++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/cl/api/utils.py b/cl/api/utils.py
index 795992b0a7..5970b0bce9 100644
--- a/cl/api/utils.py
+++ b/cl/api/utils.py
@@ -705,32 +705,19 @@ def invert_user_logs(
     end: Union[str, datetime],
     add_usernames: bool = True,
 ) -> Dict[str, Dict[str, int]]:
-    """Invert the user logs for a period of time
+    """Aggregate API usage statistics per user over a date range.
 
-    The user logs have the date in the key and the user as part of the set:
+    - Anonymous users are aggregated under the key 'AnonymousUser'.
+    - Both v3 and v4 API counts are combined in the results.
 
-        'api:v3.user.d:2016-10-01.counts': {
-           mlissner: 22,
-           joe_hazard: 33,
-        }
-
-    This inverts these entries to:
+    :param start: Beginning date (inclusive) for the query range
+    :param end: End date (inclusive) for the query range
+    :param add_usernames: If True, replaces user IDs with usernames as keys.
+        When False, uses only user IDs as keys.
 
-        users: {
-            mlissner: {
-                2016-10-01: 22,
-                total: 22,
-            },
-            joe_hazard: {
-                2016-10-01: 33,
-                total: 33,
-            }
-        }
-    :param start: The beginning date (inclusive) you want the results for. A
-    :param end: The end date (inclusive) you want the results for.
-    :param add_usernames: Stats are stored with the user ID. If this is True,
-    add an alias in the returned dictionary that contains the username as well.
-    :return The inverted dictionary
+    :return: Dictionary mapping user identifiers (usernames if `add_usernames=True`,
+        otherwise user IDs) to their daily API usage counts and totals.
+        Inner dictionaries are ordered by date. Only dates with usage are included.
     """
     r = get_redis_interface("STATS")
     pipe = r.pipeline()
@@ -739,10 +726,22 @@ def invert_user_logs(
     for d in dates:
         pipe.zrange(f"api:v3.user.d:{d}.counts", 0, -1, withscores=True)
         pipe.zrange(f"api:v4.user.d:{d}.counts", 0, -1, withscores=True)
+
+    # results contains alternating v3/v4 API usage data for each date queried.
+    # For example, if querying 2023-01-01 to 2023-01-02, results might look like:
+    # [
+    #     # 2023-01-01 v3 data: [(user_id, count), ...]
+    #     [("1", 100.0), ("2", 50.0)],
+    #     # 2023-01-01 v4 data
+    #     [("1", 50.0), ("2", 25.0)],
+    #     # 2023-01-02 v3 data
+    #     [("1", 200.0), ("2", 100.0)],
+    #     # 2023-01-02 v4 data
+    #     [("1", 100.0), ("2", 50.0)]
+    # ]
+    # We zip this with dates to combine v3/v4 counts per user per day
     results = pipe.execute()
 
-    # results is a list of results for each of the zrange queries above. Zip
-    # those results with the date that created it, and invert the whole thing.
     out: defaultdict = defaultdict(dict)
 
     def update_user_counts(_user_id, _count, _date):
@@ -753,7 +752,7 @@ def update_user_counts(_user_id, _count, _date):
         out[_user_id].setdefault(_date, 0)
         out[_user_id][_date] += _count
         out[_user_id].setdefault("total", 0)
-        out[_user_id]["total"] += _count  # Update total
+        out[_user_id]["total"] += _count
 
     for d, v3_data, v4_data in zip(dates, results[::2], results[1::2]):
         for user_id, count in chain(v3_data, v4_data):

From ef20c82b11400901e1efa8fdd01fb6aa0b0b6668 Mon Sep 17 00:00:00 2001
From: Elisa Anguita <elisa@free.law>
Date: Tue, 31 Dec 2024 13:23:05 -0300
Subject: [PATCH 3/5] test(api): include tests for combining v3 and v4 API
 usage data

---
 cl/api/tests.py | 160 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 159 insertions(+), 1 deletion(-)

diff --git a/cl/api/tests.py b/cl/api/tests.py
index e15bbfe279..1956af2fef 100644
--- a/cl/api/tests.py
+++ b/cl/api/tests.py
@@ -1,8 +1,10 @@
 import json
+from collections import OrderedDict, defaultdict
 from datetime import date, datetime, timedelta, timezone
 from http import HTTPStatus
 from typing import Any, Dict
 from unittest import mock
+from unittest.mock import MagicMock, patch
 from urllib.parse import parse_qs, urlparse
 
 from asgiref.sync import async_to_sync, sync_to_async
@@ -29,7 +31,7 @@
 from cl.api.factories import WebhookEventFactory, WebhookFactory
 from cl.api.models import WEBHOOK_EVENT_STATUS, WebhookEvent, WebhookEventType
 from cl.api.pagination import VersionBasedPagination
-from cl.api.utils import LoggingMixin, get_logging_prefix
+from cl.api.utils import LoggingMixin, get_logging_prefix, invert_user_logs
 from cl.api.views import build_chart_data, coverage_data, make_court_variable
 from cl.api.webhooks import send_webhook_event
 from cl.audio.api_views import AudioViewSet
@@ -3297,3 +3299,159 @@ async def test_count_with_no_results(self):
 
         self.assertEqual(response.status_code, 200)
         self.assertEqual(response.data["count"], 0)
+
+
+class TestApiUsage(SimpleTestCase):
+    """Tests for combining v3 and v4 API usage data"""
+
+    def setUp(self):
+        """
+        Set up test environment before each test.
+        We create fresh mocks to avoid state bleeding between tests.
+        """
+        self.mock_redis = MagicMock()
+        self.mock_pipeline = MagicMock()
+        self.mock_redis.pipeline.return_value = self.mock_pipeline
+
+    @patch("cl.api.utils.get_redis_interface")
+    def test_single_date_combined_usage(self, mock_get_redis):
+        """
+        Test that for a single date:
+        1. Both v3 and v4 usage is fetched
+        2. Counts are properly combined
+        3. The output format matches template expectations
+        """
+        mock_get_redis.return_value = self.mock_redis
+
+        self.mock_pipeline.execute.return_value = [
+            # First result: v3 API data
+            [("1", 100.0), ("2", 50.0)],
+            # Second result: v4 API data
+            [("1", 50.0), ("2", 25.0)],
+        ]
+
+        results = invert_user_logs(
+            start="2023-01-01", end="2023-01-01", add_usernames=False
+        )
+
+        expected = defaultdict(dict)
+        expected[1] = OrderedDict({"2023-01-01": 150, "total": 150})
+        expected[2] = OrderedDict({"2023-01-01": 75, "total": 75})
+
+        self.assertEqual(results, expected)
+
+    @patch("cl.api.utils.get_redis_interface")
+    def test_multiple_dates_combined_usage(self, mock_get_redis):
+        """
+        Test that across multiple dates:
+        1. API usage is correctly combined from both v3 and v4
+        2. Totals accumulate properly across dates
+        3. The data structure matches template expectations
+        4. Date ordering is preserved
+        """
+        mock_get_redis.return_value = self.mock_redis
+        self.mock_pipeline.execute.return_value = [
+            # January 1st - v3 API
+            [("1", 100.0), ("2", 50.0)],
+            # January 1st - v4 API
+            [("1", 50.0), ("2", 25.0)],
+            # January 2nd - v3 API
+            [("1", 200.0), ("2", 100.0)],
+            # January 2nd - v4 API
+            [("1", 100.0), ("2", 50.0)],
+        ]
+
+        results = invert_user_logs(
+            start="2023-01-01", end="2023-01-02", add_usernames=False
+        )
+
+        # User 1:
+        #   Jan 1: 150 (100 from v3 + 50 from v4)
+        #   Jan 2: 300 (200 from v3 + 100 from v4)
+        #   Total: 450
+        # User 2:
+        #   Jan 1: 75 (50 from v3 + 25 from v4)
+        #   Jan 2: 150 (100 from v3 + 50 from v4)
+        #   Total: 225
+        expected = defaultdict(dict)
+        expected[1] = OrderedDict(
+            {"2023-01-01": 150, "2023-01-02": 300, "total": 450}
+        )
+        expected[2] = OrderedDict(
+            {"2023-01-01": 75, "2023-01-02": 150, "total": 225}
+        )
+
+        self.assertEqual(results, expected)
+
+    @patch("cl.api.utils.get_redis_interface")
+    def test_anonymous_user_handling(self, mock_get_redis):
+        """
+        Test the handling of anonymous users, which have special requirements:
+        1. Both 'None' and 'AnonymousUser' should be treated as the same user
+        2. They should be combined under 'AnonymousUser' in the output
+        3. Their usage should be summed correctly across both identifiers
+        4. They should work with both API versions
+        """
+        mock_get_redis.return_value = self.mock_redis
+        self.mock_pipeline.execute.return_value = [
+            # January 1st - v3
+            [
+                ("None", 30.0),
+                ("1", 100.0),
+                ("AnonymousUser", 20.0),
+            ],
+            # January 1st - v4
+            [
+                ("AnonymousUser", 25.0),
+                ("1", 50.0),
+            ],
+            # January 2nd - v3
+            [("None", 40.0), ("1", 200.0)],
+            # January 2nd - v4
+            [
+                ("1", 100.0),
+                ("AnonymousUser", 35.0),
+            ],
+        ]
+
+        results = invert_user_logs(
+            start="2023-01-01", end="2023-01-02", add_usernames=False
+        )
+
+        expected = defaultdict(dict)
+        # Expected results:
+        # Anonymous user on Jan 1:
+        #   - v3: 30 (None) + 20 (AnonymousUser) = 50
+        #   - v4: 25 (AnonymousUser) = 25
+        #   - Total for Jan 1: 75
+        # Anonymous user on Jan 2:
+        #   - v3: 40 (None) = 40
+        #   - v4: 35 (AnonymousUser) = 35
+        #   - Total for Jan 2: 75
+        # Anonymous total across all dates: 150
+        expected["AnonymousUser"] = OrderedDict(
+            {
+                "2023-01-01": 75,
+                "2023-01-02": 75,
+                "total": 150,
+            }
+        )
+        expected[1] = OrderedDict(
+            {
+                "2023-01-01": 150,
+                "2023-01-02": 300,
+                "total": 450,
+            }
+        )
+
+        self.assertEqual(results, expected)
+
+        self.assertNotIn("None", results)
+
+        self.assertIn("AnonymousUser", results)
+
+        # Verify ordering is maintained even with anonymous users
+        anonymous_data = results["AnonymousUser"]
+        dates = list(anonymous_data.keys())
+        dates.remove("total")
+        self.assertEqual(dates, ["2023-01-01", "2023-01-02"])

From eb44e9663d1f54ffe9373ed9993483642b2a77cd Mon Sep 17 00:00:00 2001
From: Elisa Anguita <elisa@free.law>
Date: Fri, 3 Jan 2025 13:21:00 -0300
Subject: [PATCH 4/5] refactor(api): enhance maintainability by using
 itertools.batched instead of slicing

---
 cl/api/utils.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/cl/api/utils.py b/cl/api/utils.py
index 5970b0bce9..4155135e07 100644
--- a/cl/api/utils.py
+++ b/cl/api/utils.py
@@ -1,7 +1,7 @@
 import logging
 from collections import OrderedDict, defaultdict
 from datetime import date, datetime, timedelta, timezone
-from itertools import chain
+from itertools import batched, chain
 from typing import Any, Dict, List, Set, TypedDict, Union
 
 import eyecite
@@ -723,9 +723,15 @@ def invert_user_logs(
     pipe = r.pipeline()
 
     dates = make_date_str_list(start, end)
+    versions = ["v3", "v4"]
     for d in dates:
-        pipe.zrange(f"api:v3.user.d:{d}.counts", 0, -1, withscores=True)
-        pipe.zrange(f"api:v4.user.d:{d}.counts", 0, -1, withscores=True)
+        for v in versions:
+            pipe.zrange(
+                f"api:{v}.user.d:{d}.counts",
+                0,
+                -1,
+                withscores=True,
+            )
 
     # results contains alternating v3/v4 API usage data for each date queried.
     # For example, if querying 2023-01-01 to 2023-01-02, results might look like:
@@ -754,8 +760,8 @@ def update_user_counts(_user_id, _count, _date):
         out[_user_id].setdefault("total", 0)
         out[_user_id]["total"] += _count
 
-    for d, v3_data, v4_data in zip(dates, results[::2], results[1::2]):
-        for user_id, count in chain(v3_data, v4_data):
+    for d, api_usage in zip(dates, batched(results, len(versions))):
+        for user_id, count in chain(*api_usage):
             update_user_counts(user_id, count, d)
 
     # Sort the values

From 27340541eeff1eb98f419239da357f08657fbde1 Mon Sep 17 00:00:00 2001
From: Elisa Anguita <elisa@free.law>
Date: Fri, 3 Jan 2025 13:34:25 -0300
Subject: [PATCH 5/5] refactor(api): rename variable to avoid type checker
 error

---
 cl/api/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/api/utils.py b/cl/api/utils.py
index 4155135e07..84630f7b5d 100644
--- a/cl/api/utils.py
+++ b/cl/api/utils.py
@@ -725,9 +725,9 @@ def invert_user_logs(
     dates = make_date_str_list(start, end)
     versions = ["v3", "v4"]
     for d in dates:
-        for v in versions:
+        for version in versions:
             pipe.zrange(
-                f"api:{v}.user.d:{d}.counts",
+                f"api:{version}.user.d:{d}.counts",
                 0,
                 -1,
                 withscores=True,