From f5c87922f1ec9667ee993906401073392451ab23 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Wed, 31 Jan 2024 13:43:02 -0800
Subject: [PATCH 1/3] feat: Get deep count of descendant tags

---
 openedx_tagging/core/tagging/data.py          |  1 +
 openedx_tagging/core/tagging/models/base.py   | 39 +++++++++++++--
 .../core/tagging/rest_api/v1/serializers.py   |  1 +
 .../tagging/import_export/test_template.py    |  8 +--
 .../openedx_tagging/core/tagging/test_api.py  | 10 ++--
 .../core/tagging/test_models.py               | 43 ++++++++--------
 .../core/tagging/test_views.py                | 50 +++++++++----------
 tests/openedx_tagging/core/tagging/utils.py   |  7 ++-
 8 files changed, 99 insertions(+), 60 deletions(-)

diff --git a/openedx_tagging/core/tagging/data.py b/openedx_tagging/core/tagging/data.py
index 1ab8e987..20ba7324 100644
--- a/openedx_tagging/core/tagging/data.py
+++ b/openedx_tagging/core/tagging/data.py
@@ -21,6 +21,7 @@ class TagData(TypedDict):
     value: str
     external_id: str | None
     child_count: int
+    descendant_count: int
     depth: int
     parent_value: str | None
     # Note: usage_count may or may not be present, depending on the request.
diff --git a/openedx_tagging/core/tagging/models/base.py b/openedx_tagging/core/tagging/models/base.py
index 13beeb0a..bd3c69df 100644
--- a/openedx_tagging/core/tagging/models/base.py
+++ b/openedx_tagging/core/tagging/models/base.py
@@ -170,6 +170,18 @@ def child_count(self) -> int:
             return self.taxonomy.tag_set.filter(parent=self).count()
         return 0
 
+    @cached_property
+    def descendant_count(self) -> int:
+        """
+        How many descendant tags this tag has in the taxonomy.
+        """
+        if self.taxonomy and not self.taxonomy.allow_free_text:
+            return self.taxonomy.tag_set.filter(
+                Q(parent__parent=self) |
+                Q(parent__parent__parent=self)
+            ).count() + self.child_count
+        return 0
+
     def clean(self):
         """
         Validate this tag before saving
@@ -413,6 +425,7 @@ def _get_filtered_tags_free_text(
         qs = qs.annotate(
             depth=Value(0),
             child_count=Value(0),
+            descendant_count=Value(0),
             external_id=Value(None, output_field=models.CharField()),
             parent_value=Value(None, output_field=models.CharField()),
             _id=Value(None, output_field=models.CharField()),
@@ -444,12 +457,16 @@ def _get_filtered_tags_one_level(
         else:
             qs = self.tag_set.filter(parent=None).annotate(depth=Value(0))
             qs = qs.annotate(parent_value=Value(None, output_field=models.CharField()))
-        qs = qs.annotate(child_count=models.Count("children"))
+        qs = qs.annotate(child_count=models.Count("children", distinct=True))
+        qs = qs.annotate(grandchild_count=models.Count("children__children"))
+        qs = qs.annotate(great_grandchild_count=models.Count("children__children__children"))
+        qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count"))
         # Filter by search term:
         if search_term:
             qs = qs.filter(value__icontains=search_term)
         qs = qs.annotate(_id=F("id"))  # ID has an underscore to encourage use of 'value' rather than this internal ID
-        qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id").order_by("value")
+        qs = qs.values("value", "child_count", "descendant_count", "depth", "parent_value", "external_id", "_id")
+        qs = qs.order_by("value")
         if include_counts:
             # We need to include the count of how many times this tag is used to tag objects.
             # You'd think we could just use:
@@ -501,14 +518,25 @@ def _get_filtered_tags_deep(
                     if pk is not None:
                         matching_ids.append(pk)
             qs = qs.filter(pk__in=matching_ids)
-            qs = qs.annotate(child_count=models.Count("children", filter=Q(children__pk__in=matching_ids)))
+            qs = qs.annotate(
+                child_count=models.Count("children", filter=Q(children__pk__in=matching_ids), distinct=True),
+                grandchild_count=models.Count("children__children", filter=Q(children__children__pk__in=matching_ids)),
+                great_grandchild_count=models.Count(
+                    "children__children__children",
+                    filter=Q(children__children__children__pk__in=matching_ids),
+                ),
+            )
+            qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count"))
         elif excluded_values:
             raise NotImplementedError("Using excluded_values without search_term is not currently supported.")
             # We could implement this in the future but I'd prefer to get rid of the "excluded_values" API altogether.
             # It remains to be seen if it's useful to do that on the backend, or if we can do it better/simpler on the
             # frontend.
         else:
-            qs = qs.annotate(child_count=models.Count("children"))
+            qs = qs.annotate(child_count=models.Count("children", distinct=True))
+            qs = qs.annotate(grandchild_count=models.Count("children__children"))
+            qs = qs.annotate(great_grandchild_count=models.Count("children__children__children"))
+            qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count"))
 
         # Add the "depth" to each tag:
         qs = Tag.annotate_depth(qs)
@@ -527,7 +555,8 @@ def _get_filtered_tags_deep(
         # Add the parent value
         qs = qs.annotate(parent_value=F("parent__value"))
         qs = qs.annotate(_id=F("id"))  # ID has an underscore to encourage use of 'value' rather than this internal ID
-        qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id").order_by("sort_key")
+        qs = qs.values("value", "child_count", "descendant_count", "depth", "parent_value", "external_id", "_id")
+        qs = qs.order_by("sort_key")
         if include_counts:
             # Including the counts is a bit tricky; see the comment above in _get_filtered_tags_one_level()
             obj_tags = ObjectTag.objects.filter(tag_id=models.OuterRef("pk")).order_by().annotate(
diff --git a/openedx_tagging/core/tagging/rest_api/v1/serializers.py b/openedx_tagging/core/tagging/rest_api/v1/serializers.py
index a3af0bbb..4baca41f 100644
--- a/openedx_tagging/core/tagging/rest_api/v1/serializers.py
+++ b/openedx_tagging/core/tagging/rest_api/v1/serializers.py
@@ -208,6 +208,7 @@ class TagDataSerializer(UserPermissionsSerializerMixin, serializers.Serializer):
     value = serializers.CharField()
     external_id = serializers.CharField(allow_null=True)
     child_count = serializers.IntegerField()
+    descendant_count = serializers.IntegerField()
     depth = serializers.IntegerField()
     parent_value = serializers.CharField(allow_null=True)
     usage_count = serializers.IntegerField(required=False)
diff --git a/tests/openedx_tagging/core/tagging/import_export/test_template.py b/tests/openedx_tagging/core/tagging/import_export/test_template.py
index 89889f95..85621a72 100644
--- a/tests/openedx_tagging/core/tagging/import_export/test_template.py
+++ b/tests/openedx_tagging/core/tagging/import_export/test_template.py
@@ -53,19 +53,19 @@ def test_import_template(self, template_file, parser_format):
             'Electronic instruments (ELECTRIC) (None) (children: 2)',
             '  Synthesizer (SYNTH) (Electronic instruments) (children: 0)',
             '  Theramin (THERAMIN) (Electronic instruments) (children: 0)',
-            'Percussion instruments (PERCUSS) (None) (children: 3)',
+            'Percussion instruments (PERCUSS) (None) (children: 3 + 6)',
             '  Chordophone (CHORD) (Percussion instruments) (children: 1)',
             '    Piano (PIANO) (Chordophone) (children: 0)',
             '  Idiophone (BELLS) (Percussion instruments) (children: 2)',
             '    Celesta (CELESTA) (Idiophone) (children: 0)',
             '    Hi-hat (HI-HAT) (Idiophone) (children: 0)',
-            '  Membranophone (DRUMS) (Percussion instruments) (children: 2)',
+            '  Membranophone (DRUMS) (Percussion instruments) (children: 2 + 1)',
             '    Cajón (CAJÓN) (Membranophone) (children: 1)',
             # This tag is present in the import files, but it will be omitted from get_tags()
             # because it sits beyond TAXONOMY_MAX_DEPTH.
             #      Pyle Stringed Jam Cajón (PYLE) (Cajón) (children: 0)
             '    Tabla (TABLA) (Membranophone) (children: 0)',
-            'String instruments (STRINGS) (None) (children: 2)',
+            'String instruments (STRINGS) (None) (children: 2 + 5)',
             '  Bowed strings (BOW) (String instruments) (children: 2)',
             '    Cello (CELLO) (Bowed strings) (children: 0)',
             '    Violin (VIOLIN) (Bowed strings) (children: 0)',
@@ -73,7 +73,7 @@ def test_import_template(self, template_file, parser_format):
             '    Banjo (BANJO) (Plucked strings) (children: 0)',
             '    Harp (HARP) (Plucked strings) (children: 0)',
             '    Mandolin (MANDOLIN) (Plucked strings) (children: 0)',
-            'Wind instruments (WINDS) (None) (children: 2)',
+            'Wind instruments (WINDS) (None) (children: 2 + 5)',
             '  Brass (BRASS) (Wind instruments) (children: 2)',
             '    Trumpet (TRUMPET) (Brass) (children: 0)',
             '    Tuba (TUBA) (Brass) (children: 0)',
diff --git a/tests/openedx_tagging/core/tagging/test_api.py b/tests/openedx_tagging/core/tagging/test_api.py
index 5befef5c..4c58e532 100644
--- a/tests/openedx_tagging/core/tagging/test_api.py
+++ b/tests/openedx_tagging/core/tagging/test_api.py
@@ -126,8 +126,8 @@ def test_get_tags(self) -> None:
             "Bacteria (children: 2)",
             "  Archaebacteria (children: 0)",
             "  Eubacteria (children: 0)",
-            "Eukaryota (children: 5)",
-            "  Animalia (children: 7)",
+            "Eukaryota (children: 5 + 8)",
+            "  Animalia (children: 7 + 1)",
             "    Arthropoda (children: 0)",
             "    Chordata (children: 1)",  # The child of this is excluded due to depth limit
             "    Cnidaria (children: 0)",
@@ -155,7 +155,7 @@ def test_get_root_tags(self):
         assert pretty_format_tags(root_life_on_earth_tags, parent=False) == [
             'Archaea (children: 3)',
             'Bacteria (children: 2)',
-            'Eukaryota (children: 5)',
+            'Eukaryota (children: 5 + 8)',
         ]
 
     @override_settings(LANGUAGES=test_languages)
@@ -615,7 +615,7 @@ def get_object_tags():
             "  Proteoarchaeota (used: 0, children: 0)",
             "Bacteria (used: 0, children: 1)",  # does not contain "ar" but a child does
             "  Archaebacteria (used: 1, children: 0)",
-            "Eukaryota (used: 0, children: 1)",
+            "Eukaryota (used: 0, children: 1 + 2)",
             "  Animalia (used: 1, children: 2)",  # does not contain "ar" but a child does
             "    Arthropoda (used: 1, children: 0)",
             "    Cnidaria (used: 0, children: 0)",
@@ -637,7 +637,7 @@ def get_object_tags():
             "Bacteria (used: 0, children: 2)",
             "  Archaebacteria (used: 1, children: 0)",
             "  Eubacteria (used: 0, children: 0)",
-            "Eukaryota (used: 0, children: 4)",
+            "Eukaryota (used: 0, children: 4 + 7)",
             "  Animalia (used: 1, children: 7)",
             "    Arthropoda (used: 1, children: 0)",
             "    Chordata (used: 0, children: 0)",  # <<< Chordata has a matching child but we only support searching
diff --git a/tests/openedx_tagging/core/tagging/test_models.py b/tests/openedx_tagging/core/tagging/test_models.py
index c730b16b..453cf99c 100644
--- a/tests/openedx_tagging/core/tagging/test_models.py
+++ b/tests/openedx_tagging/core/tagging/test_models.py
@@ -285,9 +285,9 @@ def test_get_root(self) -> None:
             del r["_id"]  # Remove the internal database IDs; they aren't interesting here and a other tests check them
         assert result == [
             # These are the root tags, in alphabetical order:
-            {"value": "Archaea", "child_count": 3, **common_fields},
-            {"value": "Bacteria", "child_count": 2, **common_fields},
-            {"value": "Eukaryota", "child_count": 5, **common_fields},
+            {"value": "Archaea", "child_count": 3, "descendant_count": 3, **common_fields},
+            {"value": "Bacteria", "child_count": 2, "descendant_count": 2, **common_fields},
+            {"value": "Eukaryota", "child_count": 5, "descendant_count": 13, **common_fields},
         ]
 
     def test_get_child_tags_one_level(self) -> None:
@@ -301,11 +301,11 @@ def test_get_child_tags_one_level(self) -> None:
             del r["_id"]  # Remove the internal database IDs; they aren't interesting here and a other tests check them
         assert result == [
             # These are the Eukaryota tags, in alphabetical order:
-            {"value": "Animalia", "child_count": 7, **common_fields},
-            {"value": "Fungi", "child_count": 0, **common_fields},
-            {"value": "Monera", "child_count": 0, **common_fields},
-            {"value": "Plantae", "child_count": 0, **common_fields},
-            {"value": "Protista", "child_count": 0, **common_fields},
+            {"value": "Animalia", "child_count": 7, "descendant_count": 8, **common_fields},
+            {"value": "Fungi", "child_count": 0, "descendant_count": 0, **common_fields},
+            {"value": "Monera", "child_count": 0, "descendant_count": 0, **common_fields},
+            {"value": "Plantae", "child_count": 0, "descendant_count": 0, **common_fields},
+            {"value": "Protista", "child_count": 0, "descendant_count": 0, **common_fields},
         ]
 
     def test_get_grandchild_tags_one_level(self) -> None:
@@ -319,13 +319,13 @@ def test_get_grandchild_tags_one_level(self) -> None:
             del r["_id"]  # Remove the internal database IDs; they aren't interesting here and a other tests check them
         assert result == [
             # These are the Eukaryota tags, in alphabetical order:
-            {"value": "Arthropoda", "child_count": 0, **common_fields},
-            {"value": "Chordata", "child_count": 1, **common_fields},
-            {"value": "Cnidaria", "child_count": 0, **common_fields},
-            {"value": "Ctenophora", "child_count": 0, **common_fields},
-            {"value": "Gastrotrich", "child_count": 0, **common_fields},
-            {"value": "Placozoa", "child_count": 0, **common_fields},
-            {"value": "Porifera", "child_count": 0, **common_fields},
+            {"value": "Arthropoda", "child_count": 0, "descendant_count": 0, **common_fields},
+            {"value": "Chordata", "child_count": 1, "descendant_count": 1, **common_fields},
+            {"value": "Cnidaria", "child_count": 0, "descendant_count": 0, **common_fields},
+            {"value": "Ctenophora", "child_count": 0, "descendant_count": 0, **common_fields},
+            {"value": "Gastrotrich", "child_count": 0, "descendant_count": 0, **common_fields},
+            {"value": "Placozoa", "child_count": 0, "descendant_count": 0, **common_fields},
+            {"value": "Porifera", "child_count": 0, "descendant_count": 0, **common_fields},
         ]
 
     def test_get_depth_1_search_term(self) -> None:
@@ -337,6 +337,7 @@ def test_get_depth_1_search_term(self) -> None:
             {
                 "value": "Archaea",
                 "child_count": 3,
+                "descendant_count": 3,
                 "depth": 0,
                 "usage_count": 0,
                 "parent_value": None,
@@ -355,6 +356,7 @@ def test_get_depth_1_child_search_term(self) -> None:
             {
                 "value": "Archaebacteria",
                 "child_count": 0,
+                "descendant_count": 0,
                 "depth": 1,
                 "parent_value": "Bacteria",
                 "external_id": None,
@@ -395,8 +397,8 @@ def test_get_all(self) -> None:
             "Bacteria (None) (children: 2)",
             "  Archaebacteria (Bacteria) (children: 0)",
             "  Eubacteria (Bacteria) (children: 0)",
-            "Eukaryota (None) (children: 5)",
-            "  Animalia (Eukaryota) (children: 7)",
+            "Eukaryota (None) (children: 5 + 8)",
+            "  Animalia (Eukaryota) (children: 7 + 1)",
             "    Arthropoda (Animalia) (children: 0)",
             "    Chordata (Animalia) (children: 1)",  # note this has a child but the child is not included
             "    Cnidaria (Animalia) (children: 0)",
@@ -431,7 +433,7 @@ def test_search_2(self) -> None:
         """
         result = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="chordata"))
         assert result == [
-            "Eukaryota (None) (children: 1)",  # Has one child that matches
+            "Eukaryota (None) (children: 1 + 1)",  # Has one child that matches, plus one additional matching descendant
             "  Animalia (Eukaryota) (children: 1)",
             "    Chordata (Animalia) (children: 0)",  # this is the matching tag.
         ]
@@ -445,7 +447,7 @@ def test_search_3(self) -> None:
         assert result == [
             "Archaea (None) (children: 1)",
             "  Proteoarchaeota (Archaea) (children: 0)",
-            "Eukaryota (None) (children: 2)",  # Note the "children: 2" is correct - 2 direct children are in the result
+            "Eukaryota (None) (children: 2 + 2)",  # 2 direct matching children, 2 additional matching descendants
             "  Animalia (Eukaryota) (children: 2)",
             "    Arthropoda (Animalia) (children: 0)",  # match
             "    Gastrotrich (Animalia) (children: 0)",  # match
@@ -464,6 +466,7 @@ def test_tags_deep(self) -> None:
                 "depth": 3,
                 "usage_count": 0,
                 "child_count": 0,
+                "descendant_count": 0,
                 "external_id": None,
                 "_id": 21,  # These IDs are hard-coded in the test fixture file
             }
@@ -527,7 +530,7 @@ def test_tree_sort(self) -> None:
         taxonomy = self.create_sort_test_taxonomy()
         result = pretty_format_tags(taxonomy.get_filtered_tags())
         assert result == [
-            "1 (None) (children: 4)",
+            "1 (None) (children: 4 + 1)",
             "  1 A (1) (children: 0)",
             "  11 (1) (children: 0)",
             "  11111 (1) (children: 1)",
diff --git a/tests/openedx_tagging/core/tagging/test_views.py b/tests/openedx_tagging/core/tagging/test_views.py
index c2ccd320..f6d8c63a 100644
--- a/tests/openedx_tagging/core/tagging/test_views.py
+++ b/tests/openedx_tagging/core/tagging/test_views.py
@@ -1307,8 +1307,8 @@ def test_small_taxonomy(self):
             "Bacteria (None) (children: 2)",
             "  Archaebacteria (Bacteria) (children: 0)",
             "  Eubacteria (Bacteria) (children: 0)",
-            "Eukaryota (None) (children: 5)",
-            "  Animalia (Eukaryota) (children: 7)",
+            "Eukaryota (None) (children: 5 + 8)",
+            "  Animalia (Eukaryota) (children: 7 + 1)",
             "    Arthropoda (Animalia) (children: 0)",
             "    Chordata (Animalia) (children: 1)",
             "    Cnidaria (Animalia) (children: 0)",
@@ -1355,7 +1355,7 @@ def test_small_taxonomy_paged(self):
         assert next_response.status_code == status.HTTP_200_OK
         next_data = next_response.data
         assert pretty_format_tags(next_data["results"]) == [
-            "Eukaryota (None) (children: 5)",
+            "Eukaryota (None) (children: 5 + 8)",
         ]
         assert next_data.get("current_page") == 2
 
@@ -1474,16 +1474,16 @@ def test_large_taxonomy(self):
         results = data["results"]
 
         assert pretty_format_tags(results) == [
-            "Tag 0 (None) (used: 0, children: 12)",
-            "Tag 1099 (None) (used: 0, children: 12)",
-            "Tag 1256 (None) (used: 0, children: 12)",
-            "Tag 1413 (None) (used: 0, children: 12)",
-            "Tag 157 (None) (used: 0, children: 12)",
-            "Tag 1570 (None) (used: 0, children: 12)",
-            "Tag 1727 (None) (used: 0, children: 12)",
-            "Tag 1884 (None) (used: 0, children: 12)",
-            "Tag 2041 (None) (used: 0, children: 12)",
-            "Tag 2198 (None) (used: 0, children: 12)",
+            "Tag 0 (None) (used: 0, children: 12 + 144)",
+            "Tag 1099 (None) (used: 0, children: 12 + 144)",
+            "Tag 1256 (None) (used: 0, children: 12 + 144)",
+            "Tag 1413 (None) (used: 0, children: 12 + 144)",
+            "Tag 157 (None) (used: 0, children: 12 + 144)",
+            "Tag 1570 (None) (used: 0, children: 12 + 144)",
+            "Tag 1727 (None) (used: 0, children: 12 + 144)",
+            "Tag 1884 (None) (used: 0, children: 12 + 144)",
+            "Tag 2041 (None) (used: 0, children: 12 + 144)",
+            "Tag 2198 (None) (used: 0, children: 12 + 144)",
             # ... there are 41 more root tags but they're excluded from this first result page.
         ]
 
@@ -1554,7 +1554,7 @@ def test_large_search(self):
         data = response.data
         results = data["results"]
         assert pretty_format_tags(results)[:20] == [
-            "Tag 0 (None) (children: 3)",  # First 2 results don't match but have children that match
+            "Tag 0 (None) (children: 3 + 10)",  # First 2 results don't match but have children that match
             # Note the count here ---^ is not the total number of matching descendants, just the number of children
             # once we filter the tree to include only matches and their ancestors.
             "  Tag 1 (Tag 0) (children: 1)",
@@ -1570,7 +1570,7 @@ def test_large_search(self):
             "    Tag 117 (Tag 105) (children: 0)",
             "  Tag 118 (Tag 0) (children: 1)",
             "    Tag 119 (Tag 118) (children: 0)",
-            "Tag 1099 (None) (children: 9)",
+            "Tag 1099 (None) (children: 9 + 93)",
             "  Tag 1100 (Tag 1099) (children: 12)",
             "    Tag 1101 (Tag 1100) (children: 0)",
             "    Tag 1102 (Tag 1100) (children: 0)",
@@ -1592,16 +1592,16 @@ def test_large_search(self):
         results2 = data2["results"]
         assert pretty_format_tags(results2) == [
             # Notice that none of these root tags directly match the search query, but their children/grandchildren do
-            "Tag 0 (None) (children: 3)",
-            "Tag 1099 (None) (children: 9)",
-            "Tag 1256 (None) (children: 2)",
-            "Tag 1413 (None) (children: 1)",
-            "Tag 157 (None) (children: 2)",
-            "Tag 1570 (None) (children: 2)",
-            "Tag 1727 (None) (children: 1)",
-            "Tag 1884 (None) (children: 2)",
-            "Tag 2041 (None) (children: 1)",
-            "Tag 2198 (None) (children: 2)",
+            "Tag 0 (None) (children: 3 + 10)",
+            "Tag 1099 (None) (children: 9 + 93)",
+            "Tag 1256 (None) (children: 2 + 2)",
+            "Tag 1413 (None) (children: 1 + 1)",
+            "Tag 157 (None) (children: 2 + 2)",
+            "Tag 1570 (None) (children: 2 + 2)",
+            "Tag 1727 (None) (children: 1 + 1)",
+            "Tag 1884 (None) (children: 2 + 1)",
+            "Tag 2041 (None) (children: 1 + 10)",
+            "Tag 2198 (None) (children: 2 + 2)",
         ]
         assert data2.get("count") == 51
         assert data2.get("num_pages") == 6
diff --git a/tests/openedx_tagging/core/tagging/utils.py b/tests/openedx_tagging/core/tagging/utils.py
index 0070f12c..3e959d96 100644
--- a/tests/openedx_tagging/core/tagging/utils.py
+++ b/tests/openedx_tagging/core/tagging/utils.py
@@ -21,6 +21,11 @@ def pretty_format_tags(result, parent=True, external_id=False) -> list[str]:
         line += "("
         if "usage_count" in t:
             line += f"used: {t['usage_count']}, "
-        line += f"children: {t['child_count']})"
+        line += f"children: {t['child_count']}"
+        # In addition to the direct children, how many total descendants are there?
+        deeper_descendants = t['descendant_count'] - t['child_count']
+        if deeper_descendants != 0:
+            line += f" + {deeper_descendants}"
+        line += ")"
         pretty_results.append(line)
     return pretty_results

From 321a61939d8cd5b38f8eae65c6cb7ccbe57cdb41 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Thu, 1 Feb 2024 14:45:21 -0800
Subject: [PATCH 2/3] fix: bug with some descendant counts

---
 openedx_tagging/core/tagging/models/base.py   |  8 +++--
 .../core/tagging/test_models.py               | 31 +++++++++++++++++++
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/openedx_tagging/core/tagging/models/base.py b/openedx_tagging/core/tagging/models/base.py
index bd3c69df..8a6c688e 100644
--- a/openedx_tagging/core/tagging/models/base.py
+++ b/openedx_tagging/core/tagging/models/base.py
@@ -458,7 +458,7 @@ def _get_filtered_tags_one_level(
             qs = self.tag_set.filter(parent=None).annotate(depth=Value(0))
             qs = qs.annotate(parent_value=Value(None, output_field=models.CharField()))
         qs = qs.annotate(child_count=models.Count("children", distinct=True))
-        qs = qs.annotate(grandchild_count=models.Count("children__children"))
+        qs = qs.annotate(grandchild_count=models.Count("children__children", distinct=True))
         qs = qs.annotate(great_grandchild_count=models.Count("children__children__children"))
         qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count"))
         # Filter by search term:
@@ -520,7 +520,9 @@ def _get_filtered_tags_deep(
             qs = qs.filter(pk__in=matching_ids)
             qs = qs.annotate(
                 child_count=models.Count("children", filter=Q(children__pk__in=matching_ids), distinct=True),
-                grandchild_count=models.Count("children__children", filter=Q(children__children__pk__in=matching_ids)),
+                grandchild_count=models.Count(
+                    "children__children", filter=Q(children__children__pk__in=matching_ids), distinct=True,
+                ),
                 great_grandchild_count=models.Count(
                     "children__children__children",
                     filter=Q(children__children__children__pk__in=matching_ids),
@@ -534,7 +536,7 @@ def _get_filtered_tags_deep(
             # frontend.
         else:
             qs = qs.annotate(child_count=models.Count("children", distinct=True))
-            qs = qs.annotate(grandchild_count=models.Count("children__children"))
+            qs = qs.annotate(grandchild_count=models.Count("children__children", distinct=True))
             qs = qs.annotate(great_grandchild_count=models.Count("children__children__children"))
             qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count"))
 
diff --git a/tests/openedx_tagging/core/tagging/test_models.py b/tests/openedx_tagging/core/tagging/test_models.py
index 453cf99c..59ea8f37 100644
--- a/tests/openedx_tagging/core/tagging/test_models.py
+++ b/tests/openedx_tagging/core/tagging/test_models.py
@@ -550,6 +550,37 @@ def test_tree_sort(self) -> None:
             "  azure (ALPHABET) (children: 0)",
         ]
 
+    def test_descendant_counts(self) -> None:
+        """
+        Test getting the descendant count on a taxonomy known to cause aggregation
+        bugs unless the aggregations are correctly specified with distinct=True
+
+        https://docs.djangoproject.com/en/5.0/topics/db/aggregation/#combining-multiple-aggregations
+        """
+        taxonomy = api.create_taxonomy("ESDC Subset")
+        api.add_tag_to_taxonomy(taxonomy, "Interests")  # root tag
+        api.add_tag_to_taxonomy(taxonomy, "Holland Codes", parent_tag_value="Interests")  # child tag
+        # Create the grandchild tag:
+        g_tag = api.add_tag_to_taxonomy(taxonomy, "Interests - Holland Codes", parent_tag_value="Holland Codes")
+        # Create the 6 great-grandchild tags:
+        api.add_tag_to_taxonomy(taxonomy, "Artistic", parent_tag_value=g_tag.value)
+        api.add_tag_to_taxonomy(taxonomy, "Conventional", parent_tag_value=g_tag.value)
+        api.add_tag_to_taxonomy(taxonomy, "Enterprising", parent_tag_value=g_tag.value)
+        api.add_tag_to_taxonomy(taxonomy, "Investigative", parent_tag_value=g_tag.value)
+        api.add_tag_to_taxonomy(taxonomy, "Realistic", parent_tag_value=g_tag.value)
+        api.add_tag_to_taxonomy(taxonomy, "Social", parent_tag_value=g_tag.value)
+
+        result = pretty_format_tags(taxonomy.get_filtered_tags(depth=1, include_counts=True))
+        assert result == [
+            "Interests (None) (used: 0, children: 1 + 7)",  # 1 child + (1 grandchild and 6 great grandchild tags)
+        ]
+        result2 = pretty_format_tags(taxonomy.get_filtered_tags(depth=None, include_counts=True))
+        assert result2 == [
+            "Interests (None) (used: 0, children: 1 + 7)",
+            "  Holland Codes (Interests) (used: 0, children: 1 + 6)",
+            "    Interests - Holland Codes (Holland Codes) (used: 0, children: 6)",
+        ]
+
 
 class TestFilteredTagsFreeTextTaxonomy(TestCase):
     """

From 0fbcd01ab778095673c5821a3fc2a7a10b572f84 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Thu, 8 Feb 2024 09:49:46 -0800
Subject: [PATCH 3/3] chore: bump version to 0.5.2

---
 openedx_learning/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openedx_learning/__init__.py b/openedx_learning/__init__.py
index 69145570..fa8a6955 100644
--- a/openedx_learning/__init__.py
+++ b/openedx_learning/__init__.py
@@ -1,4 +1,4 @@
 """
 Open edX Learning ("Learning Core").
 """
-__version__ = "0.5.1"
+__version__ = "0.5.2"