From f5c87922f1ec9667ee993906401073392451ab23 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Wed, 31 Jan 2024 13:43:02 -0800 Subject: [PATCH 1/3] feat: Get deep count of descendant tags --- openedx_tagging/core/tagging/data.py | 1 + openedx_tagging/core/tagging/models/base.py | 39 +++++++++++++-- .../core/tagging/rest_api/v1/serializers.py | 1 + .../tagging/import_export/test_template.py | 8 +-- .../openedx_tagging/core/tagging/test_api.py | 10 ++-- .../core/tagging/test_models.py | 43 ++++++++-------- .../core/tagging/test_views.py | 50 +++++++++---------- tests/openedx_tagging/core/tagging/utils.py | 7 ++- 8 files changed, 99 insertions(+), 60 deletions(-) diff --git a/openedx_tagging/core/tagging/data.py b/openedx_tagging/core/tagging/data.py index 1ab8e987..20ba7324 100644 --- a/openedx_tagging/core/tagging/data.py +++ b/openedx_tagging/core/tagging/data.py @@ -21,6 +21,7 @@ class TagData(TypedDict): value: str external_id: str | None child_count: int + descendant_count: int depth: int parent_value: str | None # Note: usage_count may or may not be present, depending on the request. diff --git a/openedx_tagging/core/tagging/models/base.py b/openedx_tagging/core/tagging/models/base.py index 13beeb0a..bd3c69df 100644 --- a/openedx_tagging/core/tagging/models/base.py +++ b/openedx_tagging/core/tagging/models/base.py @@ -170,6 +170,18 @@ def child_count(self) -> int: return self.taxonomy.tag_set.filter(parent=self).count() return 0 + @cached_property + def descendant_count(self) -> int: + """ + How many descendant tags this tag has in the taxonomy. + """ + if self.taxonomy and not self.taxonomy.allow_free_text: + return self.taxonomy.tag_set.filter( + Q(parent__parent=self) | + Q(parent__parent__parent=self) + ).count() + self.child_count + return 0 + def clean(self): """ Validate this tag before saving @@ -413,6 +425,7 @@ def _get_filtered_tags_free_text( qs = qs.annotate( depth=Value(0), child_count=Value(0), + descendant_count=Value(0), external_id=Value(None, output_field=models.CharField()), parent_value=Value(None, output_field=models.CharField()), _id=Value(None, output_field=models.CharField()), @@ -444,12 +457,16 @@ def _get_filtered_tags_one_level( else: qs = self.tag_set.filter(parent=None).annotate(depth=Value(0)) qs = qs.annotate(parent_value=Value(None, output_field=models.CharField())) - qs = qs.annotate(child_count=models.Count("children")) + qs = qs.annotate(child_count=models.Count("children", distinct=True)) + qs = qs.annotate(grandchild_count=models.Count("children__children")) + qs = qs.annotate(great_grandchild_count=models.Count("children__children__children")) + qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count")) # Filter by search term: if search_term: qs = qs.filter(value__icontains=search_term) qs = qs.annotate(_id=F("id")) # ID has an underscore to encourage use of 'value' rather than this internal ID - qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id").order_by("value") + qs = qs.values("value", "child_count", "descendant_count", "depth", "parent_value", "external_id", "_id") + qs = qs.order_by("value") if include_counts: # We need to include the count of how many times this tag is used to tag objects. # You'd think we could just use: @@ -501,14 +518,25 @@ def _get_filtered_tags_deep( if pk is not None: matching_ids.append(pk) qs = qs.filter(pk__in=matching_ids) - qs = qs.annotate(child_count=models.Count("children", filter=Q(children__pk__in=matching_ids))) + qs = qs.annotate( + child_count=models.Count("children", filter=Q(children__pk__in=matching_ids), distinct=True), + grandchild_count=models.Count("children__children", filter=Q(children__children__pk__in=matching_ids)), + great_grandchild_count=models.Count( + "children__children__children", + filter=Q(children__children__children__pk__in=matching_ids), + ), + ) + qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count")) elif excluded_values: raise NotImplementedError("Using excluded_values without search_term is not currently supported.") # We could implement this in the future but I'd prefer to get rid of the "excluded_values" API altogether. # It remains to be seen if it's useful to do that on the backend, or if we can do it better/simpler on the # frontend. else: - qs = qs.annotate(child_count=models.Count("children")) + qs = qs.annotate(child_count=models.Count("children", distinct=True)) + qs = qs.annotate(grandchild_count=models.Count("children__children")) + qs = qs.annotate(great_grandchild_count=models.Count("children__children__children")) + qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count")) # Add the "depth" to each tag: qs = Tag.annotate_depth(qs) @@ -527,7 +555,8 @@ def _get_filtered_tags_deep( # Add the parent value qs = qs.annotate(parent_value=F("parent__value")) qs = qs.annotate(_id=F("id")) # ID has an underscore to encourage use of 'value' rather than this internal ID - qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id").order_by("sort_key") + qs = qs.values("value", "child_count", "descendant_count", "depth", "parent_value", "external_id", "_id") + qs = qs.order_by("sort_key") if include_counts: # Including the counts is a bit tricky; see the comment above in _get_filtered_tags_one_level() obj_tags = ObjectTag.objects.filter(tag_id=models.OuterRef("pk")).order_by().annotate( diff --git a/openedx_tagging/core/tagging/rest_api/v1/serializers.py b/openedx_tagging/core/tagging/rest_api/v1/serializers.py index a3af0bbb..4baca41f 100644 --- a/openedx_tagging/core/tagging/rest_api/v1/serializers.py +++ b/openedx_tagging/core/tagging/rest_api/v1/serializers.py @@ -208,6 +208,7 @@ class TagDataSerializer(UserPermissionsSerializerMixin, serializers.Serializer): value = serializers.CharField() external_id = serializers.CharField(allow_null=True) child_count = serializers.IntegerField() + descendant_count = serializers.IntegerField() depth = serializers.IntegerField() parent_value = serializers.CharField(allow_null=True) usage_count = serializers.IntegerField(required=False) diff --git a/tests/openedx_tagging/core/tagging/import_export/test_template.py b/tests/openedx_tagging/core/tagging/import_export/test_template.py index 89889f95..85621a72 100644 --- a/tests/openedx_tagging/core/tagging/import_export/test_template.py +++ b/tests/openedx_tagging/core/tagging/import_export/test_template.py @@ -53,19 +53,19 @@ def test_import_template(self, template_file, parser_format): 'Electronic instruments (ELECTRIC) (None) (children: 2)', ' Synthesizer (SYNTH) (Electronic instruments) (children: 0)', ' Theramin (THERAMIN) (Electronic instruments) (children: 0)', - 'Percussion instruments (PERCUSS) (None) (children: 3)', + 'Percussion instruments (PERCUSS) (None) (children: 3 + 6)', ' Chordophone (CHORD) (Percussion instruments) (children: 1)', ' Piano (PIANO) (Chordophone) (children: 0)', ' Idiophone (BELLS) (Percussion instruments) (children: 2)', ' Celesta (CELESTA) (Idiophone) (children: 0)', ' Hi-hat (HI-HAT) (Idiophone) (children: 0)', - ' Membranophone (DRUMS) (Percussion instruments) (children: 2)', + ' Membranophone (DRUMS) (Percussion instruments) (children: 2 + 1)', ' Cajón (CAJÓN) (Membranophone) (children: 1)', # This tag is present in the import files, but it will be omitted from get_tags() # because it sits beyond TAXONOMY_MAX_DEPTH. # Pyle Stringed Jam Cajón (PYLE) (Cajón) (children: 0) ' Tabla (TABLA) (Membranophone) (children: 0)', - 'String instruments (STRINGS) (None) (children: 2)', + 'String instruments (STRINGS) (None) (children: 2 + 5)', ' Bowed strings (BOW) (String instruments) (children: 2)', ' Cello (CELLO) (Bowed strings) (children: 0)', ' Violin (VIOLIN) (Bowed strings) (children: 0)', @@ -73,7 +73,7 @@ def test_import_template(self, template_file, parser_format): ' Banjo (BANJO) (Plucked strings) (children: 0)', ' Harp (HARP) (Plucked strings) (children: 0)', ' Mandolin (MANDOLIN) (Plucked strings) (children: 0)', - 'Wind instruments (WINDS) (None) (children: 2)', + 'Wind instruments (WINDS) (None) (children: 2 + 5)', ' Brass (BRASS) (Wind instruments) (children: 2)', ' Trumpet (TRUMPET) (Brass) (children: 0)', ' Tuba (TUBA) (Brass) (children: 0)', diff --git a/tests/openedx_tagging/core/tagging/test_api.py b/tests/openedx_tagging/core/tagging/test_api.py index 5befef5c..4c58e532 100644 --- a/tests/openedx_tagging/core/tagging/test_api.py +++ b/tests/openedx_tagging/core/tagging/test_api.py @@ -126,8 +126,8 @@ def test_get_tags(self) -> None: "Bacteria (children: 2)", " Archaebacteria (children: 0)", " Eubacteria (children: 0)", - "Eukaryota (children: 5)", - " Animalia (children: 7)", + "Eukaryota (children: 5 + 8)", + " Animalia (children: 7 + 1)", " Arthropoda (children: 0)", " Chordata (children: 1)", # The child of this is excluded due to depth limit " Cnidaria (children: 0)", @@ -155,7 +155,7 @@ def test_get_root_tags(self): assert pretty_format_tags(root_life_on_earth_tags, parent=False) == [ 'Archaea (children: 3)', 'Bacteria (children: 2)', - 'Eukaryota (children: 5)', + 'Eukaryota (children: 5 + 8)', ] @override_settings(LANGUAGES=test_languages) @@ -615,7 +615,7 @@ def get_object_tags(): " Proteoarchaeota (used: 0, children: 0)", "Bacteria (used: 0, children: 1)", # does not contain "ar" but a child does " Archaebacteria (used: 1, children: 0)", - "Eukaryota (used: 0, children: 1)", + "Eukaryota (used: 0, children: 1 + 2)", " Animalia (used: 1, children: 2)", # does not contain "ar" but a child does " Arthropoda (used: 1, children: 0)", " Cnidaria (used: 0, children: 0)", @@ -637,7 +637,7 @@ def get_object_tags(): "Bacteria (used: 0, children: 2)", " Archaebacteria (used: 1, children: 0)", " Eubacteria (used: 0, children: 0)", - "Eukaryota (used: 0, children: 4)", + "Eukaryota (used: 0, children: 4 + 7)", " Animalia (used: 1, children: 7)", " Arthropoda (used: 1, children: 0)", " Chordata (used: 0, children: 0)", # <<< Chordata has a matching child but we only support searching diff --git a/tests/openedx_tagging/core/tagging/test_models.py b/tests/openedx_tagging/core/tagging/test_models.py index c730b16b..453cf99c 100644 --- a/tests/openedx_tagging/core/tagging/test_models.py +++ b/tests/openedx_tagging/core/tagging/test_models.py @@ -285,9 +285,9 @@ def test_get_root(self) -> None: del r["_id"] # Remove the internal database IDs; they aren't interesting here and a other tests check them assert result == [ # These are the root tags, in alphabetical order: - {"value": "Archaea", "child_count": 3, **common_fields}, - {"value": "Bacteria", "child_count": 2, **common_fields}, - {"value": "Eukaryota", "child_count": 5, **common_fields}, + {"value": "Archaea", "child_count": 3, "descendant_count": 3, **common_fields}, + {"value": "Bacteria", "child_count": 2, "descendant_count": 2, **common_fields}, + {"value": "Eukaryota", "child_count": 5, "descendant_count": 13, **common_fields}, ] def test_get_child_tags_one_level(self) -> None: @@ -301,11 +301,11 @@ def test_get_child_tags_one_level(self) -> None: del r["_id"] # Remove the internal database IDs; they aren't interesting here and a other tests check them assert result == [ # These are the Eukaryota tags, in alphabetical order: - {"value": "Animalia", "child_count": 7, **common_fields}, - {"value": "Fungi", "child_count": 0, **common_fields}, - {"value": "Monera", "child_count": 0, **common_fields}, - {"value": "Plantae", "child_count": 0, **common_fields}, - {"value": "Protista", "child_count": 0, **common_fields}, + {"value": "Animalia", "child_count": 7, "descendant_count": 8, **common_fields}, + {"value": "Fungi", "child_count": 0, "descendant_count": 0, **common_fields}, + {"value": "Monera", "child_count": 0, "descendant_count": 0, **common_fields}, + {"value": "Plantae", "child_count": 0, "descendant_count": 0, **common_fields}, + {"value": "Protista", "child_count": 0, "descendant_count": 0, **common_fields}, ] def test_get_grandchild_tags_one_level(self) -> None: @@ -319,13 +319,13 @@ def test_get_grandchild_tags_one_level(self) -> None: del r["_id"] # Remove the internal database IDs; they aren't interesting here and a other tests check them assert result == [ # These are the Eukaryota tags, in alphabetical order: - {"value": "Arthropoda", "child_count": 0, **common_fields}, - {"value": "Chordata", "child_count": 1, **common_fields}, - {"value": "Cnidaria", "child_count": 0, **common_fields}, - {"value": "Ctenophora", "child_count": 0, **common_fields}, - {"value": "Gastrotrich", "child_count": 0, **common_fields}, - {"value": "Placozoa", "child_count": 0, **common_fields}, - {"value": "Porifera", "child_count": 0, **common_fields}, + {"value": "Arthropoda", "child_count": 0, "descendant_count": 0, **common_fields}, + {"value": "Chordata", "child_count": 1, "descendant_count": 1, **common_fields}, + {"value": "Cnidaria", "child_count": 0, "descendant_count": 0, **common_fields}, + {"value": "Ctenophora", "child_count": 0, "descendant_count": 0, **common_fields}, + {"value": "Gastrotrich", "child_count": 0, "descendant_count": 0, **common_fields}, + {"value": "Placozoa", "child_count": 0, "descendant_count": 0, **common_fields}, + {"value": "Porifera", "child_count": 0, "descendant_count": 0, **common_fields}, ] def test_get_depth_1_search_term(self) -> None: @@ -337,6 +337,7 @@ def test_get_depth_1_search_term(self) -> None: { "value": "Archaea", "child_count": 3, + "descendant_count": 3, "depth": 0, "usage_count": 0, "parent_value": None, @@ -355,6 +356,7 @@ def test_get_depth_1_child_search_term(self) -> None: { "value": "Archaebacteria", "child_count": 0, + "descendant_count": 0, "depth": 1, "parent_value": "Bacteria", "external_id": None, @@ -395,8 +397,8 @@ def test_get_all(self) -> None: "Bacteria (None) (children: 2)", " Archaebacteria (Bacteria) (children: 0)", " Eubacteria (Bacteria) (children: 0)", - "Eukaryota (None) (children: 5)", - " Animalia (Eukaryota) (children: 7)", + "Eukaryota (None) (children: 5 + 8)", + " Animalia (Eukaryota) (children: 7 + 1)", " Arthropoda (Animalia) (children: 0)", " Chordata (Animalia) (children: 1)", # note this has a child but the child is not included " Cnidaria (Animalia) (children: 0)", @@ -431,7 +433,7 @@ def test_search_2(self) -> None: """ result = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="chordata")) assert result == [ - "Eukaryota (None) (children: 1)", # Has one child that matches + "Eukaryota (None) (children: 1 + 1)", # Has one child that matches, plus one additional matching descendant " Animalia (Eukaryota) (children: 1)", " Chordata (Animalia) (children: 0)", # this is the matching tag. ] @@ -445,7 +447,7 @@ def test_search_3(self) -> None: assert result == [ "Archaea (None) (children: 1)", " Proteoarchaeota (Archaea) (children: 0)", - "Eukaryota (None) (children: 2)", # Note the "children: 2" is correct - 2 direct children are in the result + "Eukaryota (None) (children: 2 + 2)", # 2 direct matching children, 2 additional matching descendants " Animalia (Eukaryota) (children: 2)", " Arthropoda (Animalia) (children: 0)", # match " Gastrotrich (Animalia) (children: 0)", # match @@ -464,6 +466,7 @@ def test_tags_deep(self) -> None: "depth": 3, "usage_count": 0, "child_count": 0, + "descendant_count": 0, "external_id": None, "_id": 21, # These IDs are hard-coded in the test fixture file } @@ -527,7 +530,7 @@ def test_tree_sort(self) -> None: taxonomy = self.create_sort_test_taxonomy() result = pretty_format_tags(taxonomy.get_filtered_tags()) assert result == [ - "1 (None) (children: 4)", + "1 (None) (children: 4 + 1)", " 1 A (1) (children: 0)", " 11 (1) (children: 0)", " 11111 (1) (children: 1)", diff --git a/tests/openedx_tagging/core/tagging/test_views.py b/tests/openedx_tagging/core/tagging/test_views.py index c2ccd320..f6d8c63a 100644 --- a/tests/openedx_tagging/core/tagging/test_views.py +++ b/tests/openedx_tagging/core/tagging/test_views.py @@ -1307,8 +1307,8 @@ def test_small_taxonomy(self): "Bacteria (None) (children: 2)", " Archaebacteria (Bacteria) (children: 0)", " Eubacteria (Bacteria) (children: 0)", - "Eukaryota (None) (children: 5)", - " Animalia (Eukaryota) (children: 7)", + "Eukaryota (None) (children: 5 + 8)", + " Animalia (Eukaryota) (children: 7 + 1)", " Arthropoda (Animalia) (children: 0)", " Chordata (Animalia) (children: 1)", " Cnidaria (Animalia) (children: 0)", @@ -1355,7 +1355,7 @@ def test_small_taxonomy_paged(self): assert next_response.status_code == status.HTTP_200_OK next_data = next_response.data assert pretty_format_tags(next_data["results"]) == [ - "Eukaryota (None) (children: 5)", + "Eukaryota (None) (children: 5 + 8)", ] assert next_data.get("current_page") == 2 @@ -1474,16 +1474,16 @@ def test_large_taxonomy(self): results = data["results"] assert pretty_format_tags(results) == [ - "Tag 0 (None) (used: 0, children: 12)", - "Tag 1099 (None) (used: 0, children: 12)", - "Tag 1256 (None) (used: 0, children: 12)", - "Tag 1413 (None) (used: 0, children: 12)", - "Tag 157 (None) (used: 0, children: 12)", - "Tag 1570 (None) (used: 0, children: 12)", - "Tag 1727 (None) (used: 0, children: 12)", - "Tag 1884 (None) (used: 0, children: 12)", - "Tag 2041 (None) (used: 0, children: 12)", - "Tag 2198 (None) (used: 0, children: 12)", + "Tag 0 (None) (used: 0, children: 12 + 144)", + "Tag 1099 (None) (used: 0, children: 12 + 144)", + "Tag 1256 (None) (used: 0, children: 12 + 144)", + "Tag 1413 (None) (used: 0, children: 12 + 144)", + "Tag 157 (None) (used: 0, children: 12 + 144)", + "Tag 1570 (None) (used: 0, children: 12 + 144)", + "Tag 1727 (None) (used: 0, children: 12 + 144)", + "Tag 1884 (None) (used: 0, children: 12 + 144)", + "Tag 2041 (None) (used: 0, children: 12 + 144)", + "Tag 2198 (None) (used: 0, children: 12 + 144)", # ... there are 41 more root tags but they're excluded from this first result page. ] @@ -1554,7 +1554,7 @@ def test_large_search(self): data = response.data results = data["results"] assert pretty_format_tags(results)[:20] == [ - "Tag 0 (None) (children: 3)", # First 2 results don't match but have children that match + "Tag 0 (None) (children: 3 + 10)", # First 2 results don't match but have children that match # Note the count here ---^ is not the total number of matching descendants, just the number of children # once we filter the tree to include only matches and their ancestors. " Tag 1 (Tag 0) (children: 1)", @@ -1570,7 +1570,7 @@ def test_large_search(self): " Tag 117 (Tag 105) (children: 0)", " Tag 118 (Tag 0) (children: 1)", " Tag 119 (Tag 118) (children: 0)", - "Tag 1099 (None) (children: 9)", + "Tag 1099 (None) (children: 9 + 93)", " Tag 1100 (Tag 1099) (children: 12)", " Tag 1101 (Tag 1100) (children: 0)", " Tag 1102 (Tag 1100) (children: 0)", @@ -1592,16 +1592,16 @@ def test_large_search(self): results2 = data2["results"] assert pretty_format_tags(results2) == [ # Notice that none of these root tags directly match the search query, but their children/grandchildren do - "Tag 0 (None) (children: 3)", - "Tag 1099 (None) (children: 9)", - "Tag 1256 (None) (children: 2)", - "Tag 1413 (None) (children: 1)", - "Tag 157 (None) (children: 2)", - "Tag 1570 (None) (children: 2)", - "Tag 1727 (None) (children: 1)", - "Tag 1884 (None) (children: 2)", - "Tag 2041 (None) (children: 1)", - "Tag 2198 (None) (children: 2)", + "Tag 0 (None) (children: 3 + 10)", + "Tag 1099 (None) (children: 9 + 93)", + "Tag 1256 (None) (children: 2 + 2)", + "Tag 1413 (None) (children: 1 + 1)", + "Tag 157 (None) (children: 2 + 2)", + "Tag 1570 (None) (children: 2 + 2)", + "Tag 1727 (None) (children: 1 + 1)", + "Tag 1884 (None) (children: 2 + 1)", + "Tag 2041 (None) (children: 1 + 10)", + "Tag 2198 (None) (children: 2 + 2)", ] assert data2.get("count") == 51 assert data2.get("num_pages") == 6 diff --git a/tests/openedx_tagging/core/tagging/utils.py b/tests/openedx_tagging/core/tagging/utils.py index 0070f12c..3e959d96 100644 --- a/tests/openedx_tagging/core/tagging/utils.py +++ b/tests/openedx_tagging/core/tagging/utils.py @@ -21,6 +21,11 @@ def pretty_format_tags(result, parent=True, external_id=False) -> list[str]: line += "(" if "usage_count" in t: line += f"used: {t['usage_count']}, " - line += f"children: {t['child_count']})" + line += f"children: {t['child_count']}" + # In addition to the direct children, how many total descendants are there? + deeper_descendants = t['descendant_count'] - t['child_count'] + if deeper_descendants != 0: + line += f" + {deeper_descendants}" + line += ")" pretty_results.append(line) return pretty_results From 321a61939d8cd5b38f8eae65c6cb7ccbe57cdb41 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Thu, 1 Feb 2024 14:45:21 -0800 Subject: [PATCH 2/3] fix: bug with some descendant counts --- openedx_tagging/core/tagging/models/base.py | 8 +++-- .../core/tagging/test_models.py | 31 +++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/openedx_tagging/core/tagging/models/base.py b/openedx_tagging/core/tagging/models/base.py index bd3c69df..8a6c688e 100644 --- a/openedx_tagging/core/tagging/models/base.py +++ b/openedx_tagging/core/tagging/models/base.py @@ -458,7 +458,7 @@ def _get_filtered_tags_one_level( qs = self.tag_set.filter(parent=None).annotate(depth=Value(0)) qs = qs.annotate(parent_value=Value(None, output_field=models.CharField())) qs = qs.annotate(child_count=models.Count("children", distinct=True)) - qs = qs.annotate(grandchild_count=models.Count("children__children")) + qs = qs.annotate(grandchild_count=models.Count("children__children", distinct=True)) qs = qs.annotate(great_grandchild_count=models.Count("children__children__children")) qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count")) # Filter by search term: @@ -520,7 +520,9 @@ def _get_filtered_tags_deep( qs = qs.filter(pk__in=matching_ids) qs = qs.annotate( child_count=models.Count("children", filter=Q(children__pk__in=matching_ids), distinct=True), - grandchild_count=models.Count("children__children", filter=Q(children__children__pk__in=matching_ids)), + grandchild_count=models.Count( + "children__children", filter=Q(children__children__pk__in=matching_ids), distinct=True, + ), great_grandchild_count=models.Count( "children__children__children", filter=Q(children__children__children__pk__in=matching_ids), @@ -534,7 +536,7 @@ def _get_filtered_tags_deep( # frontend. else: qs = qs.annotate(child_count=models.Count("children", distinct=True)) - qs = qs.annotate(grandchild_count=models.Count("children__children")) + qs = qs.annotate(grandchild_count=models.Count("children__children", distinct=True)) qs = qs.annotate(great_grandchild_count=models.Count("children__children__children")) qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count")) diff --git a/tests/openedx_tagging/core/tagging/test_models.py b/tests/openedx_tagging/core/tagging/test_models.py index 453cf99c..59ea8f37 100644 --- a/tests/openedx_tagging/core/tagging/test_models.py +++ b/tests/openedx_tagging/core/tagging/test_models.py @@ -550,6 +550,37 @@ def test_tree_sort(self) -> None: " azure (ALPHABET) (children: 0)", ] + def test_descendant_counts(self) -> None: + """ + Test getting the descendant count on a taxonomy known to cause aggregation + bugs unless the aggregations are correctly specified with distinct=True + + https://docs.djangoproject.com/en/5.0/topics/db/aggregation/#combining-multiple-aggregations + """ + taxonomy = api.create_taxonomy("ESDC Subset") + api.add_tag_to_taxonomy(taxonomy, "Interests") # root tag + api.add_tag_to_taxonomy(taxonomy, "Holland Codes", parent_tag_value="Interests") # child tag + # Create the grandchild tag: + g_tag = api.add_tag_to_taxonomy(taxonomy, "Interests - Holland Codes", parent_tag_value="Holland Codes") + # Create the 6 great-grandchild tags: + api.add_tag_to_taxonomy(taxonomy, "Artistic", parent_tag_value=g_tag.value) + api.add_tag_to_taxonomy(taxonomy, "Conventional", parent_tag_value=g_tag.value) + api.add_tag_to_taxonomy(taxonomy, "Enterprising", parent_tag_value=g_tag.value) + api.add_tag_to_taxonomy(taxonomy, "Investigative", parent_tag_value=g_tag.value) + api.add_tag_to_taxonomy(taxonomy, "Realistic", parent_tag_value=g_tag.value) + api.add_tag_to_taxonomy(taxonomy, "Social", parent_tag_value=g_tag.value) + + result = pretty_format_tags(taxonomy.get_filtered_tags(depth=1, include_counts=True)) + assert result == [ + "Interests (None) (used: 0, children: 1 + 7)", # 1 child + (1 grandchild and 6 great grandchild tags) + ] + result2 = pretty_format_tags(taxonomy.get_filtered_tags(depth=None, include_counts=True)) + assert result2 == [ + "Interests (None) (used: 0, children: 1 + 7)", + " Holland Codes (Interests) (used: 0, children: 1 + 6)", + " Interests - Holland Codes (Holland Codes) (used: 0, children: 6)", + ] + class TestFilteredTagsFreeTextTaxonomy(TestCase): """ From 0fbcd01ab778095673c5821a3fc2a7a10b572f84 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Thu, 8 Feb 2024 09:49:46 -0800 Subject: [PATCH 3/3] chore: bump version to 0.5.2 --- openedx_learning/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openedx_learning/__init__.py b/openedx_learning/__init__.py index 69145570..fa8a6955 100644 --- a/openedx_learning/__init__.py +++ b/openedx_learning/__init__.py @@ -1,4 +1,4 @@ """ Open edX Learning ("Learning Core"). """ -__version__ = "0.5.1" +__version__ = "0.5.2"