Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add total descendant count to tag list APIs [FC-0036] #156

Merged
merged 4 commits into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions openedx_tagging/core/tagging/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class TagData(TypedDict):
value: str
external_id: str | None
child_count: int
descendant_count: int
depth: int
parent_value: str | None
# Note: usage_count may or may not be present, depending on the request.
Expand Down
41 changes: 36 additions & 5 deletions openedx_tagging/core/tagging/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,18 @@ def child_count(self) -> int:
return self.taxonomy.tag_set.filter(parent=self).count()
return 0

@cached_property
def descendant_count(self) -> int:
"""
How many descendant tags this tag has in the taxonomy.
"""
if self.taxonomy and not self.taxonomy.allow_free_text:
return self.taxonomy.tag_set.filter(
Q(parent__parent=self) |
Q(parent__parent__parent=self)
).count() + self.child_count
pomegranited marked this conversation as resolved.
Show resolved Hide resolved
return 0

def clean(self):
"""
Validate this tag before saving
Expand Down Expand Up @@ -413,6 +425,7 @@ def _get_filtered_tags_free_text(
qs = qs.annotate(
depth=Value(0),
child_count=Value(0),
descendant_count=Value(0),
external_id=Value(None, output_field=models.CharField()),
parent_value=Value(None, output_field=models.CharField()),
_id=Value(None, output_field=models.CharField()),
Expand Down Expand Up @@ -444,12 +457,16 @@ def _get_filtered_tags_one_level(
else:
qs = self.tag_set.filter(parent=None).annotate(depth=Value(0))
qs = qs.annotate(parent_value=Value(None, output_field=models.CharField()))
qs = qs.annotate(child_count=models.Count("children"))
qs = qs.annotate(child_count=models.Count("children", distinct=True))
qs = qs.annotate(grandchild_count=models.Count("children__children", distinct=True))
qs = qs.annotate(great_grandchild_count=models.Count("children__children__children"))
qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count"))
# Filter by search term:
if search_term:
qs = qs.filter(value__icontains=search_term)
qs = qs.annotate(_id=F("id")) # ID has an underscore to encourage use of 'value' rather than this internal ID
qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id").order_by("value")
qs = qs.values("value", "child_count", "descendant_count", "depth", "parent_value", "external_id", "_id")
qs = qs.order_by("value")
if include_counts:
# We need to include the count of how many times this tag is used to tag objects.
# You'd think we could just use:
Expand Down Expand Up @@ -501,14 +518,27 @@ def _get_filtered_tags_deep(
if pk is not None:
matching_ids.append(pk)
qs = qs.filter(pk__in=matching_ids)
qs = qs.annotate(child_count=models.Count("children", filter=Q(children__pk__in=matching_ids)))
qs = qs.annotate(
child_count=models.Count("children", filter=Q(children__pk__in=matching_ids), distinct=True),
grandchild_count=models.Count(
"children__children", filter=Q(children__children__pk__in=matching_ids), distinct=True,
),
great_grandchild_count=models.Count(
"children__children__children",
filter=Q(children__children__children__pk__in=matching_ids),
),
)
qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count"))
elif excluded_values:
raise NotImplementedError("Using excluded_values without search_term is not currently supported.")
# We could implement this in the future but I'd prefer to get rid of the "excluded_values" API altogether.
# It remains to be seen if it's useful to do that on the backend, or if we can do it better/simpler on the
# frontend.
else:
qs = qs.annotate(child_count=models.Count("children"))
qs = qs.annotate(child_count=models.Count("children", distinct=True))
qs = qs.annotate(grandchild_count=models.Count("children__children", distinct=True))
qs = qs.annotate(great_grandchild_count=models.Count("children__children__children"))
qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count"))

# Add the "depth" to each tag:
qs = Tag.annotate_depth(qs)
Expand All @@ -527,7 +557,8 @@ def _get_filtered_tags_deep(
# Add the parent value
qs = qs.annotate(parent_value=F("parent__value"))
qs = qs.annotate(_id=F("id")) # ID has an underscore to encourage use of 'value' rather than this internal ID
qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id").order_by("sort_key")
qs = qs.values("value", "child_count", "descendant_count", "depth", "parent_value", "external_id", "_id")
qs = qs.order_by("sort_key")
if include_counts:
# Including the counts is a bit tricky; see the comment above in _get_filtered_tags_one_level()
obj_tags = ObjectTag.objects.filter(tag_id=models.OuterRef("pk")).order_by().annotate(
Expand Down
1 change: 1 addition & 0 deletions openedx_tagging/core/tagging/rest_api/v1/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ class TagDataSerializer(UserPermissionsSerializerMixin, serializers.Serializer):
value = serializers.CharField()
external_id = serializers.CharField(allow_null=True)
child_count = serializers.IntegerField()
descendant_count = serializers.IntegerField()
depth = serializers.IntegerField()
parent_value = serializers.CharField(allow_null=True)
usage_count = serializers.IntegerField(required=False)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,27 +53,27 @@ def test_import_template(self, template_file, parser_format):
'Electronic instruments (ELECTRIC) (None) (children: 2)',
' Synthesizer (SYNTH) (Electronic instruments) (children: 0)',
' Theramin (THERAMIN) (Electronic instruments) (children: 0)',
'Percussion instruments (PERCUSS) (None) (children: 3)',
'Percussion instruments (PERCUSS) (None) (children: 3 + 6)',
' Chordophone (CHORD) (Percussion instruments) (children: 1)',
' Piano (PIANO) (Chordophone) (children: 0)',
' Idiophone (BELLS) (Percussion instruments) (children: 2)',
' Celesta (CELESTA) (Idiophone) (children: 0)',
' Hi-hat (HI-HAT) (Idiophone) (children: 0)',
' Membranophone (DRUMS) (Percussion instruments) (children: 2)',
' Membranophone (DRUMS) (Percussion instruments) (children: 2 + 1)',
' Cajón (CAJÓN) (Membranophone) (children: 1)',
# This tag is present in the import files, but it will be omitted from get_tags()
# because it sits beyond TAXONOMY_MAX_DEPTH.
# Pyle Stringed Jam Cajón (PYLE) (Cajón) (children: 0)
' Tabla (TABLA) (Membranophone) (children: 0)',
'String instruments (STRINGS) (None) (children: 2)',
'String instruments (STRINGS) (None) (children: 2 + 5)',
' Bowed strings (BOW) (String instruments) (children: 2)',
' Cello (CELLO) (Bowed strings) (children: 0)',
' Violin (VIOLIN) (Bowed strings) (children: 0)',
' Plucked strings (PLUCK) (String instruments) (children: 3)',
' Banjo (BANJO) (Plucked strings) (children: 0)',
' Harp (HARP) (Plucked strings) (children: 0)',
' Mandolin (MANDOLIN) (Plucked strings) (children: 0)',
'Wind instruments (WINDS) (None) (children: 2)',
'Wind instruments (WINDS) (None) (children: 2 + 5)',
' Brass (BRASS) (Wind instruments) (children: 2)',
' Trumpet (TRUMPET) (Brass) (children: 0)',
' Tuba (TUBA) (Brass) (children: 0)',
Expand Down
10 changes: 5 additions & 5 deletions tests/openedx_tagging/core/tagging/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ def test_get_tags(self) -> None:
"Bacteria (children: 2)",
" Archaebacteria (children: 0)",
" Eubacteria (children: 0)",
"Eukaryota (children: 5)",
" Animalia (children: 7)",
"Eukaryota (children: 5 + 8)",
" Animalia (children: 7 + 1)",
" Arthropoda (children: 0)",
" Chordata (children: 1)", # The child of this is excluded due to depth limit
" Cnidaria (children: 0)",
Expand Down Expand Up @@ -155,7 +155,7 @@ def test_get_root_tags(self):
assert pretty_format_tags(root_life_on_earth_tags, parent=False) == [
'Archaea (children: 3)',
'Bacteria (children: 2)',
'Eukaryota (children: 5)',
'Eukaryota (children: 5 + 8)',
]

@override_settings(LANGUAGES=test_languages)
Expand Down Expand Up @@ -615,7 +615,7 @@ def get_object_tags():
" Proteoarchaeota (used: 0, children: 0)",
"Bacteria (used: 0, children: 1)", # does not contain "ar" but a child does
" Archaebacteria (used: 1, children: 0)",
"Eukaryota (used: 0, children: 1)",
"Eukaryota (used: 0, children: 1 + 2)",
" Animalia (used: 1, children: 2)", # does not contain "ar" but a child does
" Arthropoda (used: 1, children: 0)",
" Cnidaria (used: 0, children: 0)",
Expand All @@ -637,7 +637,7 @@ def get_object_tags():
"Bacteria (used: 0, children: 2)",
" Archaebacteria (used: 1, children: 0)",
" Eubacteria (used: 0, children: 0)",
"Eukaryota (used: 0, children: 4)",
"Eukaryota (used: 0, children: 4 + 7)",
" Animalia (used: 1, children: 7)",
" Arthropoda (used: 1, children: 0)",
" Chordata (used: 0, children: 0)", # <<< Chordata has a matching child but we only support searching
Expand Down
74 changes: 54 additions & 20 deletions tests/openedx_tagging/core/tagging/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,9 +285,9 @@ def test_get_root(self) -> None:
del r["_id"] # Remove the internal database IDs; they aren't interesting here and a other tests check them
assert result == [
# These are the root tags, in alphabetical order:
{"value": "Archaea", "child_count": 3, **common_fields},
{"value": "Bacteria", "child_count": 2, **common_fields},
{"value": "Eukaryota", "child_count": 5, **common_fields},
{"value": "Archaea", "child_count": 3, "descendant_count": 3, **common_fields},
{"value": "Bacteria", "child_count": 2, "descendant_count": 2, **common_fields},
{"value": "Eukaryota", "child_count": 5, "descendant_count": 13, **common_fields},
]

def test_get_child_tags_one_level(self) -> None:
Expand All @@ -301,11 +301,11 @@ def test_get_child_tags_one_level(self) -> None:
del r["_id"] # Remove the internal database IDs; they aren't interesting here and a other tests check them
assert result == [
# These are the Eukaryota tags, in alphabetical order:
{"value": "Animalia", "child_count": 7, **common_fields},
{"value": "Fungi", "child_count": 0, **common_fields},
{"value": "Monera", "child_count": 0, **common_fields},
{"value": "Plantae", "child_count": 0, **common_fields},
{"value": "Protista", "child_count": 0, **common_fields},
{"value": "Animalia", "child_count": 7, "descendant_count": 8, **common_fields},
{"value": "Fungi", "child_count": 0, "descendant_count": 0, **common_fields},
{"value": "Monera", "child_count": 0, "descendant_count": 0, **common_fields},
{"value": "Plantae", "child_count": 0, "descendant_count": 0, **common_fields},
{"value": "Protista", "child_count": 0, "descendant_count": 0, **common_fields},
]

def test_get_grandchild_tags_one_level(self) -> None:
Expand All @@ -319,13 +319,13 @@ def test_get_grandchild_tags_one_level(self) -> None:
del r["_id"] # Remove the internal database IDs; they aren't interesting here and a other tests check them
assert result == [
# These are the Eukaryota tags, in alphabetical order:
{"value": "Arthropoda", "child_count": 0, **common_fields},
{"value": "Chordata", "child_count": 1, **common_fields},
{"value": "Cnidaria", "child_count": 0, **common_fields},
{"value": "Ctenophora", "child_count": 0, **common_fields},
{"value": "Gastrotrich", "child_count": 0, **common_fields},
{"value": "Placozoa", "child_count": 0, **common_fields},
{"value": "Porifera", "child_count": 0, **common_fields},
{"value": "Arthropoda", "child_count": 0, "descendant_count": 0, **common_fields},
{"value": "Chordata", "child_count": 1, "descendant_count": 1, **common_fields},
{"value": "Cnidaria", "child_count": 0, "descendant_count": 0, **common_fields},
{"value": "Ctenophora", "child_count": 0, "descendant_count": 0, **common_fields},
{"value": "Gastrotrich", "child_count": 0, "descendant_count": 0, **common_fields},
{"value": "Placozoa", "child_count": 0, "descendant_count": 0, **common_fields},
{"value": "Porifera", "child_count": 0, "descendant_count": 0, **common_fields},
]

def test_get_depth_1_search_term(self) -> None:
Expand All @@ -337,6 +337,7 @@ def test_get_depth_1_search_term(self) -> None:
{
"value": "Archaea",
"child_count": 3,
"descendant_count": 3,
"depth": 0,
"usage_count": 0,
"parent_value": None,
Expand All @@ -355,6 +356,7 @@ def test_get_depth_1_child_search_term(self) -> None:
{
"value": "Archaebacteria",
"child_count": 0,
"descendant_count": 0,
"depth": 1,
"parent_value": "Bacteria",
"external_id": None,
Expand Down Expand Up @@ -395,8 +397,8 @@ def test_get_all(self) -> None:
"Bacteria (None) (children: 2)",
" Archaebacteria (Bacteria) (children: 0)",
" Eubacteria (Bacteria) (children: 0)",
"Eukaryota (None) (children: 5)",
" Animalia (Eukaryota) (children: 7)",
"Eukaryota (None) (children: 5 + 8)",
" Animalia (Eukaryota) (children: 7 + 1)",
" Arthropoda (Animalia) (children: 0)",
" Chordata (Animalia) (children: 1)", # note this has a child but the child is not included
" Cnidaria (Animalia) (children: 0)",
Expand Down Expand Up @@ -431,7 +433,7 @@ def test_search_2(self) -> None:
"""
result = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="chordata"))
assert result == [
"Eukaryota (None) (children: 1)", # Has one child that matches
"Eukaryota (None) (children: 1 + 1)", # Has one child that matches, plus one additional matching descendant
" Animalia (Eukaryota) (children: 1)",
" Chordata (Animalia) (children: 0)", # this is the matching tag.
]
Expand All @@ -445,7 +447,7 @@ def test_search_3(self) -> None:
assert result == [
"Archaea (None) (children: 1)",
" Proteoarchaeota (Archaea) (children: 0)",
"Eukaryota (None) (children: 2)", # Note the "children: 2" is correct - 2 direct children are in the result
"Eukaryota (None) (children: 2 + 2)", # 2 direct matching children, 2 additional matching descendants
" Animalia (Eukaryota) (children: 2)",
" Arthropoda (Animalia) (children: 0)", # match
" Gastrotrich (Animalia) (children: 0)", # match
Expand All @@ -464,6 +466,7 @@ def test_tags_deep(self) -> None:
"depth": 3,
"usage_count": 0,
"child_count": 0,
"descendant_count": 0,
"external_id": None,
"_id": 21, # These IDs are hard-coded in the test fixture file
}
Expand Down Expand Up @@ -527,7 +530,7 @@ def test_tree_sort(self) -> None:
taxonomy = self.create_sort_test_taxonomy()
result = pretty_format_tags(taxonomy.get_filtered_tags())
assert result == [
"1 (None) (children: 4)",
"1 (None) (children: 4 + 1)",
" 1 A (1) (children: 0)",
" 11 (1) (children: 0)",
" 11111 (1) (children: 1)",
Expand All @@ -547,6 +550,37 @@ def test_tree_sort(self) -> None:
" azure (ALPHABET) (children: 0)",
]

def test_descendant_counts(self) -> None:
"""
Test getting the descendant count on a taxonomy known to cause aggregation
bugs unless the aggregations are correctly specified with distinct=True

https://docs.djangoproject.com/en/5.0/topics/db/aggregation/#combining-multiple-aggregations
"""
taxonomy = api.create_taxonomy("ESDC Subset")
api.add_tag_to_taxonomy(taxonomy, "Interests") # root tag
api.add_tag_to_taxonomy(taxonomy, "Holland Codes", parent_tag_value="Interests") # child tag
# Create the grandchild tag:
g_tag = api.add_tag_to_taxonomy(taxonomy, "Interests - Holland Codes", parent_tag_value="Holland Codes")
# Create the 6 great-grandchild tags:
api.add_tag_to_taxonomy(taxonomy, "Artistic", parent_tag_value=g_tag.value)
api.add_tag_to_taxonomy(taxonomy, "Conventional", parent_tag_value=g_tag.value)
api.add_tag_to_taxonomy(taxonomy, "Enterprising", parent_tag_value=g_tag.value)
api.add_tag_to_taxonomy(taxonomy, "Investigative", parent_tag_value=g_tag.value)
api.add_tag_to_taxonomy(taxonomy, "Realistic", parent_tag_value=g_tag.value)
api.add_tag_to_taxonomy(taxonomy, "Social", parent_tag_value=g_tag.value)

result = pretty_format_tags(taxonomy.get_filtered_tags(depth=1, include_counts=True))
assert result == [
"Interests (None) (used: 0, children: 1 + 7)", # 1 child + (1 grandchild and 6 great grandchild tags)
]
result2 = pretty_format_tags(taxonomy.get_filtered_tags(depth=None, include_counts=True))
assert result2 == [
"Interests (None) (used: 0, children: 1 + 7)",
" Holland Codes (Interests) (used: 0, children: 1 + 6)",
" Interests - Holland Codes (Holland Codes) (used: 0, children: 6)",
]


class TestFilteredTagsFreeTextTaxonomy(TestCase):
"""
Expand Down
Loading
Loading