From 6d7e6bd7a1dc6a939ab09081029b762bd7e6a5fe Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Tue, 17 Dec 2024 16:23:39 -0800 Subject: [PATCH] Add included_languages M2M field to contentnodes. Ignore constraints when generating sql alchemy schema. --- kolibri/core/content/api.py | 20 +- .../versions/content_schema_current.py | 129 +++------ .../management/commands/generate_schema.py | 2 +- .../0040_contentnode_included_languages.py | 23 ++ kolibri/core/content/models.py | 10 + kolibri/core/content/test/test_annotation.py | 262 ++++++++++++++++++ kolibri/core/content/test/test_content_app.py | 204 ++++++++++++++ kolibri/core/content/utils/annotation.py | 56 ++++ requirements/dev.txt | 2 +- 9 files changed, 600 insertions(+), 108 deletions(-) create mode 100644 kolibri/core/content/migrations/0040_contentnode_included_languages.py diff --git a/kolibri/core/content/api.py b/kolibri/core/content/api.py index 83724ca071a..e6250320c56 100644 --- a/kolibri/core/content/api.py +++ b/kolibri/core/content/api.py @@ -258,12 +258,19 @@ def list(self, request, *args, **kwargs): return super(RemoteViewSet, self).list(request, *args, **kwargs) +class CharInFilter(BaseInFilter, CharFilter): + pass + + class ChannelMetadataFilter(FilterSet): available = BooleanFilter(method="filter_available", label="Available") contains_exercise = BooleanFilter( method="filter_contains_exercise", label="Has exercises" ) contains_quiz = BooleanFilter(method="filter_contains_quiz", label="Has quizzes") + languages = CharInFilter( + field_name="included_languages", label="Languages", distinct=True + ) class Meta: model = models.ChannelMetadata @@ -417,10 +424,6 @@ class UUIDInFilter(BaseInFilter, UUIDFilter): pass -class CharInFilter(BaseInFilter, CharFilter): - pass - - contentnode_filter_fields = [ "parent", "parent__isnull", @@ -470,7 +473,7 @@ class ContentNodeFilter(FilterSet): learner_needs = CharFilter(method="bitmask_contains_and") keywords = CharFilter(method="filter_keywords") channels = UUIDInFilter(field_name="channel_id") - languages = CharInFilter(field_name="lang_id") + languages = CharInFilter(field_name="included_languages") categories__isnull = BooleanFilter(field_name="categories", lookup_expr="isnull") lft__gt = NumberFilter(field_name="lft", lookup_expr="gt") rght__lt = NumberFilter(field_name="rght", lookup_expr="lt") @@ -671,10 +674,11 @@ def get_queryset(self): return models.ContentNode.objects.filter(available=True) def get_related_data_maps(self, items, queryset): + ids = [item["id"] for item in items] assessmentmetadata_map = { a["contentnode"]: a for a in models.AssessmentMetaData.objects.filter( - contentnode__in=queryset + contentnode__in=ids ).values( "assessment_item_ids", "number_of_assessments", @@ -688,7 +692,7 @@ def get_related_data_maps(self, items, queryset): files_map = {} files = list( - models.File.objects.filter(contentnode__in=queryset).values( + models.File.objects.filter(contentnode__in=ids).values( "id", "contentnode", "local_file__id", @@ -723,7 +727,7 @@ def get_related_data_maps(self, items, queryset): tags_map = {} for t in ( - models.ContentTag.objects.filter(tagged_content__in=queryset) + models.ContentTag.objects.filter(tagged_content__in=ids) .values( "tag_name", "tagged_content", diff --git a/kolibri/core/content/contentschema/versions/content_schema_current.py b/kolibri/core/content/contentschema/versions/content_schema_current.py index 939bb253929..8bdf71bdeb1 100644 --- a/kolibri/core/content/contentschema/versions/content_schema_current.py +++ b/kolibri/core/content/contentschema/versions/content_schema_current.py @@ -2,17 +2,13 @@ from sqlalchemy import BigInteger from sqlalchemy import Boolean from sqlalchemy import CHAR -from sqlalchemy import CheckConstraint from sqlalchemy import Column from sqlalchemy import Float -from sqlalchemy import ForeignKey -from sqlalchemy import ForeignKeyConstraint from sqlalchemy import Index from sqlalchemy import Integer from sqlalchemy import String from sqlalchemy import Text from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship Base = declarative_base() metadata = Base.metadata @@ -47,34 +43,17 @@ class ContentLocalfile(Base): class ContentContentnode(Base): __tablename__ = "content_contentnode" __table_args__ = ( - CheckConstraint("lft >= 0"), - CheckConstraint("tree_id >= 0"), - CheckConstraint("level >= 0"), - CheckConstraint("duration >= 0"), - CheckConstraint("rght >= 0"), - ForeignKeyConstraint( - ["lang_id"], - ["content_language.id"], - deferrable=True, - initially="DEFERRED", - ), - ForeignKeyConstraint( - ["parent_id"], - ["content_contentnode.id"], - deferrable=True, - initially="DEFERRED", - ), Index( - "content_contentnode_level_channel_id_available_29f0bb18_idx", + "content_contentnode_level_channel_id_kind_fd732cc4_idx", "level", "channel_id", - "available", + "kind", ), Index( - "content_contentnode_level_channel_id_kind_fd732cc4_idx", + "content_contentnode_level_channel_id_available_29f0bb18_idx", "level", "channel_id", - "kind", + "available", ), ) @@ -89,6 +68,7 @@ class ContentContentnode(Base): kind = Column(String(200), nullable=False) available = Column(Boolean, nullable=False) lft = Column(Integer, nullable=False) + rght = Column(Integer, nullable=False) tree_id = Column(Integer, nullable=False, index=True) level = Column(Integer, nullable=False) lang_id = Column(String(14), index=True) @@ -112,12 +92,8 @@ class ContentContentnode(Base): learning_activities_bitmask_0 = Column(BigInteger) ancestors = Column(Text) admin_imported = Column(Boolean) - rght = Column(Integer, nullable=False) parent_id = Column(CHAR(32), index=True) - lang = relationship("ContentLanguage") - parent = relationship("ContentContentnode", remote_side=[id]) - class ContentAssessmentmetadata(Base): __tablename__ = "content_assessmentmetadata" @@ -128,22 +104,11 @@ class ContentAssessmentmetadata(Base): mastery_model = Column(Text, nullable=False) randomize = Column(Boolean, nullable=False) is_manipulable = Column(Boolean, nullable=False) - contentnode_id = Column( - ForeignKey("content_contentnode.id"), nullable=False, index=True - ) - - contentnode = relationship("ContentContentnode") + contentnode_id = Column(CHAR(32), nullable=False, index=True) class ContentChannelmetadata(Base): __tablename__ = "content_channelmetadata" - __table_args__ = ( - CheckConstraint('"order" >= 0'), - ForeignKeyConstraint( - ["root_id"], - ["content_contentnode.id"], - ), - ) id = Column(CHAR(32), primary_key=True) name = Column(String(200), nullable=False) @@ -163,8 +128,6 @@ class ContentChannelmetadata(Base): included_categories = Column(Text) included_grade_levels = Column(Text) - root = relationship("ContentContentnode") - class ContentContentnodeHasPrerequisite(Base): __tablename__ = "content_contentnode_has_prerequisite" @@ -178,22 +141,25 @@ class ContentContentnodeHasPrerequisite(Base): ) id = Column(Integer, primary_key=True) - from_contentnode_id = Column( - ForeignKey("content_contentnode.id"), nullable=False, index=True - ) - to_contentnode_id = Column( - ForeignKey("content_contentnode.id"), nullable=False, index=True - ) + from_contentnode_id = Column(CHAR(32), nullable=False, index=True) + to_contentnode_id = Column(CHAR(32), nullable=False, index=True) - from_contentnode = relationship( - "ContentContentnode", - primaryjoin="ContentContentnodeHasPrerequisite.from_contentnode_id == ContentContentnode.id", - ) - to_contentnode = relationship( - "ContentContentnode", - primaryjoin="ContentContentnodeHasPrerequisite.to_contentnode_id == ContentContentnode.id", + +class ContentContentnodeIncludedLanguages(Base): + __tablename__ = "content_contentnode_included_languages" + __table_args__ = ( + Index( + "content_contentnode_included_languages_contentnode_id_language_id_7d14ec8b_uniq", + "contentnode_id", + "language_id", + unique=True, + ), ) + id = Column(Integer, primary_key=True) + contentnode_id = Column(CHAR(32), nullable=False, index=True) + language_id = Column(String(14), nullable=False, index=True) + class ContentContentnodeRelated(Base): __tablename__ = "content_contentnode_related" @@ -207,21 +173,8 @@ class ContentContentnodeRelated(Base): ) id = Column(Integer, primary_key=True) - from_contentnode_id = Column( - ForeignKey("content_contentnode.id"), nullable=False, index=True - ) - to_contentnode_id = Column( - ForeignKey("content_contentnode.id"), nullable=False, index=True - ) - - from_contentnode = relationship( - "ContentContentnode", - primaryjoin="ContentContentnodeRelated.from_contentnode_id == ContentContentnode.id", - ) - to_contentnode = relationship( - "ContentContentnode", - primaryjoin="ContentContentnodeRelated.to_contentnode_id == ContentContentnode.id", - ) + from_contentnode_id = Column(CHAR(32), nullable=False, index=True) + to_contentnode_id = Column(CHAR(32), nullable=False, index=True) class ContentContentnodeTags(Base): @@ -236,15 +189,8 @@ class ContentContentnodeTags(Base): ) id = Column(Integer, primary_key=True) - contentnode_id = Column( - ForeignKey("content_contentnode.id"), nullable=False, index=True - ) - contenttag_id = Column( - ForeignKey("content_contenttag.id"), nullable=False, index=True - ) - - contentnode = relationship("ContentContentnode") - contenttag = relationship("ContentContenttag") + contentnode_id = Column(CHAR(32), nullable=False, index=True) + contenttag_id = Column(CHAR(32), nullable=False, index=True) class ContentFile(Base): @@ -254,19 +200,11 @@ class ContentFile(Base): supplementary = Column(Boolean, nullable=False) thumbnail = Column(Boolean, nullable=False) priority = Column(Integer, index=True) - contentnode_id = Column( - ForeignKey("content_contentnode.id"), nullable=False, index=True - ) - lang_id = Column(ForeignKey("content_language.id"), index=True) - local_file_id = Column( - ForeignKey("content_localfile.id"), nullable=False, index=True - ) + contentnode_id = Column(CHAR(32), nullable=False, index=True) + lang_id = Column(String(14), index=True) + local_file_id = Column(String(32), nullable=False, index=True) preset = Column(String(150), nullable=False) - contentnode = relationship("ContentContentnode") - lang = relationship("ContentLanguage") - local_file = relationship("ContentLocalfile") - class ContentChannelmetadataIncludedLanguages(Base): __tablename__ = "content_channelmetadata_included_languages" @@ -280,11 +218,6 @@ class ContentChannelmetadataIncludedLanguages(Base): ) id = Column(Integer, primary_key=True) - channelmetadata_id = Column( - ForeignKey("content_channelmetadata.id"), nullable=False, index=True - ) - language_id = Column(ForeignKey("content_language.id"), nullable=False, index=True) sort_value = Column(Integer, nullable=False) - - channelmetadata = relationship("ContentChannelmetadata") - language = relationship("ContentLanguage") + channelmetadata_id = Column(CHAR(32), nullable=False, index=True) + language_id = Column(String(14), nullable=False, index=True) diff --git a/kolibri/core/content/management/commands/generate_schema.py b/kolibri/core/content/management/commands/generate_schema.py index c6f2abe9a3f..269a8516d84 100644 --- a/kolibri/core/content/management/commands/generate_schema.py +++ b/kolibri/core/content/management/commands/generate_schema.py @@ -134,7 +134,7 @@ def handle(self, *args, **options): metadata.bind = engine generator = CodeGenerator( - metadata, False, False, True, True, False, nocomments=False + metadata, False, True, True, True, False, nocomments=False ) with io.open( diff --git a/kolibri/core/content/migrations/0040_contentnode_included_languages.py b/kolibri/core/content/migrations/0040_contentnode_included_languages.py new file mode 100644 index 00000000000..0785e517964 --- /dev/null +++ b/kolibri/core/content/migrations/0040_contentnode_included_languages.py @@ -0,0 +1,23 @@ +# Generated by Django 3.2.25 on 2024-12-18 00:14 +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + + dependencies = [ + ("content", "0039_channelmetadata_ordered_fields"), + ] + + operations = [ + migrations.AddField( + model_name="contentnode", + name="included_languages", + field=models.ManyToManyField( + blank=True, + related_name="contentnodes", + to="content.Language", + verbose_name="languages", + ), + ), + ] diff --git a/kolibri/core/content/models.py b/kolibri/core/content/models.py index 78e461fb865..dc58579d0a8 100644 --- a/kolibri/core/content/models.py +++ b/kolibri/core/content/models.py @@ -215,6 +215,16 @@ class ContentNode(base_models.ContentNode): # needs a subsequent Kolibri upgrade step to backfill these values. admin_imported = models.BooleanField(null=True) + # Languages that are in this node and/or any descendant nodes of this node + # for non-topic nodes, this is the language of the node itself + # for topic nodes, this is the union of all languages of all descendant nodes + # and any language set on the topic node itself + # We do this to allow filtering of a topic tree by a specific language for + # multi-language channels. + included_languages = models.ManyToManyField( + "Language", related_name="contentnodes", verbose_name="languages", blank=True + ) + objects = ContentNodeManager() class Meta: diff --git a/kolibri/core/content/test/test_annotation.py b/kolibri/core/content/test/test_annotation.py index 2de250711eb..1d4123ecc5e 100644 --- a/kolibri/core/content/test/test_annotation.py +++ b/kolibri/core/content/test/test_annotation.py @@ -745,6 +745,268 @@ def test_two_channels_no_annotation_collision_child_true(self): self.assertFalse(root_node.available) self.assertFalse(root_node.coach_content) + def test_non_topic_node_included_languages(self): + """ + Test that non-topic nodes get their lang_id properly set as their included_language + """ + test_node = ContentNode.objects.exclude(kind=content_kinds.TOPIC).first() + test_language = Language.objects.create( + id="te-st", + lang_code="te", + lang_subcode="st", + lang_name="Test Language", + lang_direction="ltr", + ) + + # Set a language on our test node + test_node.lang = test_language + test_node.save() + + recurse_annotation_up_tree(channel_id="6199dde695db4ee4ab392222d5af1e5c") + + # Verify the node has exactly one included language matching its lang_id + self.assertEqual(test_node.included_languages.count(), 1) + self.assertEqual(test_node.included_languages.first(), test_language) + + def test_topic_node_not_includes_own_language(self): + """ + Test that topic nodes do not include their own language in included_languages + """ + topic_node = ContentNode.objects.filter(kind=content_kinds.TOPIC).first() + test_language = Language.objects.create( + id="te-st", + lang_code="te", + lang_subcode="st", + lang_name="Test Language", + lang_direction="ltr", + ) + + # Set a language on the topic + topic_node.lang = test_language + topic_node.save() + + recurse_annotation_up_tree(channel_id="6199dde695db4ee4ab392222d5af1e5c") + + # Verify the topic includes its own language + self.assertNotIn(test_language, topic_node.included_languages.all()) + + def test_topic_node_includes_child_languages(self): + """ + Test that topic nodes include languages from their child nodes + """ + topic_node = ContentNode.objects.filter(kind=content_kinds.TOPIC).first() + + # Create two test languages + lang1 = Language.objects.create( + id="te-st", + lang_code="te", + lang_subcode="st", + lang_name="Test Language 1", + lang_direction="ltr", + ) + lang2 = Language.objects.create( + id="tt-se", + lang_code="tt", + lang_subcode="se", + lang_name="Test Language 2", + lang_direction="ltr", + ) + + # Create two child nodes with different languages + ContentNode.objects.create( + title="test1", + id=uuid.uuid4().hex, + content_id=uuid.uuid4().hex, + channel_id=topic_node.channel_id, + parent=topic_node, + kind=content_kinds.VIDEO, + available=True, + lang=lang1, + ) + + ContentNode.objects.create( + title="test2", + id=uuid.uuid4().hex, + content_id=uuid.uuid4().hex, + channel_id=topic_node.channel_id, + parent=topic_node, + kind=content_kinds.VIDEO, + available=True, + lang=lang2, + ) + + recurse_annotation_up_tree(channel_id="6199dde695db4ee4ab392222d5af1e5c") + + # Verify the topic includes both child languages + included_languages = topic_node.included_languages.all() + self.assertEqual(len(included_languages), 2) + self.assertIn(lang1, included_languages) + self.assertIn(lang2, included_languages) + + def test_topic_node_includes_grandchild_languages(self): + """ + Test that topic nodes include languages from their grandchild nodes + """ + root_topic = ContentNode.objects.filter(kind=content_kinds.TOPIC).first() + + # Create test language + test_language = Language.objects.create( + id="te-st", + lang_code="te", + lang_subcode="st", + lang_name="Test Language", + lang_direction="ltr", + ) + + # Create child topic + child_topic = ContentNode.objects.create( + title="test_topic", + id=uuid.uuid4().hex, + content_id=uuid.uuid4().hex, + channel_id=root_topic.channel_id, + parent=root_topic, + kind=content_kinds.TOPIC, + available=True, + ) + + # Create grandchild with language + ContentNode.objects.create( + title="test_content", + id=uuid.uuid4().hex, + content_id=uuid.uuid4().hex, + channel_id=root_topic.channel_id, + parent=child_topic, + kind=content_kinds.VIDEO, + available=True, + lang=test_language, + ) + + recurse_annotation_up_tree(channel_id="6199dde695db4ee4ab392222d5af1e5c") + + # Verify both the child topic and root topic include the grandchild's language + self.assertIn(test_language, child_topic.included_languages.all()) + self.assertIn(test_language, root_topic.included_languages.all()) + + def test_topic_deduplicates_languages(self): + """ + Test that topic nodes don't duplicate languages when multiple children have the same language + """ + topic_node = ContentNode.objects.filter(kind=content_kinds.TOPIC).first() + test_language = Language.objects.create( + id="te-st", + lang_code="te", + lang_subcode="st", + lang_name="Test Language", + lang_direction="ltr", + ) + + # Create multiple children with the same language + for i in range(3): + ContentNode.objects.create( + title=f"test{i}", + id=uuid.uuid4().hex, + content_id=uuid.uuid4().hex, + channel_id=topic_node.channel_id, + parent=topic_node, + kind=content_kinds.VIDEO, + available=True, + lang=test_language, + ) + + recurse_annotation_up_tree(channel_id="6199dde695db4ee4ab392222d5af1e5c") + + # Verify the language only appears once + self.assertEqual(topic_node.included_languages.count(), 1) + self.assertEqual(topic_node.included_languages.first(), test_language) + + def test_non_available_child_languages_excluded(self): + """ + Test that languages from non-available children are not included in the topic's languages + """ + topic_node = ContentNode.objects.filter(kind=content_kinds.TOPIC).first() + test_language = Language.objects.create( + id="te-st", + lang_code="te", + lang_subcode="st", + lang_name="Test Language", + lang_direction="ltr", + ) + + # Create a non-available child with a language + ContentNode.objects.create( + title="test", + id=uuid.uuid4().hex, + content_id=uuid.uuid4().hex, + channel_id=topic_node.channel_id, + parent=topic_node, + kind=content_kinds.VIDEO, + available=False, + lang=test_language, + ) + + recurse_annotation_up_tree(channel_id="6199dde695db4ee4ab392222d5af1e5c") + + # Verify the topic doesn't include the language from the non-available child + self.assertEqual(topic_node.included_languages.count(), 0) + + def test_duplicate_language_handling_in_recursion(self): + """ + Test that the recursion handles cases where a topic might receive the same + language multiple times (from own lang_id and from children) + """ + # Create a topic with two levels of children + root_topic = ContentNode.objects.create( + title="root", + id=uuid.uuid4().hex, + content_id=uuid.uuid4().hex, + channel_id="6199dde695db4ee4ab392222d5af1e5c", + kind=content_kinds.TOPIC, + available=True, + ) + + test_language = Language.objects.create( + id="te-st", + lang_code="te", + lang_subcode="st", + lang_name="Test Language", + lang_direction="ltr", + ) + + # Set the root topic's language + root_topic.lang = test_language + root_topic.save() + + # Create a child topic with the same language + child_topic = ContentNode.objects.create( + title="child", + id=uuid.uuid4().hex, + content_id=uuid.uuid4().hex, + channel_id="6199dde695db4ee4ab392222d5af1e5c", + parent=root_topic, + kind=content_kinds.TOPIC, + available=True, + lang=test_language, + ) + + # Create a grandchild with the same language + ContentNode.objects.create( + title="grandchild", + id=uuid.uuid4().hex, + content_id=uuid.uuid4().hex, + channel_id="6199dde695db4ee4ab392222d5af1e5c", + parent=child_topic, + kind=content_kinds.VIDEO, + available=True, + lang=test_language, + ) + + # This should not raise an IntegrityError + recurse_annotation_up_tree(channel_id="6199dde695db4ee4ab392222d5af1e5c") + + # Verify the relationships are correct + self.assertEqual(root_topic.included_languages.count(), 1) + self.assertEqual(child_topic.included_languages.count(), 1) + def tearDown(self): call_command("flush", interactive=False) super(AnnotationTreeRecursion, self).tearDown() diff --git a/kolibri/core/content/test/test_content_app.py b/kolibri/core/content/test/test_content_app.py index 09836a5af2e..179c4276360 100644 --- a/kolibri/core/content/test/test_content_app.py +++ b/kolibri/core/content/test/test_content_app.py @@ -463,6 +463,210 @@ def test_contentnode_list_long(self): self.assertEqual(len(response.data), expected_output) self._assert_nodes(response.data, nodes) + def test_filter_by_single_included_language(self): + """ + Test filtering ContentNodes by a single included language + """ + c1 = content.ContentNode.objects.get(title="c1") + language = content.Language.objects.create( + id="en", + lang_code="en", + lang_subcode="", + lang_name="English", + lang_direction="ltr", + ) + c1.included_languages.set([language]) + c1.save() + + response = self.client.get( + reverse("kolibri:core:contentnode-list"), data={"languages": "en"} + ) + self.assertEqual(len(response.data), 1) + self.assertEqual(response.data[0]["title"], "c1") + + def test_filter_by_multiple_included_languages(self): + """ + Test filtering ContentNodes that match any of the provided languages + """ + c1 = content.ContentNode.objects.get(title="c1") + c2 = content.ContentNode.objects.get(title="c2") + english = content.Language.objects.create( + id="en", + lang_code="en", + lang_subcode="", + lang_name="English", + lang_direction="ltr", + ) + spanish = content.Language.objects.create( + id="es", + lang_code="es", + lang_subcode="", + lang_name="Spanish", + lang_direction="ltr", + ) + c1.included_languages.set([english]) + c2.included_languages.set([spanish]) + c1.save() + c2.save() + + response = self.client.get( + reverse("kolibri:core:contentnode-list"), data={"languages": "en,es"} + ) + self.assertEqual(len(response.data), 2) + titles = [node["title"] for node in response.data] + self.assertIn("c1", titles) + self.assertIn("c2", titles) + + def test_filter_by_non_existent_language(self): + """ + Test filtering by a language that no ContentNode has + """ + c1 = content.ContentNode.objects.get(title="c1") + english = content.Language.objects.create( + id="en", + lang_code="en", + lang_subcode="", + lang_name="English", + lang_direction="ltr", + ) + c1.included_languages.set([english]) + c1.save() + + response = self.client.get( + reverse("kolibri:core:contentnode-list"), data={"languages": "fr"} + ) + self.assertEqual(len(response.data), 0) + + def test_filter_by_multiple_languages_per_node(self): + """ + Test filtering nodes that have multiple languages assigned + """ + c1 = content.ContentNode.objects.get(title="c1") + english = content.Language.objects.create( + id="en", + lang_code="en", + lang_subcode="", + lang_name="English", + lang_direction="ltr", + ) + spanish = content.Language.objects.create( + id="es", + lang_code="es", + lang_subcode="", + lang_name="Spanish", + lang_direction="ltr", + ) + c1.included_languages.set([english, spanish]) + c1.save() + + # Should match when searching for either language + response = self.client.get( + reverse("kolibri:core:contentnode-list"), data={"languages": "en"} + ) + self.assertEqual(len(response.data), 1) + self.assertEqual(response.data[0]["title"], "c1") + + response = self.client.get( + reverse("kolibri:core:contentnode-list"), data={"languages": "es"} + ) + self.assertEqual(len(response.data), 1) + self.assertEqual(response.data[0]["title"], "c1") + + def test_filter_by_empty_included_languages(self): + """ + Test that nodes with empty included_languages are not returned when filtering + """ + c1 = content.ContentNode.objects.get(title="c1") + c1.included_languages.set([]) + c1.save() + + response = self.client.get( + reverse("kolibri:core:contentnode-list"), data={"languages": "en"} + ) + self.assertEqual(len(response.data), 0) + + def test_filter_by_language_subcodes(self): + """ + Test filtering by language codes with subcodes, ensuring that: + 1. Searching by base language code returns all variants + 2. Searching by specific subcode only returns exact matches + """ + nodes = { + "es_node": content.ContentNode.objects.get(title="c1"), + "es_es_node": content.ContentNode.objects.get(title="c2"), + "es_419_node": content.ContentNode.objects.get(title="c2c1"), + "en_gb_node": content.ContentNode.objects.get(title="c2c2"), + } + + english = content.Language.objects.create( + id="en-gb", + lang_code="en", + lang_subcode="gb", + lang_name="English", + lang_direction="ltr", + ) + spanish = content.Language.objects.create( + id="es", + lang_code="es", + lang_subcode="", + lang_name="Spanish", + lang_direction="ltr", + ) + + spanish_spanish = content.Language.objects.create( + id="es-es", + lang_code="es", + lang_subcode="es", + lang_name="Spanish", + lang_direction="ltr", + ) + + latin_american_spanish = content.Language.objects.create( + id="es-419", + lang_code="es", + lang_subcode="419", + lang_name="Spanish", + lang_direction="ltr", + ) + + # Set up nodes with different language codes + nodes["es_node"].included_languages.set([spanish]) + nodes["es_es_node"].included_languages.set([spanish_spanish]) + nodes["es_419_node"].included_languages.set([latin_american_spanish]) + nodes["en_gb_node"].included_languages.set([english]) + + for node in nodes.values(): + node.save() + + # Test that searching by 'es' returns only the unpreifxed spanish variaent + response = self.client.get( + reverse("kolibri:core:contentnode-list"), data={"languages": "es"} + ) + self.assertEqual(len(response.data), 1) + title = response.data[0]["title"] + self.assertEqual(title, "c1") + + # Test that searching by specific Spanish variant only returns exact match + response = self.client.get( + reverse("kolibri:core:contentnode-list"), data={"languages": "es-419"} + ) + self.assertEqual(len(response.data), 1) + self.assertEqual(response.data[0]["title"], "c2c1") + + # Test that searching by 'en' returns nothing + response = self.client.get( + reverse("kolibri:core:contentnode-list"), data={"languages": "en"} + ) + self.assertEqual(len(response.data), 0) + + # Test searching for multiple specific variants + response = self.client.get( + reverse("kolibri:core:contentnode-list"), data={"languages": "es-es,es-419"} + ) + self.assertEqual(len(response.data), 2) + titles = {node["title"] for node in response.data} + self.assertEqual(titles, {"c2", "c2c1"}) + def _recurse_and_assert(self, data, nodes, recursion_depth=0): recursion_depths = [] for actual, expected in zip(data, nodes): diff --git a/kolibri/core/content/utils/annotation.py b/kolibri/core/content/utils/annotation.py index a9cba7f146e..7b36b5f9ff3 100644 --- a/kolibri/core/content/utils/annotation.py +++ b/kolibri/core/content/utils/annotation.py @@ -530,6 +530,7 @@ def recurse_annotation_up_tree(channel_id): bridge = Bridge(app_name=CONTENT_APP_NAME) ContentNodeTable = bridge.get_table(ContentNode) + IncludedLanguagesTable = bridge.get_table(ContentNode.included_languages.through) connection = bridge.get_connection() @@ -583,6 +584,31 @@ def recurse_annotation_up_tree(channel_id): ) ) + # First clear out all existing language relationships for nodes in this channel + connection.execute( + IncludedLanguagesTable.delete().where( + IncludedLanguagesTable.c.contentnode_id.in_( + select([ContentNodeTable.c.id]).where( + ContentNodeTable.c.channel_id == channel_id + ) + ) + ) + ) + + # For non-topic nodes only, set included_languages based on their lang_id + connection.execute( + IncludedLanguagesTable.insert().from_select( + ["contentnode_id", "language_id"], + select([ContentNodeTable.c.id, ContentNodeTable.c.lang_id]).where( + and_( + ContentNodeTable.c.channel_id == channel_id, + ContentNodeTable.c.kind != content_kinds.TOPIC, + ContentNodeTable.c.lang_id.isnot(None), + ) + ), + ) + ) + # Expression to capture all available child nodes of a contentnode available_nodes = select(child.c.available).where( and_( @@ -661,6 +687,36 @@ def recurse_annotation_up_tree(channel_id): ) ) + # Update included languages for all topics at this level by combining + # their own language with their children's languages in one query + connection.execute( + IncludedLanguagesTable.insert().from_select( + ["contentnode_id", "language_id"], + # Languages from children + select([ContentNodeTable.c.id, IncludedLanguagesTable.c.language_id]) + .select_from(ContentNodeTable) + .join( + child, + and_( + child.c.parent_id == ContentNodeTable.c.id, + child.c.available == True, # noqa + ), + ) + .join( + IncludedLanguagesTable, + IncludedLanguagesTable.c.contentnode_id == child.c.id, + ) + .where( + and_( + ContentNodeTable.c.level == level - 1, + ContentNodeTable.c.channel_id == channel_id, + ContentNodeTable.c.kind == content_kinds.TOPIC, + ) + ) + .distinct(), + ) + ) + # commit the transaction trans.commit() diff --git a/requirements/dev.txt b/requirements/dev.txt index 22a2c4da9a9..b919d57a5f3 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -9,4 +9,4 @@ django-debug-toolbar==4.3.0 drf-yasg==1.21.7 coreapi==2.3.3 nodeenv==1.3.3 -sqlacodegen==2.1.0 +sqlacodegen==2.3.0.post1