Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Annotate channels with some ordered metadata #12944

Merged
merged 7 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
from sqlalchemy import BigInteger
from sqlalchemy import Boolean
from sqlalchemy import CHAR
from sqlalchemy import CheckConstraint
from sqlalchemy import Column
from sqlalchemy import Float
from sqlalchemy import ForeignKey
from sqlalchemy import ForeignKeyConstraint
from sqlalchemy import Index
from sqlalchemy import Integer
from sqlalchemy import String
Expand Down Expand Up @@ -45,6 +47,23 @@ class ContentLocalfile(Base):
class ContentContentnode(Base):
__tablename__ = "content_contentnode"
__table_args__ = (
CheckConstraint("lft >= 0"),
CheckConstraint("tree_id >= 0"),
CheckConstraint("level >= 0"),
CheckConstraint("duration >= 0"),
CheckConstraint("rght >= 0"),
ForeignKeyConstraint(
["lang_id"],
["content_language.id"],
deferrable=True,
initially="DEFERRED",
),
ForeignKeyConstraint(
["parent_id"],
["content_contentnode.id"],
deferrable=True,
initially="DEFERRED",
),
Index(
"content_contentnode_level_channel_id_available_29f0bb18_idx",
"level",
Expand All @@ -69,11 +88,10 @@ class ContentContentnode(Base):
author = Column(String(200), nullable=False)
kind = Column(String(200), nullable=False)
available = Column(Boolean, nullable=False)
lft = Column(Integer, nullable=False, index=True)
rght = Column(Integer, nullable=False, index=True)
lft = Column(Integer, nullable=False)
tree_id = Column(Integer, nullable=False, index=True)
level = Column(Integer, nullable=False, index=True)
lang_id = Column(ForeignKey("content_language.id"), index=True)
level = Column(Integer, nullable=False)
lang_id = Column(String(14), index=True)
license_description = Column(Text)
license_name = Column(String(50))
coach_content = Column(Boolean, nullable=False)
Expand All @@ -94,7 +112,8 @@ class ContentContentnode(Base):
learning_activities_bitmask_0 = Column(BigInteger)
ancestors = Column(Text)
admin_imported = Column(Boolean)
parent_id = Column(ForeignKey("content_contentnode.id"), index=True)
rght = Column(Integer, nullable=False)
parent_id = Column(CHAR(32), index=True)

lang = relationship("ContentLanguage")
parent = relationship("ContentContentnode", remote_side=[id])
Expand All @@ -118,6 +137,13 @@ class ContentAssessmentmetadata(Base):

class ContentChannelmetadata(Base):
__tablename__ = "content_channelmetadata"
__table_args__ = (
CheckConstraint('"order" >= 0'),
ForeignKeyConstraint(
["root_id"],
["content_contentnode.id"],
),
)

id = Column(CHAR(32), primary_key=True)
name = Column(String(200), nullable=False)
Expand All @@ -127,13 +153,15 @@ class ContentChannelmetadata(Base):
thumbnail = Column(Text, nullable=False)
last_updated = Column(String)
min_schema_version = Column(String(50), nullable=False)
root_id = Column(ForeignKey("content_contentnode.id"), nullable=False, index=True)
root_id = Column(CHAR(32), nullable=False, index=True)
published_size = Column(BigInteger)
total_resource_count = Column(Integer)
order = Column(Integer)
public = Column(Boolean)
tagline = Column(String(150))
partial = Column(Boolean)
included_categories = Column(Text)
included_grade_levels = Column(Text)

root = relationship("ContentContentnode")

Expand Down Expand Up @@ -242,12 +270,21 @@ class ContentFile(Base):

class ContentChannelmetadataIncludedLanguages(Base):
__tablename__ = "content_channelmetadata_included_languages"
__table_args__ = (
Index(
"content_channelmetadata_included_languages_channelmetadata_id_language_id_51f20415_uniq",
"channelmetadata_id",
"language_id",
unique=True,
),
)

id = Column(Integer, primary_key=True)
channelmetadata_id = Column(
ForeignKey("content_channelmetadata.id"), nullable=False
ForeignKey("content_channelmetadata.id"), nullable=False, index=True
)
language_id = Column(ForeignKey("content_language.id"), nullable=False)
language_id = Column(ForeignKey("content_language.id"), nullable=False, index=True)
sort_value = Column(Integer, nullable=False)

channelmetadata = relationship("ContentChannelmetadata")
language = relationship("ContentLanguage")
25 changes: 25 additions & 0 deletions kolibri/core/content/management/commands/generate_schema.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import inspect
import io
import json
import os
Expand All @@ -10,6 +11,30 @@
from django.core.management import call_command
from django.core.management.base import BaseCommand
from django.db import connections

# Compatibility layer for Python 3.12+ where ArgSpec is removed
if not hasattr(inspect, "ArgSpec"):

class ArgSpec:
def __init__(self, args, varargs, keywords, defaults):
self.args = args
self.varargs = varargs
self.keywords = keywords
self.defaults = defaults

def getargspec(func):
spec = inspect.getfullargspec(func)
return ArgSpec(
args=spec.args,
varargs=spec.varargs,
keywords=spec.varkw,
defaults=spec.defaults,
)

inspect.ArgSpec = ArgSpec
inspect.getargspec = getargspec


from sqlacodegen.codegen import CodeGenerator
from sqlalchemy import create_engine
from sqlalchemy import MetaData
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Generated by Django 3.2.25 on 2024-12-13 17:17
import sortedm2m.fields
import sortedm2m.operations
from django.db import migrations
from django.db import models


class Migration(migrations.Migration):

dependencies = [
("content", "0038_alter_localfile_extension"),
]

operations = [
migrations.AddField(
model_name="channelmetadata",
name="included_categories",
field=models.TextField(blank=True, null=True),
),
migrations.AddField(
model_name="channelmetadata",
name="included_grade_levels",
field=models.TextField(blank=True, null=True),
),
sortedm2m.operations.AlterSortedManyToManyField(
model_name="channelmetadata",
name="included_languages",
field=sortedm2m.fields.SortedManyToManyField(
blank=True,
related_name="channels",
to="content.Language",
verbose_name="languages",
),
),
]
10 changes: 8 additions & 2 deletions kolibri/core/content/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from morango.models.fields import UUIDField
from mptt.managers import TreeManager
from mptt.querysets import TreeQuerySet
from sortedm2m.fields import SortedManyToManyField

from .utils import paths
from kolibri.core.auth.models import Facility
Expand Down Expand Up @@ -376,9 +377,14 @@ class ChannelMetadata(base_models.ChannelMetadata):
# precalculated fields during annotation/migration
published_size = models.BigIntegerField(default=0, null=True, blank=True)
total_resource_count = models.IntegerField(default=0, null=True, blank=True)
included_languages = models.ManyToManyField(
"Language", related_name="channels", verbose_name="languages", blank=True
included_languages = SortedManyToManyField(
Language,
related_name="channels",
verbose_name="languages",
blank=True,
)
included_categories = models.TextField(null=True, blank=True)
included_grade_levels = models.TextField(null=True, blank=True)
order = models.PositiveIntegerField(default=0, null=True, blank=True)
public = models.BooleanField(null=True)
# Has only a subset of this channel's metadata been imported?
Expand Down
115 changes: 115 additions & 0 deletions kolibri/core/content/test/test_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from kolibri.core.content.models import LocalFile
from kolibri.core.content.test.test_channel_upgrade import ChannelBuilder
from kolibri.core.content.utils.annotation import calculate_included_languages
from kolibri.core.content.utils.annotation import calculate_ordered_categories
from kolibri.core.content.utils.annotation import calculate_ordered_grade_levels
from kolibri.core.content.utils.annotation import calculate_published_size
from kolibri.core.content.utils.annotation import calculate_total_resource_count
from kolibri.core.content.utils.annotation import mark_local_files_as_available
Expand Down Expand Up @@ -962,6 +964,119 @@ def test_calculate_included_languages(self):
list(self.channel.included_languages.values_list("id", flat=True)), ["en"]
)

def test_calculate_ordered_categories(self):
# Test with no categories
calculate_ordered_categories(self.channel)
self.assertIsNone(self.channel.included_categories)

# Create nodes with different categories
ContentNode.objects.filter(id=self.node.id).update(categories="math,science")
ContentNode.objects.create(
title="test2",
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=self.node.channel_id,
categories="math,history",
available=True,
)
node3 = ContentNode.objects.create(
title="test3",
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=self.node.channel_id,
categories="math",
available=True,
)

# Test ordering by frequency
calculate_ordered_categories(self.channel)
self.assertEqual(self.channel.included_categories, "math,science,history")

# Test with unavailable node
node3.available = False
node3.save()
calculate_ordered_categories(self.channel)
self.assertEqual(self.channel.included_categories, "math,science,history")

def test_calculate_ordered_grade_levels(self):
# Test with no grade levels
calculate_ordered_grade_levels(self.channel)
self.assertIsNone(self.channel.included_grade_levels)

# Create nodes with different grade levels
ContentNode.objects.filter(id=self.node.id).update(grade_levels="1,2")
ContentNode.objects.create(
title="test2",
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=self.node.channel_id,
grade_levels="2,3",
available=True,
)
node3 = ContentNode.objects.create(
title="test3",
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=self.node.channel_id,
grade_levels="2",
available=True,
)

# Test ordering by frequency
calculate_ordered_grade_levels(self.channel)
self.assertEqual(self.channel.included_grade_levels, "2,1,3")

# Test with unavailable node
node3.available = False
node3.save()
calculate_ordered_grade_levels(self.channel)
self.assertEqual(self.channel.included_grade_levels, "2,1,3")

def test_calculate_included_languages_frequency(self):
# Create additional languages
Language.objects.create(id="es", lang_code="es")
Language.objects.create(id="fr", lang_code="fr")

# Create nodes with different languages
self.node.lang_id = "en"
self.node.save()
ContentNode.objects.create(
title="test2",
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=self.node.channel_id,
lang_id="es",
available=True,
)
node3 = ContentNode.objects.create(
title="test3",
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=self.node.channel_id,
lang_id="es",
available=True,
)
ContentNode.objects.create(
title="test4",
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=self.node.channel_id,
lang_id="fr",
available=True,
)

# Test ordering by frequency
calculate_included_languages(self.channel)
languages = set(self.channel.included_languages.values_list("id", flat=True))
self.assertEqual(languages, {"en", "es", "fr"})

# Test with unavailable node
node3.available = False
node3.save()
calculate_included_languages(self.channel)
languages = set(self.channel.included_languages.values_list("id", flat=True))
self.assertEqual(languages, {"en", "es", "fr"})

def test_calculate_total_resources(self):
local_file = LocalFile.objects.create(
id=uuid.uuid4().hex, extension="mp4", available=True, file_size=10
Expand Down
15 changes: 15 additions & 0 deletions kolibri/core/content/upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
from kolibri.core.content.models import ChannelMetadata
from kolibri.core.content.models import ContentNode
from kolibri.core.content.tasks import enqueue_automatic_resource_import_if_needed
from kolibri.core.content.utils.annotation import calculate_included_languages
from kolibri.core.content.utils.annotation import calculate_ordered_categories
from kolibri.core.content.utils.annotation import calculate_ordered_grade_levels
from kolibri.core.content.utils.annotation import set_channel_ancestors
from kolibri.core.content.utils.annotation import set_content_visibility_from_disk
from kolibri.core.content.utils.channel_import import FutureSchemaError
Expand Down Expand Up @@ -343,3 +346,15 @@ def synchronize_content_requests_upgrade():
synchronize_content_requests(dataset_id, transfer_session=None)

enqueue_automatic_resource_import_if_needed()


@version_upgrade(old_version="<0.18.0")
def ordered_metadata_in_channels():
"""
Update the channel metadata to have grade_levels, categories,
and included languages ordered by occurrence in the channel resources
"""
for channel in ChannelMetadata.objects.all():
calculate_ordered_categories(channel)
calculate_ordered_grade_levels(channel)
calculate_included_languages(channel)
Loading
Loading