Skip to content

Commit

Permalink
Link vtt file to CaptionFile model
Browse files Browse the repository at this point in the history
  • Loading branch information
akash5100 committed Oct 17, 2023
1 parent bd831b8 commit e982829
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 43 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by Django 3.2.14 on 2023-09-23 08:23
# Generated by Django 3.2.14 on 2023-10-17 06:55

import contentcuration.models
from django.db import migrations, models
Expand All @@ -18,7 +18,9 @@ class Migration(migrations.Migration):
fields=[
('id', contentcuration.models.UUIDField(default=uuid.uuid4, max_length=32, primary_key=True, serialize=False)),
('file_id', contentcuration.models.UUIDField(default=uuid.uuid4, max_length=32)),
('modified', models.DateTimeField(auto_now=True, verbose_name='modified')),
('language', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='caption_file', to='contentcuration.language')),
('output_file', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='contentcuration.file')),
],
options={
'unique_together': {('file_id', 'language')},
Expand Down
6 changes: 5 additions & 1 deletion contentcuration/contentcuration/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2065,12 +2065,16 @@ class CaptionFile(models.Model):
"""
Represents a caption file record.
- file_id: The identifier of related file in Google Cloud Storage.
- file_id: The identifier of related Video/Audio File object.
- language: The language of the caption file.
- output_file: The FK to the associated generated VTT File object.
"""
id = UUIDField(primary_key=True, default=uuid.uuid4)
file_id = UUIDField(default=uuid.uuid4, max_length=36)
language = models.ForeignKey(Language, related_name="caption_file", on_delete=models.CASCADE)
modified = models.DateTimeField(auto_now=True, verbose_name="modified")
output_file = models.ForeignKey('File', null=True, blank=True,
on_delete=models.SET_NULL)

class Meta:
unique_together = ['file_id', 'language']
Expand Down
88 changes: 54 additions & 34 deletions contentcuration/contentcuration/utils/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,15 +168,59 @@ def create_kolibri_license_object(ccnode):
license_description=ccnode.license.license_description if use_license_description else ccnode.license_description
)

def create_webvtt_file(ccnode: ccmodels.ContentNode,
vtt_content: str,
caption_file: ccmodels.CaptionFile,
user_id: int = None) -> None:
"""Create a WebVTT file and associate it with a CaptionFile.
:param ccnode: The ContentNode associated with the WebVTT file.
:param vtt_content: The content of the WebVTT file as a string (UTF-8 encoded).
:param caption_file: A CaptionFile to associate with the WebVTT file.
:param user_id: The ID of the user creating the WebVTT file (optional).
"""
logging.debug(f"Creating WebVTT for Node {ccnode.title}")
filename = "{0}_{1}.{ext}".format(ccnode.title, caption_file.language, ext=file_formats.VTT)
temppath = None
try:
with tempfile.NamedTemporaryFile(suffix="vtt", delete=False) as tempf:
temppath = tempf.name
tempf.write(vtt_content)
file_size = tempf.tell()
tempf.flush()

vtt_file_obj = ccmodels.File.objects.create(
file_on_disk=File(open(temppath, 'rb'), name=filename),
contentnode=ccnode,
file_format_id=file_formats.VTT,
preset_id=format_presets.VIDEO_SUBTITLE,
original_filename=filename,
file_size=file_size,
uploaded_by_id=user_id,
language=caption_file.language,
)
logging.debug("Created VTT for {0} with checksum {1}".format(ccnode.title, vtt_file_obj.checksum))

caption_file.output_file = vtt_file_obj
caption_file.save()
except Exception as e:
logging.error(f"Error creating VTT file for {ccnode.title}: {str(e)}")
finally:
temppath and os.unlink(temppath)

def generate_webvtt_file(caption_cues: QuerySet[ccmodels.CaptionCue]) -> str:
""":returns: webvtt_content as string"""
""" Generate the content of a WebVTT file based on CaptionCue's.
:param: caption_cues: QuerySet of CaptionCues to include in the WebVTT.
:returns: The WebVTT content as a UTF-8 encoded string.
"""
webvtt_content = "WEBVTT\n\n"
for cue in caption_cues.order_by('starttime'):
st = str(timedelta(seconds=cue.starttime))
et = str(timedelta(seconds=cue.endtime))
webvtt_content += f"{st}.000 --> {et}.000\n"
webvtt_content += f"- {cue.text}\n\n"
return webvtt_content
return webvtt_content.encode('utf-8')

def increment_channel_version(channel):
channel.version += 1
Expand Down Expand Up @@ -276,13 +320,14 @@ def recurse_nodes(self, node, inherited_fields): # noqa C901
elif node.kind_id == content_kinds.SLIDESHOW:
create_slideshow_manifest(node, user_id=self.user_id)
elif node.kind_id in [content_kinds.AUDIO, content_kinds.VIDEO]:
file_ids = node.files.all().values_list('id')
caption_files_queryset = ccmodels.CaptionFile.objects.filter(file_id__in=file_ids)
for caption_file in caption_files_queryset:
caption_cues = ccmodels.CaptionCue.objects.filter(caption_file=caption_file)
if caption_cues.exists():
lang, webvtt_content = caption_file.language, generate_webvtt_file(caption_cues)
create_webvtt(node, webvtt_content, lang, self.user_id)
if node.changed:
file_ids = node.files.all().values_list('id')
caption_files = ccmodels.CaptionFile.objects.filter(file_id__in=file_ids)
for cf in caption_files:
vtt_file = cf.output_file
vtt_content = generate_webvtt_file(caption_cues=cf.caption_cue.all())
if vtt_file is None or vtt_file.modified < cf.modified:
create_webvtt_file(node, vtt_content, cf, self.user_id)
elif node.kind_id == content_kinds.TOPIC:
for child in node.children.all():
self.recurse_nodes(child, metadata)
Expand Down Expand Up @@ -492,31 +537,6 @@ def create_associated_file_objects(kolibrinode, ccnode):
local_file=kolibrilocalfilemodel,
)

def create_webvtt(ccnode: ccmodels.ContentNode, vtt_content: str, language: str, user_id: int = None) -> None:
logging.debug(f"Creating WebVTT for Node {ccnode.title}")
filename = "{0}_{1}.{ext}".format(ccnode.title, language, ext=file_formats.VTT)
temppath = None
try:
with tempfile.NamedTemporaryFile(suffix="vtt", delete=False) as tempf:
temppath = tempf.name
tempf.write(vtt_content.encode('utf-8'))
file_size = tempf.tell()
tempf.flush()

file_obj = ccmodels.File.objects.create(
file_on_disk=File(open(temppath, 'rb'), name=filename),
contentnode=ccnode,
file_format_id=file_formats.VTT,
preset_id=format_presets.VIDEO_SUBTITLE,
original_filename=filename,
file_size=file_size,
uploaded_by_id=user_id,
language=language,
)
logging.debug("Created vtt for {0} with checksum {1}".format(ccnode.title, file_obj.checksum))
finally:
temppath and os.unlink(temppath)

def create_perseus_exercise(ccnode, kolibrinode, exercise_data, user_id=None):
logging.debug("Creating Perseus Exercise for Node {}".format(ccnode.title))
filename = "{0}.{ext}".format(ccnode.title, ext=file_formats.PERSEUS)
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.in
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ git+https://github.com/someshchaturvedi/customizable-django-profiler.git#customi
tabulate==0.8.2
fonttools
minio==7.1.1
transformers==4.29.2
57 changes: 50 additions & 7 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#
# This file is autogenerated by pip-compile with python 3.9
# To update, run:
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile requirements-dev.in
#
Expand Down Expand Up @@ -77,8 +77,12 @@ drf-yasg==1.20.0
# via -r requirements-dev.in
faker==0.9.1
# via mixer
filelock==3.4.1
# via virtualenv
filelock==3.12.4
# via
# -c requirements.txt
# huggingface-hub
# transformers
# virtualenv
flake8==3.4.1
# via -r requirements-dev.in
flask==2.0.3
Expand All @@ -92,6 +96,10 @@ flask-cors==3.0.10
# via locust
fonttools==4.40.0
# via -r requirements-dev.in
fsspec==2023.9.2
# via
# -c requirements.txt
# huggingface-hub
gevent==21.12.0
# via
# geventhttpclient
Expand All @@ -100,6 +108,10 @@ geventhttpclient==2.0.9
# via locust
greenlet==1.1.2
# via gevent
huggingface-hub==0.17.3
# via
# -c requirements.txt
# transformers
identify==2.4.4
# via pre-commit
idna==2.10
Expand Down Expand Up @@ -144,12 +156,18 @@ nodeenv==1.6.0
# via
# -r requirements-dev.in
# pre-commit
numpy==1.26.0
# via
# -c requirements.txt
# transformers
packaging==20.9
# via
# -c requirements.txt
# build
# drf-yasg
# huggingface-hub
# pytest
# transformers
pep517==0.12.0
# via build
pip-tools==6.8.0
Expand Down Expand Up @@ -209,17 +227,26 @@ pytz==2022.1
# via
# -c requirements.txt
# django
pyyaml==6.0
pyyaml==6.0.1
# via
# -c requirements.txt
# aspy-yaml
# huggingface-hub
# pre-commit
# transformers
pyzmq==23.1.0
# via locust
regex==2023.10.3
# via
# -c requirements.txt
# transformers
requests==2.25.1
# via
# -c requirements.txt
# coreapi
# huggingface-hub
# locust
# transformers
roundrobin==0.0.2
# via locust
ruamel-yaml==0.17.21
Expand Down Expand Up @@ -247,6 +274,10 @@ tblib==1.7.0
# via django-concurrent-test-helper
text-unidecode==1.2
# via faker
tokenizers==0.13.3
# via
# -c requirements.txt
# transformers
toml==0.10.2
# via
# pre-commit
Expand All @@ -256,8 +287,20 @@ tomli==1.2.3
# build
# coverage
# pep517
typing-extensions==4.5.0
# via locust
tqdm==4.66.1
# via
# -c requirements.txt
# huggingface-hub
# transformers
transformers==4.29.2
# via
# -c requirements.txt
# -r requirements-dev.in
typing-extensions==4.8.0
# via
# -c requirements.txt
# huggingface-hub
# locust
uritemplate==3.0.1
# via
# coreapi
Expand Down

0 comments on commit e982829

Please sign in to comment.