Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expand documentation on API media properties #4225

Merged
merged 9 commits into from
May 8, 2024
2 changes: 2 additions & 0 deletions api/api/docs/media_properties/preamble.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ data.
The columns are sorted alphabetically and separated into relations (used to
establish relationships to other models) and values (used to hold some data
value). Note that relation fields are always nullable.

**Models**:
12 changes: 12 additions & 0 deletions api/api/management/commands/documentmedia.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dataclasses import dataclass
from inspect import getdoc
from pathlib import Path
from textwrap import dedent

from django.core.management import BaseCommand
from django.db import connection
Expand All @@ -22,6 +23,7 @@ class RelationInfo:

nature: str
to: str
doc: str


@dataclass
Expand Down Expand Up @@ -140,6 +142,7 @@ def parse_fields(model_class: type[AbstractMedia]) -> list[FieldInfo]:
nature for nature in natures if getattr(field, nature, False)
),
to=field.related_model.__name__,
doc=field.related_model.__doc__,
)
else:
field_info.value_info = ValueInfo(
Expand Down Expand Up @@ -181,6 +184,9 @@ def generate_docs(props: dict[str, list[FieldInfo]]) -> str:

output += PREAMBLE_PATH.read_text()
output += "\n"
for model in props:
output += f"- [{model}](#{model.lower()})\n"
output += "\n"

for model, fields in props.items():
relations, values = [], []
Expand Down Expand Up @@ -293,6 +299,12 @@ def generate_notes(model: str, fields: list[FieldInfo]) -> tuple[str, set[str]]:
if not field.is_relation and field.value_info.help_text:
field_output += f"**Help text:** {field.value_info.help_text}\n\n"
record = True
if field.is_relation and field.relation_info.doc:
field_output += (
f"**`{field.relation_info.to}` docstring:** "
f"{dedent(field.relation_info.doc)}\n\n"
)
record = True
if record:
noted_fields.add(field.name)
output += field_output
Expand Down
78 changes: 78 additions & 0 deletions api/api/migrations/0060_fill_out_help_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Generated by Django 4.2.11 on 2024-04-29 22:12

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('api', '0059_userpreferences'),
]

operations = [
migrations.AlterField(
model_name='audio',
name='alt_files',
field=models.JSONField(blank=True, help_text='\nJSON object containing information on alternative audio files. Each object\nis expected to contain:\n\n- `url`: URL reference to the file\n- `filesize`: File size in bytes\n- `filetype`: Extension of the file\n- `bit_rate`: Bitrate of the file in bits/second\n- `sample_rate`: Sample rate of the file in bits/second\n', null=True),
),
migrations.AlterField(
model_name='audio',
name='last_synced_with_source',
field=models.DateTimeField(blank=True, db_index=True, help_text='The date the media was last updated from the upstream source.', null=True),
),
migrations.AlterField(
model_name='audio',
name='meta_data',
field=models.JSONField(blank=True, help_text='\nJSON object containing extra data about the media item. No fields are expected,\nbut if the `license_url` field is available, it will be used for determining\nthe license URL for the media item. The `description` field, if available, is\nalso indexed into Elasticsearch and as a search field on queries.\n', null=True),
),
migrations.AlterField(
model_name='audio',
name='removed_from_source',
field=models.BooleanField(default=False, help_text='Whether the media has been removed from the upstream source.'),
),
migrations.AlterField(
model_name='audio',
name='tags',
field=models.JSONField(blank=True, help_text='\nJSON array of objects containing tags for the media. Each tag object\nis expected to have:\n\n- `name`: The tag itself (e.g. "dog")\n- `provider`: The source of the tag\n- `accuracy`: If the tag was added using a machine-labeler, the confidence\nfor that label expressed as a value between 0 and 1.\n\nNote that only `name` and `accuracy` are presently surfaced in API results.\n', null=True),
),
migrations.AlterField(
model_name='audio',
name='view_count',
field=models.IntegerField(blank=True, default=0, help_text='Vestigial field, purpose unknown.', null=True),
),
migrations.AlterField(
model_name='audio',
name='watermarked',
field=models.BooleanField(blank=True, help_text='Whether the media contains a watermark. Not currently leveraged.', null=True),
),
migrations.AlterField(
model_name='image',
name='last_synced_with_source',
field=models.DateTimeField(blank=True, db_index=True, help_text='The date the media was last updated from the upstream source.', null=True),
),
migrations.AlterField(
model_name='image',
name='meta_data',
field=models.JSONField(blank=True, help_text='\nJSON object containing extra data about the media item. No fields are expected,\nbut if the `license_url` field is available, it will be used for determining\nthe license URL for the media item. The `description` field, if available, is\nalso indexed into Elasticsearch and as a search field on queries.\n', null=True),
),
migrations.AlterField(
model_name='image',
name='removed_from_source',
field=models.BooleanField(default=False, help_text='Whether the media has been removed from the upstream source.'),
),
migrations.AlterField(
model_name='image',
name='tags',
field=models.JSONField(blank=True, help_text='\nJSON array of objects containing tags for the media. Each tag object\nis expected to have:\n\n- `name`: The tag itself (e.g. "dog")\n- `provider`: The source of the tag\n- `accuracy`: If the tag was added using a machine-labeler, the confidence\nfor that label expressed as a value between 0 and 1.\n\nNote that only `name` and `accuracy` are presently surfaced in API results.\n', null=True),
),
migrations.AlterField(
model_name='image',
name='view_count',
field=models.IntegerField(blank=True, default=0, help_text='Vestigial field, purpose unknown.', null=True),
),
migrations.AlterField(
model_name='image',
name='watermarked',
field=models.BooleanField(blank=True, help_text='Whether the media contains a watermark. Not currently leveraged.', null=True),
),
]
34 changes: 27 additions & 7 deletions api/api/models/audio.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from textwrap import dedent as d
AetherUnbound marked this conversation as resolved.
Show resolved Hide resolved

from django.conf import settings
from django.contrib.postgres.fields import ArrayField
from django.db import models
Expand Down Expand Up @@ -46,8 +48,8 @@ class AudioSet(ForeignIdentifierMixin, MediaMixin, FileMixin, OpenLedgerModel):
"""
This is an ordered collection of audio files, such as a podcast series or an album.

Not to be confused with AudioList which is a many-to-many collection of audio files,
like a playlist or favourites library.
Not to be confused with ``AudioList`` which is a many-to-many collection of audio
files, like a playlist or favourites library.

The FileMixin inherited by this model refers not to audio but album art.
"""
Expand Down Expand Up @@ -139,7 +141,7 @@ class AudioAddOn(OpenLedgerModel):

class Audio(AudioFileMixin, AbstractMedia):
"""
Represents one audio media instance.
One audio media instance.

Inherited fields
================
Expand Down Expand Up @@ -192,7 +194,16 @@ class Audio(AudioFileMixin, AbstractMedia):
alt_files = models.JSONField(
blank=True,
null=True,
help_text="JSON describing alternative files for this audio.",
help_text=d("""
JSON object containing information on alternative audio files. Each object
is expected to contain:

- `url`: URL reference to the file
- `filesize`: File size in bytes
- `filetype`: Extension of the file
- `bit_rate`: Bitrate of the file in bits/second
- `sample_rate`: Sample rate of the file in bits/second
sarayourfriend marked this conversation as resolved.
Show resolved Hide resolved
"""),
sarayourfriend marked this conversation as resolved.
Show resolved Hide resolved
)

@property
Expand Down Expand Up @@ -243,7 +254,7 @@ class Meta(AbstractMedia.Meta):

class DeletedAudio(AbstractDeletedMedia):
"""
Stores identifiers of audio tracks that have been deleted from the source.
Audio tracks deleted from the upstream source.

Do not create instances of this model manually. Create an ``AudioReport`` instance
instead.
Expand All @@ -269,7 +280,7 @@ class Meta:

class SensitiveAudio(AbstractSensitiveMedia):
"""
Stores all audio tracks that have been flagged as 'mature'.
Audio tracks with verified sensitivity reports.

Do not create instances of this model manually. Create an ``AudioReport`` instance
instead.
Expand All @@ -295,6 +306,13 @@ class Meta:


class AudioReport(AbstractMediaReport):
"""
User-submitted reports of audio tracks.

``AudioDecision`` is populated only if moderators have made a decision
for this report.
"""

media_class = Audio

media_obj = models.ForeignKey(
Expand All @@ -319,7 +337,7 @@ class Meta:


class AudioDecision(AbstractMediaDecision):
"""Represents moderation decisions taken for audio tracks."""
"""Moderation decisions taken for audio tracks."""

media_class = Audio

Expand All @@ -331,6 +349,8 @@ class AudioDecision(AbstractMediaDecision):


class AudioList(AbstractMediaList):
"""A list of audio files. Currently unused."""

audios = models.ManyToManyField(
Audio,
related_name="lists",
Expand Down
17 changes: 13 additions & 4 deletions api/api/models/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class Meta:

class Image(ImageFileMixin, AbstractMedia):
"""
Represents one image media instance.
One image media instance.

Inherited fields
================
Expand All @@ -60,7 +60,7 @@ def sensitive(self) -> bool:

class DeletedImage(AbstractDeletedMedia):
"""
Stores identifiers of images that have been deleted from the source.
Images deleted from the upstream source.

Do not create instances of this model manually. Create an ``ImageReport`` instance
instead.
Expand All @@ -83,7 +83,7 @@ class DeletedImage(AbstractDeletedMedia):

class SensitiveImage(AbstractSensitiveMedia):
"""
Stores all images that have been flagged as 'mature'.
Images with verified sensitivity reports.

Do not create instances of this model manually. Create an ``ImageReport`` instance
instead.
Expand All @@ -108,6 +108,13 @@ class Meta:


class ImageReport(AbstractMediaReport):
"""
User-submitted report of an image.

This contains an ``ImageDecision`` as well, if moderators have made a decision
for this report.
"""

media_class = Image

media_obj = models.ForeignKey(
Expand All @@ -132,7 +139,7 @@ class Meta:


class ImageDecision(AbstractMediaDecision):
"""Represents moderation decisions taken for images."""
"""Moderation decisions taken for images."""

media_class = Image

Expand All @@ -144,6 +151,8 @@ class ImageDecision(AbstractMediaDecision):


class ImageList(AbstractMediaList):
"""A list of images. Currently unused."""

images = models.ManyToManyField(
Image,
related_name="lists",
Expand Down
46 changes: 38 additions & 8 deletions api/api/models/media.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import mimetypes
from textwrap import dedent

from django.conf import settings
from django.core.exceptions import ValidationError
Expand Down Expand Up @@ -42,7 +43,11 @@ class AbstractMedia(
define one explicitly.
"""

watermarked = models.BooleanField(blank=True, null=True)
watermarked = models.BooleanField(
blank=True,
null=True,
help_text="Whether the media contains a watermark. Not currently leveraged.",
)

license = models.CharField(
max_length=50,
Expand All @@ -64,19 +69,35 @@ class AbstractMedia(
"Source and provider can be different. Eg: the Google Open "
"Images dataset is source=openimages, but provider=flickr.",
)
last_synced_with_source = models.DateTimeField(blank=True, null=True, db_index=True)
removed_from_source = models.BooleanField(default=False)

view_count = models.IntegerField(
last_synced_with_source = models.DateTimeField(
blank=True,
null=True,
default=0,
db_index=True,
help_text="The date the media was last updated from the upstream source.",
)
removed_from_source = models.BooleanField(
default=False,
help_text="Whether the media has been removed from the upstream source.",
)

view_count = models.IntegerField(
blank=True, null=True, default=0, help_text="Vestigial field, purpose unknown."
sarayourfriend marked this conversation as resolved.
Show resolved Hide resolved
)

tags = models.JSONField(
blank=True,
null=True,
help_text="Tags with detailed metadata, such as accuracy.",
help_text=dedent("""
JSON array of objects containing tags for the media. Each tag object
is expected to have:

- `name`: The tag itself (e.g. "dog")
- `provider`: The source of the tag
- `accuracy`: If the tag was added using a machine-labeler, the confidence
for that label expressed as a value between 0 and 1.

Note that only `name` and `accuracy` are presently surfaced in API results.
"""),
)

category = models.CharField(
Expand All @@ -87,7 +108,16 @@ class AbstractMedia(
help_text="The top-level classification of this media file.",
)

meta_data = models.JSONField(blank=True, null=True)
meta_data = models.JSONField(
blank=True,
null=True,
help_text=dedent("""
JSON object containing extra data about the media item. No fields are expected,
but if the `license_url` field is available, it will be used for determining
the license URL for the media item. The `description` field, if available, is
also indexed into Elasticsearch and as a search field on queries.
"""),
)

@property
def license_url(self) -> str | None:
Expand Down
2 changes: 1 addition & 1 deletion api/latest_migrations/api
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# If you have a merge conflict in this file, it means you need to run:
# manage.py makemigrations --merge
# in order to resolve the conflict between migrations.
0058_moderation_decision
0060_fill_out_help_text
Loading
Loading