diff --git a/api/api/admin/media_report.py b/api/api/admin/media_report.py index 85fccca26f3..4bef2af20ee 100644 --- a/api/api/admin/media_report.py +++ b/api/api/admin/media_report.py @@ -5,10 +5,13 @@ from django import forms from django.conf import settings from django.contrib import admin, messages +from django.contrib.admin import helpers from django.contrib.admin.views.main import ChangeList +from django.core.exceptions import ObjectDoesNotExist from django.db.models import Count, F, Min from django.http import JsonResponse from django.shortcuts import redirect +from django.template.response import TemplateResponse from django.urls import reverse from django.utils.html import format_html from django.utils.safestring import mark_safe @@ -23,12 +26,16 @@ AudioDecision, AudioDecisionThrough, AudioReport, + DeletedAudio, + DeletedImage, Image, ImageDecision, ImageDecisionThrough, ImageReport, + SensitiveAudio, + SensitiveImage, ) -from api.models.media import AbstractDeletedMedia, AbstractSensitiveMedia +from api.utils.moderation import perform_moderation from api.utils.moderation_lock import LockManager @@ -42,11 +49,11 @@ def register(site): site.register(AudioReport, AudioReportAdmin) site.register(ImageReport, ImageReportAdmin) - for klass in [ - *AbstractSensitiveMedia.__subclasses__(), - *AbstractDeletedMedia.__subclasses__(), - ]: - site.register(klass, MediaSubreportAdmin) + site.register(SensitiveImage, SensitiveImageAdmin) + site.register(SensitiveAudio, SensitiveAudioAdmin) + + site.register(DeletedImage, DeletedImageAdmin) + site.register(DeletedAudio, DeletedAudioAdmin) site.register(ImageDecision, ImageDecisionAdmin) site.register(AudioDecision, AudioDecisionAdmin) @@ -181,12 +188,13 @@ def choices(self, changelist): def lookups(self, request, model_admin): return [ (None, "Moderation queue"), + ("prev", "Resolved"), ("all", "All"), ] def queryset(self, request, qs): value = self.value() - if value is None: + if value is None or value == "prev": # Filter down to only instances with reports qs = qs.filter(**{f"{media_type}_report__isnull": False}) @@ -204,13 +212,108 @@ def queryset(self, request, qs): qs = qs.order_by( "-total_report_count", "-pending_report_count", "oldest_report_date" ) + if value is None: + qs = qs.filter(pending_report_count__gt=0) + if value == "prev": + qs = qs.filter(pending_report_count=0) return qs return PendingRecordCountFilter -class MediaListAdmin(admin.ModelAdmin): +class BulkModerationMixin: + def has_bulk_mod_permission(self, request): + return request.user.has_perm(f"api.add_{self.media_type}decision") + + def _bulk_mod(self, request, queryset, action: DecisionAction): + """ + Perform bulk moderation for the queryset as per the requested + action. + + This follows the pattern of the ``delete_selected`` action + defined in ``django.contrib.admin.actions.py``. + """ + + opts = self.model._meta + verbose_name_plural = opts.verbose_name_plural + + if action == DecisionAction.MARKED_SENSITIVE: + init_count = queryset.count() + queryset = queryset.filter(**{f"sensitive_{self.media_type}__isnull": True}) + + count = len(queryset) + prev_count = init_count - count + stats = { + f"selected {verbose_name_plural}": init_count, + f"{verbose_name_plural} already marked as sensitive": prev_count, + } + else: + # No filtering is needed for any other actions. + count = len(queryset) + stats = {} + stats[f"{verbose_name_plural} to be {action.verb}"] = count + + if count == 0: + messages.info( + request, + f"All selected items are already {action.verb}.", + ) + return None + + if request.POST.get("post"): + # The user has already confirmed so we will perform the + # moderation and return ``None`` to display the change list + # view again. + decision = perform_moderation(request, self.media_type, queryset, action) + path = reverse( + f"admin:api_{self.media_type}decision_change", args=(decision.id,) + ) + messages.success( + request, + format_html( + 'Successfully moderated {} items via decision {}.', + queryset.count(), + path, + decision.id, + ), + ) + return None + + objects_name = opts.verbose_name if count == 1 else opts.verbose_name_plural + moderatable_objects = [ + format_html( + '{}', + reverse( + f"admin:api_{queryset.model._meta.model_name}_change", + args=(obj.pk,), + ), + obj, + ) + for obj in queryset + ] + + context = { + **self.admin_site.each_context(request), + "title": "Are you sure?", + "objects_name": str(objects_name), + "moderatable_objects": [moderatable_objects], + "stats": dict(stats).items(), + "queryset": queryset, + "opts": opts, + "action_checkbox_name": helpers.ACTION_CHECKBOX_NAME, + "media": self.media, + "decision_action": action, + } + + return TemplateResponse( + request, + "admin/api/bulk_moderation_confirmation.html", + context, + ) + + +class MediaListAdmin(BulkModerationMixin, admin.ModelAdmin): media_type = None def __init__(self, *args, **kwargs): @@ -294,23 +397,30 @@ def has_sensitive_text(self, obj): list_display_links = ("identifier",) search_fields = _production_deferred("identifier") sortable_by = () # Ordering is defined in ``get_queryset``. + actions = ["marked_sensitive", "deindexed_sensitive", "deindexed_copyright"] def get_list_filter(self, request): return (get_pending_record_filter(self.media_type),) def get_list_display(self, request): - if request.GET.get("pending_record_count") != "all": + if request.GET.get("pending_record_count") == "all": + return self.list_display + ( + "source", + "provider", + ) + elif request.GET.get("pending_record_count") == "prev": return self.list_display + ( "total_report_count", - "pending_report_count", "oldest_report_date", - "pending_reports_links", "has_sensitive_text", ) else: return self.list_display + ( - "source", - "provider", + "total_report_count", + "pending_report_count", + "oldest_report_date", + "pending_reports_links", + "has_sensitive_text", ) def total_report_count(self, obj): @@ -322,17 +432,18 @@ def pending_report_count(self, obj): def oldest_report_date(self, obj): return obj.oldest_report_date + @admin.display(description="Open reports") def pending_reports_links(self, obj): reports = getattr(obj, f"{self.media_type}_report") pending_reports = reports.filter(decision__isnull=True) data = [] for report in pending_reports.all(): - url = reverse( + path = reverse( f"admin:api_{self.media_type}report_change", args=(report.id,) ) - data.append(format_html('Report {}', url, report.id)) + data.append(format_html('• Report {}', path, report.id)) - return mark_safe(", ".join(data)) + return mark_safe("
".join(data)) def changelist_view(self, request, extra_context=None): extra_context = extra_context or {} @@ -554,6 +665,31 @@ def report_create_view(self, request, object_id): def get_changelist(self, request, **kwargs): return PredeterminedOrderChangelist + ################ + # Bulk actions # + ################ + + @admin.action( + permissions=["bulk_mod"], + description="Mark selected %(verbose_name_plural)s as sensitive", + ) + def marked_sensitive(self, request, queryset): + return self._bulk_mod(request, queryset, DecisionAction.MARKED_SENSITIVE) + + @admin.action( + permissions=["bulk_mod"], + description="Deindex selected %(verbose_name_plural)s (sensitive)", + ) + def deindexed_sensitive(self, request, queryset): + return self._bulk_mod(request, queryset, DecisionAction.DEINDEXED_SENSITIVE) + + @admin.action( + permissions=["bulk_mod"], + description="Deindex selected %(verbose_name_plural)s (copyright)", + ) + def deindexed_copyright(self, request, queryset): + return self._bulk_mod(request, queryset, DecisionAction.DEINDEXED_COPYRIGHT) + class MediaReportAdmin(admin.ModelAdmin): media_type = None @@ -586,7 +722,6 @@ def is_pending(self, obj): "reason", ("decision", admin.EmptyFieldListFilter), # ~is_pending ) - list_select_related = ("media_obj",) search_fields = ("description", *_production_deferred("media_obj__identifier")) @admin.display(description="Media obj") @@ -663,13 +798,18 @@ def has_add_permission(self, request) -> bool: @admin.display(description="Media objs") def media_ids(self, obj): through_objs = getattr(obj, f"{self.media_type}decisionthrough_set").all() - text = [] + data = [] for obj in through_objs: - path = reverse( - f"admin:api_{self.media_type}_change", args=(obj.media_obj.id,) - ) - text.append(f'• {obj.media_obj}') - return format_html("
".join(text)) + try: + path = reverse( + f"admin:api_{self.media_type}_change", args=(obj.media_obj.id,) + ) + data.append( + format_html('• {}', path, obj.media_obj.identifier) + ) + except ObjectDoesNotExist: + data.append(f"• {obj.media_obj_id}") + return mark_safe("
".join(data)) ############### # Change view # @@ -706,8 +846,20 @@ class MediaDecisionThroughAdmin(admin.TabularInline): autocomplete_fields = _production_deferred("media_obj") raw_id_fields = _non_production_deferred("media_obj") + def get_readonly_fields(self, request, obj): + if is_mutable: + return super().get_readonly_fields(request, obj) + else: + return ("media_obj_id",) + + def get_exclude(self, request, obj): + if is_mutable: + return super().get_exclude(request, obj) + else: + return ("media_obj",) + def has_add_permission(self, request, obj=None): - return is_mutable and super().has_change_permission(request, obj) + return is_mutable and super().has_add_permission(request, obj) def has_change_permission(self, request, obj=None): return is_mutable and super().has_change_permission(request, obj) @@ -722,6 +874,60 @@ def save_model(self, request, obj, form, change): return super().save_model(request, obj, form, change) +class MediaSubreportAdmin(BulkModerationMixin, admin.ModelAdmin): + media_type = None + + exclude = ("media_obj",) + ordering = ("-created_on",) + search_fields = ("media_obj__identifier",) + readonly_fields = ("media_obj_id",) + + def has_add_permission(self, *args, **kwargs): + # These objects are created through moderation and + # bulk-moderation operations. + return False + + +class DeletedMediaAdmin(MediaSubreportAdmin): + actions = ["reversed_deindex"] + list_display = ("media_obj_id", "created_on") + + ################ + # Bulk actions # + ################ + + @admin.action( + permissions=["bulk_mod"], + description="Reindex selected %(verbose_name_plural)s", + ) + def reversed_deindex(self, request, queryset): + return self._bulk_mod(request, queryset, DecisionAction.REVERSED_DEINDEX) + + +class SensitiveMediaAdmin(MediaSubreportAdmin): + actions = ["reversed_mark_sensitive"] + list_display = ("media_obj_id", "created_on", "is_deindexed") + + @admin.display(description="Deindexed?", boolean=True) + def is_deindexed(self, obj): + try: + getattr(obj, "media_obj") + return False + except ObjectDoesNotExist: + return True + + ################ + # Bulk actions # + ################ + + @admin.action( + permissions=["bulk_mod"], + description="Unmark selected %(verbose_name_plural)s as sensitive", + ) + def reversed_mark_sensitive(self, request, queryset): + return self._bulk_mod(request, queryset, DecisionAction.REVERSED_MARK_SENSITIVE) + + class ImageReportAdmin(MediaReportAdmin): media_type = "image" @@ -748,11 +954,17 @@ class AudioDecisionAdmin(MediaDecisionAdmin): through_model = AudioDecisionThrough -class MediaSubreportAdmin(admin.ModelAdmin): - exclude = ("media_obj",) - search_fields = ("media_obj__identifier",) - readonly_fields = ("media_obj_id",) +class SensitiveImageAdmin(SensitiveMediaAdmin): + media_type = "image" - def has_add_permission(self, *args, **kwargs): - """Create ``_Report`` instances instead.""" - return False + +class SensitiveAudioAdmin(SensitiveMediaAdmin): + media_type = "audio" + + +class DeletedImageAdmin(DeletedMediaAdmin): + media_type = "image" + + +class DeletedAudioAdmin(DeletedMediaAdmin): + media_type = "audio" diff --git a/api/api/constants/moderation.py b/api/api/constants/moderation.py index ba1232e1d18..416df5f6a42 100644 --- a/api/api/constants/moderation.py +++ b/api/api/constants/moderation.py @@ -19,5 +19,42 @@ class DecisionAction(models.TextChoices): REVERSED_DEINDEX = "reversed_deindex", "Reversed deindex" @property - def is_reversal(self): + def is_forward(self): + return self in { + self.MARKED_SENSITIVE, + self.DEINDEXED_COPYRIGHT, + self.DEINDEXED_SENSITIVE, + } + + @property + def is_reverse(self): return self in {self.REVERSED_DEINDEX, self.REVERSED_MARK_SENSITIVE} + + @property + def is_deindex(self): + return self in {self.DEINDEXED_COPYRIGHT, self.DEINDEXED_SENSITIVE} + + @property + def verb(self) -> str: + """ + Return the verb form of the action for use in sentences. + + :param object: the object of the sentence + :return: the grammatically coherent verb phrase of the action + """ + + match self: + case self.MARKED_SENSITIVE: + return "marked as sensitive" + case self.DEINDEXED_COPYRIGHT: + return "deindexed (copyright)" + case self.DEINDEXED_SENSITIVE: + return "deindexed (sensitive)" + case self.REJECTED_REPORTS: + return "rejected" + case self.DEDUPLICATED_REPORTS: + return "de-duplicated" + case self.REVERSED_MARK_SENSITIVE: + return "unmarked as sensitive" + case self.REVERSED_DEINDEX: + return "reindexed" diff --git a/api/api/models/media.py b/api/api/models/media.py index ff16b9778a2..694ac3a05ca 100644 --- a/api/api/models/media.py +++ b/api/api/models/media.py @@ -8,7 +8,7 @@ from django.utils.html import format_html import structlog -from elasticsearch import Elasticsearch, NotFoundError +from elasticsearch import Elasticsearch, NotFoundError, helpers from openverse_attribution.license import License from api.constants.moderation import DecisionAction @@ -365,9 +365,9 @@ def save(self, *args, **kwargs): class PerformIndexUpdateMixin: - @property - def indexes(self): - return [self.es_index, f"{self.es_index}-filtered"] + @classmethod + def indexes(cls): + return [cls.es_index, f"{cls.es_index}-filtered"] def _perform_index_update(self, method: str, raise_errors: bool, **es_method_args): """ @@ -387,7 +387,7 @@ def _perform_index_update(self, method: str, raise_errors: bool, **es_method_arg f"with identifier {self.media_obj.identifier}." ) - for index in self.indexes: + for index in self.indexes(): try: getattr(es, method)( index=index, @@ -404,6 +404,42 @@ def _perform_index_update(self, method: str, raise_errors: bool, **es_method_arg ) continue + @classmethod + def _bulk_perform_index_update( + cls, + method: str, + document_ids: list[str], + **es_method_args, + ): + """ + Call ``method`` on the Elasticsearch client in a bulk operation. + + Automatically handles 404 errors for documents, forces a refresh, + and calls the method for origin and filtered indexes. + + Unlike the single-document behaviour, this function does not + provide validation to check if the media objects exist. + """ + + es: Elasticsearch = settings.ES + + actions = [ + { + "_op_type": method, + "_index": index, + "_id": document_id, + **es_method_args, + } + for index in cls.indexes() + for document_id in document_ids + ] + + # Perform all actions in bulk, while allowing for missing + # documents, similar to the single-document behaviour. In all + # other cases, this raises ``BulkIndexError``. + helpers.bulk(es, actions, ignore_status=(404,)) + es.indices.refresh(index=cls.indexes()) + class AbstractDeletedMedia(PerformIndexUpdateMixin, OpenLedgerModel): """ @@ -432,22 +468,41 @@ class AbstractDeletedMedia(PerformIndexUpdateMixin, OpenLedgerModel): """ Sub-classes must override this field to point to a concrete sub-class of ``AbstractMedia``. + + Note that unlike ``AbstractSensitiveMedia``, this does not provide + a ``delete()`` method to undo the effects of ``save()``. Deindexed + media can only be restored through a data refresh. """ class Meta: abstract = True - def _update_es(self, raise_errors: bool) -> models.Model: + def save(self, *args, **kwargs): + super().save(*args, **kwargs) + self.perform_action() + + def _update_es(self, raise_errors: bool): self._perform_index_update( "delete", raise_errors, ) - def save(self, *args, **kwargs): + def perform_action(self): self._update_es(True) - super().save(*args, **kwargs) self.media_obj.delete() # remove the actual model instance + @classmethod + def _bulk_update_es(cls, media_item_ids: list[str]): + cls._bulk_perform_index_update( + "delete", + media_item_ids, + ) + + @classmethod + def bulk_perform_action(cls, media_items: list[type[AbstractMedia]]): + cls._bulk_update_es(media_items.values_list("id", flat=True)) + media_items.delete() # remove the actual model instances + class AbstractSensitiveMedia(PerformIndexUpdateMixin, models.Model): """ @@ -481,6 +536,14 @@ class AbstractSensitiveMedia(PerformIndexUpdateMixin, models.Model): class Meta: abstract = True + def save(self, *args, **kwargs): + self._update_es(True, True) + super().save(*args, **kwargs) + + def delete(self, *args, **kwargs): + self._update_es(False, False) + super().delete(*args, **kwargs) + def _update_es(self, is_mature: bool, raise_errors: bool): """ Update the Elasticsearch document associated with the given model. @@ -494,13 +557,21 @@ def _update_es(self, is_mature: bool, raise_errors: bool): doc={"mature": is_mature}, ) - def save(self, *args, **kwargs): - self._update_es(True, True) - super().save(*args, **kwargs) + @classmethod + def _bulk_update_es(cls, is_mature: bool, media_item_ids: list[str]): + cls._bulk_perform_index_update( + "update", + media_item_ids, + doc={"mature": is_mature}, + ) - def delete(self, *args, **kwargs): - self._update_es(False, False) - super().delete(*args, **kwargs) + @classmethod + def bulk_perform_action( + cls, + is_mature: bool, + media_items: list[type[AbstractMedia]], + ): + cls._bulk_update_es(is_mature, media_items.values_list("id", flat=True)) class AbstractMediaList(OpenLedgerModel): diff --git a/api/api/templates/admin/api/bulk_moderation_confirmation.html b/api/api/templates/admin/api/bulk_moderation_confirmation.html new file mode 100644 index 00000000000..16586b58df0 --- /dev/null +++ b/api/api/templates/admin/api/bulk_moderation_confirmation.html @@ -0,0 +1,68 @@ +{% extends "admin/base_site.html" %} +{% load admin_urls static %} + +{% block extrahead %} + {{ block.super }} + {{ media }} + +{% endblock %} + +{% block bodyclass %}{{ block.super }} app-{{ opts.app_label }} model-{{ opts.model_name }} delete-confirmation delete-selected-confirmation{% endblock %} + +{% block breadcrumbs %} + +{% endblock %} + +{% block content %} +

Are you sure you want the selected {{ objects_name }} to be {{ decision_action.verb }}?

+ +{% if decision_action == "reversed_deindex" %} +
+ This action does not immediately result in deindexed records being + added back to the DB and Elasticsearch indices. When the records were + originally deindexed, they were deleted from both, and there is no + quick way to restore them without running a data refresh. +
+ +{% endif %} + +

Summary

+ + +

Objects

+{% for moderatable_object in moderatable_objects %} + +{% endfor %} + +
+ {% csrf_token %} +
+
+ +
+ {% for obj in queryset %} + + {% endfor %} + + + + No, take me back +
+
+{% endblock %} diff --git a/api/api/utils/moderation.py b/api/api/utils/moderation.py new file mode 100644 index 00000000000..cf2ee0f4b60 --- /dev/null +++ b/api/api/utils/moderation.py @@ -0,0 +1,137 @@ +from typing import Literal + +import structlog + +from api.constants.moderation import DecisionAction +from api.models.audio import ( + Audio, + AudioDecision, + AudioDecisionThrough, + DeletedAudio, + SensitiveAudio, +) +from api.models.image import ( + DeletedImage, + Image, + ImageDecision, + ImageDecisionThrough, + SensitiveImage, +) +from api.models.media import AbstractDeletedMedia, AbstractMedia, AbstractSensitiveMedia + + +logger = structlog.get_logger(__name__) + + +def perform_moderation( + request, + media_type: Literal["audio", "image"], + mod_objects: list[ + type[AbstractSensitiveMedia] | type[AbstractDeletedMedia] | type[AbstractMedia] + ], + action: DecisionAction, +): + """ + Perform bulk moderation on the given models. + + If the decision action is forward, ``mod_objects`` is a queryset of + ``Media`` items. We can get the UUIDs from the ``identifier`` field. + + If the decision action is reverse, ``mod_objects`` is a queryset of + ``SensitiveMedia`` or ``DeletedMedia`` items. We can get the UUIDs + from the ``media_obj_id`` field. + + Note that bulk moderation will not resolve any open reports. It is + up to the moderator to manually link open reports with the + appropriate decisions and resolve them. + + :param request: the request used to determine the moderator + :param media_type: the type of media being bulk-moderated + :param mod_objects: a ``QuerySet`` of media items to bulk-moderate + :para action: the action of the bulk moderation decision + """ + + match media_type: + case "audio": + Media = Audio + SensitiveMedia = SensitiveAudio + DeletedMedia = DeletedAudio + MediaDecision = AudioDecision + MediaDecisionThrough = AudioDecisionThrough + case "image": + Media = Image + SensitiveMedia = SensitiveImage + DeletedMedia = DeletedImage + MediaDecision = ImageDecision + MediaDecisionThrough = ImageDecisionThrough + + # The following block uses ``list`` to force evaluation of the + # queryset and store the values because calling ``delete`` below + # will otherwise make the querysets evaluate differently. + if action.is_reverse: + identifiers = list(mod_objects.values_list("media_obj_id", flat=True)) + else: + identifiers = list(mod_objects.values_list("identifier", flat=True)) + + logger.info( + "Performing bulk moderation action.", + action=action, + model=mod_objects.model._meta.label, + identifiers=identifiers, + ) + + match action: + case DecisionAction.MARKED_SENSITIVE: + created = SensitiveMedia.objects.bulk_create( + [SensitiveMedia(media_obj_id=identifier) for identifier in identifiers] + ) + logger.debug(f"Created sensitive-{media_type} items.", count=len(created)) + SensitiveMedia.bulk_perform_action(True, mod_objects) + + case DecisionAction.DEINDEXED_COPYRIGHT | DecisionAction.DEINDEXED_SENSITIVE: + created = DeletedMedia.objects.bulk_create( + [DeletedMedia(media_obj_id=identifier) for identifier in identifiers] + ) + logger.debug(f"Created deleted-{media_type} items.", count=len(created)) + DeletedMedia.bulk_perform_action(mod_objects) + + case DecisionAction.REVERSED_MARK_SENSITIVE: + media_items = Media.objects.filter(identifier__in=identifiers) + SensitiveMedia.bulk_perform_action(False, media_items) + logger.debug( + f"Unmarked {media_type} items as sensitive.", + identifier_count=len(identifiers), + media_item_count=len(media_items), + ) + + count, _ = mod_objects.delete() + logger.debug(f"Deleted sensitive-{media_type} items.", count=count) + + case DecisionAction.REVERSED_DEINDEX: + # There is no bulk action for reversed-deindex. The media + # item will eventually be reindexed through data refresh. + count, _ = mod_objects.delete() + logger.debug(f"Deleted deleted-{media_type} items.", count=count) + + media_decision = MediaDecision.objects.create( + action=action, + moderator=request.user, + notes=request.POST.get("notes"), + ) + logger.debug( + f"{media_type}-decision_created", + decision=media_decision.id, + action=media_decision.action, + notes=media_decision.notes, + moderator=media_decision.moderator.get_username(), + ) + + created = MediaDecisionThrough.objects.bulk_create( + [ + MediaDecisionThrough(decision_id=media_decision.id, media_obj_id=identifier) + for identifier in identifiers + ] + ) + logger.debug(f"{media_type}-decision-through_bulk_created", count=len(created)) + + return media_decision diff --git a/api/test/factory/models/__init__.py b/api/test/factory/models/__init__.py index 7560ee6cafb..33f2d42785c 100644 --- a/api/test/factory/models/__init__.py +++ b/api/test/factory/models/__init__.py @@ -2,9 +2,11 @@ AudioAddOnFactory, AudioFactory, AudioReportFactory, + DeletedAudioFactory, SensitiveAudioFactory, ) from test.factory.models.image import ( + DeletedImageFactory, ImageFactory, ImageReportFactory, SensitiveImageFactory, diff --git a/api/test/factory/models/audio.py b/api/test/factory/models/audio.py index b5a096082af..045d5871d05 100644 --- a/api/test/factory/models/audio.py +++ b/api/test/factory/models/audio.py @@ -1,7 +1,13 @@ import factory from factory.django import DjangoModelFactory -from api.models.audio import Audio, AudioAddOn, AudioReport, SensitiveAudio +from api.models.audio import ( + Audio, + AudioAddOn, + AudioReport, + DeletedAudio, + SensitiveAudio, +) from test.factory.faker import Faker from test.factory.models.media import ( IdentifierFactory, @@ -17,6 +23,13 @@ class Meta: media_obj = factory.SubFactory("test.factory.models.audio.AudioFactory") +class DeletedAudioFactory(DjangoModelFactory): + class Meta: + model = DeletedAudio + + media_obj = factory.SubFactory("test.factory.models.audio.AudioFactory") + + class AudioFactory(MediaFactory): _sensitive_factory = SensitiveAudioFactory diff --git a/api/test/factory/models/image.py b/api/test/factory/models/image.py index 856c42d686d..63c59d7c210 100644 --- a/api/test/factory/models/image.py +++ b/api/test/factory/models/image.py @@ -1,7 +1,7 @@ import factory from factory.django import DjangoModelFactory -from api.models.image import Image, ImageReport, SensitiveImage +from api.models.image import DeletedImage, Image, ImageReport, SensitiveImage from test.factory.models.media import MediaFactory, MediaReportFactory @@ -12,6 +12,13 @@ class Meta: media_obj = factory.SubFactory("test.factory.models.image.ImageFactory") +class DeletedImageFactory(DjangoModelFactory): + class Meta: + model = DeletedImage + + media_obj = factory.SubFactory("test.factory.models.image.ImageFactory") + + class ImageFactory(MediaFactory): _sensitive_factory = SensitiveImageFactory diff --git a/api/test/factory/models/oauth2.py b/api/test/factory/models/oauth2.py index 28f5eac9e39..1264eab1a45 100644 --- a/api/test/factory/models/oauth2.py +++ b/api/test/factory/models/oauth2.py @@ -1,4 +1,6 @@ from django.contrib.auth import get_user_model +from django.contrib.auth.models import AbstractUser, Permission +from django.db.models import Q from django.utils import timezone import factory @@ -71,5 +73,22 @@ class Meta: class UserFactory(DjangoModelFactory): + username = Faker("word") + class Meta: model = get_user_model() + + @classmethod + def create(cls, *args, **kwargs) -> AbstractUser: + is_moderator = kwargs.pop("is_moderator", False) + + user = super().create(*args, **kwargs) + + if is_moderator: + user.user_permissions.set( + Permission.objects.filter( + Q(name__contains="Can add") & Q(name__endswith="decision") + ) + ) + + return user diff --git a/api/test/fixtures/media_type_config.py b/api/test/fixtures/media_type_config.py index 87ed34a4832..f5f26ded8eb 100644 --- a/api/test/fixtures/media_type_config.py +++ b/api/test/fixtures/media_type_config.py @@ -46,6 +46,7 @@ class MediaTypeConfig: model_factory: MediaFactory model_class: AbstractMedia sensitive_factory: MediaFactory + deleted_factory: MediaFactory sensitive_class: AbstractSensitiveMedia search_request_serializer: MediaSearchRequestSerializer model_serializer: MediaSerializer @@ -78,6 +79,7 @@ def indexes(self): model_factory=model_factories.ImageFactory, model_class=Image, sensitive_factory=model_factories.SensitiveImageFactory, + deleted_factory=model_factories.DeletedImageFactory, search_request_serializer=ImageSearchRequestSerializer, model_serializer=ImageSerializer, report_serializer=ImageReportRequestSerializer, @@ -97,6 +99,7 @@ def indexes(self): model_factory=model_factories.AudioFactory, model_class=Audio, sensitive_factory=model_factories.SensitiveAudioFactory, + deleted_factory=model_factories.DeletedAudioFactory, search_request_serializer=AudioSearchRequestSerializer, model_serializer=AudioSerializer, report_serializer=AudioReportRequestSerializer, diff --git a/api/test/unit/models/test_media.py b/api/test/unit/models/test_media.py index 6d7d3b00e67..000f3519015 100644 --- a/api/test/unit/models/test_media.py +++ b/api/test/unit/models/test_media.py @@ -1,4 +1,7 @@ +from django.conf import settings + import pytest +from elasticsearch_dsl import Document from api.models import Audio, Image @@ -54,3 +57,38 @@ def test_license_url_is_generated_if_missing(media_type, media_model): license_version="3.0", ) assert obj.license_url is not None + + +@pytest.mark.django_db +def test_deleted_media_bulk_action(media_type_config): + """ + Test that ``AbstractDeletedMedia`` performs bulk moderation by + deleting media items from ES. + """ + + media_ids = [media_type_config.model_factory.create().id for _ in range(2)] + + media_type_config.deleted_class._bulk_update_es(media_ids) + + for index in media_type_config.indexes: + for media_id in media_ids: + exists = Document.exists(id=media_id, index=index, using=settings.ES) + assert not exists + + +@pytest.mark.django_db +def test_sensitive_media_bulk_action(media_type_config): + """ + Test that ``AbstractSensitiveMedia`` performs bulk moderation by + setting ``mature`` field in ES. + """ + + media_ids = [media_type_config.model_factory.create().id for _ in range(2)] + + for mature in [True, False]: + media_type_config.sensitive_class._bulk_update_es(mature, media_ids) + + for index in media_type_config.indexes: + for media_id in media_ids: + doc = Document.get(id=media_id, index=index, using=settings.ES) + assert doc.mature == mature diff --git a/api/test/unit/utils/test_moderation.py b/api/test/unit/utils/test_moderation.py new file mode 100644 index 00000000000..76a0454c384 --- /dev/null +++ b/api/test/unit/utils/test_moderation.py @@ -0,0 +1,222 @@ +from unittest.mock import patch + +from django.test import RequestFactory + +import pytest + +from api.constants.moderation import DecisionAction +from api.models.media import AbstractDeletedMedia, AbstractSensitiveMedia +from api.utils.moderation import perform_moderation +from test.factory.models.oauth2 import UserFactory + + +pytestmark = pytest.mark.django_db + + +@pytest.fixture +def mod_request(): + notes = "bulk moderation" + + request = RequestFactory().post("/", {"notes": notes}) + + moderator = UserFactory.create(is_moderator=True) + request.user = moderator + + return moderator, request, notes + + +@pytest.mark.parametrize( + "prefix, action", + [ + ("sensitive", DecisionAction.MARKED_SENSITIVE), + ("deleted", DecisionAction.DEINDEXED_SENSITIVE), + ("deleted", DecisionAction.DEINDEXED_COPYRIGHT), + ], +) +def test_perform_moderation_creates_decision_and_through_models_for_forward_actions( + prefix, + action, + media_type_config, + mod_request, +): + media_count = 2 + uuids = [ + media_type_config.model_factory.create().identifier for _ in range(media_count) + ] + # ``QuerySet`` of ``AbstractMedia`` subclass + qs = media_type_config.model_class.objects.filter(identifier__in=uuids) + + moderator, request, notes = mod_request + dec = perform_moderation(request, media_type_config.media_type, qs, action) + + # Verify that the sensitive/deleted models were created. + assert ( + getattr(media_type_config, f"{prefix}_class") + .objects.filter(media_obj_id__in=uuids) + .count() + == media_count + ) + + # Verify decision attributes. + assert dec.moderator == moderator + assert dec.notes == notes + assert dec.action == action + + # Verify that number of through models equals number of media items. + assert ( + getattr(dec, f"{media_type_config.media_type}decisionthrough_set").count() + == media_count + ) + + +@patch.object(AbstractSensitiveMedia, "_bulk_update_es") +def test_perform_moderation_handles_marked_sensitive( + mock, + media_type_config, + mod_request, +): + media_count = 2 + uuids = [ + media_type_config.model_factory.create().identifier for _ in range(media_count) + ] + # ``QuerySet`` of ``AbstractMedia`` subclass + qs = media_type_config.model_class.objects.filter(identifier__in=uuids) + + request = mod_request[1] + action = DecisionAction.MARKED_SENSITIVE + perform_moderation(request, media_type_config.media_type, qs, action) + + # Verify that media items are now marked sensitive. + for uuid in uuids: + assert media_type_config.model_class.objects.get(identifier=uuid).sensitive + # Verify that documents have been updated in ES. + assert mock.called + + +@patch.object(AbstractDeletedMedia, "_bulk_update_es") +@pytest.mark.parametrize( + "action", + [DecisionAction.DEINDEXED_SENSITIVE, DecisionAction.DEINDEXED_COPYRIGHT], +) +def test_perform_moderation_handles_deindexed( + mock, + action, + media_type_config, + mod_request, +): + media_count = 2 + uuids = [ + media_type_config.model_factory.create().identifier for _ in range(media_count) + ] + # ``QuerySet`` of ``AbstractMedia`` subclass + qs = media_type_config.model_class.objects.filter(identifier__in=uuids) + + request = mod_request[1] + perform_moderation(request, media_type_config.media_type, qs, action) + + # Verify that media items are deleted. + assert not media_type_config.model_class.objects.filter( + identifier__in=uuids + ).exists() + # Verify that documents have been deleted in ES. + assert mock.called + + +@patch("django.conf.settings.ES") +@pytest.mark.parametrize( + "prefix, action", + [ + ("sensitive", DecisionAction.REVERSED_MARK_SENSITIVE), + ("deleted", DecisionAction.REVERSED_DEINDEX), + ], +) +def test_perform_moderation_creates_decision_and_through_models_for_reverse_actions( + mock_es, + prefix, + action, + media_type_config, + mod_request, +): + media_count = 2 + uuids = [ + getattr(media_type_config, f"{prefix}_factory").create().media_obj_id + for _ in range(media_count) + ] + # ``QuerySet`` of ``AbstractSensitiveMedia``/``AbstractDeletedMedia`` subclass + qs = getattr(media_type_config, f"{prefix}_class").objects.filter( + media_obj_id__in=uuids + ) + + moderator, request, notes = mod_request + dec = perform_moderation(request, media_type_config.media_type, qs, action) + + # Verify that the sensitive/deleted models were deleted. + assert ( + not getattr(media_type_config, f"{prefix}_class") + .objects.filter(media_obj_id__in=uuids) + .exists() + ) + + # Verify decision attributes. + assert dec.moderator == moderator + assert dec.notes == notes + assert dec.action == action + + # Verify that number of through models equals number of media items. + assert ( + getattr(dec, f"{media_type_config.media_type}decisionthrough_set").count() + == media_count + ) + + +@patch.object(AbstractSensitiveMedia, "_bulk_update_es") +def test_perform_moderation_handles_reversed_mark_sensitive( + mock, + media_type_config, + mod_request, +): + media_count = 2 + uuids = [ + media_type_config.sensitive_factory.create().media_obj_id + for _ in range(media_count) + ] + # ``QuerySet`` of ``AbstractSensitiveMedia`` subclass + qs = media_type_config.sensitive_class.objects.filter(media_obj_id__in=uuids) + + request = mod_request[1] + action = DecisionAction.REVERSED_MARK_SENSITIVE + perform_moderation(request, media_type_config.media_type, qs, action) + + # Verify that media items are not marked sensitive. + for uuid in uuids: + assert not media_type_config.model_class.objects.get(identifier=uuid).sensitive + # Verify that documents have been updated in ES. + assert mock.called + + +@patch("django.conf.settings.ES") +@patch.object(AbstractDeletedMedia, "_bulk_update_es") +def test_perform_moderation_handles_reversed_deindex( + mock_es, + mock, + media_type_config, + mod_request, +): + media_count = 2 + uuids = [ + media_type_config.deleted_factory.create().media_obj_id + for _ in range(media_count) + ] + # ``QuerySet`` of ``AbstractDeletedMedia`` subclass + qs = media_type_config.deleted_class.objects.filter(media_obj_id__in=uuids) + + request = mod_request[1] + action = DecisionAction.REVERSED_DEINDEX + perform_moderation(request, media_type_config.media_type, qs, action) + + # Verify that media items stay deleted and are not restored. + assert not media_type_config.model_class.objects.filter( + identifier__in=uuids + ).exists() + # Verify that no ES call was made. + assert not mock.called