diff --git a/care/facility/models/file_upload.py b/care/facility/models/file_upload.py index d8873e3d28..24ed0703d5 100644 --- a/care/facility/models/file_upload.py +++ b/care/facility/models/file_upload.py @@ -1,4 +1,5 @@ import enum +import math import time from uuid import uuid4 @@ -125,3 +126,20 @@ def get_object(self, bucket=settings.FILE_UPLOAD_BUCKET, **kwargs): Key=f"{self.FileType(self.file_type).name}/{self.internal_name}", **kwargs, ) + + @staticmethod + def bulk_delete_objects(s3_keys): + s3 = boto3.client("s3", **cs_provider.get_client_config()) + bucket = settings.FILE_UPLOAD_BUCKET + max_keys_per_batch = 1000 + + num_of_batches = math.ceil(len(s3_keys) / max_keys_per_batch) + for batch_index in range(num_of_batches): + start_index = batch_index * max_keys_per_batch + end_index = min(start_index + max_keys_per_batch, len(s3_keys)) + batch_keys = s3_keys[start_index:end_index] + + s3.delete_objects( + Bucket=bucket, + Delete={"Objects": [{"Key": key} for key in batch_keys], "Quiet": True}, + ) diff --git a/care/facility/tasks/__init__.py b/care/facility/tasks/__init__.py index 7ebf63cdaa..fddc5ee78f 100644 --- a/care/facility/tasks/__init__.py +++ b/care/facility/tasks/__init__.py @@ -1,7 +1,10 @@ from celery import current_app from celery.schedules import crontab -from care.facility.tasks.cleanup import delete_old_notifications +from care.facility.tasks.cleanup import ( + delete_incomplete_file_uploads, + delete_old_notifications, +) from care.facility.tasks.summarisation import ( summarise_district_patient, summarise_facility_capacity, @@ -18,6 +21,11 @@ def setup_periodic_tasks(sender, **kwargs): delete_old_notifications.s(), name="delete_old_notifications", ) + sender.add_periodic_task( + crontab(hour="0", minute="0"), + delete_incomplete_file_uploads.s(), + name="delete_incomplete_file_uploads", + ) sender.add_periodic_task( crontab(hour="*/4", minute=59), summarise_triage.s(), diff --git a/care/facility/tasks/cleanup.py b/care/facility/tasks/cleanup.py index 3f913142cc..3f3e8e752f 100644 --- a/care/facility/tasks/cleanup.py +++ b/care/facility/tasks/cleanup.py @@ -3,6 +3,7 @@ from celery import shared_task from django.utils import timezone +from care.facility.models.file_upload import FileUpload from care.facility.models.notification import Notification @@ -10,3 +11,19 @@ def delete_old_notifications(): ninety_days_ago = timezone.now() - timedelta(days=90) Notification.objects.filter(created_date__lte=ninety_days_ago).delete() + + +@shared_task +def delete_incomplete_file_uploads(): + yesterday = timezone.now() - timedelta(days=1) + incomplete_uploads = FileUpload.objects.filter( + created_date__lte=yesterday, upload_completed=False + ) + + s3_keys = [ + f"{upload.FileType(upload.file_type).name}/{upload.internal_name}" + for upload in incomplete_uploads + ] + + FileUpload.bulk_delete_objects(s3_keys) + incomplete_uploads.update(deleted=True) diff --git a/care/facility/tests/test_FileUpload_model.py b/care/facility/tests/test_FileUpload_model.py new file mode 100644 index 0000000000..a14a431eb3 --- /dev/null +++ b/care/facility/tests/test_FileUpload_model.py @@ -0,0 +1,41 @@ +import math +from unittest import TestCase +from unittest.mock import patch + +from care.facility.models.file_upload import FileUpload +from care.utils.csp import config as cs_provider + + +class FileUploadModelTest(TestCase): + @patch("boto3.client") + def test_bulk_delete_objects(self, mock_s3_client): + s3_keys = ["key1", "key2", "key3"] + + with patch( + "care.facility.models.file_upload.settings.FILE_UPLOAD_BUCKET", + "test_bucket", + ): + FileUpload.bulk_delete_objects(s3_keys) + + mock_s3_client.assert_called_once_with("s3", **cs_provider.get_client_config()) + mock_s3_client.return_value.delete_objects.assert_called_once_with( + Bucket="test_bucket", + Delete={"Objects": [{"Key": key} for key in s3_keys], "Quiet": True}, + ) + + def test_batch_iteration(self): + s3_keys = ["key1", "key2", "key3", "key4", "key5"] + max_keys_per_batch = 2 + + expected_num_of_batches = math.ceil(len(s3_keys) / max_keys_per_batch) + expected_batch_keys = [["key1", "key2"], ["key3", "key4"], ["key5"]] + + batches = [] + for batch_index in range(expected_num_of_batches): + start_index = batch_index * max_keys_per_batch + end_index = min(start_index + max_keys_per_batch, len(s3_keys)) + batch_keys = s3_keys[start_index:end_index] + batches.append(batch_keys) + + self.assertEqual(len(batches), expected_num_of_batches) + self.assertEqual(batches, expected_batch_keys) diff --git a/care/facility/tests/test_delete_incomplete_file_uploads_task.py b/care/facility/tests/test_delete_incomplete_file_uploads_task.py new file mode 100644 index 0000000000..f73b760b2b --- /dev/null +++ b/care/facility/tests/test_delete_incomplete_file_uploads_task.py @@ -0,0 +1,50 @@ +from unittest.mock import patch + +from django.test import TestCase +from django.utils import timezone +from freezegun import freeze_time + +from care.facility.models.file_upload import FileUpload +from care.facility.tasks.cleanup import delete_incomplete_file_uploads + + +class DeleteIncompleteFileUploadsTest(TestCase): + def test_delete_incomplete_file_uploads_with_mock_s3(self): + yesterday = timezone.now() - timezone.timedelta(days=1) + + # Create dummy FileUpload objects + with freeze_time(yesterday): + file_upload1 = FileUpload.objects.create( + file_type=FileUpload.FileType.PATIENT.value, + internal_name="file1.pdf", + upload_completed=False, + ) + file_upload2 = FileUpload.objects.create( + file_type=FileUpload.FileType.PATIENT.value, + internal_name="file2.jpg", + upload_completed=False, + ) + file_upload3 = FileUpload.objects.create( + file_type=FileUpload.FileType.PATIENT.value, + internal_name="file3.csv", + upload_completed=True, + ) + + # Patch the bulk_delete_objects method + with patch( + "care.facility.models.file_upload.FileUpload.bulk_delete_objects" + ) as mock_bulk_delete_objects: + # Call the Celery task + delete_incomplete_file_uploads() + + # Assert + self.assertFalse(FileUpload.objects.filter(pk=file_upload1.pk).exists()) + self.assertFalse(FileUpload.objects.filter(pk=file_upload2.pk).exists()) + self.assertTrue(FileUpload.objects.filter(pk=file_upload3.pk).exists()) + + mock_bulk_delete_objects.assert_called_once_with( + [ + f"{file_upload1.FileType(file_upload1.file_type).name}/{file_upload1.internal_name}", + f"{file_upload2.FileType(file_upload2.file_type).name}/{file_upload2.internal_name}", + ] + )