From 8946d5fda7f63f94803c4903cf5c878290408660 Mon Sep 17 00:00:00 2001 From: yaswanthsaivendra Date: Tue, 23 May 2023 16:21:36 +0530 Subject: [PATCH 01/15] used prefetch_related method to optimize the query performed to fetch a many to many relationship --- care/users/api/serializers/user.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/care/users/api/serializers/user.py b/care/users/api/serializers/user.py index e3b0edc0db..ad594f1ceb 100644 --- a/care/users/api/serializers/user.py +++ b/care/users/api/serializers/user.py @@ -358,11 +358,6 @@ class UserAssignedSerializer(serializers.ModelSerializer): home_facility_object = FacilityBareMinimumSerializer( source="home_facility", read_only=True ) - skills = serializers.SerializerMethodField() - - def get_skills(self, obj): - qs = obj.skills.filter(userskill__deleted=False) - return SkillSerializer(qs, many=True).data class Meta: model = User @@ -382,6 +377,14 @@ class Meta: "skills", ) + def to_representation(self, instance): + representation = super().to_representation(instance) + instance = User.objects.prefetch_related("skills").get(pk=instance.pk) + skills = instance.skills.filter(userskill__deleted=False) + skills_data = SkillSerializer(skills, many=True).data + representation["skills"] = skills_data + return representation + class UserListSerializer(serializers.ModelSerializer): local_body_object = LocalBodySerializer(source="local_body", read_only=True) From 7d410fdb541859ccee1e8e51bdbd1d50d831207f Mon Sep 17 00:00:00 2001 From: yaswanthsaivendra Date: Sat, 27 May 2023 00:12:51 +0530 Subject: [PATCH 02/15] Revert "add direnv support (#1304)" This reverts commit f6136618821e17278bd46824a0bcd9e3faad1960. --- .envrc | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 .envrc diff --git a/.envrc b/.envrc deleted file mode 100644 index f5f5cd5cd5..0000000000 --- a/.envrc +++ /dev/null @@ -1,2 +0,0 @@ -source .venv/bin/activate; unset PS1 -dotenv From 7179fdf297aac1c4a25d0b756e2d5881a1247d4d Mon Sep 17 00:00:00 2001 From: yaswanthsaivendra Date: Sat, 27 May 2023 00:17:28 +0530 Subject: [PATCH 03/15] Revert "Revert "add direnv support (#1304)"" This reverts commit 7d410fdb541859ccee1e8e51bdbd1d50d831207f. --- .envrc | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .envrc diff --git a/.envrc b/.envrc new file mode 100644 index 0000000000..f5f5cd5cd5 --- /dev/null +++ b/.envrc @@ -0,0 +1,2 @@ +source .venv/bin/activate; unset PS1 +dotenv From 9cb5f7c0cf3145ddeb1d8de3c1436dac3a34563b Mon Sep 17 00:00:00 2001 From: yaswanthsaivendra Date: Sat, 27 May 2023 00:18:01 +0530 Subject: [PATCH 04/15] Revert "used prefetch_related method to optimize the query performed to fetch a many to many relationship" This reverts commit 8946d5fda7f63f94803c4903cf5c878290408660. --- care/users/api/serializers/user.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/care/users/api/serializers/user.py b/care/users/api/serializers/user.py index ad594f1ceb..e3b0edc0db 100644 --- a/care/users/api/serializers/user.py +++ b/care/users/api/serializers/user.py @@ -358,6 +358,11 @@ class UserAssignedSerializer(serializers.ModelSerializer): home_facility_object = FacilityBareMinimumSerializer( source="home_facility", read_only=True ) + skills = serializers.SerializerMethodField() + + def get_skills(self, obj): + qs = obj.skills.filter(userskill__deleted=False) + return SkillSerializer(qs, many=True).data class Meta: model = User @@ -377,14 +382,6 @@ class Meta: "skills", ) - def to_representation(self, instance): - representation = super().to_representation(instance) - instance = User.objects.prefetch_related("skills").get(pk=instance.pk) - skills = instance.skills.filter(userskill__deleted=False) - skills_data = SkillSerializer(skills, many=True).data - representation["skills"] = skills_data - return representation - class UserListSerializer(serializers.ModelSerializer): local_body_object = LocalBodySerializer(source="local_body", read_only=True) From 7000a38bafd16e789ccf6ffc5f9a1f41d0c34429 Mon Sep 17 00:00:00 2001 From: yaswanthsaivendra Date: Sat, 27 May 2023 02:17:43 +0530 Subject: [PATCH 05/15] used prefetch where we are querying the skills using UserAssigned Serializer --- care/facility/api/viewsets/facility_users.py | 20 ++++++++++++++------ care/users/api/serializers/user.py | 6 +----- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/care/facility/api/viewsets/facility_users.py b/care/facility/api/viewsets/facility_users.py index fc8c4ecb86..2fc1e751a5 100644 --- a/care/facility/api/viewsets/facility_users.py +++ b/care/facility/api/viewsets/facility_users.py @@ -1,3 +1,5 @@ +from django.core.exceptions import ObjectDoesNotExist +from django.db.models import Prefetch from django_filters import rest_framework as filters from rest_framework import mixins from rest_framework.exceptions import ValidationError @@ -6,12 +8,14 @@ from care.facility.models.facility import Facility from care.users.api.serializers.user import UserAssignedSerializer -from care.users.models import User +from care.users.models import Skill, User class UserFilter(filters.FilterSet): - user_type = filters.TypedChoiceFilter(choices=[(key, key) for key in User.TYPE_VALUE_MAP.keys()], - coerce=lambda role: User.TYPE_VALUE_MAP[role]) + user_type = filters.TypedChoiceFilter( + choices=[(key, key) for key in User.TYPE_VALUE_MAP.keys()], + coerce=lambda role: User.TYPE_VALUE_MAP[role], + ) class Meta: model = User @@ -21,13 +25,17 @@ class Meta: class FacilityUserViewSet(GenericViewSet, mixins.ListModelMixin): serializer_class = UserAssignedSerializer filterset_class = UserFilter - queryset = User.objects.all() + queryset = User.objects.all().prefetch_related( + Prefetch("skills", queryset=Skill.objects.filter(userskill__deleted=False)) + ) permission_classes = [IsAuthenticated] filter_backends = [filters.DjangoFilterBackend] def get_queryset(self): try: - facility = Facility.objects.get(external_id=self.kwargs.get("facility_external_id")) + facility = Facility.objects.get( + external_id=self.kwargs.get("facility_external_id") + ) return facility.users.filter(deleted=False).order_by("-last_login") - except: + except ObjectDoesNotExist: raise ValidationError({"Facility": "Facility not found"}) diff --git a/care/users/api/serializers/user.py b/care/users/api/serializers/user.py index e3b0edc0db..f3d418af3d 100644 --- a/care/users/api/serializers/user.py +++ b/care/users/api/serializers/user.py @@ -358,11 +358,7 @@ class UserAssignedSerializer(serializers.ModelSerializer): home_facility_object = FacilityBareMinimumSerializer( source="home_facility", read_only=True ) - skills = serializers.SerializerMethodField() - - def get_skills(self, obj): - qs = obj.skills.filter(userskill__deleted=False) - return SkillSerializer(qs, many=True).data + skills = SkillSerializer(many=True, read_only=True) class Meta: model = User From a75b98ddfa2a472f59a4a4d7091016489ce0570f Mon Sep 17 00:00:00 2001 From: yaswanthsaivendra Date: Sat, 27 May 2023 16:27:29 +0530 Subject: [PATCH 06/15] Revert "used prefetch where we are querying the skills using UserAssigned Serializer" This reverts commit 7000a38bafd16e789ccf6ffc5f9a1f41d0c34429. --- care/facility/api/viewsets/facility_users.py | 20 ++++++-------------- care/users/api/serializers/user.py | 6 +++++- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/care/facility/api/viewsets/facility_users.py b/care/facility/api/viewsets/facility_users.py index 2fc1e751a5..fc8c4ecb86 100644 --- a/care/facility/api/viewsets/facility_users.py +++ b/care/facility/api/viewsets/facility_users.py @@ -1,5 +1,3 @@ -from django.core.exceptions import ObjectDoesNotExist -from django.db.models import Prefetch from django_filters import rest_framework as filters from rest_framework import mixins from rest_framework.exceptions import ValidationError @@ -8,14 +6,12 @@ from care.facility.models.facility import Facility from care.users.api.serializers.user import UserAssignedSerializer -from care.users.models import Skill, User +from care.users.models import User class UserFilter(filters.FilterSet): - user_type = filters.TypedChoiceFilter( - choices=[(key, key) for key in User.TYPE_VALUE_MAP.keys()], - coerce=lambda role: User.TYPE_VALUE_MAP[role], - ) + user_type = filters.TypedChoiceFilter(choices=[(key, key) for key in User.TYPE_VALUE_MAP.keys()], + coerce=lambda role: User.TYPE_VALUE_MAP[role]) class Meta: model = User @@ -25,17 +21,13 @@ class Meta: class FacilityUserViewSet(GenericViewSet, mixins.ListModelMixin): serializer_class = UserAssignedSerializer filterset_class = UserFilter - queryset = User.objects.all().prefetch_related( - Prefetch("skills", queryset=Skill.objects.filter(userskill__deleted=False)) - ) + queryset = User.objects.all() permission_classes = [IsAuthenticated] filter_backends = [filters.DjangoFilterBackend] def get_queryset(self): try: - facility = Facility.objects.get( - external_id=self.kwargs.get("facility_external_id") - ) + facility = Facility.objects.get(external_id=self.kwargs.get("facility_external_id")) return facility.users.filter(deleted=False).order_by("-last_login") - except ObjectDoesNotExist: + except: raise ValidationError({"Facility": "Facility not found"}) diff --git a/care/users/api/serializers/user.py b/care/users/api/serializers/user.py index f3d418af3d..e3b0edc0db 100644 --- a/care/users/api/serializers/user.py +++ b/care/users/api/serializers/user.py @@ -358,7 +358,11 @@ class UserAssignedSerializer(serializers.ModelSerializer): home_facility_object = FacilityBareMinimumSerializer( source="home_facility", read_only=True ) - skills = SkillSerializer(many=True, read_only=True) + skills = serializers.SerializerMethodField() + + def get_skills(self, obj): + qs = obj.skills.filter(userskill__deleted=False) + return SkillSerializer(qs, many=True).data class Meta: model = User From f59ff6b993d51a4e3748193ba5fd2efa064a206c Mon Sep 17 00:00:00 2001 From: yaswanthsaivendra Date: Sat, 27 May 2023 19:06:28 +0530 Subject: [PATCH 07/15] made neccessary changes for prefetching the skills --- care/facility/api/viewsets/facility_users.py | 17 +++++++++----- .../api/viewsets/patient_consultation.py | 11 +++++++++- care/users/api/serializers/user.py | 22 ++++++++++++++----- 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/care/facility/api/viewsets/facility_users.py b/care/facility/api/viewsets/facility_users.py index fc8c4ecb86..9781219981 100644 --- a/care/facility/api/viewsets/facility_users.py +++ b/care/facility/api/viewsets/facility_users.py @@ -1,3 +1,4 @@ +from django.core.exceptions import ObjectDoesNotExist from django_filters import rest_framework as filters from rest_framework import mixins from rest_framework.exceptions import ValidationError @@ -10,8 +11,10 @@ class UserFilter(filters.FilterSet): - user_type = filters.TypedChoiceFilter(choices=[(key, key) for key in User.TYPE_VALUE_MAP.keys()], - coerce=lambda role: User.TYPE_VALUE_MAP[role]) + user_type = filters.TypedChoiceFilter( + choices=[(key, key) for key in User.TYPE_VALUE_MAP.keys()], + coerce=lambda role: User.TYPE_VALUE_MAP[role], + ) class Meta: model = User @@ -27,7 +30,11 @@ class FacilityUserViewSet(GenericViewSet, mixins.ListModelMixin): def get_queryset(self): try: - facility = Facility.objects.get(external_id=self.kwargs.get("facility_external_id")) - return facility.users.filter(deleted=False).order_by("-last_login") - except: + facility = Facility.objects.get( + external_id=self.kwargs.get("facility_external_id") + ) + queryset = facility.users.filter(deleted=False).order_by("-last_login") + queryset = self.get_serializer_class().skills_eager_loading(queryset) + return queryset + except ObjectDoesNotExist: raise ValidationError({"Facility": "Facility not found"}) diff --git a/care/facility/api/viewsets/patient_consultation.py b/care/facility/api/viewsets/patient_consultation.py index 19a7cc70e9..dead0b0f74 100644 --- a/care/facility/api/viewsets/patient_consultation.py +++ b/care/facility/api/viewsets/patient_consultation.py @@ -1,4 +1,5 @@ from django.core.validators import validate_email +from django.db.models import Prefetch from django.db.models.query_utils import Q from django_filters import rest_framework as filters from drf_yasg import openapi @@ -25,7 +26,7 @@ email_discharge_summary, generate_and_upload_discharge_summary_task, ) -from care.users.models import User +from care.users.models import Skill, User from care.utils.cache.cache_allowed_facilities import get_accessible_facilities @@ -68,6 +69,14 @@ def get_permissions(self): return super().get_permissions() def get_queryset(self): + if self.serializer_class == PatientConsultationSerializer: + self.queryset = self.queryset.prefetch_related( + "assigned_to", + Prefetch( + "assigned_to__skills", + queryset=Skill.objects.filter(userskill__deleted=False), + ), + ) if self.request.user.is_superuser: return self.queryset elif self.request.user.user_type >= User.TYPE_VALUE_MAP["StateLabAdmin"]: diff --git a/care/users/api/serializers/user.py b/care/users/api/serializers/user.py index e3b0edc0db..532e4d698c 100644 --- a/care/users/api/serializers/user.py +++ b/care/users/api/serializers/user.py @@ -3,6 +3,7 @@ from django.contrib.auth import get_user_model from django.contrib.auth.hashers import make_password from django.db import transaction +from django.db.models import Prefetch from rest_framework import exceptions, serializers from care.facility.api.serializers.facility import FacilityBareMinimumSerializer @@ -13,7 +14,7 @@ StateSerializer, ) from care.users.api.serializers.skill import SkillSerializer -from care.users.models import GENDER_CHOICES +from care.users.models import GENDER_CHOICES, Skill from care.utils.queryset.facility import get_home_facility_queryset from care.utils.serializer.external_id_field import ExternalIdSerializerField from care.utils.serializer.phonenumber_ispossible_field import ( @@ -358,11 +359,20 @@ class UserAssignedSerializer(serializers.ModelSerializer): home_facility_object = FacilityBareMinimumSerializer( source="home_facility", read_only=True ) - skills = serializers.SerializerMethodField() - - def get_skills(self, obj): - qs = obj.skills.filter(userskill__deleted=False) - return SkillSerializer(qs, many=True).data + # skills = serializers.SerializerMethodField() + skills = SkillSerializer(many=True, read_only=True) + + # def get_skills(self, obj): + # qs = obj.skills.filter(userskill__deleted=False) + # return SkillSerializer(qs, many=True).data + + @staticmethod + def skills_eager_loading(queryset): + """Perform necessary eager loading of data.""" + queryset = queryset.prefetch_related( + Prefetch("skills", queryset=Skill.objects.filter(userskill__deleted=False)) + ) + return queryset class Meta: model = User From e6541ef4f8e470d8fc140a49184e0b3ce96ddbd3 Mon Sep 17 00:00:00 2001 From: yaswanthsaivendra Date: Tue, 30 May 2023 17:15:24 +0530 Subject: [PATCH 08/15] Revert "made neccessary changes for prefetching the skills" This reverts commit f59ff6b993d51a4e3748193ba5fd2efa064a206c. --- care/facility/api/viewsets/facility_users.py | 17 +++++--------- .../api/viewsets/patient_consultation.py | 11 +--------- care/users/api/serializers/user.py | 22 +++++-------------- 3 files changed, 12 insertions(+), 38 deletions(-) diff --git a/care/facility/api/viewsets/facility_users.py b/care/facility/api/viewsets/facility_users.py index 9781219981..fc8c4ecb86 100644 --- a/care/facility/api/viewsets/facility_users.py +++ b/care/facility/api/viewsets/facility_users.py @@ -1,4 +1,3 @@ -from django.core.exceptions import ObjectDoesNotExist from django_filters import rest_framework as filters from rest_framework import mixins from rest_framework.exceptions import ValidationError @@ -11,10 +10,8 @@ class UserFilter(filters.FilterSet): - user_type = filters.TypedChoiceFilter( - choices=[(key, key) for key in User.TYPE_VALUE_MAP.keys()], - coerce=lambda role: User.TYPE_VALUE_MAP[role], - ) + user_type = filters.TypedChoiceFilter(choices=[(key, key) for key in User.TYPE_VALUE_MAP.keys()], + coerce=lambda role: User.TYPE_VALUE_MAP[role]) class Meta: model = User @@ -30,11 +27,7 @@ class FacilityUserViewSet(GenericViewSet, mixins.ListModelMixin): def get_queryset(self): try: - facility = Facility.objects.get( - external_id=self.kwargs.get("facility_external_id") - ) - queryset = facility.users.filter(deleted=False).order_by("-last_login") - queryset = self.get_serializer_class().skills_eager_loading(queryset) - return queryset - except ObjectDoesNotExist: + facility = Facility.objects.get(external_id=self.kwargs.get("facility_external_id")) + return facility.users.filter(deleted=False).order_by("-last_login") + except: raise ValidationError({"Facility": "Facility not found"}) diff --git a/care/facility/api/viewsets/patient_consultation.py b/care/facility/api/viewsets/patient_consultation.py index dead0b0f74..19a7cc70e9 100644 --- a/care/facility/api/viewsets/patient_consultation.py +++ b/care/facility/api/viewsets/patient_consultation.py @@ -1,5 +1,4 @@ from django.core.validators import validate_email -from django.db.models import Prefetch from django.db.models.query_utils import Q from django_filters import rest_framework as filters from drf_yasg import openapi @@ -26,7 +25,7 @@ email_discharge_summary, generate_and_upload_discharge_summary_task, ) -from care.users.models import Skill, User +from care.users.models import User from care.utils.cache.cache_allowed_facilities import get_accessible_facilities @@ -69,14 +68,6 @@ def get_permissions(self): return super().get_permissions() def get_queryset(self): - if self.serializer_class == PatientConsultationSerializer: - self.queryset = self.queryset.prefetch_related( - "assigned_to", - Prefetch( - "assigned_to__skills", - queryset=Skill.objects.filter(userskill__deleted=False), - ), - ) if self.request.user.is_superuser: return self.queryset elif self.request.user.user_type >= User.TYPE_VALUE_MAP["StateLabAdmin"]: diff --git a/care/users/api/serializers/user.py b/care/users/api/serializers/user.py index 532e4d698c..e3b0edc0db 100644 --- a/care/users/api/serializers/user.py +++ b/care/users/api/serializers/user.py @@ -3,7 +3,6 @@ from django.contrib.auth import get_user_model from django.contrib.auth.hashers import make_password from django.db import transaction -from django.db.models import Prefetch from rest_framework import exceptions, serializers from care.facility.api.serializers.facility import FacilityBareMinimumSerializer @@ -14,7 +13,7 @@ StateSerializer, ) from care.users.api.serializers.skill import SkillSerializer -from care.users.models import GENDER_CHOICES, Skill +from care.users.models import GENDER_CHOICES from care.utils.queryset.facility import get_home_facility_queryset from care.utils.serializer.external_id_field import ExternalIdSerializerField from care.utils.serializer.phonenumber_ispossible_field import ( @@ -359,20 +358,11 @@ class UserAssignedSerializer(serializers.ModelSerializer): home_facility_object = FacilityBareMinimumSerializer( source="home_facility", read_only=True ) - # skills = serializers.SerializerMethodField() - skills = SkillSerializer(many=True, read_only=True) - - # def get_skills(self, obj): - # qs = obj.skills.filter(userskill__deleted=False) - # return SkillSerializer(qs, many=True).data - - @staticmethod - def skills_eager_loading(queryset): - """Perform necessary eager loading of data.""" - queryset = queryset.prefetch_related( - Prefetch("skills", queryset=Skill.objects.filter(userskill__deleted=False)) - ) - return queryset + skills = serializers.SerializerMethodField() + + def get_skills(self, obj): + qs = obj.skills.filter(userskill__deleted=False) + return SkillSerializer(qs, many=True).data class Meta: model = User From 81dfa52764b23a5ed4b7ba5b3a6826ea9cc89373 Mon Sep 17 00:00:00 2001 From: yaswanthsaivendra Date: Thu, 1 Jun 2023 22:47:28 +0530 Subject: [PATCH 09/15] delete incomplete file uploads --- care/facility/models/file_upload.py | 8 ++++++++ .../delete_incomplete_file_uploads.py | 20 +++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 care/facility/tasks/fileupload/delete_incomplete_file_uploads.py diff --git a/care/facility/models/file_upload.py b/care/facility/models/file_upload.py index 4255f6b409..50ebcd6748 100644 --- a/care/facility/models/file_upload.py +++ b/care/facility/models/file_upload.py @@ -124,3 +124,11 @@ def get_object(self, bucket=settings.FILE_UPLOAD_BUCKET, **kwargs): Key=f"{self.FileType(self.file_type).name}/{self.internal_name}", **kwargs, ) + + def delete_object(self, **kwargs): + s3 = boto3.client("s3", **cs_provider.get_client_config()) + s3.delete_object( + Bucket=settings.FILE_UPLOAD_BUCKET, + Key=f"{self.FileType(self.file_type).name}/{self.internal_name}", + **kwargs, + ) diff --git a/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py b/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py new file mode 100644 index 0000000000..3c6fbef6df --- /dev/null +++ b/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py @@ -0,0 +1,20 @@ +from datetime import timedelta + +from celery.decorators import periodic_task +from celery.schedules import crontab +from django.utils import timezone + +from care.facility.models.file_upload import FileUpload + + +@periodic_task( + run_every=crontab(minute="0", hour="0") +) # Run the task daily at midnight +def delete_incomplete_file_uploads(): + yesterday = timezone.now() - timedelta(days=1) + incomplete_uploads = FileUpload.objects.filter( + created__date__lte=yesterday, upload_completed=False + ) + for upload in incomplete_uploads: + upload.delete_object() + upload.delete() From a354df9200c3700a07370343ea3cb628d2763683 Mon Sep 17 00:00:00 2001 From: yaswanthsaivendra Date: Fri, 2 Jun 2023 16:49:24 +0530 Subject: [PATCH 10/15] updated code to do bulk delete --- care/facility/models/file_upload.py | 12 +++++++----- .../fileupload/delete_incomplete_file_uploads.py | 11 ++++++++--- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/care/facility/models/file_upload.py b/care/facility/models/file_upload.py index 50ebcd6748..82851a2aa2 100644 --- a/care/facility/models/file_upload.py +++ b/care/facility/models/file_upload.py @@ -125,10 +125,12 @@ def get_object(self, bucket=settings.FILE_UPLOAD_BUCKET, **kwargs): **kwargs, ) - def delete_object(self, **kwargs): + @staticmethod + def bulk_delete_objects(s3_keys): s3 = boto3.client("s3", **cs_provider.get_client_config()) - s3.delete_object( - Bucket=settings.FILE_UPLOAD_BUCKET, - Key=f"{self.FileType(self.file_type).name}/{self.internal_name}", - **kwargs, + bucket = settings.FILE_UPLOAD_BUCKET + + return s3.delete_objects( + Bucket=bucket, + Delete={"Objects": [{"Key": key} for key in s3_keys], "Quiet": True}, ) diff --git a/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py b/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py index 3c6fbef6df..748fe01669 100644 --- a/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py +++ b/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py @@ -15,6 +15,11 @@ def delete_incomplete_file_uploads(): incomplete_uploads = FileUpload.objects.filter( created__date__lte=yesterday, upload_completed=False ) - for upload in incomplete_uploads: - upload.delete_object() - upload.delete() + + s3_keys = [ + f"{upload.FileType(upload.file_type).name/upload.internal_name}" + for upload in incomplete_uploads + ] + + FileUpload.bulk_delete_objects(s3_keys) + incomplete_uploads.delete() From 897ec15188cff16eb2f59bc4129eaa4c02862292 Mon Sep 17 00:00:00 2001 From: yaswanth sai vendra <75136628+yaswanthsaivendra@users.noreply.github.com> Date: Wed, 7 Jun 2023 19:59:31 +0530 Subject: [PATCH 11/15] changed the hard delete to soft delete --- .../facility/tasks/fileupload/delete_incomplete_file_uploads.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py b/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py index 748fe01669..860be9007c 100644 --- a/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py +++ b/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py @@ -22,4 +22,4 @@ def delete_incomplete_file_uploads(): ] FileUpload.bulk_delete_objects(s3_keys) - incomplete_uploads.delete() + incomplete_uploads.update(deleted=True) From c063d6535b1d23cc05ac2268fdcd415a8250586d Mon Sep 17 00:00:00 2001 From: yaswanthsaivendra Date: Thu, 8 Jun 2023 17:40:58 +0530 Subject: [PATCH 12/15] updated code to handle up upper limit by batching --- care/facility/models/file_upload.py | 10 +++++++++- .../tasks/fileupload/delete_incomplete_file_uploads.py | 4 ++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/care/facility/models/file_upload.py b/care/facility/models/file_upload.py index 82851a2aa2..a60acc3809 100644 --- a/care/facility/models/file_upload.py +++ b/care/facility/models/file_upload.py @@ -1,4 +1,5 @@ import enum +import math import time from uuid import uuid4 @@ -129,8 +130,15 @@ def get_object(self, bucket=settings.FILE_UPLOAD_BUCKET, **kwargs): def bulk_delete_objects(s3_keys): s3 = boto3.client("s3", **cs_provider.get_client_config()) bucket = settings.FILE_UPLOAD_BUCKET + max_keys_per_batch = 1000 + + num_of_batches = math.ceil(len(s3_keys) / max_keys_per_batch) + for batch_index in range(num_of_batches): + start_index = batch_index * max_keys_per_batch + end_index = min(start_index + max_keys_per_batch, len(s3_keys)) + batch_keys = s3_keys[start_index:end_index] return s3.delete_objects( Bucket=bucket, - Delete={"Objects": [{"Key": key} for key in s3_keys], "Quiet": True}, + Delete={"Objects": [{"Key": key} for key in batch_keys], "Quiet": True}, ) diff --git a/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py b/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py index 748fe01669..a540b22064 100644 --- a/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py +++ b/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py @@ -13,7 +13,7 @@ def delete_incomplete_file_uploads(): yesterday = timezone.now() - timedelta(days=1) incomplete_uploads = FileUpload.objects.filter( - created__date__lte=yesterday, upload_completed=False + created_date__lte=yesterday, upload_completed=False ) s3_keys = [ @@ -22,4 +22,4 @@ def delete_incomplete_file_uploads(): ] FileUpload.bulk_delete_objects(s3_keys) - incomplete_uploads.delete() + incomplete_uploads.update(deleted=True) From 2e1ac00f2f7c78c1d3b39115b415cdccfde26df2 Mon Sep 17 00:00:00 2001 From: yaswanthsaivendra Date: Thu, 8 Jun 2023 17:48:25 +0530 Subject: [PATCH 13/15] minor fix --- care/facility/models/file_upload.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/care/facility/models/file_upload.py b/care/facility/models/file_upload.py index a60acc3809..1253fd150a 100644 --- a/care/facility/models/file_upload.py +++ b/care/facility/models/file_upload.py @@ -138,7 +138,7 @@ def bulk_delete_objects(s3_keys): end_index = min(start_index + max_keys_per_batch, len(s3_keys)) batch_keys = s3_keys[start_index:end_index] - return s3.delete_objects( - Bucket=bucket, - Delete={"Objects": [{"Key": key} for key in batch_keys], "Quiet": True}, - ) + s3.delete_objects( + Bucket=bucket, + Delete={"Objects": [{"Key": key} for key in batch_keys], "Quiet": True}, + ) From 2a1c1e11c614dd0977b8821ab1a17c6d18602d49 Mon Sep 17 00:00:00 2001 From: yaswanthsaivendra Date: Thu, 8 Jun 2023 23:20:43 +0530 Subject: [PATCH 14/15] minor fix and added tests --- .../delete_incomplete_file_uploads.py | 2 +- care/facility/tests/test_FileUpload_model.py | 41 +++++++++++++++ ...est_delete_incomplete_file_uploads_task.py | 52 +++++++++++++++++++ 3 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 care/facility/tests/test_FileUpload_model.py create mode 100644 care/facility/tests/test_delete_incomplete_file_uploads_task.py diff --git a/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py b/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py index a540b22064..6077018506 100644 --- a/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py +++ b/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py @@ -17,7 +17,7 @@ def delete_incomplete_file_uploads(): ) s3_keys = [ - f"{upload.FileType(upload.file_type).name/upload.internal_name}" + f"{upload.FileType(upload.file_type).name}/{upload.internal_name}" for upload in incomplete_uploads ] diff --git a/care/facility/tests/test_FileUpload_model.py b/care/facility/tests/test_FileUpload_model.py new file mode 100644 index 0000000000..a14a431eb3 --- /dev/null +++ b/care/facility/tests/test_FileUpload_model.py @@ -0,0 +1,41 @@ +import math +from unittest import TestCase +from unittest.mock import patch + +from care.facility.models.file_upload import FileUpload +from care.utils.csp import config as cs_provider + + +class FileUploadModelTest(TestCase): + @patch("boto3.client") + def test_bulk_delete_objects(self, mock_s3_client): + s3_keys = ["key1", "key2", "key3"] + + with patch( + "care.facility.models.file_upload.settings.FILE_UPLOAD_BUCKET", + "test_bucket", + ): + FileUpload.bulk_delete_objects(s3_keys) + + mock_s3_client.assert_called_once_with("s3", **cs_provider.get_client_config()) + mock_s3_client.return_value.delete_objects.assert_called_once_with( + Bucket="test_bucket", + Delete={"Objects": [{"Key": key} for key in s3_keys], "Quiet": True}, + ) + + def test_batch_iteration(self): + s3_keys = ["key1", "key2", "key3", "key4", "key5"] + max_keys_per_batch = 2 + + expected_num_of_batches = math.ceil(len(s3_keys) / max_keys_per_batch) + expected_batch_keys = [["key1", "key2"], ["key3", "key4"], ["key5"]] + + batches = [] + for batch_index in range(expected_num_of_batches): + start_index = batch_index * max_keys_per_batch + end_index = min(start_index + max_keys_per_batch, len(s3_keys)) + batch_keys = s3_keys[start_index:end_index] + batches.append(batch_keys) + + self.assertEqual(len(batches), expected_num_of_batches) + self.assertEqual(batches, expected_batch_keys) diff --git a/care/facility/tests/test_delete_incomplete_file_uploads_task.py b/care/facility/tests/test_delete_incomplete_file_uploads_task.py new file mode 100644 index 0000000000..9f19520a7b --- /dev/null +++ b/care/facility/tests/test_delete_incomplete_file_uploads_task.py @@ -0,0 +1,52 @@ +from unittest.mock import patch + +from django.test import TestCase +from django.utils import timezone +from freezegun import freeze_time + +from care.facility.models.file_upload import FileUpload +from care.facility.tasks.fileupload.delete_incomplete_file_uploads import ( + delete_incomplete_file_uploads, +) + + +class DeleteIncompleteFileUploadsTest(TestCase): + def test_delete_incomplete_file_uploads_with_mock_s3(self): + yesterday = timezone.now() - timezone.timedelta(days=1) + + # Create dummy FileUpload objects + with freeze_time(yesterday): + file_upload1 = FileUpload.objects.create( + file_type=FileUpload.FileType.PATIENT.value, + internal_name="file1.pdf", + upload_completed=False, + ) + file_upload2 = FileUpload.objects.create( + file_type=FileUpload.FileType.PATIENT.value, + internal_name="file2.jpg", + upload_completed=False, + ) + file_upload3 = FileUpload.objects.create( + file_type=FileUpload.FileType.PATIENT.value, + internal_name="file3.csv", + upload_completed=True, + ) + + # Patch the bulk_delete_objects method + with patch( + "care.facility.models.file_upload.FileUpload.bulk_delete_objects" + ) as mock_bulk_delete_objects: + # Call the Celery task + delete_incomplete_file_uploads() + + # Assert + self.assertFalse(FileUpload.objects.filter(pk=file_upload1.pk).exists()) + self.assertFalse(FileUpload.objects.filter(pk=file_upload2.pk).exists()) + self.assertTrue(FileUpload.objects.filter(pk=file_upload3.pk).exists()) + + mock_bulk_delete_objects.assert_called_once_with( + [ + f"{file_upload1.FileType(file_upload1.file_type).name}/{file_upload1.internal_name}", + f"{file_upload2.FileType(file_upload2.file_type).name}/{file_upload2.internal_name}", + ] + ) From f4c1853dfd1bba3e5496a2fad33bb0169deacf96 Mon Sep 17 00:00:00 2001 From: Aakash Singh Date: Wed, 5 Jul 2023 18:31:16 +0530 Subject: [PATCH 15/15] update task for celery 5.x --- care/facility/tasks/__init__.py | 10 +++++++- care/facility/tasks/cleanup.py | 17 +++++++++++++ .../delete_incomplete_file_uploads.py | 25 ------------------- ...est_delete_incomplete_file_uploads_task.py | 4 +-- 4 files changed, 27 insertions(+), 29 deletions(-) delete mode 100644 care/facility/tasks/fileupload/delete_incomplete_file_uploads.py diff --git a/care/facility/tasks/__init__.py b/care/facility/tasks/__init__.py index 7ebf63cdaa..fddc5ee78f 100644 --- a/care/facility/tasks/__init__.py +++ b/care/facility/tasks/__init__.py @@ -1,7 +1,10 @@ from celery import current_app from celery.schedules import crontab -from care.facility.tasks.cleanup import delete_old_notifications +from care.facility.tasks.cleanup import ( + delete_incomplete_file_uploads, + delete_old_notifications, +) from care.facility.tasks.summarisation import ( summarise_district_patient, summarise_facility_capacity, @@ -18,6 +21,11 @@ def setup_periodic_tasks(sender, **kwargs): delete_old_notifications.s(), name="delete_old_notifications", ) + sender.add_periodic_task( + crontab(hour="0", minute="0"), + delete_incomplete_file_uploads.s(), + name="delete_incomplete_file_uploads", + ) sender.add_periodic_task( crontab(hour="*/4", minute=59), summarise_triage.s(), diff --git a/care/facility/tasks/cleanup.py b/care/facility/tasks/cleanup.py index 3f913142cc..3f3e8e752f 100644 --- a/care/facility/tasks/cleanup.py +++ b/care/facility/tasks/cleanup.py @@ -3,6 +3,7 @@ from celery import shared_task from django.utils import timezone +from care.facility.models.file_upload import FileUpload from care.facility.models.notification import Notification @@ -10,3 +11,19 @@ def delete_old_notifications(): ninety_days_ago = timezone.now() - timedelta(days=90) Notification.objects.filter(created_date__lte=ninety_days_ago).delete() + + +@shared_task +def delete_incomplete_file_uploads(): + yesterday = timezone.now() - timedelta(days=1) + incomplete_uploads = FileUpload.objects.filter( + created_date__lte=yesterday, upload_completed=False + ) + + s3_keys = [ + f"{upload.FileType(upload.file_type).name}/{upload.internal_name}" + for upload in incomplete_uploads + ] + + FileUpload.bulk_delete_objects(s3_keys) + incomplete_uploads.update(deleted=True) diff --git a/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py b/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py deleted file mode 100644 index 6077018506..0000000000 --- a/care/facility/tasks/fileupload/delete_incomplete_file_uploads.py +++ /dev/null @@ -1,25 +0,0 @@ -from datetime import timedelta - -from celery.decorators import periodic_task -from celery.schedules import crontab -from django.utils import timezone - -from care.facility.models.file_upload import FileUpload - - -@periodic_task( - run_every=crontab(minute="0", hour="0") -) # Run the task daily at midnight -def delete_incomplete_file_uploads(): - yesterday = timezone.now() - timedelta(days=1) - incomplete_uploads = FileUpload.objects.filter( - created_date__lte=yesterday, upload_completed=False - ) - - s3_keys = [ - f"{upload.FileType(upload.file_type).name}/{upload.internal_name}" - for upload in incomplete_uploads - ] - - FileUpload.bulk_delete_objects(s3_keys) - incomplete_uploads.update(deleted=True) diff --git a/care/facility/tests/test_delete_incomplete_file_uploads_task.py b/care/facility/tests/test_delete_incomplete_file_uploads_task.py index 9f19520a7b..f73b760b2b 100644 --- a/care/facility/tests/test_delete_incomplete_file_uploads_task.py +++ b/care/facility/tests/test_delete_incomplete_file_uploads_task.py @@ -5,9 +5,7 @@ from freezegun import freeze_time from care.facility.models.file_upload import FileUpload -from care.facility.tasks.fileupload.delete_incomplete_file_uploads import ( - delete_incomplete_file_uploads, -) +from care.facility.tasks.cleanup import delete_incomplete_file_uploads class DeleteIncompleteFileUploadsTest(TestCase):