Skip to content

Commit

Permalink
✨(edx_imports) add management command populate_certificate_signatory
Browse files Browse the repository at this point in the history
For several imported degree certificates signatory is not set. We create a
management command to try to populate this missing information with ease.
  • Loading branch information
jbpenrath committed Jul 24, 2024
1 parent df1272e commit ff42caf
Show file tree
Hide file tree
Showing 7 changed files with 616 additions and 30 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ and this project adheres to

## [Unreleased]

### Added

- Add management command to fix imported certificates without signatory

### Fixed

- Fix signatories retrieval logic in edx certificate import
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""populate_certificate_signatory command module"""

import logging

from django.core.management import BaseCommand

from joanie.edx_imports.checks import (
check_import_db_connections,
check_import_env,
check_openedx_host,
)
from joanie.edx_imports.tasks import populate_signatory_certificates_task

logger = logging.getLogger(__name__)


class Command(BaseCommand):
"""Retrieve certificate without signatory then try to populate missing signatory"""

def add_arguments(self, parser):
"""Add arguments to the command"""
parser.add_argument(
"--skip-check",
action="store_true",
default=False,
help="Skip check the env vars and db connections",
)
parser.add_argument(
"--id",
type=str,
help="To populate signatory for a specific certificate",
)
parser.add_argument(
"--course-id",
type=str,
help="To populate signatory for all certificates of a specific course",
)

def handle(self, *args, **options):
"""Handle the command"""

skip_check = options.get("skip_check")
certificate_id = options.get("id")
course_id = options.get("course_id")

if not skip_check:
logger.info("Checking the environment and database connections...")
check_result = check_import_env(self.style)
check_result = check_openedx_host(self.style) and check_result
check_result = check_import_db_connections(self.style) and check_result
if not check_result:
logger.error(self.style.ERROR("\nCheck failed"))
continue_import = input(
"\nDo you want to continue importing data? (yes/no): "
)
if continue_import.lower() not in ["yes", "y"]:
return
logger.warning(
self.style.WARNING("Continuing import despite failed checks")
)

populate_signatory_certificates_task.delay(
certificate_id=certificate_id, course_id=course_id
)

logger.info("Populate signatory certificates tasks launched")
5 changes: 4 additions & 1 deletion src/backend/joanie/edx_imports/tasks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""Module for importing tasks."""

from .certificates import import_certificates_batch_task
from .certificates import (
import_certificates_batch_task,
populate_signatory_certificates_task,
)
from .course_runs import import_course_runs_batch_task
from .enrollments import import_enrollments_batch_task
from .universities import import_universities_batch_task
Expand Down
154 changes: 125 additions & 29 deletions src/backend/joanie/edx_imports/tasks/certificates.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,29 @@
"""Celery tasks for importing Open edX certificates to Joanie organizations."""

import re

# pylint: disable=too-many-locals,too-many-statements,too-many-branches,broad-exception-caught
# ruff: noqa: SLF001,PLR0915,PLR0912,BLE001

from logging import getLogger

from django.conf import settings
from django.core.files.storage import default_storage

from hashids import Hashids

from joanie.celery_app import app
from joanie.core import models
from joanie.core import enums, models
from joanie.core.enums import CERTIFICATE, DEGREE
from joanie.core.models import DocumentImage
from joanie.core.models import Certificate, DocumentImage
from joanie.core.utils import file_checksum
from joanie.edx_imports import edx_mongodb
from joanie.edx_imports.edx_database import OpenEdxDB
from joanie.edx_imports.utils import (
download_and_store,
download_signature_image,
extract_organization_code,
format_percent,
make_date_aware,
set_certificate_images,
update_context_signatory,
)
from joanie.lms_handler.backends.openedx import OPENEDX_MODE_VERIFIED

Expand Down Expand Up @@ -172,34 +173,16 @@ def import_certificates_batch(
edx_certificate.course_id
)

signature = None
if signatory:
signature_image_path = signatory.get("signature_image_path")
if signature_image_path.startswith("/"):
signature_image_path = signature_image_path[1:]
signature_path = download_and_store(signature_image_path)
if signature_path:
signature_file = default_storage.open(signature_path)
signature_checksum = file_checksum(signature_file)
(signature, _created) = DocumentImage.objects.get_or_create(
checksum=signature_checksum,
defaults={"file": signature_path},
)

signature, _ = download_signature_image(signature_image_path)
if signature:
signatory["signature_id"] = str(signature.id)
certificate_context["signatory"] = signatory

for language, _ in settings.LANGUAGES:
if signatory:
certificate_context[language]["organizations"][0][
"representative"
] = signatory.get("name")
certificate_context[language]["organizations"][0][
"representative_profession"
] = signatory.get("title")
if signature:
certificate_context[language]["organizations"][0][
"signature_id"
] = signature.id
certificate_context = update_context_signatory(
certificate_context, signatory
)

certificates_to_create.append(
models.Certificate(
Expand Down Expand Up @@ -274,3 +257,116 @@ def import_certificates_batch(
report["certificates"]["skipped"],
report["certificates"]["errors"],
)


@app.task(bind=True)
def populate_signatory_certificates_task(self, **kwargs):
"""Task to populate signatory certificates for those this information is missing."""
try:
report = populate_signatory_certificates(**kwargs)
except Exception as e:
logger.exception(e)
raise self.retry(exc=e) from e
return report


def populate_signatory_certificates(certificate_id=None, course_id=None):
"""
Retrieve existing certificates without signatory and populate them with the signatory
First try to retrieve signatory information from OpenEdX instance otherwise
use the organization signatory information.
"""
report = {
"total": 0,
"populated": 0,
"errors": 0,
"skipped": 0,
}

queryset = {
"certificate_definition__template": enums.DEGREE,
"enrollment__isnull": False,
}
if certificate_id:
queryset["id"] = certificate_id
if course_id:
queryset["enrollment__course_run__resource_link__icontains"] = course_id

certificates = Certificate.objects.filter(**queryset).select_related("organization")

report["total"] = certificates.count()

for certificate in certificates.iterator():
localized_context = certificate.localized_context.copy()
resource_link = certificate.enrollment.course_run.resource_link
key = course_id or (
re.match("^.*/courses/(?P<course_id>.*)/course/?$", resource_link).group(
"course_id"
)
)

if not key:
report["errors"] += 1
continue

try:
organization = certificate.localized_context.get(
settings.LANGUAGE_CODE
).get("organizations")[0]
except (AttributeError, TypeError, IndexError):
report["errors"] += 1
continue

if organization.get("signature_id") is not None and organization.get(
"representative"
):
report["skipped"] += 1
continue

if signatory := edx_mongodb.get_signatory_from_course_id(key):
signature_image_path = signatory.get("signature_image_path")
signature, _ = download_signature_image(signature_image_path)
if signature:
signatory["signature_id"] = str(signature.id)
localized_context["signatory"] = signatory
else:
organization = certificate.organization
signature_checksum = file_checksum(organization.signature)
signature, _ = DocumentImage.objects.get_or_create(
checksum=signature_checksum,
defaults={"file": organization.signature},
)
signatory = {
"name": organization.signatory_representative
or organization.representative,
"title": organization.signatory_representative_profession
if organization.signatory_representative
else organization.representative_profession,
"signature_id": str(signature.id) if signature else None,
}

if not signatory.get("name") or not signatory.get("signature_id"):
report["errors"] += 1
continue

certificate.localized_context = update_context_signatory(
localized_context, signatory
)
certificate.save()
report["populated"] += 1

report_string = "%s certificates processed, %s populated, %s skipped, %s errors"
logger.info(
report_string,
report["total"],
report["populated"],
report["skipped"],
report["errors"],
)

return report_string % (
report["total"],
report["populated"],
report["skipped"],
report["errors"],
)
37 changes: 37 additions & 0 deletions src/backend/joanie/edx_imports/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from joanie.core import enums, utils
from joanie.core.models import DocumentImage
from joanie.core.utils import file_checksum
from joanie.lms_handler.backends.openedx import split_course_key

logger = getLogger(__name__)
Expand Down Expand Up @@ -105,3 +106,39 @@ def set_certificate_images(certificate):
images_set.add(image)

certificate.images.set(images_set)


def download_signature_image(path):
"""Download signature image from OpenEdX then store it"""
created = False
signature = None
signature_image_path = path

if signature_image_path.startswith("/"):
signature_image_path = signature_image_path[1:]

signature_path = download_and_store(signature_image_path)

if signature_path:
signature_file = default_storage.open(signature_path)
signature_checksum = file_checksum(signature_file)
(signature, created) = DocumentImage.objects.get_or_create(
checksum=signature_checksum,
defaults={"file": signature_path},
)

return signature, created


def update_context_signatory(context, signatory):
"""Update the certificate context with the signatory information"""
for language, _ in settings.LANGUAGES:
if name := signatory.get("name"):
context[language]["organizations"][0]["representative"] = name
context[language]["organizations"][0]["representative_profession"] = (
signatory.get("title")
)
if signature_id := signatory.get("signature_id"):
context[language]["organizations"][0]["signature_id"] = signature_id

return context
Loading

0 comments on commit ff42caf

Please sign in to comment.