From 868810de33a10bf4770995d211ee3eac499e7f5a Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 25 Oct 2024 23:07:27 +0530 Subject: [PATCH] Optimize data migration - Avoid loading all records at once in memory Signed-off-by: Keshav Priyadarshi --- .../migrations/0071_auto_20241007_1044.py | 36 ++++++++++++++++--- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/vulnerabilities/migrations/0071_auto_20241007_1044.py b/vulnerabilities/migrations/0071_auto_20241007_1044.py index c55c06a8b..3b52dd640 100644 --- a/vulnerabilities/migrations/0071_auto_20241007_1044.py +++ b/vulnerabilities/migrations/0071_auto_20241007_1044.py @@ -1,14 +1,22 @@ from django.db import migrations, models import django.db.models.deletion -from django.core.validators import MaxValueValidator, MinValueValidator -from vulnerabilities.improver import MAX_CONFIDENCE +from aboutcode.pipeline import LoopProgress def split_packagerelatedvulnerability(apps, schema_editor): PackageRelatedVulnerability = apps.get_model('vulnerabilities', 'PackageRelatedVulnerability') FixingPackageRelatedVulnerability = apps.get_model('vulnerabilities', 'FixingPackageRelatedVulnerability') AffectedByPackageRelatedVulnerability = apps.get_model('vulnerabilities', 'AffectedByPackageRelatedVulnerability') - for prv in PackageRelatedVulnerability.objects.all(): + obsolete_package_relation_query = PackageRelatedVulnerability.objects.all() + obsolete_package_relation_query_count = obsolete_package_relation_query.count() + print(f"\nMigrating {obsolete_package_relation_query_count:,d} old package vulnerability relationship.") + + progress = LoopProgress( + total_iterations=obsolete_package_relation_query_count, + progress_step=1, + logger=print, + ) + for prv in progress.iter(obsolete_package_relation_query.iterator(chunk_size=10000)): if prv.fix: FixingPackageRelatedVulnerability.objects.create( package=prv.package, @@ -29,7 +37,16 @@ def reverse_migration(apps, schema_editor): AffectedByPackageRelatedVulnerability = apps.get_model('vulnerabilities', 'AffectedByPackageRelatedVulnerability') PackageRelatedVulnerability = apps.get_model('vulnerabilities', 'PackageRelatedVulnerability') - for fpv in FixingPackageRelatedVulnerability.objects.all(): + fixing_package_relation_query = FixingPackageRelatedVulnerability.objects.all() + fixing_package_relation_query_count = fixing_package_relation_query.count() + print(f"\nMigrating {fixing_package_relation_query_count:,d} FixingPackage to old relationship.") + + progress = LoopProgress( + total_iterations=fixing_package_relation_query_count, + progress_step=1, + logger=print, + ) + for fpv in progress.iter(fixing_package_relation_query.iterator(chunk_size=10000)): PackageRelatedVulnerability.objects.create( package=fpv.package, vulnerability=fpv.vulnerability, @@ -38,7 +55,16 @@ def reverse_migration(apps, schema_editor): fix=True, ) - for apv in AffectedByPackageRelatedVulnerability.objects.all(): + affected_package_relation_query = AffectedByPackageRelatedVulnerability.objects.all() + affected_package_relation_query_count = affected_package_relation_query.count() + print(f"\nMigrating {affected_package_relation_query_count:,d} AffectedPackage to old relationship.") + + progress = LoopProgress( + total_iterations=affected_package_relation_query_count, + progress_step=1, + logger=print, + ) + for apv in progress.iter(affected_package_relation_query.iterator(chunk_size=10000)): PackageRelatedVulnerability.objects.create( package=apv.package, vulnerability=apv.vulnerability,