quality_data: overview: Fix incorrect field summing passing grants

When calculating the overview stats for /api/dashboard/overview?mode=grants the logic of fail/pass was being used to count the total number of grants which passed by just using the total number of grants that are in the source file. This is only correct some of the time because it just so happens that if a publisher is likely to be missing a field (such as company number) then they will miss it out of all of their datasets so the calculation is coincidentally correct. Some publishers however do add such fields for _some_ of their grants which makes this calculation completely incorrect for certain metrics. This change makes sure we use the total count from the DQT (library) rather than the aggregated data. Updates basic test data to corrected value. Fixes: #246
ThreeSixtyGiving · Dec 16, 2024 · 8f6cdb0 · 8f6cdb0
1 parent 34a2c1b
commit 8f6cdb0
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 12 deletions.
diff --git a/datastore/data_quality/quality_data.py b/datastore/data_quality/quality_data.py
@@ -324,21 +324,33 @@ def get_pc_quality_grants(self):
         )
 
         for metric, query in self.quality_query_parameters.items():
-            # Aggregate total number of errors for the metric
-            ret[metric] = (
-                self.source_file_set.filter(**query)
-                .annotate(total=RawSQL("((aggregate->>%s)::int)", ("count",)))
-                .aggregate(Sum("total"))["total__sum"]
-            )
+            # Aggregate total count of errors for the metrics
+
+            # We need to drop down to SQL to do the casting/aggregates of the number of grants that are
+            # affected. Extract column/key from django orm syntax
+            # e.g. for hasRecipientIndividualsCodelists:
+            # {'quality__IndividualsCodeListsNotPresent__fail': False} extract "IndividualsCodeListsNotPresent"
+            metric_dqt_name = list(query.keys())[0].split("__")[1]
+
+            metric_total_errors_count = self.source_file_set.annotate(
+                total=RawSQL("((quality->%s->>'count')::numeric)", (metric_dqt_name,))
+            ).aggregate(Sum("total"))["total__sum"]
 
-            if ret[metric] == None:
-                ret[metric] = 0
+            # Guard as the query can technically return None
+            if metric_total_errors_count == None:
+                metric_total_errors_count = 0
 
-            # Workout percentage of total errors / all grants in the relevant recipient set
+            # Workout percentage of total which don't have errors / all grants in the relevant recipient set
             if "Org" in metric:
-                ret[metric] = round(ret[metric] / total_recipient_org_grants * 100)
+                ret[metric] = round(
+                    (total_recipient_org_grants - metric_total_errors_count)
+                    / total_recipient_org_grants
+                    * 100
+                )
             else:
-                ret[metric] = round(ret[metric] / total_grants * 100)
+                ret[metric] = round(
+                    (total_grants - metric_total_errors_count) / total_grants * 100
+                )
 
         return ret
 

diff --git a/datastore/tests/test_api.py b/datastore/tests/test_api.py
@@ -46,7 +46,7 @@ def test_dashboard_api_overview_grants(self):
                 "hasGrantClassification": 0,
                 "hasBeneficiaryLocationGeoCode": 100,
                 "hasRecipientOrgCompanyOrCharityNumber": 0,
-                "has50pcExternalOrgId": 0,
+                "has50pcExternalOrgId": 100,
                 "hasRecipientIndividualsCodelists": 100,
             },
         }