From a86f9c7c359e615622d0fc5e4cee7b76aba6abab Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Fri, 23 Aug 2024 09:14:25 -0400 Subject: [PATCH] allow monthly metrics reports on multiple subjects replace `DailyReport.DAILY_UNIQUE_FIELDS` with `UNIQUE_TOGETHER_FIELDS` on both `DailyReport` and `MonthlyReport`, so we can have (for example) monthly reports for each institution or each institutional user account --- osf/metrics/reports.py | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py index 609e79fc324b..07fd240184ec 100644 --- a/osf/metrics/reports.py +++ b/osf/metrics/reports.py @@ -20,7 +20,7 @@ class DailyReport(metrics.Metric): There's something we'd like to know about every so often, so let's regularly run a report and stash the results here. """ - DAILY_UNIQUE_FIELD = None # set in subclasses that expect multiple reports per day + UNIQUE_TOGETHER_FIELDS = ('report_date',) # override in subclasses for multiple reports per day report_date = metrics.Date(format='strict_date', required=True) @@ -58,6 +58,7 @@ def serialize(self, data): class MonthlyReport(metrics.Metric): """MonthlyReport (abstract base for report-based metrics that run monthly) """ + UNIQUE_TOGETHER_FIELDS = ('report_yearmonth',) # override in subclasses for multiple reports per month report_yearmonth = YearmonthField() @@ -69,23 +70,17 @@ class Meta: @receiver(metrics_pre_save) def set_report_id(sender, instance, **kwargs): - # Set the document id to a hash of "unique together" - # values (just `report_date` by default) to get - # "ON CONFLICT UPDATE" behavior -- if the document + # Set the document id to a hash of "unique together" fields + # for "ON CONFLICT UPDATE" behavior -- if the document # already exists, it will be updated rather than duplicated. # Cannot detect/avoid conflicts this way, but that's ok. - - if issubclass(sender, DailyReport): - duf_name = instance.DAILY_UNIQUE_FIELD - if duf_name is None: - instance.meta.id = stable_key(instance.report_date) - else: - duf_value = getattr(instance, duf_name) - if not duf_value or not isinstance(duf_value, str): - raise ReportInvalid(f'{sender.__name__}.{duf_name} MUST have a non-empty string value (got {duf_value})') - instance.meta.id = stable_key(instance.report_date, duf_value) - elif issubclass(sender, MonthlyReport): - instance.meta.id = stable_key(instance.report_yearmonth) + _key_values = [] + for _field_name in instance.UNIQUE_TOGETHER_FIELDS: + _field_value = getattr(instance, _field_name) + if not _field_value or not isinstance(_field_value, str): + raise ReportInvalid(f'{sender.__name__}.{_field_name} MUST have a non-empty string value (got {_field_value})') + _key_values.append(_field_value) + instance.meta.id = stable_key(*_key_values) #### BEGIN reusable inner objects ##### @@ -157,7 +152,7 @@ class DownloadCountReport(DailyReport): class InstitutionSummaryReport(DailyReport): - DAILY_UNIQUE_FIELD = 'institution_id' + UNIQUE_TOGETHER_FIELDS = ('report_date', 'institution_id',) institution_id = metrics.Keyword() institution_name = metrics.Keyword() @@ -169,7 +164,7 @@ class InstitutionSummaryReport(DailyReport): class NewUserDomainReport(DailyReport): - DAILY_UNIQUE_FIELD = 'domain_name' + UNIQUE_TOGETHER_FIELDS = ('report_date', 'domain_name',) domain_name = metrics.Keyword() new_user_count = metrics.Integer() @@ -187,7 +182,7 @@ class OsfstorageFileCountReport(DailyReport): class PreprintSummaryReport(DailyReport): - DAILY_UNIQUE_FIELD = 'provider_key' + UNIQUE_TOGETHER_FIELDS = ('report_date', 'provider_key',) provider_key = metrics.Keyword() preprint_count = metrics.Integer()