From 771e63b766173788a3212ab01297c9d4c91bebd5 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 16 Oct 2024 17:22:40 -0400 Subject: [PATCH] add yearmonth as init arg to MonthlyReporter --- .../commands/monthly_reporters_go.py | 4 +-- osf/metrics/reporters/_base.py | 17 +++++++----- .../reporters/institution_summary_monthly.py | 15 +++++------ osf/metrics/reporters/institutional_users.py | 6 ++--- osf/metrics/reporters/public_item_usage.py | 27 +++++++++---------- osf/metrics/reporters/spam_count.py | 6 ++--- .../test_institutional_summary_reporter.py | 12 ++++----- .../test_institutional_users_reporter.py | 14 +++++----- .../test_public_item_usage_reporter.py | 11 ++++---- 9 files changed, 56 insertions(+), 56 deletions(-) diff --git a/osf/management/commands/monthly_reporters_go.py b/osf/management/commands/monthly_reporters_go.py index 5734a692d3a..6dda9bac4a0 100644 --- a/osf/management/commands/monthly_reporters_go.py +++ b/osf/management/commands/monthly_reporters_go.py @@ -44,8 +44,8 @@ def monthly_reporters_go(report_year=None, report_month=None): ) def monthly_reporter_go(task, reporter_key: str, yearmonth: str): _reporter_class = AllMonthlyReporters[reporter_key].value - _parsed_yearmonth = YearMonth.from_str(yearmonth) - _reporter_class().run_and_record_for_month(_parsed_yearmonth) + _reporter = _reporter_class(YearMonth.from_str(yearmonth)) + _reporter.run_and_record_for_month() class Command(BaseCommand): diff --git a/osf/metrics/reporters/_base.py b/osf/metrics/reporters/_base.py index bd02ee881cf..beacd30ae30 100644 --- a/osf/metrics/reporters/_base.py +++ b/osf/metrics/reporters/_base.py @@ -1,6 +1,9 @@ from collections import abc +import dataclasses import logging +import celery + from osf.metrics.reports import MonthlyReport from osf.metrics.utils import YearMonth @@ -8,19 +11,19 @@ logger = logging.getLogger(__name__) +@dataclasses.dataclass class MonthlyReporter: - def report( - self, - report_yearmonth: YearMonth, - ) -> abc.Iterable[MonthlyReport] | abc.Iterator[MonthlyReport]: + yearmonth: YearMonth + + def report(self) -> abc.Iterable[MonthlyReport] | abc.Iterator[MonthlyReport]: """build a report for the given month """ raise NotImplementedError(f'{self.__name__} must implement `report`') - def run_and_record_for_month(self, report_yearmonth: YearMonth) -> None: - reports = self.report(report_yearmonth) + def run_and_record_for_month(self) -> None: + reports = self.report() for report in reports: - report.report_yearmonth = report_yearmonth + report.report_yearmonth = self.yearmonth report.save() diff --git a/osf/metrics/reporters/institution_summary_monthly.py b/osf/metrics/reporters/institution_summary_monthly.py index a7622e4eacd..998cc056298 100644 --- a/osf/metrics/reporters/institution_summary_monthly.py +++ b/osf/metrics/reporters/institution_summary_monthly.py @@ -5,26 +5,25 @@ from osf.models.spam import SpamStatus from addons.osfstorage.models import OsfStorageFile from osf.metrics.reports import InstitutionMonthlySummaryReport -from osf.metrics.utils import YearMonth from ._base import MonthlyReporter class InstitutionalSummaryMonthlyReporter(MonthlyReporter): """Generate an InstitutionMonthlySummaryReport for each institution.""" - def report(self, yearmonth: YearMonth): + def report(self): for institution in Institution.objects.all(): - yield self.generate_report(institution, yearmonth) + yield self.generate_report(institution) - def generate_report(self, institution, yearmonth): + def generate_report(self, institution): node_queryset = institution.nodes.filter( deleted__isnull=True, - created__lt=yearmonth.month_end() + created__lt=self.yearmonth.month_end() ).exclude( spam_status=SpamStatus.SPAM, ) - preprint_queryset = self.get_published_preprints(institution, yearmonth) + preprint_queryset = self.get_published_preprints(institution, self.yearmonth) return InstitutionMonthlySummaryReport( institution_id=institution._id, @@ -36,8 +35,8 @@ def generate_report(self, institution, yearmonth): published_preprint_count=preprint_queryset.count(), storage_byte_count=self.get_storage_size(node_queryset, preprint_queryset), public_file_count=self.get_files(node_queryset, preprint_queryset, is_public=True).count(), - monthly_logged_in_user_count=self.get_monthly_logged_in_user_count(institution, yearmonth), - monthly_active_user_count=self.get_monthly_active_user_count(institution, yearmonth), + monthly_logged_in_user_count=self.get_monthly_logged_in_user_count(institution, self.yearmonth), + monthly_active_user_count=self.get_monthly_active_user_count(institution, self.yearmonth), ) def _get_count(self, node_queryset, node_type, is_public): diff --git a/osf/metrics/reporters/institutional_users.py b/osf/metrics/reporters/institutional_users.py index 6b4db457834..780811be743 100644 --- a/osf/metrics/reporters/institutional_users.py +++ b/osf/metrics/reporters/institutional_users.py @@ -22,12 +22,12 @@ class InstitutionalUsersReporter(MonthlyReporter): which offers institutional admins insight into how people at their institution are using osf, based on their explicitly-affiliated osf objects ''' - def report(self, yearmonth: YearMonth): - _before_datetime = yearmonth.month_end() + def report(self): + _before_datetime = self.yearmonth.month_end() for _institution in osfdb.Institution.objects.filter(created__lt=_before_datetime): _user_qs = _institution.get_institution_users().filter(created__lt=_before_datetime) for _user in _user_qs.iterator(chunk_size=_CHUNK_SIZE): - _helper = _InstiUserReportHelper(_institution, _user, yearmonth, _before_datetime) + _helper = _InstiUserReportHelper(_institution, _user, self.yearmonth, _before_datetime) yield _helper.report diff --git a/osf/metrics/reporters/public_item_usage.py b/osf/metrics/reporters/public_item_usage.py index 7aa61723d8b..1fbb85752ad 100644 --- a/osf/metrics/reporters/public_item_usage.py +++ b/osf/metrics/reporters/public_item_usage.py @@ -10,7 +10,6 @@ get_provider_id, ) from osf.metrics.reports import PublicItemUsageReport -from osf.metrics.utils import YearMonth from osf import models as osfdb from website import settings as website_settings from ._base import MonthlyReporter @@ -31,18 +30,18 @@ class PublicItemUsageReporter(MonthlyReporter): includes projects, project components, registrations, registration components, and preprints ''' - def report(self, yearmonth: YearMonth): + def report(self): # use two composite aggregations in parallel to page thru every # public item viewed or downloaded this month, counting: # - views and downloads for each item (using `CountedAuthUsage.item_guid`) # - views for each item's components and files (using `CountedAuthUsage.surrounding_guids`) for _exact_bucket, _contained_views_bucket in _zip_composite_aggs( - self._exact_item_search(yearmonth), 'agg_osfid', - self._contained_item_views_search(yearmonth), 'agg_surrounding_osfid', + self._exact_item_search(), 'agg_osfid', + self._contained_item_views_search(), 'agg_surrounding_osfid', ): try: _report = self._report_from_buckets(_exact_bucket, _contained_views_bucket) - _report.view_session_count = self._get_view_session_count(yearmonth, _report.item_osfid) + _report.view_session_count = self._get_view_session_count(_report.item_osfid) yield _report except _SkipItem: pass @@ -102,20 +101,20 @@ def _init_report_from_osfid(self, osfid: str) -> PublicItemUsageReport: download_session_count=0, ) - def _base_usage_search(self, yearmonth): + def _base_usage_search(self): return ( CountedAuthUsage.search() .filter('term', item_public=True) .filter('range', timestamp={ - 'gte': yearmonth.month_start(), - 'lt': yearmonth.month_end(), + 'gte': self.yearmonth.month_start(), + 'lt': self.yearmonth.month_end(), }) .update_from_dict({'size': 0}) # only aggregations, no hits ) - def _exact_item_search(self, yearmonth) -> edsl.Search: + def _exact_item_search(self) -> edsl.Search: '''aggregate views and downloads on each osfid (not including components/files)''' - _search = self._base_usage_search(yearmonth) + _search = self._base_usage_search() # the main agg: use a composite aggregation to page thru *every* item _agg_osfid = _search.aggs.bucket( 'agg_osfid', @@ -148,10 +147,10 @@ def _exact_item_search(self, yearmonth) -> edsl.Search: ) return _search - def _contained_item_views_search(self, yearmonth) -> edsl.Search: + def _contained_item_views_search(self) -> edsl.Search: '''aggregate views (but not downloads) on components and files contained within each osfid''' _search = ( - self._base_usage_search(yearmonth) + self._base_usage_search() .filter('term', action_labels=CountedAuthUsage.ActionLabel.VIEW.value) ) # the main agg: use a composite aggregation to page thru *every* item @@ -163,14 +162,14 @@ def _contained_item_views_search(self, yearmonth) -> edsl.Search: ) return _search - def _get_view_session_count(self, yearmonth, osfid: str): + def _get_view_session_count(self, osfid: str): '''compute view_session_count separately to avoid double-counting (the same session may be represented in both the composite agg on `item_guid` and that on `surrounding_guids`) ''' _search = ( - self._base_usage_search(yearmonth) + self._base_usage_search() .query( 'bool', filter=[ diff --git a/osf/metrics/reporters/spam_count.py b/osf/metrics/reporters/spam_count.py index c9499efa416..94290f96203 100644 --- a/osf/metrics/reporters/spam_count.py +++ b/osf/metrics/reporters/spam_count.py @@ -8,9 +8,9 @@ class SpamCountReporter(MonthlyReporter): - def report(self, report_yearmonth): - target_month = report_yearmonth.month_start() - next_month = report_yearmonth.month_end() + def report(self): + target_month = self.yearmonth.month_start() + next_month = self.yearmonth.month_end() report = SpamSummaryReport( # Node Log entries diff --git a/osf_tests/metrics/reporters/test_institutional_summary_reporter.py b/osf_tests/metrics/reporters/test_institutional_summary_reporter.py index 4228beb1c47..715a2cd1553 100644 --- a/osf_tests/metrics/reporters/test_institutional_summary_reporter.py +++ b/osf_tests/metrics/reporters/test_institutional_summary_reporter.py @@ -77,8 +77,8 @@ def _create_active_user(cls, institution, date_confirmed): return user def test_report_generation(self): - reporter = InstitutionalSummaryMonthlyReporter() - reports = list(reporter.report(self._yearmonth)) + reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth) + reports = list(reporter.report()) self.assertEqual(len(reports), 1) report = reports[0] @@ -113,8 +113,8 @@ def test_report_generation_multiple_institutions(self): self._create_active_user(institution3, date_confirmed=last_month) # Run the reporter for the current month (February 2018) - reporter = InstitutionalSummaryMonthlyReporter() - reports = list(reporter.report(self._yearmonth)) + reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth) + reports = list(reporter.report()) self.assertEqual(len(reports), 3) # Reports for self._institution, institution2, institution3 # Extract reports by institution @@ -262,8 +262,8 @@ def test_high_counts_multiple_institutions(self): # Run the reporter if enable_benchmarking: reporter_start_time = time.time() - reporter = InstitutionalSummaryMonthlyReporter() - reports = list(reporter.report(self._yearmonth)) + reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth) + reports = list(reporter.report()) assert len(reports) == additional_institution_count + 1 if enable_benchmarking: diff --git a/osf_tests/metrics/reporters/test_institutional_users_reporter.py b/osf_tests/metrics/reporters/test_institutional_users_reporter.py index 8fbb873083f..1a09dec7cec 100644 --- a/osf_tests/metrics/reporters/test_institutional_users_reporter.py +++ b/osf_tests/metrics/reporters/test_institutional_users_reporter.py @@ -62,24 +62,24 @@ def _assert_report_matches_setup(self, report: InstitutionalUserReport, setup: _ self.assertEqual(report.published_preprint_count, setup.published_preprint_count) def test_no_users(self): - _actual_reports = list(InstitutionalUsersReporter().report(self._yearmonth)) + _actual_reports = list(InstitutionalUsersReporter(self._yearmonth).report()) self.assertEqual(_actual_reports, []) def test_one_user_with_nothing(self): self._user_setup_with_nothing.affiliate_user() - _reports = list(InstitutionalUsersReporter().report(self._yearmonth)) + _reports = list(InstitutionalUsersReporter(self._yearmonth).report()) self.assertEqual(len(_reports), 1) self._assert_report_matches_setup(_reports[0], self._user_setup_with_nothing) def test_one_user_with_ones(self): self._user_setup_with_ones.affiliate_user() - _reports = list(InstitutionalUsersReporter().report(self._yearmonth)) + _reports = list(InstitutionalUsersReporter(self._yearmonth).report()) self.assertEqual(len(_reports), 1) self._assert_report_matches_setup(_reports[0], self._user_setup_with_ones) def test_one_user_with_stuff_and_no_files(self): self._user_setup_with_stuff.affiliate_user() - _reports = list(InstitutionalUsersReporter().report(self._yearmonth)) + _reports = list(InstitutionalUsersReporter(self._yearmonth).report()) self.assertEqual(len(_reports), 1) self._assert_report_matches_setup(_reports[0], self._user_setup_with_stuff) self.assertEqual(_reports[0].public_file_count, 2) # preprint 2 files @@ -91,7 +91,7 @@ def test_one_user_with_stuff_and_a_file(self): _project = _user.nodes.first() with _patch_now(self._now): create_test_file(target=_project, user=_user, size=37) - (_report,) = InstitutionalUsersReporter().report(self._yearmonth) + (_report,) = InstitutionalUsersReporter(self._yearmonth).report() self._assert_report_matches_setup(_report, self._user_setup_with_stuff) self.assertEqual(_report.public_file_count, 3) # 2 preprint files self.assertEqual(_report.storage_byte_count, 2711) # 2 preprint files @@ -108,7 +108,7 @@ def test_one_user_with_stuff_and_multiple_files(self): create_test_file(target=_component, user=_user, size=53, filename='bla') create_test_file(target=_component, user=_user, size=51, filename='blar') create_test_file(target=_component, user=_user, size=47, filename='blarg') - (_report,) = InstitutionalUsersReporter().report(self._yearmonth) + (_report,) = InstitutionalUsersReporter(self._yearmonth).report() self._assert_report_matches_setup(_report, self._user_setup_with_stuff) self.assertEqual(_report.public_file_count, 7) # 2 preprint files self.assertEqual(_report.storage_byte_count, 2935) # 2 preprint files + 37 + 73 + 53 + 51 + 47 @@ -125,7 +125,7 @@ def test_several_users(self): _setup.user._id: _setup for _setup in _setups } - _reports = list(InstitutionalUsersReporter().report(self._yearmonth)) + _reports = list(InstitutionalUsersReporter(self._yearmonth).report()) self.assertEqual(len(_reports), len(_setup_by_userid)) for _actual_report in _reports: _setup = _setup_by_userid[_actual_report.user_id] diff --git a/osf_tests/metrics/reporters/test_public_item_usage_reporter.py b/osf_tests/metrics/reporters/test_public_item_usage_reporter.py index 3abcd290048..454b8d6700d 100644 --- a/osf_tests/metrics/reporters/test_public_item_usage_reporter.py +++ b/osf_tests/metrics/reporters/test_public_item_usage_reporter.py @@ -153,15 +153,14 @@ def busy_month_item2(self, ym_busy): ) def test_no_data(self, ym_empty): - _reporter = PublicItemUsageReporter() - _empty = list(_reporter.report(ym_empty)) + _reporter = PublicItemUsageReporter(ym_empty) + _empty = list(_reporter.report()) assert _empty == [] def test_reporter(self, ym_empty, ym_sparse, ym_busy, sparse_month_usage, busy_month_item0, busy_month_item1, busy_month_item2): - _reporter = PublicItemUsageReporter() - _empty = list(_reporter.report(ym_empty)) - _sparse = list(_reporter.report(ym_sparse)) - _busy = list(_reporter.report(ym_busy)) + _empty = list(PublicItemUsageReporter(ym_empty).report()) + _sparse = list(PublicItemUsageReporter(ym_sparse).report()) + _busy = list(PublicItemUsageReporter(ym_busy).report()) # empty month: assert _empty == []