From 68ae13e2084aed302b99f0fe7484dd8e6f9b2c01 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Thu, 17 Oct 2024 15:02:43 -0400 Subject: [PATCH] respond to review - reduce redundance of view count computation - let "last month" be two months ago if last month isn't there yet --- osf/metrics/reporters/public_item_usage.py | 23 ++++++++--------- osf/metrics/reports.py | 2 +- osf_tests/metrics/test_monthly_report.py | 29 ++++++++++++++-------- 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/osf/metrics/reporters/public_item_usage.py b/osf/metrics/reporters/public_item_usage.py index 45e92d1aeb3..ecc34a5d9c7 100644 --- a/osf/metrics/reporters/public_item_usage.py +++ b/osf/metrics/reporters/public_item_usage.py @@ -43,7 +43,6 @@ def report(self): ): try: _report = self._report_from_buckets(_exact_bucket, _contained_views_bucket) - _report.view_session_count = self._get_view_session_count(_report.item_osfid) yield _report except _SkipItem: pass @@ -63,8 +62,7 @@ def _report_from_buckets(self, exact_bucket, contained_views_bucket): else self._init_report_from_osfid(contained_views_bucket.key.osfid) ) # view counts include views on contained items (components, files) - if contained_views_bucket is not None: - _report.view_count += contained_views_bucket.doc_count + _report.view_count, _report.view_session_count = self._get_view_counts(_report.item_osfid) return _report def _init_report_from_exact_bucket(self, exact_bucket) -> PublicItemUsageReport: @@ -82,10 +80,8 @@ def _init_report_from_exact_bucket(self, exact_bucket) -> PublicItemUsageReport: download_session_count=0, ) for _actionbucket in exact_bucket.agg_action: - if _actionbucket.key == CountedAuthUsage.ActionLabel.VIEW.value: - _report.view_count = _actionbucket.doc_count - # note: view_session_count computed separately to avoid double-counting - elif _actionbucket.key == CountedAuthUsage.ActionLabel.DOWNLOAD.value: + # note: view counts computed separately to avoid double-counting + if _actionbucket.key == CountedAuthUsage.ActionLabel.DOWNLOAD.value: _report.download_count = _actionbucket.doc_count _report.download_session_count = _actionbucket.agg_session_count.value return _report @@ -136,17 +132,16 @@ def _exact_item_search(self) -> edsl.Search: _agg_osfid.bucket('agg_provider_id', 'terms', field='provider_id') # nested agg: for each item, get item_type values _agg_osfid.bucket('agg_item_type', 'terms', field='item_type') - # nested agg: for each item, get view and download count + # nested agg: for each item, get download count _agg_action = _agg_osfid.bucket( 'agg_action', 'terms', field='action_labels', include=[ CountedAuthUsage.ActionLabel.DOWNLOAD.value, - CountedAuthUsage.ActionLabel.VIEW.value, ], ) - # nested nested agg: for each item-action pair, get a session count + # nested nested agg: get download session count _agg_action.metric( 'agg_session_count', 'cardinality', @@ -156,7 +151,7 @@ def _exact_item_search(self) -> edsl.Search: return _search def _contained_item_views_search(self) -> edsl.Search: - '''aggregate views (but not downloads) on components and files contained within each osfid''' + '''iterate osfids with views on contained components and files''' _search = ( self._base_usage_search() .filter('term', action_labels=CountedAuthUsage.ActionLabel.VIEW.value) @@ -170,7 +165,7 @@ def _contained_item_views_search(self) -> edsl.Search: ) return _search - def _get_view_session_count(self, osfid: str): + def _get_view_counts(self, osfid: str) -> tuple[int, int]: '''compute view_session_count separately to avoid double-counting (the same session may be represented in both the composite agg on `item_guid` @@ -197,7 +192,9 @@ def _get_view_session_count(self, osfid: str): precision_threshold=_MAX_CARDINALITY_PRECISION, ) _response = _search.execute() - return _response.aggregations.agg_session_count.value + _view_count = _response.hits.total + _view_session_count = _response.aggregations.agg_session_count.value + return (_view_count, _view_session_count) ### diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py index d61e627fae8..8d0c2f96c13 100644 --- a/osf/metrics/reports.py +++ b/osf/metrics/reports.py @@ -314,7 +314,7 @@ def for_last_month(cls, item_osfid: str) -> PublicItemUsageReport | None: .filter('term', item_osfid=item_osfid) # only last month's report .filter('range', report_yearmonth={ - 'gte': 'now-1M/M', + 'gte': 'now-2M/M', 'lt': 'now/M', }) .sort('-report_yearmonth') diff --git a/osf_tests/metrics/test_monthly_report.py b/osf_tests/metrics/test_monthly_report.py index 5687c9a7b02..23546eb1fb3 100644 --- a/osf_tests/metrics/test_monthly_report.py +++ b/osf_tests/metrics/test_monthly_report.py @@ -88,9 +88,13 @@ def last_month(self, this_month): return _prior_yearmonth(this_month) @pytest.fixture - def prior_month(self, last_month): + def two_months_back(self, last_month): return _prior_yearmonth(last_month) + @pytest.fixture + def three_months_back(self, two_months_back): + return _prior_yearmonth(two_months_back) + @pytest.fixture def this_month_report(self, osfid, this_month): return _item_usage_report(this_month, osfid, view_count=77) @@ -104,19 +108,24 @@ def diff_last_month_report(self, last_month): return _item_usage_report(last_month, 'zyxvt', view_count=17) @pytest.fixture - def prior_month_report(self, osfid, prior_month): - return _item_usage_report(prior_month, osfid, view_count=37) + def two_months_back_report(self, osfid, two_months_back): + return _item_usage_report(two_months_back, osfid, view_count=27) + + @pytest.fixture + def three_months_back_report(self, osfid, three_months_back): + return _item_usage_report(three_months_back, osfid, view_count=37) def test_with_none(self, osfid): - assert PublicItemUsageReport().for_last_month(osfid) is None + assert PublicItemUsageReport.for_last_month(osfid) is None + + def test_with_others(self, osfid, this_month_report, three_months_back_report, diff_last_month_report): + assert PublicItemUsageReport.for_last_month(osfid) is None - def test_with_others(self, osfid, this_month_report, prior_month_report, diff_last_month_report): - assert PublicItemUsageReport().for_last_month(osfid) is None + def test_with_prior_month(self, osfid, this_month_report, two_months_back_report, three_months_back_report, diff_last_month_report): + assert PublicItemUsageReport.for_last_month(osfid) == two_months_back_report - def test_with_last_month(self, osfid, this_month_report, last_month_report, diff_last_month_report, prior_month_report): - _report = PublicItemUsageReport().for_last_month(osfid) - assert _report is not None - assert _report.view_count == 57 + def test_with_last_month(self, osfid, this_month_report, last_month_report, two_months_back_report, three_months_back_report, diff_last_month_report): + assert PublicItemUsageReport.for_last_month(osfid) == last_month_report def _prior_yearmonth(ym: YearMonth) -> YearMonth: