Skip to content

Commit

Permalink
respond to review
Browse files Browse the repository at this point in the history
- reduce redundance of view count computation
- let "last month" be two months ago if last month isn't there yet
  • Loading branch information
aaxelb committed Oct 17, 2024
1 parent 5f70268 commit 68ae13e
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 24 deletions.
23 changes: 10 additions & 13 deletions osf/metrics/reporters/public_item_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ def report(self):
):
try:
_report = self._report_from_buckets(_exact_bucket, _contained_views_bucket)
_report.view_session_count = self._get_view_session_count(_report.item_osfid)
yield _report
except _SkipItem:
pass
Expand All @@ -63,8 +62,7 @@ def _report_from_buckets(self, exact_bucket, contained_views_bucket):
else self._init_report_from_osfid(contained_views_bucket.key.osfid)
)
# view counts include views on contained items (components, files)
if contained_views_bucket is not None:
_report.view_count += contained_views_bucket.doc_count
_report.view_count, _report.view_session_count = self._get_view_counts(_report.item_osfid)
return _report

def _init_report_from_exact_bucket(self, exact_bucket) -> PublicItemUsageReport:
Expand All @@ -82,10 +80,8 @@ def _init_report_from_exact_bucket(self, exact_bucket) -> PublicItemUsageReport:
download_session_count=0,
)
for _actionbucket in exact_bucket.agg_action:
if _actionbucket.key == CountedAuthUsage.ActionLabel.VIEW.value:
_report.view_count = _actionbucket.doc_count
# note: view_session_count computed separately to avoid double-counting
elif _actionbucket.key == CountedAuthUsage.ActionLabel.DOWNLOAD.value:
# note: view counts computed separately to avoid double-counting
if _actionbucket.key == CountedAuthUsage.ActionLabel.DOWNLOAD.value:
_report.download_count = _actionbucket.doc_count
_report.download_session_count = _actionbucket.agg_session_count.value
return _report
Expand Down Expand Up @@ -136,17 +132,16 @@ def _exact_item_search(self) -> edsl.Search:
_agg_osfid.bucket('agg_provider_id', 'terms', field='provider_id')
# nested agg: for each item, get item_type values
_agg_osfid.bucket('agg_item_type', 'terms', field='item_type')
# nested agg: for each item, get view and download count
# nested agg: for each item, get download count
_agg_action = _agg_osfid.bucket(
'agg_action',
'terms',
field='action_labels',
include=[
CountedAuthUsage.ActionLabel.DOWNLOAD.value,
CountedAuthUsage.ActionLabel.VIEW.value,
],
)
# nested nested agg: for each item-action pair, get a session count
# nested nested agg: get download session count
_agg_action.metric(
'agg_session_count',
'cardinality',
Expand All @@ -156,7 +151,7 @@ def _exact_item_search(self) -> edsl.Search:
return _search

def _contained_item_views_search(self) -> edsl.Search:
'''aggregate views (but not downloads) on components and files contained within each osfid'''
'''iterate osfids with views on contained components and files'''
_search = (
self._base_usage_search()
.filter('term', action_labels=CountedAuthUsage.ActionLabel.VIEW.value)
Expand All @@ -170,7 +165,7 @@ def _contained_item_views_search(self) -> edsl.Search:
)
return _search

def _get_view_session_count(self, osfid: str):
def _get_view_counts(self, osfid: str) -> tuple[int, int]:
'''compute view_session_count separately to avoid double-counting
(the same session may be represented in both the composite agg on `item_guid`
Expand All @@ -197,7 +192,9 @@ def _get_view_session_count(self, osfid: str):
precision_threshold=_MAX_CARDINALITY_PRECISION,
)
_response = _search.execute()
return _response.aggregations.agg_session_count.value
_view_count = _response.hits.total
_view_session_count = _response.aggregations.agg_session_count.value
return (_view_count, _view_session_count)


###
Expand Down
2 changes: 1 addition & 1 deletion osf/metrics/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def for_last_month(cls, item_osfid: str) -> PublicItemUsageReport | None:
.filter('term', item_osfid=item_osfid)
# only last month's report
.filter('range', report_yearmonth={
'gte': 'now-1M/M',
'gte': 'now-2M/M',
'lt': 'now/M',
})
.sort('-report_yearmonth')
Expand Down
29 changes: 19 additions & 10 deletions osf_tests/metrics/test_monthly_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,13 @@ def last_month(self, this_month):
return _prior_yearmonth(this_month)

@pytest.fixture
def prior_month(self, last_month):
def two_months_back(self, last_month):
return _prior_yearmonth(last_month)

@pytest.fixture
def three_months_back(self, two_months_back):
return _prior_yearmonth(two_months_back)

@pytest.fixture
def this_month_report(self, osfid, this_month):
return _item_usage_report(this_month, osfid, view_count=77)
Expand All @@ -104,19 +108,24 @@ def diff_last_month_report(self, last_month):
return _item_usage_report(last_month, 'zyxvt', view_count=17)

@pytest.fixture
def prior_month_report(self, osfid, prior_month):
return _item_usage_report(prior_month, osfid, view_count=37)
def two_months_back_report(self, osfid, two_months_back):
return _item_usage_report(two_months_back, osfid, view_count=27)

@pytest.fixture
def three_months_back_report(self, osfid, three_months_back):
return _item_usage_report(three_months_back, osfid, view_count=37)

def test_with_none(self, osfid):
assert PublicItemUsageReport().for_last_month(osfid) is None
assert PublicItemUsageReport.for_last_month(osfid) is None

def test_with_others(self, osfid, this_month_report, three_months_back_report, diff_last_month_report):
assert PublicItemUsageReport.for_last_month(osfid) is None

def test_with_others(self, osfid, this_month_report, prior_month_report, diff_last_month_report):
assert PublicItemUsageReport().for_last_month(osfid) is None
def test_with_prior_month(self, osfid, this_month_report, two_months_back_report, three_months_back_report, diff_last_month_report):
assert PublicItemUsageReport.for_last_month(osfid) == two_months_back_report

def test_with_last_month(self, osfid, this_month_report, last_month_report, diff_last_month_report, prior_month_report):
_report = PublicItemUsageReport().for_last_month(osfid)
assert _report is not None
assert _report.view_count == 57
def test_with_last_month(self, osfid, this_month_report, last_month_report, two_months_back_report, three_months_back_report, diff_last_month_report):
assert PublicItemUsageReport.for_last_month(osfid) == last_month_report


def _prior_yearmonth(ym: YearMonth) -> YearMonth:
Expand Down

0 comments on commit 68ae13e

Please sign in to comment.