Skip to content

Commit

Permalink
add yearmonth as init arg to MonthlyReporter
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Oct 16, 2024
1 parent 488013b commit 771e63b
Show file tree
Hide file tree
Showing 9 changed files with 56 additions and 56 deletions.
4 changes: 2 additions & 2 deletions osf/management/commands/monthly_reporters_go.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def monthly_reporters_go(report_year=None, report_month=None):
)
def monthly_reporter_go(task, reporter_key: str, yearmonth: str):
_reporter_class = AllMonthlyReporters[reporter_key].value
_parsed_yearmonth = YearMonth.from_str(yearmonth)
_reporter_class().run_and_record_for_month(_parsed_yearmonth)
_reporter = _reporter_class(YearMonth.from_str(yearmonth))
_reporter.run_and_record_for_month()


class Command(BaseCommand):
Expand Down
17 changes: 10 additions & 7 deletions osf/metrics/reporters/_base.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,29 @@
from collections import abc
import dataclasses
import logging

import celery

from osf.metrics.reports import MonthlyReport
from osf.metrics.utils import YearMonth


logger = logging.getLogger(__name__)


@dataclasses.dataclass
class MonthlyReporter:
def report(
self,
report_yearmonth: YearMonth,
) -> abc.Iterable[MonthlyReport] | abc.Iterator[MonthlyReport]:
yearmonth: YearMonth

def report(self) -> abc.Iterable[MonthlyReport] | abc.Iterator[MonthlyReport]:
"""build a report for the given month
"""
raise NotImplementedError(f'{self.__name__} must implement `report`')

def run_and_record_for_month(self, report_yearmonth: YearMonth) -> None:
reports = self.report(report_yearmonth)
def run_and_record_for_month(self) -> None:
reports = self.report()
for report in reports:
report.report_yearmonth = report_yearmonth
report.report_yearmonth = self.yearmonth
report.save()


Expand Down
15 changes: 7 additions & 8 deletions osf/metrics/reporters/institution_summary_monthly.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,25 @@
from osf.models.spam import SpamStatus
from addons.osfstorage.models import OsfStorageFile
from osf.metrics.reports import InstitutionMonthlySummaryReport
from osf.metrics.utils import YearMonth
from ._base import MonthlyReporter


class InstitutionalSummaryMonthlyReporter(MonthlyReporter):
"""Generate an InstitutionMonthlySummaryReport for each institution."""

def report(self, yearmonth: YearMonth):
def report(self):
for institution in Institution.objects.all():
yield self.generate_report(institution, yearmonth)
yield self.generate_report(institution)

def generate_report(self, institution, yearmonth):
def generate_report(self, institution):
node_queryset = institution.nodes.filter(
deleted__isnull=True,
created__lt=yearmonth.month_end()
created__lt=self.yearmonth.month_end()
).exclude(
spam_status=SpamStatus.SPAM,
)

preprint_queryset = self.get_published_preprints(institution, yearmonth)
preprint_queryset = self.get_published_preprints(institution, self.yearmonth)

return InstitutionMonthlySummaryReport(
institution_id=institution._id,
Expand All @@ -36,8 +35,8 @@ def generate_report(self, institution, yearmonth):
published_preprint_count=preprint_queryset.count(),
storage_byte_count=self.get_storage_size(node_queryset, preprint_queryset),
public_file_count=self.get_files(node_queryset, preprint_queryset, is_public=True).count(),
monthly_logged_in_user_count=self.get_monthly_logged_in_user_count(institution, yearmonth),
monthly_active_user_count=self.get_monthly_active_user_count(institution, yearmonth),
monthly_logged_in_user_count=self.get_monthly_logged_in_user_count(institution, self.yearmonth),
monthly_active_user_count=self.get_monthly_active_user_count(institution, self.yearmonth),
)

def _get_count(self, node_queryset, node_type, is_public):
Expand Down
6 changes: 3 additions & 3 deletions osf/metrics/reporters/institutional_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ class InstitutionalUsersReporter(MonthlyReporter):
which offers institutional admins insight into how people at their institution are
using osf, based on their explicitly-affiliated osf objects
'''
def report(self, yearmonth: YearMonth):
_before_datetime = yearmonth.month_end()
def report(self):
_before_datetime = self.yearmonth.month_end()
for _institution in osfdb.Institution.objects.filter(created__lt=_before_datetime):
_user_qs = _institution.get_institution_users().filter(created__lt=_before_datetime)
for _user in _user_qs.iterator(chunk_size=_CHUNK_SIZE):
_helper = _InstiUserReportHelper(_institution, _user, yearmonth, _before_datetime)
_helper = _InstiUserReportHelper(_institution, _user, self.yearmonth, _before_datetime)
yield _helper.report


Expand Down
27 changes: 13 additions & 14 deletions osf/metrics/reporters/public_item_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
get_provider_id,
)
from osf.metrics.reports import PublicItemUsageReport
from osf.metrics.utils import YearMonth
from osf import models as osfdb
from website import settings as website_settings
from ._base import MonthlyReporter
Expand All @@ -31,18 +30,18 @@ class PublicItemUsageReporter(MonthlyReporter):
includes projects, project components, registrations, registration components, and preprints
'''

def report(self, yearmonth: YearMonth):
def report(self):
# use two composite aggregations in parallel to page thru every
# public item viewed or downloaded this month, counting:
# - views and downloads for each item (using `CountedAuthUsage.item_guid`)
# - views for each item's components and files (using `CountedAuthUsage.surrounding_guids`)
for _exact_bucket, _contained_views_bucket in _zip_composite_aggs(
self._exact_item_search(yearmonth), 'agg_osfid',
self._contained_item_views_search(yearmonth), 'agg_surrounding_osfid',
self._exact_item_search(), 'agg_osfid',
self._contained_item_views_search(), 'agg_surrounding_osfid',
):
try:
_report = self._report_from_buckets(_exact_bucket, _contained_views_bucket)
_report.view_session_count = self._get_view_session_count(yearmonth, _report.item_osfid)
_report.view_session_count = self._get_view_session_count(_report.item_osfid)
yield _report
except _SkipItem:
pass
Expand Down Expand Up @@ -102,20 +101,20 @@ def _init_report_from_osfid(self, osfid: str) -> PublicItemUsageReport:
download_session_count=0,
)

def _base_usage_search(self, yearmonth):
def _base_usage_search(self):
return (
CountedAuthUsage.search()
.filter('term', item_public=True)
.filter('range', timestamp={
'gte': yearmonth.month_start(),
'lt': yearmonth.month_end(),
'gte': self.yearmonth.month_start(),
'lt': self.yearmonth.month_end(),
})
.update_from_dict({'size': 0}) # only aggregations, no hits
)

def _exact_item_search(self, yearmonth) -> edsl.Search:
def _exact_item_search(self) -> edsl.Search:
'''aggregate views and downloads on each osfid (not including components/files)'''
_search = self._base_usage_search(yearmonth)
_search = self._base_usage_search()
# the main agg: use a composite aggregation to page thru *every* item
_agg_osfid = _search.aggs.bucket(
'agg_osfid',
Expand Down Expand Up @@ -148,10 +147,10 @@ def _exact_item_search(self, yearmonth) -> edsl.Search:
)
return _search

def _contained_item_views_search(self, yearmonth) -> edsl.Search:
def _contained_item_views_search(self) -> edsl.Search:
'''aggregate views (but not downloads) on components and files contained within each osfid'''
_search = (
self._base_usage_search(yearmonth)
self._base_usage_search()
.filter('term', action_labels=CountedAuthUsage.ActionLabel.VIEW.value)
)
# the main agg: use a composite aggregation to page thru *every* item
Expand All @@ -163,14 +162,14 @@ def _contained_item_views_search(self, yearmonth) -> edsl.Search:
)
return _search

def _get_view_session_count(self, yearmonth, osfid: str):
def _get_view_session_count(self, osfid: str):
'''compute view_session_count separately to avoid double-counting
(the same session may be represented in both the composite agg on `item_guid`
and that on `surrounding_guids`)
'''
_search = (
self._base_usage_search(yearmonth)
self._base_usage_search()
.query(
'bool',
filter=[
Expand Down
6 changes: 3 additions & 3 deletions osf/metrics/reporters/spam_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@

class SpamCountReporter(MonthlyReporter):

def report(self, report_yearmonth):
target_month = report_yearmonth.month_start()
next_month = report_yearmonth.month_end()
def report(self):
target_month = self.yearmonth.month_start()
next_month = self.yearmonth.month_end()

report = SpamSummaryReport(
# Node Log entries
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ def _create_active_user(cls, institution, date_confirmed):
return user

def test_report_generation(self):
reporter = InstitutionalSummaryMonthlyReporter()
reports = list(reporter.report(self._yearmonth))
reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth)
reports = list(reporter.report())
self.assertEqual(len(reports), 1)

report = reports[0]
Expand Down Expand Up @@ -113,8 +113,8 @@ def test_report_generation_multiple_institutions(self):
self._create_active_user(institution3, date_confirmed=last_month)

# Run the reporter for the current month (February 2018)
reporter = InstitutionalSummaryMonthlyReporter()
reports = list(reporter.report(self._yearmonth))
reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth)
reports = list(reporter.report())
self.assertEqual(len(reports), 3) # Reports for self._institution, institution2, institution3

# Extract reports by institution
Expand Down Expand Up @@ -262,8 +262,8 @@ def test_high_counts_multiple_institutions(self):
# Run the reporter
if enable_benchmarking:
reporter_start_time = time.time()
reporter = InstitutionalSummaryMonthlyReporter()
reports = list(reporter.report(self._yearmonth))
reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth)
reports = list(reporter.report())
assert len(reports) == additional_institution_count + 1

if enable_benchmarking:
Expand Down
14 changes: 7 additions & 7 deletions osf_tests/metrics/reporters/test_institutional_users_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,24 +62,24 @@ def _assert_report_matches_setup(self, report: InstitutionalUserReport, setup: _
self.assertEqual(report.published_preprint_count, setup.published_preprint_count)

def test_no_users(self):
_actual_reports = list(InstitutionalUsersReporter().report(self._yearmonth))
_actual_reports = list(InstitutionalUsersReporter(self._yearmonth).report())
self.assertEqual(_actual_reports, [])

def test_one_user_with_nothing(self):
self._user_setup_with_nothing.affiliate_user()
_reports = list(InstitutionalUsersReporter().report(self._yearmonth))
_reports = list(InstitutionalUsersReporter(self._yearmonth).report())
self.assertEqual(len(_reports), 1)
self._assert_report_matches_setup(_reports[0], self._user_setup_with_nothing)

def test_one_user_with_ones(self):
self._user_setup_with_ones.affiliate_user()
_reports = list(InstitutionalUsersReporter().report(self._yearmonth))
_reports = list(InstitutionalUsersReporter(self._yearmonth).report())
self.assertEqual(len(_reports), 1)
self._assert_report_matches_setup(_reports[0], self._user_setup_with_ones)

def test_one_user_with_stuff_and_no_files(self):
self._user_setup_with_stuff.affiliate_user()
_reports = list(InstitutionalUsersReporter().report(self._yearmonth))
_reports = list(InstitutionalUsersReporter(self._yearmonth).report())
self.assertEqual(len(_reports), 1)
self._assert_report_matches_setup(_reports[0], self._user_setup_with_stuff)
self.assertEqual(_reports[0].public_file_count, 2) # preprint 2 files
Expand All @@ -91,7 +91,7 @@ def test_one_user_with_stuff_and_a_file(self):
_project = _user.nodes.first()
with _patch_now(self._now):
create_test_file(target=_project, user=_user, size=37)
(_report,) = InstitutionalUsersReporter().report(self._yearmonth)
(_report,) = InstitutionalUsersReporter(self._yearmonth).report()
self._assert_report_matches_setup(_report, self._user_setup_with_stuff)
self.assertEqual(_report.public_file_count, 3) # 2 preprint files
self.assertEqual(_report.storage_byte_count, 2711) # 2 preprint files
Expand All @@ -108,7 +108,7 @@ def test_one_user_with_stuff_and_multiple_files(self):
create_test_file(target=_component, user=_user, size=53, filename='bla')
create_test_file(target=_component, user=_user, size=51, filename='blar')
create_test_file(target=_component, user=_user, size=47, filename='blarg')
(_report,) = InstitutionalUsersReporter().report(self._yearmonth)
(_report,) = InstitutionalUsersReporter(self._yearmonth).report()
self._assert_report_matches_setup(_report, self._user_setup_with_stuff)
self.assertEqual(_report.public_file_count, 7) # 2 preprint files
self.assertEqual(_report.storage_byte_count, 2935) # 2 preprint files + 37 + 73 + 53 + 51 + 47
Expand All @@ -125,7 +125,7 @@ def test_several_users(self):
_setup.user._id: _setup
for _setup in _setups
}
_reports = list(InstitutionalUsersReporter().report(self._yearmonth))
_reports = list(InstitutionalUsersReporter(self._yearmonth).report())
self.assertEqual(len(_reports), len(_setup_by_userid))
for _actual_report in _reports:
_setup = _setup_by_userid[_actual_report.user_id]
Expand Down
11 changes: 5 additions & 6 deletions osf_tests/metrics/reporters/test_public_item_usage_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,15 +153,14 @@ def busy_month_item2(self, ym_busy):
)

def test_no_data(self, ym_empty):
_reporter = PublicItemUsageReporter()
_empty = list(_reporter.report(ym_empty))
_reporter = PublicItemUsageReporter(ym_empty)
_empty = list(_reporter.report())
assert _empty == []

def test_reporter(self, ym_empty, ym_sparse, ym_busy, sparse_month_usage, busy_month_item0, busy_month_item1, busy_month_item2):
_reporter = PublicItemUsageReporter()
_empty = list(_reporter.report(ym_empty))
_sparse = list(_reporter.report(ym_sparse))
_busy = list(_reporter.report(ym_busy))
_empty = list(PublicItemUsageReporter(ym_empty).report())
_sparse = list(PublicItemUsageReporter(ym_sparse).report())
_busy = list(PublicItemUsageReporter(ym_busy).report())

# empty month:
assert _empty == []
Expand Down

0 comments on commit 771e63b

Please sign in to comment.