Skip to content

Commit

Permalink
osf:usage supplementary metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Oct 1, 2024
1 parent e18dfff commit 954dd0a
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 4 deletions.
33 changes: 33 additions & 0 deletions osf/metadata/osf_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
without_namespace,
smells_like_iri,
)
from osf.metrics.reports import PublicItemUsageReport
from osf.utils import workflows as osfworkflows
from osf.utils.outcomes import ArtifactTypes
from website import settings as website_settings
Expand Down Expand Up @@ -220,16 +221,22 @@ def osfmap_supplement_for_type(rdftype_iri: str):

OSFMAP_SUPPLEMENT = {
OSF.Project: {
OSF.usage: None,
},
OSF.ProjectComponent: {
OSF.usage: None,
},
OSF.Registration: {
OSF.usage: None,
},
OSF.RegistrationComponent: {
OSF.usage: None,
},
OSF.Preprint: {
OSF.usage: None,
},
OSF.File: {
OSF.usage: None,
},
}

Expand Down Expand Up @@ -1051,3 +1058,29 @@ def gather_cedar_templates(focus):
template_iri = rdflib.URIRef(record.get_template_semantic_iri())
yield (OSF.hasCedarTemplate, template_iri)
yield (template_iri, DCTERMS.title, record.get_template_name())


@gather.er(OSF.usage)
def gather_last_month_usage(focus):
_search = (
PublicItemUsageReport.search()
.filter('term', item_osfid=osfguid_from_iri(focus.iri))
# only last month's report
.filter('range', report_yearmonth={'gte': 'now-1M/M'})
.sort('-report_yearmonth')
[:1]
)
_reports = list(_search.execute())
if _reports:
_usage_report = _reports[0]
_usage_report_ref = rdflib.BNode()
yield (OSF.usage, _usage_report_ref)
yield (_usage_report_ref, DCAT.accessService, rdflib.URIRef(website_settings.DOMAIN.rstrip('/')))
yield (_usage_report_ref, DCTERMS.temporal, rdflib.Literal(
str(_usage_report.report_yearmonth),
datatype=rdflib.XSD.gYearMonth,
))
yield (_usage_report_ref, OSF.viewCount, _usage_report.view_count)
yield (_usage_report_ref, OSF.viewSessionCount, _usage_report.view_session_count)
yield (_usage_report_ref, OSF.downloadCount, _usage_report.download_count)
yield (_usage_report_ref, OSF.downloadSessionCount, _usage_report.download_session_count)
2 changes: 1 addition & 1 deletion osf/metrics/counted_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import pytz

from osf.metrics.utils import stable_key
from osf.models import Guid


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -87,6 +86,7 @@ def _autofill_fields(sender, instance, **kwargs):
_fill_pageview_info(instance)
item_guid = getattr(instance, 'item_guid', None)
if item_guid:
from osf.models import Guid
guid_instance = Guid.load(item_guid)
if guid_instance and guid_instance.referent:
_fill_osfguid_info(instance, guid_instance.referent)
Expand Down
17 changes: 16 additions & 1 deletion osf/metrics/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@
from django.dispatch import receiver
from elasticsearch6_dsl import InnerDoc
from elasticsearch_metrics import metrics
from elasticsearch_metrics.signals import pre_save as metrics_pre_save
from elasticsearch_metrics.signals import (
pre_save as metrics_pre_save,
post_save as metrics_post_save,
)

from osf.metrics.utils import stable_key, YearMonth
from website import settings as website_settings


class ReportInvalid(Exception):
Expand Down Expand Up @@ -304,3 +308,14 @@ class PublicItemUsageReport(MonthlyReport):
# download counts of this item only (not including contained components or files)
download_count = metrics.Long() # counter:Total_Item_Requests
download_session_count = metrics.Long() # counter:Unique_Item_Requests


@receiver(metrics_post_save, sender=PublicItemUsageReport)
def update_supplementary_metadata(sender, instance, **kwargs):
if website_settings.SHARE_ENABLED:
from api.share.utils import task__update_share
task__update_share.apply_async(
args=(instance.item_osfid,),
kwargs={'is_supplementary': True, 'is_backfill': True},
countdown=30, # delay 30 seconds; plenty of time for index refresh
)
3 changes: 2 additions & 1 deletion osf/models/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@
from api.caching.tasks import update_storage_usage
from api.caching import settings as cache_settings
from api.caching.utils import storage_usage_cache
from api.share.utils import update_share


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -711,6 +710,7 @@ def should_request_identifiers(self):

@classmethod
def bulk_update_search(cls, nodes, index=None):
from api.share.utils import update_share
for _node in nodes:
update_share(_node)
from website import search
Expand All @@ -722,6 +722,7 @@ def bulk_update_search(cls, nodes, index=None):
log_exception(e)

def update_search(self):
from api.share.utils import update_share
update_share(self)
from website import search
try:
Expand Down
2 changes: 1 addition & 1 deletion osf/models/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
MergeConflictError)
from framework.exceptions import PermissionsError
from framework.sessions.utils import remove_sessions_for_user
from api.share.utils import update_share
from osf.utils.requests import get_current_request
from osf.exceptions import reraise_django_validation_errors, UserStateError
from .base import BaseModel, GuidMixin, GuidMixinQuerySet
Expand Down Expand Up @@ -1451,6 +1450,7 @@ def is_assumed_ham(self):
return user_has_trusted_email

def update_search(self):
from api.share.utils import update_share
update_share(self)
from website.search.search import update_user
update_user(self)
Expand Down
29 changes: 29 additions & 0 deletions osf_tests/metadata/test_osf_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
FOAF,
OSF,
OSFIO,
DCAT,
DCTERMS,
DCMITYPE,
DOI,
Expand All @@ -20,11 +21,14 @@
checksum_iri,
)
from osf import models as osfdb
from osf.metrics.reports import PublicItemUsageReport
from osf.metrics.utils import YearMonth
from osf.utils import permissions, workflows
from osf_tests import factories
from website import settings as website_settings
from website.project import new_bookmark_collection
from osf_tests.metadata._utils import assert_triples
from osf_tests.metrics.utils_for_tests import es_metrics_temps


class TestOsfGathering(TestCase):
Expand Down Expand Up @@ -750,3 +754,28 @@ def test_gather_cedar_templates(self):
(self.filefocus.iri, OSF.hasCedarTemplate, cedar_template_iri),
(cedar_template_iri, DCTERMS.title, Literal(self.cedar_template.schema_name))
})

@es_metrics_temps()
def test_gather_last_month_usage(self):
# no usage report:
assert_triples(osf_gathering.gather_last_month_usage(self.projectfocus), set())
# yes usage report:
_ym = YearMonth.from_date(datetime.datetime.now(tz=datetime.UTC))
PublicItemUsageReport(
item_osfid=self.project._id,
report_yearmonth=_ym,
view_count=71,
view_session_count=13,
download_count=43,
download_session_count=11,
).save(refresh=True)
_usage_bnode = rdflib.BNode()
assert_triples(osf_gathering.gather_last_month_usage(self.projectfocus), {
(self.projectfocus.iri, OSF.usage, _usage_bnode),
(_usage_bnode, DCTERMS.temporal, Literal(str(_ym), datatype=rdflib.XSD.gYearMonth)),
(_usage_bnode, DCAT.accessService, rdflib.URIRef(website_settings.DOMAIN.rstrip('/'))),
(_usage_bnode, OSF.viewCount, Literal(71)),
(_usage_bnode, OSF.viewSessionCount, Literal(13)),
(_usage_bnode, OSF.downloadCount, Literal(43)),
(_usage_bnode, OSF.downloadSessionCount, Literal(11)),
})

0 comments on commit 954dd0a

Please sign in to comment.