From 68742fbd25b3808593d3d6ca405e63029660ff22 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Mon, 21 Oct 2024 11:45:58 -0400 Subject: [PATCH] prov:qualifiedAttribution metadata [ENG-6185] --- osf/metadata/osf_gathering.py | 25 ++++++++- osf/metadata/rdfutils.py | 2 + .../preprint_basic.turtle | 5 +- .../preprint_full.turtle | 5 +- .../project_basic.turtle | 5 +- .../project_full.turtle | 5 +- .../registration_basic.turtle | 5 +- .../registration_full.turtle | 5 +- osf_tests/metadata/test_osf_gathering.py | 56 ++++++++++++++++++- 9 files changed, 103 insertions(+), 10 deletions(-) diff --git a/osf/metadata/osf_gathering.py b/osf/metadata/osf_gathering.py index 2456b412c687..617e22b237d3 100644 --- a/osf/metadata/osf_gathering.py +++ b/osf/metadata/osf_gathering.py @@ -22,6 +22,7 @@ OSF, OSFIO, OWL, + PROV, RDF, ROR, SKOS, @@ -32,7 +33,10 @@ ) from osf.metrics.reports import PublicItemUsageReport from osf.metrics.utils import YearMonth -from osf.utils import workflows as osfworkflows +from osf.utils import ( + workflows as osfworkflows, + permissions as osfpermissions, +) from osf.utils.outcomes import ArtifactTypes from website import settings as website_settings @@ -140,6 +144,7 @@ def pls_get_magic_metadata_basket(osf_item) -> gather.Basket: DCTERMS.creator: OSF_AGENT_REFERENCE, }, OWL.sameAs: None, + PROV.qualifiedAttribution: None, } OSFMAP = { @@ -268,6 +273,11 @@ def pls_get_magic_metadata_basket(osf_item) -> gather.Basket: ArtifactTypes.PAPERS: OSF.hasPapersResource, ArtifactTypes.SUPPLEMENTS: OSF.hasSupplementalResource, } +OSF_CONTRIBUTOR_ROLES = { + osfpermissions.READ: OSF['readonly-contributor'], + osfpermissions.WRITE: OSF['write-contributor'], + osfpermissions.ADMIN: OSF['admin-contributor'], +} BEPRESS_SUBJECT_SCHEME_URI = 'https://bepress.com/reference_guide_dc/disciplines/' BEPRESS_SUBJECT_SCHEME_TITLE = 'bepress Digital Commons Three-Tiered Taxonomy' @@ -902,6 +912,19 @@ def gather_agents(focus): # TODO: preserve order via rdflib.Seq +@gather.er(PROV.qualifiedAttribution) +def gather_qualified_attributions(focus): + _contributor_set = getattr(focus.dbmodel, 'contributor_set', None) + if _contributor_set is not None: + for _contributor in _contributor_set.filter(visible=True).select_related('user'): + _osfrole_ref = OSF_CONTRIBUTOR_ROLES.get(_contributor.permission) + if _osfrole_ref is not None: + _attribution_ref = rdflib.BNode() + yield (PROV.qualifiedAttribution, _attribution_ref) + yield (_attribution_ref, PROV.agent, OsfFocus(_contributor.user)) + yield (_attribution_ref, DCAT.hadRole, _osfrole_ref) + + @gather.er(OSF.affiliation) def gather_affiliated_institutions(focus): if hasattr(focus.dbmodel, 'get_affiliated_institutions'): # like OSFUser diff --git a/osf/metadata/rdfutils.py b/osf/metadata/rdfutils.py index cd944169e205..d2596ad344e2 100644 --- a/osf/metadata/rdfutils.py +++ b/osf/metadata/rdfutils.py @@ -23,6 +23,7 @@ RDF = rdflib.Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') # "resource description framework" SKOS = rdflib.Namespace('http://www.w3.org/2004/02/skos/core#') # "simple knowledge organization system" DCAT = rdflib.Namespace('http://www.w3.org/ns/dcat#') # "data catalog (vocabulary)" +PROV = rdflib.Namespace('http://www.w3.org/ns/prov#') # "provenance" # non-standard namespace for datacite terms (resolves to datacite docs) DATACITE = rdflib.Namespace('https://schema.datacite.org/meta/kernel-4/#') @@ -38,6 +39,7 @@ 'skos': SKOS, 'dcmitype': DCMITYPE, 'dcat': DCAT, + 'prov': PROV, } diff --git a/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle b/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle index f6db59e6e249..ee7e866827b3 100644 --- a/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle +++ b/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle @@ -3,6 +3,7 @@ @prefix foaf: . @prefix osf: . @prefix owl: . +@prefix prov: . @prefix rdfs: . @prefix skos: . @@ -25,7 +26,9 @@ dcat:accessService ; osf:hostingInstitution ; osf:isSupplementedBy ; - osf:statedConflictOfInterest osf:no-conflict-of-interest . + osf:statedConflictOfInterest osf:no-conflict-of-interest ; + prov:qualifiedAttribution [ dcat:hadRole osf:admin-contributor ; + prov:agent ] . a dcterms:Agent, foaf:Organization ; diff --git a/osf_tests/metadata/expected_metadata_files/preprint_full.turtle b/osf_tests/metadata/expected_metadata_files/preprint_full.turtle index 93c69fa4e8cf..cdf665fd5fec 100644 --- a/osf_tests/metadata/expected_metadata_files/preprint_full.turtle +++ b/osf_tests/metadata/expected_metadata_files/preprint_full.turtle @@ -3,6 +3,7 @@ @prefix foaf: . @prefix osf: . @prefix owl: . +@prefix prov: . @prefix rdfs: . @prefix skos: . @@ -25,7 +26,9 @@ dcat:accessService ; osf:hostingInstitution ; osf:isSupplementedBy ; - osf:statedConflictOfInterest osf:no-conflict-of-interest . + osf:statedConflictOfInterest osf:no-conflict-of-interest ; + prov:qualifiedAttribution [ dcat:hadRole osf:admin-contributor ; + prov:agent ] . a dcterms:Agent, foaf:Organization ; diff --git a/osf_tests/metadata/expected_metadata_files/project_basic.turtle b/osf_tests/metadata/expected_metadata_files/project_basic.turtle index 252bfeac4abe..2d771d1fdfd7 100644 --- a/osf_tests/metadata/expected_metadata_files/project_basic.turtle +++ b/osf_tests/metadata/expected_metadata_files/project_basic.turtle @@ -3,6 +3,7 @@ @prefix foaf: . @prefix osf: . @prefix owl: . +@prefix prov: . @prefix rdfs: . a osf:Project ; @@ -23,7 +24,9 @@ dcat:accessService ; osf:contains ; osf:hostingInstitution ; - osf:supplements . + osf:supplements ; + prov:qualifiedAttribution [ dcat:hadRole osf:admin-contributor ; + prov:agent ] . a osf:Preprint ; dcterms:created "2123-05-04" ; diff --git a/osf_tests/metadata/expected_metadata_files/project_full.turtle b/osf_tests/metadata/expected_metadata_files/project_full.turtle index 329bb734f1ca..f3e3c340b3a0 100644 --- a/osf_tests/metadata/expected_metadata_files/project_full.turtle +++ b/osf_tests/metadata/expected_metadata_files/project_full.turtle @@ -3,6 +3,7 @@ @prefix foaf: . @prefix osf: . @prefix owl: . +@prefix prov: . @prefix rdfs: . a osf:Project ; @@ -29,7 +30,9 @@ osf:hasFunding , ; osf:hostingInstitution ; - osf:supplements . + osf:supplements ; + prov:qualifiedAttribution [ dcat:hadRole osf:admin-contributor ; + prov:agent ] . a osf:Preprint ; dcterms:created "2123-05-04" ; diff --git a/osf_tests/metadata/expected_metadata_files/registration_basic.turtle b/osf_tests/metadata/expected_metadata_files/registration_basic.turtle index 693ed41c0cb1..97672d4b0e5c 100644 --- a/osf_tests/metadata/expected_metadata_files/registration_basic.turtle +++ b/osf_tests/metadata/expected_metadata_files/registration_basic.turtle @@ -3,6 +3,7 @@ @prefix foaf: . @prefix osf: . @prefix owl: . +@prefix prov: . @prefix rdfs: . a osf:Registration ; @@ -22,7 +23,9 @@ dcterms:type ; dcat:accessService ; osf:contains ; - osf:hostingInstitution . + osf:hostingInstitution ; + prov:qualifiedAttribution [ dcat:hadRole osf:admin-contributor ; + prov:agent ] . a osf:Project ; dcterms:created "2123-05-04" ; diff --git a/osf_tests/metadata/expected_metadata_files/registration_full.turtle b/osf_tests/metadata/expected_metadata_files/registration_full.turtle index 6db9fd83ca3a..0efab405b341 100644 --- a/osf_tests/metadata/expected_metadata_files/registration_full.turtle +++ b/osf_tests/metadata/expected_metadata_files/registration_full.turtle @@ -3,6 +3,7 @@ @prefix foaf: . @prefix osf: . @prefix owl: . +@prefix prov: . @prefix rdfs: . a osf:Registration ; @@ -22,7 +23,9 @@ dcterms:type ; dcat:accessService ; osf:contains ; - osf:hostingInstitution . + osf:hostingInstitution ; + prov:qualifiedAttribution [ dcat:hadRole osf:admin-contributor ; + prov:agent ] . a osf:Project ; dcterms:created "2123-05-04" ; diff --git a/osf_tests/metadata/test_osf_gathering.py b/osf_tests/metadata/test_osf_gathering.py index afc73c179476..021ed9c5096e 100644 --- a/osf_tests/metadata/test_osf_gathering.py +++ b/osf_tests/metadata/test_osf_gathering.py @@ -17,6 +17,7 @@ DCMITYPE, DOI, OWL, + PROV, RDF, SKOS, checksum_iri, @@ -40,12 +41,13 @@ def setUpTestData(cls): external_identity={'ORCID': {'1234-4321-5678-8765': 'VERIFIED'}}, ) cls.user__readonly = factories.UserFactory( - external_identity={'ORCID': {'1234-4321-6789-9876': 'CREATE'}}, + external_identity={'ORCID': {'1234-4321-6789-9876': 'CREATE'}}, # unverified orcid social={ 'profileWebsites': ['http://mysite.example', 'http://myothersite.example/foo'], 'baiduScholar': 'blarg', }, ) + cls.user__invisible = factories.UserFactory() # cedar metadata template cls.cedar_template = factories.CedarMetadataTemplateFactory( cedar_id='https://repo.metadatacenter.org/templates/this-is-a-cedar-id', @@ -58,7 +60,8 @@ def setUpTestData(cls): cls.project.add_addon('box', auth=None) cls.project.add_addon('gitlab', auth=None) cls.project.add_contributor(cls.user__readwrite, permissions=permissions.WRITE) - cls.project.add_contributor(cls.user__readonly, permissions=permissions.READ, visible=False) + cls.project.add_contributor(cls.user__readonly, permissions=permissions.READ) + cls.project.add_contributor(cls.user__invisible, permissions=permissions.WRITE, visible=False) cls.component = factories.ProjectFactory(parent=cls.project, creator=cls.user__admin, is_public=True) cls.sibcomponent = factories.ProjectFactory(parent=cls.project, creator=cls.user__admin, is_public=True) cls.subcomponent = factories.ProjectFactory(parent=cls.component, creator=cls.user__admin, is_public=True) @@ -95,7 +98,8 @@ def setUpTestData(cls): is_public=True, ) cls.preprint.add_contributor(cls.user__readwrite, permissions=permissions.WRITE) - cls.preprint.add_contributor(cls.user__readonly, permissions=permissions.READ, visible=False) + cls.preprint.add_contributor(cls.user__readonly, permissions=permissions.READ) + cls.preprint.add_contributor(cls.user__invisible, permissions=permissions.WRITE, visible=False) cls.registration_cedar_record = factories.CedarMetadataRecordFactory( template=cls.cedar_template, is_published=True, @@ -528,11 +532,19 @@ def test_gather_agents(self): assert_triples(osf_gathering.gather_agents(self.projectfocus), { (self.projectfocus.iri, DCTERMS.creator, self.userfocus__admin), (self.projectfocus.iri, DCTERMS.creator, self.userfocus__readwrite), + (self.projectfocus.iri, DCTERMS.creator, self.userfocus__readonly), }) # focus: registration assert_triples(osf_gathering.gather_agents(self.registrationfocus), { (self.registrationfocus.iri, DCTERMS.creator, self.userfocus__admin), (self.registrationfocus.iri, DCTERMS.creator, self.userfocus__readwrite), + (self.registrationfocus.iri, DCTERMS.creator, self.userfocus__readonly), + }) + # focus: preprint + assert_triples(osf_gathering.gather_agents(self.preprintfocus), { + (self.preprintfocus.iri, DCTERMS.creator, self.userfocus__admin), + (self.preprintfocus.iri, DCTERMS.creator, self.userfocus__readwrite), + (self.preprintfocus.iri, DCTERMS.creator, self.userfocus__readonly), }) # focus: file assert_triples(osf_gathering.gather_agents(self.filefocus), set()) @@ -832,3 +844,41 @@ def test_gather_storage_byte_count(self): assert_triples(osf_gathering.gather_storage_byte_count(self.preprintfocus), { (self.preprintfocus.iri, OSF.storageByteCount, Literal(1337)), }) + + def test_gather_qualified_attributions(self): + _attribution_admin = rdflib.BNode() + _attribution_readwrite = rdflib.BNode() + _attribution_readonly = rdflib.BNode() + assert_triples(osf_gathering.gather_qualified_attributions(self.projectfocus), { + (self.projectfocus.iri, PROV.qualifiedAttribution, _attribution_admin), + (_attribution_admin, PROV.agent, self.userfocus__admin), + (_attribution_admin, DCAT.hadRole, OSF['admin-contributor']), + (self.projectfocus.iri, PROV.qualifiedAttribution, _attribution_readwrite), + (_attribution_readwrite, PROV.agent, self.userfocus__readwrite), + (_attribution_readwrite, DCAT.hadRole, OSF['write-contributor']), + (self.projectfocus.iri, PROV.qualifiedAttribution, _attribution_readonly), + (_attribution_readonly, PROV.agent, self.userfocus__readonly), + (_attribution_readonly, DCAT.hadRole, OSF['readonly-contributor']), + }) + assert_triples(osf_gathering.gather_qualified_attributions(self.registrationfocus), { + (self.registrationfocus.iri, PROV.qualifiedAttribution, _attribution_admin), + (_attribution_admin, PROV.agent, self.userfocus__admin), + (_attribution_admin, DCAT.hadRole, OSF['admin-contributor']), + (self.registrationfocus.iri, PROV.qualifiedAttribution, _attribution_readwrite), + (_attribution_readwrite, PROV.agent, self.userfocus__readwrite), + (_attribution_readwrite, DCAT.hadRole, OSF['write-contributor']), + (self.registrationfocus.iri, PROV.qualifiedAttribution, _attribution_readonly), + (_attribution_readonly, PROV.agent, self.userfocus__readonly), + (_attribution_readonly, DCAT.hadRole, OSF['readonly-contributor']), + }) + assert_triples(osf_gathering.gather_qualified_attributions(self.preprintfocus), { + (self.preprintfocus.iri, PROV.qualifiedAttribution, _attribution_admin), + (_attribution_admin, PROV.agent, self.userfocus__admin), + (_attribution_admin, DCAT.hadRole, OSF['admin-contributor']), + (self.preprintfocus.iri, PROV.qualifiedAttribution, _attribution_readwrite), + (_attribution_readwrite, PROV.agent, self.userfocus__readwrite), + (_attribution_readwrite, DCAT.hadRole, OSF['write-contributor']), + (self.preprintfocus.iri, PROV.qualifiedAttribution, _attribution_readonly), + (_attribution_readonly, PROV.agent, self.userfocus__readonly), + (_attribution_readonly, DCAT.hadRole, OSF['readonly-contributor']), + })