From 600bc6d64aee5d999829f2cc00f8fbfe958fb645 Mon Sep 17 00:00:00 2001 From: Abram Booth Date: Wed, 10 Jan 2024 08:33:15 -0500 Subject: [PATCH] fix: multiple funding awards from the same funder would previously generate invalid xml for datacite, now should not. --- .../datacite/datacite_tree_walker.py | 53 +++++++++++-------- .../expected_metadata_files/file_full.turtle | 16 +++++- .../preprint_full.turtle | 16 +++++- .../project_full.datacite.json | 19 +++++++ .../project_full.datacite.xml | 10 ++++ .../project_full.turtle | 16 +++++- .../registration_full.turtle | 16 +++++- .../metadata/test_serialized_metadata.py | 16 +++++- 8 files changed, 131 insertions(+), 31 deletions(-) diff --git a/osf/metadata/serializers/datacite/datacite_tree_walker.py b/osf/metadata/serializers/datacite/datacite_tree_walker.py index 08990d466a9..bddc64fac6a 100644 --- a/osf/metadata/serializers/datacite/datacite_tree_walker.py +++ b/osf/metadata/serializers/datacite/datacite_tree_walker.py @@ -268,33 +268,42 @@ def _visit_dates(self, parent_el): def _visit_funding_references(self, parent_el): fundrefs_el = self.visit(parent_el, 'fundingReferences', is_list=True) + _visited_funders = set() + for _funding_award in sorted(self.basket[OSF.hasFunding]): + # datacite allows at most one funder per funding reference + _funder = next(self.basket[_funding_award:DCTERMS.contributor]) + self._funding_reference(fundrefs_el, _funder, _funding_award) + _visited_funders.add(_funder) for _funder in self.basket[OSF.funder]: - fundref_el = self.visit(fundrefs_el, 'fundingReference') - self.visit(fundref_el, 'funderName', text=next(self.basket[_funder:FOAF.name], '')) - funder_identifier = next(self.basket[_funder:DCTERMS.identifier], '') + if _funder not in _visited_funders: + self._funding_reference(fundrefs_el, _funder) + + def _funding_reference(self, fundrefs_el, funder, funding_award=None): + _fundref_el = self.visit(fundrefs_el, 'fundingReference') + self.visit(_fundref_el, 'funderName', text=next(self.basket[funder:FOAF.name], '')) + _funder_identifier = next(self.basket[funder:DCTERMS.identifier], '') + self.visit( + _fundref_el, + 'funderIdentifier', + text=_funder_identifier, + attrib={ + 'funderIdentifierType': self._funder_identifier_type(_funder_identifier), + }, + ) + if funding_award is not None: self.visit( - fundref_el, - 'funderIdentifier', - text=funder_identifier, + _fundref_el, + 'awardNumber', + text=next(self.basket[funding_award:OSF.awardNumber], ''), attrib={ - 'funderIdentifierType': self._funder_identifier_type(funder_identifier), + 'awardURI': ( + str(funding_award) + if isinstance(funding_award, rdflib.URIRef) + else '' + ) }, ) - for _funding_award in self.basket[OSF.hasFunding]: - if _funder in self.basket[_funding_award:DCTERMS.contributor]: - self.visit( - fundref_el, - 'awardNumber', - text=next(self.basket[_funding_award:OSF.awardNumber], ''), - attrib={ - 'awardURI': ( - str(_funding_award) - if isinstance(_funding_award, rdflib.URIRef) - else '' - ) - }, - ) - self.visit(fundref_el, 'awardTitle', text=next(self.basket[_funding_award:DCTERMS.title], '')) + self.visit(_fundref_el, 'awardTitle', text=next(self.basket[funding_award:DCTERMS.title], '')) def _visit_publication_year(self, parent_el, focus_iri): year_copyrighted = next(self.basket[focus_iri:DCTERMS.dateCopyrighted], None) diff --git a/osf_tests/metadata/expected_metadata_files/file_full.turtle b/osf_tests/metadata/expected_metadata_files/file_full.turtle index d04eca39e8e..4859b2bf84a 100644 --- a/osf_tests/metadata/expected_metadata_files/file_full.turtle +++ b/osf_tests/metadata/expected_metadata_files/file_full.turtle @@ -28,8 +28,10 @@ dcterms:title "this is a project title!"@en ; dcterms:type ; owl:sameAs ; - osf:funder ; - osf:hasFunding . + osf:funder , + ; + osf:hasFunding , + . a osf:FileVersion ; dcterms:created "2123-05-04" ; @@ -46,6 +48,12 @@ dcterms:title "because reasons" ; osf:awardNumber "10000000" . + a osf:FundingAward ; + dcterms:contributor ; + dcterms:identifier "https://moneypockets.example/millions-more" ; + dcterms:title "because reasons!" ; + osf:awardNumber "2000000" . + dcterms:identifier "https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode" ; foaf:name "CC-By Attribution-NonCommercial-NoDerivatives 4.0 International" . @@ -53,6 +61,10 @@ dcterms:identifier "https://doi.org/10.$$$$" ; foaf:name "Mx. Moneypockets" . + a dcterms:Agent ; + dcterms:identifier "https://doi.org/10.$" ; + foaf:name "Caring Fan" . + a dcterms:Agent, foaf:Person ; dcterms:identifier "http://localhost:5000/w1ibb" ; diff --git a/osf_tests/metadata/expected_metadata_files/preprint_full.turtle b/osf_tests/metadata/expected_metadata_files/preprint_full.turtle index 59943430882..10ae10a7741 100644 --- a/osf_tests/metadata/expected_metadata_files/preprint_full.turtle +++ b/osf_tests/metadata/expected_metadata_files/preprint_full.turtle @@ -50,8 +50,10 @@ dcterms:title "this is a project title!"@en ; dcterms:type ; owl:sameAs ; - osf:funder ; - osf:hasFunding . + osf:funder , + ; + osf:hasFunding , + . a skos:Concept ; skos:broader ; @@ -77,6 +79,12 @@ dcterms:title "because reasons" ; osf:awardNumber "10000000" . + a osf:FundingAward ; + dcterms:contributor ; + dcterms:identifier "https://moneypockets.example/millions-more" ; + dcterms:title "because reasons!" ; + osf:awardNumber "2000000" . + rdfs:label "Dataset"@en . rdfs:label "Preprint"@en . @@ -106,6 +114,10 @@ dcterms:identifier "https://doi.org/10.$$$$" ; foaf:name "Mx. Moneypockets" . + a dcterms:Agent ; + dcterms:identifier "https://doi.org/10.$" ; + foaf:name "Caring Fan" . + a skos:Concept ; skos:inScheme ; skos:prefLabel "wibbble" . diff --git a/osf_tests/metadata/expected_metadata_files/project_full.datacite.json b/osf_tests/metadata/expected_metadata_files/project_full.datacite.json index d77541c609e..43d3373c9f1 100644 --- a/osf_tests/metadata/expected_metadata_files/project_full.datacite.json +++ b/osf_tests/metadata/expected_metadata_files/project_full.datacite.json @@ -59,6 +59,25 @@ "funderIdentifierType": "Crossref Funder ID" }, "funderName": "Mx. Moneypockets" + }, + { + "awardNumber": { + "awardNumber": "2000000", + "awardURI": "https://moneypockets.example/millions-more" + }, + "awardTitle": "because reasons!", + "funderIdentifier": { + "funderIdentifier": "https://doi.org/10.$$$$", + "funderIdentifierType": "Crossref Funder ID" + }, + "funderName": "Mx. Moneypockets" + }, + { + "funderIdentifier": { + "funderIdentifier": "https://doi.org/10.$", + "funderIdentifierType": "Crossref Funder ID" + }, + "funderName": "Caring Fan" } ], "identifier": { diff --git a/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml b/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml index 95f15129b3f..8cf7efb1221 100644 --- a/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml +++ b/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml @@ -42,6 +42,16 @@ 10000000 because reasons + + Mx. Moneypockets + https://doi.org/10.$$$$ + 2000000 + because reasons! + + + Caring Fan + https://doi.org/10.$ + http://localhost:5000/w5ibb diff --git a/osf_tests/metadata/expected_metadata_files/project_full.turtle b/osf_tests/metadata/expected_metadata_files/project_full.turtle index 5fa0dad1229..4a601897f11 100644 --- a/osf_tests/metadata/expected_metadata_files/project_full.turtle +++ b/osf_tests/metadata/expected_metadata_files/project_full.turtle @@ -24,8 +24,10 @@ owl:sameAs ; dcat:accessService ; osf:contains ; - osf:funder ; - osf:hasFunding ; + osf:funder , + ; + osf:hasFunding , + ; osf:hostingInstitution ; osf:supplements . @@ -64,6 +66,12 @@ dcterms:title "because reasons" ; osf:awardNumber "10000000" . + a osf:FundingAward ; + dcterms:contributor ; + dcterms:identifier "https://moneypockets.example/millions-more" ; + dcterms:title "because reasons!" ; + osf:awardNumber "2000000" . + a dcterms:Agent, foaf:Organization ; dcterms:identifier "https://cos.io/", @@ -88,6 +96,10 @@ dcterms:identifier "https://doi.org/10.$$$$" ; foaf:name "Mx. Moneypockets" . + a dcterms:Agent ; + dcterms:identifier "https://doi.org/10.$" ; + foaf:name "Caring Fan" . + a dcterms:Agent, foaf:Person ; dcterms:identifier "http://localhost:5000/w1ibb" ; diff --git a/osf_tests/metadata/expected_metadata_files/registration_full.turtle b/osf_tests/metadata/expected_metadata_files/registration_full.turtle index 9101e9f64b3..2fe48ce7fae 100644 --- a/osf_tests/metadata/expected_metadata_files/registration_full.turtle +++ b/osf_tests/metadata/expected_metadata_files/registration_full.turtle @@ -35,8 +35,10 @@ dcterms:title "this is a project title!"@en ; dcterms:type ; owl:sameAs ; - osf:funder ; - osf:hasFunding . + osf:funder , + ; + osf:hasFunding , + . a osf:FundingAward ; dcterms:contributor ; @@ -44,6 +46,12 @@ dcterms:title "because reasons" ; osf:awardNumber "10000000" . + a osf:FundingAward ; + dcterms:contributor ; + dcterms:identifier "https://moneypockets.example/millions-more" ; + dcterms:title "because reasons!" ; + osf:awardNumber "2000000" . + a dcterms:Agent, foaf:Organization ; dcterms:identifier "https://cos.io/", @@ -63,6 +71,10 @@ dcterms:identifier "https://doi.org/10.$$$$" ; foaf:name "Mx. Moneypockets" . + a dcterms:Agent ; + dcterms:identifier "https://doi.org/10.$" ; + foaf:name "Caring Fan" . + a dcterms:Agent, foaf:Person ; dcterms:identifier "http://localhost:5000/w1ibb" ; diff --git a/osf_tests/metadata/test_serialized_metadata.py b/osf_tests/metadata/test_serialized_metadata.py index ec9eb6b4af2..bc6b1387c60 100644 --- a/osf_tests/metadata/test_serialized_metadata.py +++ b/osf_tests/metadata/test_serialized_metadata.py @@ -222,13 +222,27 @@ def _setUp_full(self): 'language': 'en', 'resource_type_general': 'Dataset', 'funding_info': [ - { + { # full funding reference: 'funder_name': 'Mx. Moneypockets', 'funder_identifier': 'https://doi.org/10.$$$$', 'funder_identifier_type': 'Crossref Funder ID', 'award_number': '10000000', 'award_uri': 'https://moneypockets.example/millions', 'award_title': 'because reasons', + }, { # second funding award from the same funder: + 'funder_name': 'Mx. Moneypockets', + 'funder_identifier': 'https://doi.org/10.$$$$', + 'funder_identifier_type': 'Crossref Funder ID', + 'award_number': '2000000', + 'award_uri': 'https://moneypockets.example/millions-more', + 'award_title': 'because reasons!', + }, { # no award info, just a funder: + 'funder_name': 'Caring Fan', + 'funder_identifier': 'https://doi.org/10.$', + 'funder_identifier_type': 'Crossref Funder ID', + 'award_number': '', + 'award_uri': '', + 'award_title': '', }, ], }, auth=self.user)