diff --git a/invenio_rdm_records/resources/serializers/datacite/schema.py b/invenio_rdm_records/resources/serializers/datacite/schema.py index 5ea842917..cb483c7ed 100644 --- a/invenio_rdm_records/resources/serializers/datacite/schema.py +++ b/invenio_rdm_records/resources/serializers/datacite/schema.py @@ -27,6 +27,29 @@ from ...serializers.ui.schema import current_default_locale from ..utils import get_preferred_identifier, get_vocabulary_props +RELATED_IDENTIFIER_SCHEMES = { + "ark", + "arxiv", + "bibcode", + "doi", + "ean13", + "eissn", + "handle", + "igsn", + "isbn", + "issn", + "istc", + "lissn", + "lsid1", + "pmid", + "purl", + "upc", + "url", + "urn", + "w3id", +} +"""Allowed related identifier schemes for DataCite. Vocabulary taken from DataCite 4.3 schema definition.""" + def get_scheme_datacite(scheme, config_name, default=None): """Returns the datacite equivalent of a scheme.""" @@ -362,7 +385,8 @@ def get_related_identifiers(self, obj): default=scheme, ) - if id_scheme: + # Only serialize related identifiers with a valid scheme for DataCite. + if id_scheme and id_scheme.lower() in RELATED_IDENTIFIER_SCHEMES: serialized_identifier = { "relatedIdentifier": rel_id["identifier"], "relationType": props.get("datacite", ""), @@ -423,13 +447,14 @@ def get_related_identifiers(self, obj): "RDM_RECORDS_IDENTIFIERS_SCHEMES", default="doi", ) - serialized_identifiers.append( - { - "relatedIdentifier": parent_doi["identifier"], - "relationType": "IsVersionOf", - "relatedIdentifierType": id_scheme, - } - ) + if id_scheme.lower() in RELATED_IDENTIFIER_SCHEMES: + serialized_identifiers.append( + { + "relatedIdentifier": parent_doi["identifier"], + "relationType": "IsVersionOf", + "relatedIdentifierType": id_scheme, + } + ) # adding communities communities = obj.get("parent", {}).get("communities", {}).get("ids", []) diff --git a/tests/resources/serializers/test_datacite_serializer.py b/tests/resources/serializers/test_datacite_serializer.py index 80ef5115a..14ac1a92b 100644 --- a/tests/resources/serializers/test_datacite_serializer.py +++ b/tests/resources/serializers/test_datacite_serializer.py @@ -365,8 +365,8 @@ def test_datacite43_serializer_with_unknown_id_schemes( assert expected_pid_id_2 in serialized_record["identifiers"] assert len(serialized_record["identifiers"]) == 5 - assert expected_related_id in serialized_record["relatedIdentifiers"] - assert len(serialized_record["relatedIdentifiers"]) == 2 + assert expected_related_id not in serialized_record["relatedIdentifiers"] + assert len(serialized_record["relatedIdentifiers"]) == 1 creator_ids = serialized_record["creators"][0]["nameIdentifiers"] assert expected_creator_id in creator_ids @@ -386,5 +386,5 @@ def test_datacite43_xml_serializer_with_unknown_id_schemes( assert expected_pid_id in serialized_record assert expected_pid_id_2 in serialized_record - assert expected_related_id in serialized_record + assert expected_related_id not in serialized_record assert expected_creator_id in serialized_record