-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
KP-8714 Make publisher parsing work for persons
CMDI has separate distributionRigtsHolderPerson and distributionRightsHolderOrganization elements, which is a bit tricky for the current parsing. At the moment, sole organizations are not added as actors at all though: ``` $ git show b815790 commit b815790 Author: Anni Järvenpää <[email protected]> Date: Tue Feb 27 11:37:54 2024 +0000 KP-7427 Skip empty actors Because the organizations have not been implemented yet, some of the actors had just a role and an empty person dict. Metax does not like this, so those are skipped altogether now. ``` Thus it's less bad to read the person element than to read the organization element. This needs to be made to work with both organizations and persons though if (when?) we remove the person data requirement is lifted.
- Loading branch information
Showing
3 changed files
with
211 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
192 changes: 192 additions & 0 deletions
192
tests/test_data/kielipankki_record_sample_with_publisher_person.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
<record xmlns="http://www.openarchives.org/OAI/2.0/"> | ||
<header> | ||
<identifier>oai:clarino.uib.no:lb-20140730134</identifier> | ||
<datestamp>2024-06-19T07:39:34Z</datestamp> | ||
<setSpec>FIN-CLARIN</setSpec> | ||
</header> | ||
<metadata> | ||
<CMD xmlns="http://www.clarin.eu/cmd/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" CMDVersion="1.1" xsi:schemaLocation="http://www.clarin.eu/cmd/ http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1361876010571/xsd"> | ||
<Header> | ||
<MdCreator>metashareToCmdi.xsl remove_metashare_namespace.xsl</MdCreator> | ||
<MdCreationDate>2023-03-22</MdCreationDate> | ||
<MdSelfLink>urn:nbn:fi:lb-20140730134</MdSelfLink> | ||
<MdProfile>clarin.eu:cr1:p_1361876010571</MdProfile> | ||
</Header> | ||
<Resources> | ||
<ResourceProxyList/> | ||
<JournalFileProxyList/> | ||
<ResourceRelationList/> | ||
<IsPartOfList/> | ||
</Resources> | ||
<Components> | ||
<resourceInfo> | ||
<identificationInfo ComponentId="clarin.eu:cr1:c_1349361150743"> | ||
<resourceName xml:lang="en"> | ||
The Finnish Broadcasting Company Corpus of Subtitles | ||
</resourceName> | ||
<description xml:lang="fi"> | ||
Kuvaus tekstityskorpukselle vuosilta 1992-2004. | ||
</description> | ||
<description xml:lang="en"> | ||
Description for subtitle corpus from 1992-2004. | ||
</description> | ||
<resourceShortName xml:lang="fi">YLE-korpus</resourceShortName> | ||
<metaShareId>NOT_DEFINED_FOR_V2</metaShareId> | ||
<identifier>http://urn.fi/urn:nbn:fi:lb-20140730134</identifier> | ||
</identificationInfo> | ||
<distributionInfo ComponentId="clarin.eu:cr1:c_1352813745459"> | ||
<availability>underNegotiation</availability> | ||
<licenceInfo ComponentId="clarin.eu:cr1:c_1352813745464"> | ||
<licence>underNegotiation</licence> | ||
<distributionAccessMedium>CD-ROM</distributionAccessMedium> | ||
<userNature>academic</userNature> | ||
<licensorPerson ComponentId="clarin.eu:cr1:c_1352813745465"> | ||
<role>licensor</role> | ||
<personInfo ComponentId="clarin.eu:cr1:c_1349361150746"> | ||
<surname xml:lang="en">Lisensoija</surname> | ||
<givenName xml:lang="en">Late</givenName> | ||
<sex>male</sex> | ||
<communicationInfo ComponentId="clarin.eu:cr1:c_1352813745460"> | ||
<email>[email protected]</email> | ||
</communicationInfo> | ||
<affiliation ComponentId="clarin.eu:cr1:c_1352813745462"> | ||
<role>affiliation</role> | ||
<organizationInfo ComponentId="clarin.eu:cr1:c_1352813745461"> | ||
<organizationName xml:lang="fi">Itä-Suomen yliopisto</organizationName> | ||
<organizationName xml:lang="en">University of Eastern Finland</organizationName> | ||
<organizationShortName xml:lang="en">UEF</organizationShortName> | ||
<communicationInfo ComponentId="clarin.eu:cr1:c_1352813745460"> | ||
<email>[email protected]</email> | ||
<url>http://www.uef.fi/uef/english</url> | ||
<url>https://ror.org/00cyydd11</url> | ||
<country>Finland</country> | ||
</communicationInfo> | ||
</organizationInfo> | ||
</affiliation> | ||
</personInfo> | ||
</licensorPerson> | ||
<distributionRightsHolderPerson ComponentId="clarin.eu:cr1:c_1352813745466"> | ||
<role>distributionRightsHolder</role> | ||
<personInfo ComponentId="clarin.eu:cr1:c_1349361150746"> | ||
<surname xml:lang="en">Lisensoija</surname> | ||
<givenName xml:lang="en">Late</givenName> | ||
<sex>male</sex> | ||
<communicationInfo ComponentId="clarin.eu:cr1:c_1352813745460"> | ||
<email>[email protected]</email> | ||
</communicationInfo> | ||
<affiliation ComponentId="clarin.eu:cr1:c_1352813745462"> | ||
<role>affiliation</role> | ||
<organizationInfo ComponentId="clarin.eu:cr1:c_1352813745461"> | ||
<organizationName xml:lang="fi">Itä-Suomen yliopisto</organizationName> | ||
<organizationName xml:lang="en">University of Eastern Finland</organizationName> | ||
<organizationShortName xml:lang="en">UEF</organizationShortName> | ||
<communicationInfo ComponentId="clarin.eu:cr1:c_1352813745460"> | ||
<email>[email protected]</email> | ||
<url>http://www.uef.fi/uef/english</url> | ||
<url>https://ror.org/00cyydd11</url> | ||
<country>Finland</country> | ||
</communicationInfo> | ||
</organizationInfo> | ||
</affiliation> | ||
</personInfo> | ||
</distributionRightsHolderPerson> | ||
</licenceInfo> | ||
<iprHolderPerson ComponentId="clarin.eu:cr1:c_1352813745463"> | ||
<role>iprHolder</role> | ||
<personInfo ComponentId="clarin.eu:cr1:c_1349361150746"> | ||
<surname xml:lang="en">Lisensoija</surname> | ||
<givenName xml:lang="en">Late</givenName> | ||
<sex>male</sex> | ||
<communicationInfo ComponentId="clarin.eu:cr1:c_1352813745460"> | ||
<email>[email protected]</email> | ||
</communicationInfo> | ||
<affiliation ComponentId="clarin.eu:cr1:c_1352813745462"> | ||
<role>affiliation</role> | ||
<organizationInfo ComponentId="clarin.eu:cr1:c_1352813745461"> | ||
<organizationName xml:lang="fi">Itä-Suomen yliopisto</organizationName> | ||
<organizationName xml:lang="en">University of Eastern Finland</organizationName> | ||
<organizationShortName xml:lang="en">UEF</organizationShortName> | ||
<communicationInfo ComponentId="clarin.eu:cr1:c_1352813745460"> | ||
<email>[email protected]</email> | ||
<url>http://www.uef.fi/uef/english</url> | ||
<url>https://ror.org/00cyydd11</url> | ||
<country>Finland</country> | ||
</communicationInfo> | ||
</organizationInfo> | ||
</affiliation> | ||
</personInfo> | ||
</iprHolderPerson> | ||
</distributionInfo> | ||
<contactPerson ComponentId="clarin.eu:cr1:c_1352813745468"> | ||
<role>contactPerson</role> | ||
<personInfo ComponentId="clarin.eu:cr1:c_1349361150746"> | ||
<surname xml:lang="en">Lisensoija</surname> | ||
<givenName xml:lang="en">Late</givenName> | ||
<sex>male</sex> | ||
<communicationInfo ComponentId="clarin.eu:cr1:c_1352813745460"> | ||
<email>[email protected]</email> | ||
</communicationInfo> | ||
<affiliation ComponentId="clarin.eu:cr1:c_1352813745462"> | ||
<role>affiliation</role> | ||
<organizationInfo ComponentId="clarin.eu:cr1:c_1352813745461"> | ||
<organizationName xml:lang="fi">Itä-Suomen yliopisto</organizationName> | ||
<organizationName xml:lang="en">University of Eastern Finland</organizationName> | ||
<organizationShortName xml:lang="en">UEF</organizationShortName> | ||
<communicationInfo ComponentId="clarin.eu:cr1:c_1352813745460"> | ||
<email>[email protected]</email> | ||
<url>http://www.uef.fi/uef/english</url> | ||
<url>https://ror.org/00cyydd11</url> | ||
<country>Finland</country> | ||
</communicationInfo> | ||
</organizationInfo> | ||
</affiliation> | ||
</personInfo> | ||
</contactPerson> | ||
<metadataInfo ComponentId="clarin.eu:cr1:c_1349361150745"> | ||
<metadataCreationDate>2012-08-02</metadataCreationDate> | ||
<metadataLanguageName>Finnish</metadataLanguageName> | ||
<metadataLanguageName>English</metadataLanguageName> | ||
<metadataLanguageId>Fi</metadataLanguageId> | ||
<metadataLanguageId>en</metadataLanguageId> | ||
<metadataLastDateUpdated>2014-04-24</metadataLastDateUpdated> | ||
</metadataInfo> | ||
<corpusInfo ComponentId="clarin.eu:cr1:c_1355150532309"> | ||
<resourceType>corpus</resourceType> | ||
<corpusMediaType ComponentId="clarin.eu:cr1:c_1355150532310"> | ||
<corpusTextInfo ComponentId="clarin.eu:cr1:c_1355150532311"> | ||
<mediaType>text</mediaType> | ||
<lingualityInfo ComponentId="clarin.eu:cr1:c_1355150532313"> | ||
<lingualityType>multilingual</lingualityType> | ||
<multilingualityType>multilingualSingleText</multilingualityType> | ||
</lingualityInfo> | ||
<languageInfo ComponentId="clarin.eu:cr1:c_1355150532314"> | ||
<languageId>Fi</languageId> | ||
<languageName>Finnish</languageName> | ||
</languageInfo> | ||
<languageInfo ComponentId="clarin.eu:cr1:c_1355150532314"> | ||
<languageId>En</languageId> | ||
<languageName>English</languageName> | ||
</languageInfo> | ||
<languageInfo ComponentId="clarin.eu:cr1:c_1355150532314"> | ||
<languageId>Sv</languageId> | ||
<languageName>Swedish</languageName> | ||
</languageInfo> | ||
<languageInfo ComponentId="clarin.eu:cr1:c_1355150532314"> | ||
<languageId>smi</languageId> | ||
<languageName>Sami languages</languageName> | ||
</languageInfo> | ||
<modalityInfo ComponentId="clarin.eu:cr1:c_1355150532318"> | ||
<modalityType>spokenLanguage</modalityType> | ||
</modalityInfo> | ||
<sizeInfo ComponentId="clarin.eu:cr1:c_1353678848785"> | ||
<size>100000000</size> | ||
<sizeUnit>words</sizeUnit> | ||
</sizeInfo> | ||
</corpusTextInfo> | ||
</corpusMediaType> | ||
</corpusInfo> | ||
</resourceInfo> | ||
</Components> | ||
</CMD> | ||
</metadata> | ||
</record> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters