Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add example import configuration for DSpace xoai format #3942

Open
wants to merge 3 commits into
base: dev
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 164 additions & 0 deletions import/xsl/dspace-xoai.xsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
<!-- available fields are defined in solr/biblio/conf/schema.xml -->
<!-- This document was written for Catalyst IT
by Alex Buckley ([email protected]).
It takes metadata directly from xoai to index in Solr. It is based on dspace.xsl.

The choices of fields were made based on which metadata is most interesting
for the user of the library and which Solr fields in schema.xml are close to
the metadata of choice.
-->
<xsl:stylesheet xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0" xmlns:php="http://php.net/xsl" xsi:schemaLocation="http://www.w3.org/2005/Atom http://www.kbcafe.com/rss/atom.xsd.xml">
<xsl:output method="xml" indent="yes" encoding="utf-8"/>
<xsl:param name="institution">My University</xsl:param>
<xsl:param name="collection">DSpace</xsl:param>
<xsl:param name="urlPrefix">http</xsl:param>
<xsl:param name="geographic">false</xsl:param>
<xsl:param name="id_tag_name">identifier</xsl:param>
<xsl:param name="change_tracking_core">biblio</xsl:param>
<xsl:param name="change_tracking_date_tag_name"></xsl:param>
<xsl:param name="workKey_include_regEx"/>
<xsl:param name="workKey_exclude_regEx"/>
<xsl:param name="workKey_transliterator_rules">:: NFD; :: lower; :: Latin; :: [^[:letter:] [:number:]] Remove; :: NFKC;</xsl:param>
<xsl:template match="/">
<add>
<doc>
<!-- RECORD ID -->
<field name="id">
<xsl:value-of select="//*[name()=$id_tag_name]"/>
</field>
<!-- RECORD FORMAT -->
<field name="record_format">dspace</field>
<!-- INSTITUTION -->
<field name="institution">
<xsl:value-of select="$institution" />
</field>
<!-- COLLECTION -->
<field name="collection">
<xsl:value-of select="$collection"/>
</field>
<!-- TITLE -->
<xsl:if test="//*[@name='title']">
<field name="title">
<xsl:value-of select="//*[@name='title']/*/*[@name='value']"/>
</field>
<field name="title_short">
<xsl:value-of select="//*[@name='title']/*/*[@name='value'][normalize-space()]"/>
</field>
<field name="title_full">
<xsl:value-of select="//*[@name='title']/*/*[@name='value'][normalize-space()]"/>
</field>
<field name="title_sort">
<xsl:value-of select="php:function('VuFind::stripArticles', string(//*[@name='title']/*/*[@name='value'][normalize-space()]))"/>
</field>
</xsl:if>
<!-- AUTHOR -->
<xsl:for-each select="//*[@name='dc']/*[@name='contributor']/*[@name='author']/*/*[@name='value']">
<xsl:if test="normalize-space()">
<field name="author">
<xsl:value-of select="normalize-space()"/>
</field>
<!-- use first author value for sorting -->
<xsl:if test="position()=1">
<field name="author_sort">
<xsl:value-of select="normalize-space()"/>
</field>
</xsl:if>
</xsl:if>
</xsl:for-each>
<!-- CO AUTHOR -->
<xsl:for-each select="//*[@name='contributor']/*[@name='advisor']/*[@name='none']/*[@name='value']">
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is "advisor" the only possible value here that could apply to author2?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @demiankatz I'm trying to find the answer to this. On the AUT DSpace instance it does look like 'advisor' is the only possible value here. I am checking for a wider DSpace context if that is the case.

<field name="author2">
<xsl:value-of select="normalize-space(.)"/>
</field>
</xsl:for-each>
<!-- PUBLISHDATE -->
<xsl:if test="//*[@name='date']">
<field name="publishDate">
<xsl:value-of select="substring(//*[@name='date']/*[@name='accessioned']/*[@name='none']/*[@name='value'], 1, 4)"/>
</field>
<field name="publishDateSort">
<xsl:value-of select="substring(//*[@name='date']/*[@name='accessioned']/*[@name='none']/*[@name='value'], 1, 4)"/>
</field>
</xsl:if>
<!-- Publisher -->
<xsl:if test="//*[@name='publisher']">
<field name="publisher">
<xsl:value-of select="//*[@name='publisher']/*/*[@name='value']"/>
<xsl:if test="//*[@name='dc']/*[@name='relation']/*[@name='uri']">
# <xsl:value-of select="//*[@name='dc']/*[@name='relation']/*[@name='uri']" />
</xsl:if>
</field>
</xsl:if>
<xsl:if test="//*[@name='description']/*[@name='abstract']/*/*[@name='value']">
<field name="description">
<xsl:for-each select="//*[@name='description']/*[@name='abstract']/*/*[@name='value']">
<xsl:value-of select="concat(., '&#10;')"/>
</xsl:for-each>
</field>
</xsl:if>
<!-- LANGUAGE -->
<xsl:for-each select="//*[@name='language']/*[@name='iso']">
<xsl:if test="string-length() > 0">
<field name="language">
<xsl:value-of select="php:function('VuFind::mapString', normalize-space(string(.)), 'language_map_iso639-1.properties')"/>
</field>
</xsl:if>
</xsl:for-each>
<!-- SUBJECT -->
<xsl:for-each select="//*[@name='subject']/*/*[@name='value']">
<field name="topic">
<xsl:value-of select="normalize-space(.)"/>
</field>
<field name="topic_facet">
<xsl:value-of select="normalize-space(.)"/>
</field>
</xsl:for-each>
<!-- Type -->
<xsl:if test="//*[@name='type']">
<field name="format">
<xsl:value-of select="//*[@name='type']"/>
</field>
</xsl:if>
<!-- Rights - stored in the rights_str dynamic solr field -->
<xsl:for-each select="//*[@name='dc']/*[@name='rights_str']">
<field name="edition">
<xsl:value-of select="//*[@name='dc']/*[@name='rights_str']"/>
</field>
</xsl:for-each>
<!-- Original URL -->
<xsl:for-each select="//*[text()='ORIGINAL']/following-sibling::*[@name='bitstreams']/*[@name='bitstream']">
<field name="url">
<xsl:value-of select="./*[@name='url']"/> # <xsl:value-of select="./*[@name='name']" />
</field>
</xsl:for-each>
<!-- Thumbnail -->
<xsl:if test="//*[text()='THUMBNAIL']">
<field name="thumbnail">
<xsl:value-of select="//*[text()='THUMBNAIL']/following-sibling::*[@name='bitstreams']/*[@name='bitstream']/*[@name='url']"/>
</field>
</xsl:if>
<!-- FULLTEXT -->
<xsl:if test="//*[text()='TEXT']/following-sibling::*[@name='bitstreams']/*[@name='bitstream']/*[@name='url']">
<field name="fulltext">
<xsl:value-of select="php:function('VuFind::harvestWithParser', string(//*[text()='TEXT']/following-sibling::*[@name='bitstreams']/*[@name='bitstream']/*[@name='url']) )"/>
</field>
</xsl:if>
<!-- LICENSE -->
<xsl:if test="//*[text()='LICENSE']/following-sibling::*[@name='bitstreams']/*[@name='bitstream']/*[@name='url']">
<field name="physical">
<xsl:value-of select="php:function('VuFind::harvestWithParser', string(//*[text()='LICENSE']/following-sibling::*[@name='bitstreams']/*[@name='bitstream']/*[@name='url']) )"/>
</field>
</xsl:if>
<!-- Change Tracking (note that the identifier selected below must match the id field above)-->
<xsl:if test="$change_tracking_date_tag_name">
<field name="first_indexed">
<xsl:value-of select="php:function('VuFind::getFirstIndexed', $change_tracking_core, normalize-space(string(*[name()=$id_tag_name])), normalize-space(*[name()=$change_tracking_date_tag_name]))" />
</field>
<field name="last_indexed">
<xsl:value-of select="php:function('VuFind::getLastIndexed', $change_tracking_core, normalize-space(string(*[name()=$id_tag_name])), normalize-space(*[name()=$change_tracking_date_tag_name]))" />
</field>
</xsl:if>
</doc>
</add>
</xsl:template>
</xsl:stylesheet>
Loading