Skip to content

Commit

Permalink
Harvester / Add support for Zenodo / Clean up HTML from abstract.
Browse files Browse the repository at this point in the history
  • Loading branch information
fxprunayre committed Jan 6, 2025
1 parent bc00549 commit 2510e0e
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 9 deletions.
5 changes: 5 additions & 0 deletions schemas/iso19115-3.2018/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@
<groupId>org.xmlunit</groupId>
<artifactId>xmlunit-matchers</artifactId>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
xmlns:gcx="http://standards.iso.org/iso/19115/-3/gcx/1.0"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:gml="http://www.opengis.net/gml/3.2"
xmlns:md="http://www.pangaea.de/MetaData"
xmlns:util="java:org.fao.geonet.util.XslUtil"
exclude-result-prefixes="#all">

<!--
Expand Down Expand Up @@ -176,7 +176,7 @@

<mri:abstract>
<gco:CharacterString>
<xsl:value-of select="metadata/description"/>
<xsl:value-of select="util:html2text(metadata/description, true())"/>
</gco:CharacterString>
</mri:abstract>

Expand Down Expand Up @@ -296,7 +296,7 @@
<mri:associationType>
<mri:DS_AssociationTypeCode
codeList="http://standards.iso.org/iso/19115/resources/Codelists/cat/codelists.xml#DS_AssociationTypeCode"
codeListValue="largerWorkCitation"/>
codeListValue="partOfSeamlessDatabase"/>
</mri:associationType>
<mri:metadataReference xlink:href="{.}" xlink:title="{.}"/>
</mri:MD_AssociatedResource>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,29 @@
*/
package org.fao.geonet.util;

import org.jsoup.Jsoup;

/**
* Copy of some utility functions required for testing XSL files.
* Dependency to XslUtil in core would create a circular dependency.
*/
public class XslUtil {
public static String twoCharLangCode(String iso3code) {
return iso3code.substring(0, 2);
}
public static String threeCharLangCode(String iso2code) {
return "fre";
}
public static String htmlElement2textReplacer(String html) {
return html
.replaceAll("<br */?>", System.getProperty("line.separator"))
.replaceAll("<li>(.*)</li>", System.getProperty("line.separator") + "* $1");
}
public static String html2text(String html) {
return Jsoup.parse(html).wholeText();
}
public static String html2text(String html, boolean substituteHtmlToTextLayoutElement) {
return html2text(
substituteHtmlToTextLayoutElement ? htmlElement2textReplacer(html) : html);
}
}
13 changes: 7 additions & 6 deletions schemas/iso19115-3.2018/src/test/resources/zenodo.xml
Original file line number Diff line number Diff line change
Expand Up @@ -154,14 +154,15 @@
</cit:CI_Citation>
</mri:citation>
<mri:abstract>
<gco:CharacterString>&lt;p&gt;Argo is a real-time global ocean in situ observing system. It provides thousands
<gco:CharacterString>Argo is a real-time global ocean in situ observing system. It provides thousands
of highly accurate ocean measurements every day. The Argo dataset has now accumulated more than 2.3 million
vertical ocean profiles and accessing it for scientific analysis remains a challenge.&lt;/p&gt; &lt;p&gt;The
vertical ocean profiles and accessing it for scientific analysis remains a challenge.
The
Argo expert community, focused on delivering a curated dataset of the best scientific quality possible, has
never provided its user base with a Python software package to easily access and manipulate Argo measurements:
the argopy software aims to fill this gap. &lt;strong&gt;The argopy software can be used to easily fetch and
manipulate measurements from Argo floats&lt;/strong&gt;. It is dedicated to scientists without knowledge of
the Argo data management system but is also designed to accommodate expert requirements.&lt;/p&gt;
the argopy software aims to fill this gap. The argopy software can be used to easily fetch and
manipulate measurements from Argo floats. It is dedicated to scientists without knowledge of
the Argo data management system but is also designed to accommodate expert requirements.
</gco:CharacterString>
</mri:abstract>
<mri:pointOfContact>
Expand Down Expand Up @@ -331,7 +332,7 @@
<mri:associationType>
<mri:DS_AssociationTypeCode
codeList="http://standards.iso.org/iso/19115/resources/Codelists/cat/codelists.xml#DS_AssociationTypeCode"
codeListValue="largerWorkCitation"/>
codeListValue="partOfSeamlessDatabase"/>
</mri:associationType>
<mri:metadataReference xlink:href="https://doi.org/10.5281/zenodo.4009263"
xlink:title="https://doi.org/10.5281/zenodo.4009263"/>
Expand Down

0 comments on commit 2510e0e

Please sign in to comment.