Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] Use MODS reader library #1194

Merged
merged 2 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 83 additions & 58 deletions Classes/Format/Mods.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
use Kitodo\Dlf\Api\Orcid\Profile as OrcidProfile;
use Kitodo\Dlf\Api\Viaf\Profile as ViafProfile;
use Kitodo\Dlf\Common\MetadataInterface;
use Slub\Mods\Element\Name;
use Slub\Mods\ModsReader;

/**
* Metadata MODS format class for the 'dlf' extension
Expand All @@ -32,6 +34,12 @@ class Mods implements MetadataInterface
**/
private $xml;

/**
* @access private
* @var ModsReader The metadata XML
**/
private $modsReader;

/**
* @access private
* @var array The metadata array
Expand Down Expand Up @@ -61,7 +69,7 @@ public function extractMetadata(\SimpleXMLElement $xml, array &$metadata, bool $
$this->metadata = $metadata;
$this->useExternalApis = $useExternalApis;

$this->xml->registerXPathNamespace('mods', 'http://www.loc.gov/mods/v3');
$this->modsReader = new ModsReader($this->xml);

$this->getAuthors();
$this->getHolders();
Expand All @@ -80,20 +88,17 @@ public function extractMetadata(\SimpleXMLElement $xml, array &$metadata, bool $
*/
private function getAuthors(): void
{
$authors = $this->xml->xpath('./mods:name[./mods:role/mods:roleTerm[@type="code" and @authority="marcrelator"]="aut"]');

$authors = $this->modsReader->getNames('[./mods:role/mods:roleTerm[@type="code" and @authority="marcrelator"]="aut"]');
// Get "author" and "author_sorting" again if that was too sophisticated.
if (empty($authors)) {
// Get all names which do not have any role term assigned and assume these are authors.
$authors = $this->xml->xpath('./mods:name[not(./mods:role)]');
$authors = $this->modsReader->getNames('[not(./mods:role)]');
}
if (!empty($authors)) {
for ($i = 0, $j = count($authors); $i < $j; $i++) {
$authors[$i]->registerXPathNamespace('mods', 'http://www.loc.gov/mods/v3');

$identifier = $authors[$i]->xpath('./mods:name/mods:nameIdentifier[@type="orcid"]');
if ($this->useExternalApis && !empty((string) $identifier[0])) {
$this->getAuthorFromOrcidApi((string) $identifier[0], $authors, $i);
$identifiers = $authors[$i]->getNameIdentifiers('[@type="orcid"]');
if ($this->useExternalApis && !empty($identifiers)) {
$this->getAuthorFromOrcidApi($identifiers[0]->getValue(), $authors, $i);
} else {
$this->getAuthorFromXml($authors, $i);
}
Expand Down Expand Up @@ -141,43 +146,42 @@ private function getAuthorFromXml(array $authors, int $i): void
{
$this->getAuthorFromXmlDisplayForm($authors, $i);

$nameParts = $authors[$i]->xpath('./mods:namePart');

$nameParts = $authors[$i]->getNameParts();
if (empty($this->metadata['author'][$i]) && $nameParts) {
$name = [];
$k = 4;
foreach ($nameParts as $namePart) {
if (
isset($namePart['type'])
&& (string) $namePart['type'] == 'family'
!empty($namePart->getType())
&& $namePart->getType() == 'family'
) {
$name[0] = (string) $namePart;
$name[0] = $namePart->getValue();
} elseif (
isset($namePart['type'])
&& (string) $namePart['type'] == 'given'
!empty($namePart->getType())
&& $namePart->getType() == 'given'
) {
$name[1] = (string) $namePart;
$name[1] = $namePart->getValue();
} elseif (
isset($namePart['type'])
&& (string) $namePart['type'] == 'termsOfAddress'
!empty($namePart->getType())
&& $namePart->getType() == 'termsOfAddress'
) {
$name[2] = (string) $namePart;
$name[2] = $namePart->getValue();
} elseif (
isset($namePart['type'])
&& (string) $namePart['type'] == 'date'
!empty($namePart->getType())
&& $namePart->getType() == 'date'
) {
$name[3] = (string) $namePart;
$name[3] = $namePart->getValue();
} else {
$name[$k] = (string) $namePart;
$name[$k] = $namePart->getValue();
}
$k++;
}
ksort($name);
$this->metadata['author'][$i] = trim(implode(', ', $name));
}
// Append "valueURI" to name using Unicode unit separator.
if (isset($authors[$i]['valueURI'])) {
$this->metadata['author'][$i] .= pack('C', 31) . (string) $authors[$i]['valueURI'];
if (!empty($authors[$i]->getValueURI())) {
$this->metadata['author'][$i] .= pack('C', 31) . $authors[$i]->getValueURI();
}
}

Expand All @@ -186,16 +190,16 @@ private function getAuthorFromXml(array $authors, int $i): void
*
* @access private
*
* @param array $authors
* @param Name[] $authors
* @param int $i
*
* @return void
*/
private function getAuthorFromXmlDisplayForm(array $authors, int $i): void
{
$displayForm = $authors[$i]->xpath('./mods:displayForm');
if ($displayForm) {
$this->metadata['author'][$i] = (string) $displayForm[0];
$displayForms = $authors[$i]->getDisplayForms();
if ($displayForms) {
$this->metadata['author'][$i] = $displayForms[0]->getValue();
}
}

Expand All @@ -208,15 +212,13 @@ private function getAuthorFromXmlDisplayForm(array $authors, int $i): void
*/
private function getHolders(): void
{
$holders = $this->xml->xpath('./mods:name[./mods:role/mods:roleTerm[@type="code" and @authority="marcrelator"]="prv"]');
$holders = $this->modsReader->getNames('[./mods:role/mods:roleTerm[@type="code" and @authority="marcrelator"]="prv"]');

if (!empty($holders)) {
for ($i = 0, $j = count($holders); $i < $j; $i++) {
$holders[$i]->registerXPathNamespace('mods', 'http://www.loc.gov/mods/v3');

$identifier = $holders[$i]->xpath('./mods:name/mods:nameIdentifier[@type="viaf"]');
if ($this->useExternalApis && !empty((string) $identifier[0])) {
$this->getHolderFromViafApi((string) $identifier[0], $holders, $i);
$identifiers = $holders[$i]->getNameIdentifiers('[@type="viaf"]');
if ($this->useExternalApis && !empty($identifiers)) {
$this->getHolderFromViafApi($identifiers[0]->getValue(), $holders, $i);
} else {
$this->getHolderFromXml($holders, $i);
}
Expand Down Expand Up @@ -264,8 +266,8 @@ private function getHolderFromXml(array $holders, int $i): void
{
$this->getHolderFromXmlDisplayForm($holders, $i);
// Append "valueURI" to name using Unicode unit separator.
if (isset($holders[$i]['valueURI'])) {
$this->metadata['holder'][$i] .= pack('C', 31) . (string) $holders[$i]['valueURI'];
if (!empty($holders[$i]->getValueURI())) {
$this->metadata['holder'][$i] .= pack('C', 31) . $holders[$i]->getValueURI();
}
}

Expand All @@ -282,9 +284,9 @@ private function getHolderFromXml(array $holders, int $i): void
private function getHolderFromXmlDisplayForm(array $holders, int $i): void
{
// Check if there is a display form.
$displayForm = $holders[$i]->xpath('./mods:displayForm');
if ($displayForm) {
$this->metadata['holder'][$i] = (string) $displayForm[0];
$displayForms = $holders[$i]->getDisplayForm();
if ($displayForms) {
$this->metadata['holder'][$i] = $displayForms[0]->getValue();
}
}

Expand All @@ -297,17 +299,34 @@ private function getHolderFromXmlDisplayForm(array $holders, int $i): void
*/
private function getPlaces(): void
{
$places = $this->xml->xpath('./mods:originInfo[not(./mods:edition="[Electronic ed.]")]/mods:place/mods:placeTerm');
$places = [];
$originInfos = $this->modsReader->getOriginInfos('[not(./mods:edition="[Electronic ed.]")]');
foreach ($originInfos as $originInfo) {
foreach ($originInfo->getPlaces() as $place) {
foreach ($place->getPlaceTerms() as $placeTerm) {
$places[] = $placeTerm->getValue();
}
}
}

// Get "place" and "place_sorting" again if that was to sophisticated.
if (empty($places)) {
// Get all places and assume these are places of publication.
$places = $this->xml->xpath('./mods:originInfo/mods:place/mods:placeTerm');
$originInfos = $this->modsReader->getOriginInfos();
foreach ($originInfos as $originInfo) {
foreach ($originInfo->getPlaces() as $place) {
foreach ($place->getPlaceTerms() as $placeTerm) {
$places[] = $placeTerm->getValue();
}
}
}
}

if (!empty($places)) {
foreach ($places as $place) {
$this->metadata['place'][] = (string) $place;
$this->metadata['place'][] = $place;
if (!$this->metadata['place_sorting'][0]) {
$this->metadata['place_sorting'][0] = preg_replace('/[[:punct:]]/', '', (string) $place);
$this->metadata['place_sorting'][0] = preg_replace('/[[:punct:]]/', '', $place);
}
}
}
Expand All @@ -323,31 +342,37 @@ private function getPlaces(): void
private function getYears(): void
{
// Get "year_sorting".
$yearsSorting = $this->xml->xpath('./mods:originInfo[not(./mods:edition="[Electronic ed.]")]/mods:dateOther[@type="order" and @encoding="w3cdtf"]');
$yearsSorting = $this->modsReader->getOriginInfos('[not(./mods:edition="[Electronic ed.]")]/mods:dateOther[@type="order" and @encoding="w3cdtf"]');
if ($yearsSorting) {
foreach ($yearsSorting as $yearSorting) {
$this->metadata['year_sorting'][0] = (int) $yearSorting;
$otherDates = $yearSorting->getOtherDates();
if (!empty($otherDates)) {
$this->metadata['year_sorting'][0] = $otherDates[0]->getValue();
}
}
}
// Get "year" and "year_sorting" if not specified separately.
$years = $this->xml->xpath('./mods:originInfo[not(./mods:edition="[Electronic ed.]")]/mods:dateIssued[@keyDate="yes"]');
$years = $this->modsReader->getOriginInfos('./mods:originInfo[not(./mods:edition="[Electronic ed.]")]/mods:dateIssued[@keyDate="yes"]');
// Get "year" and "year_sorting" again if that was to sophisticated.
if (empty($years)) {
// Get all dates and assume these are dates of publication.
$years = $this->xml->xpath('./mods:originInfo/mods:dateIssued');
$years = $this->modsReader->getOriginInfos();
}
if (!empty($years)) {
foreach ($years as $year) {
$this->metadata['year'][] = (string) $year;
if (!$this->metadata['year_sorting'][0]) {
$yearSorting = str_ireplace('x', '5', preg_replace('/[^\d.x]/i', '', (string) $year));
if (
strpos($yearSorting, '.')
|| strlen($yearSorting) < 3
) {
$yearSorting = (((int) trim($yearSorting, '.') - 1) * 100) + 50;
$issued = $year->getIssuedDates();
if (!empty($issued)) {
$this->metadata['year'][] = $issued[0]->getValue();
if (!$this->metadata['year_sorting'][0]) {
$yearSorting = str_ireplace('x', '5', preg_replace('/[^\d.x]/i', '', $issued[0]->getValue()));
if (
strpos($yearSorting, '.')
|| strlen($yearSorting) < 3
) {
$yearSorting = (((int) trim($yearSorting, '.') - 1) * 100) + 50;
}
$this->metadata['year_sorting'][0] = (int) $yearSorting;
}
$this->metadata['year_sorting'][0] = (int) $yearSorting;
}
}
}
Expand Down
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"typo3/cms-extbase": "^10.4.37|^11.5.37",
"typo3/cms-tstemplate": "^10.4.37|^11.5.37",
"caseyamcl/phpoaipmh": "^3.3.1",
"slub/php-mods-reader": "^0.3.0",
"ubl/php-iiif-prezi-reader": "0.3.0",
"solarium/solarium": "5.2 - 6.3"
},
Expand Down