From cc122d8dc0b82647c1e6320a5a682161153e9512 Mon Sep 17 00:00:00 2001 From: Beatrycze Volk Date: Wed, 29 May 2024 09:08:49 +0200 Subject: [PATCH 1/2] [FEATURE] Use MODS reader library (#1194) Co-authored-by: Sebastian Meyer --- Classes/Format/Mods.php | 141 +++++++++++++++++++++++----------------- composer.json | 1 + 2 files changed, 84 insertions(+), 58 deletions(-) diff --git a/Classes/Format/Mods.php b/Classes/Format/Mods.php index c62f3605c..9a4ced0cd 100644 --- a/Classes/Format/Mods.php +++ b/Classes/Format/Mods.php @@ -15,6 +15,8 @@ use Kitodo\Dlf\Api\Orcid\Profile as OrcidProfile; use Kitodo\Dlf\Api\Viaf\Profile as ViafProfile; use Kitodo\Dlf\Common\MetadataInterface; +use Slub\Mods\Element\Name; +use Slub\Mods\ModsReader; /** * Metadata MODS format class for the 'dlf' extension @@ -32,6 +34,12 @@ class Mods implements MetadataInterface **/ private $xml; + /** + * @access private + * @var ModsReader The metadata XML + **/ + private $modsReader; + /** * @access private * @var array The metadata array @@ -61,7 +69,7 @@ public function extractMetadata(\SimpleXMLElement $xml, array &$metadata, bool $ $this->metadata = $metadata; $this->useExternalApis = $useExternalApis; - $this->xml->registerXPathNamespace('mods', 'http://www.loc.gov/mods/v3'); + $this->modsReader = new ModsReader($this->xml); $this->getAuthors(); $this->getHolders(); @@ -80,20 +88,17 @@ public function extractMetadata(\SimpleXMLElement $xml, array &$metadata, bool $ */ private function getAuthors(): void { - $authors = $this->xml->xpath('./mods:name[./mods:role/mods:roleTerm[@type="code" and @authority="marcrelator"]="aut"]'); - + $authors = $this->modsReader->getNames('[./mods:role/mods:roleTerm[@type="code" and @authority="marcrelator"]="aut"]'); // Get "author" and "author_sorting" again if that was too sophisticated. if (empty($authors)) { // Get all names which do not have any role term assigned and assume these are authors. - $authors = $this->xml->xpath('./mods:name[not(./mods:role)]'); + $authors = $this->modsReader->getNames('[not(./mods:role)]'); } if (!empty($authors)) { for ($i = 0, $j = count($authors); $i < $j; $i++) { - $authors[$i]->registerXPathNamespace('mods', 'http://www.loc.gov/mods/v3'); - - $identifier = $authors[$i]->xpath('./mods:name/mods:nameIdentifier[@type="orcid"]'); - if ($this->useExternalApis && !empty((string) $identifier[0])) { - $this->getAuthorFromOrcidApi((string) $identifier[0], $authors, $i); + $identifiers = $authors[$i]->getNameIdentifiers('[@type="orcid"]'); + if ($this->useExternalApis && !empty($identifiers)) { + $this->getAuthorFromOrcidApi($identifiers[0]->getValue(), $authors, $i); } else { $this->getAuthorFromXml($authors, $i); } @@ -141,34 +146,33 @@ private function getAuthorFromXml(array $authors, int $i): void { $this->getAuthorFromXmlDisplayForm($authors, $i); - $nameParts = $authors[$i]->xpath('./mods:namePart'); - + $nameParts = $authors[$i]->getNameParts(); if (empty($this->metadata['author'][$i]) && $nameParts) { $name = []; $k = 4; foreach ($nameParts as $namePart) { if ( - isset($namePart['type']) - && (string) $namePart['type'] == 'family' + !empty($namePart->getType()) + && $namePart->getType() == 'family' ) { - $name[0] = (string) $namePart; + $name[0] = $namePart->getValue(); } elseif ( - isset($namePart['type']) - && (string) $namePart['type'] == 'given' + !empty($namePart->getType()) + && $namePart->getType() == 'given' ) { - $name[1] = (string) $namePart; + $name[1] = $namePart->getValue(); } elseif ( - isset($namePart['type']) - && (string) $namePart['type'] == 'termsOfAddress' + !empty($namePart->getType()) + && $namePart->getType() == 'termsOfAddress' ) { - $name[2] = (string) $namePart; + $name[2] = $namePart->getValue(); } elseif ( - isset($namePart['type']) - && (string) $namePart['type'] == 'date' + !empty($namePart->getType()) + && $namePart->getType() == 'date' ) { - $name[3] = (string) $namePart; + $name[3] = $namePart->getValue(); } else { - $name[$k] = (string) $namePart; + $name[$k] = $namePart->getValue(); } $k++; } @@ -176,8 +180,8 @@ private function getAuthorFromXml(array $authors, int $i): void $this->metadata['author'][$i] = trim(implode(', ', $name)); } // Append "valueURI" to name using Unicode unit separator. - if (isset($authors[$i]['valueURI'])) { - $this->metadata['author'][$i] .= pack('C', 31) . (string) $authors[$i]['valueURI']; + if (!empty($authors[$i]->getValueURI())) { + $this->metadata['author'][$i] .= pack('C', 31) . $authors[$i]->getValueURI(); } } @@ -186,16 +190,16 @@ private function getAuthorFromXml(array $authors, int $i): void * * @access private * - * @param array $authors + * @param Name[] $authors * @param int $i * * @return void */ private function getAuthorFromXmlDisplayForm(array $authors, int $i): void { - $displayForm = $authors[$i]->xpath('./mods:displayForm'); - if ($displayForm) { - $this->metadata['author'][$i] = (string) $displayForm[0]; + $displayForms = $authors[$i]->getDisplayForms(); + if ($displayForms) { + $this->metadata['author'][$i] = $displayForms[0]->getValue(); } } @@ -208,15 +212,13 @@ private function getAuthorFromXmlDisplayForm(array $authors, int $i): void */ private function getHolders(): void { - $holders = $this->xml->xpath('./mods:name[./mods:role/mods:roleTerm[@type="code" and @authority="marcrelator"]="prv"]'); + $holders = $this->modsReader->getNames('[./mods:role/mods:roleTerm[@type="code" and @authority="marcrelator"]="prv"]'); if (!empty($holders)) { for ($i = 0, $j = count($holders); $i < $j; $i++) { - $holders[$i]->registerXPathNamespace('mods', 'http://www.loc.gov/mods/v3'); - - $identifier = $holders[$i]->xpath('./mods:name/mods:nameIdentifier[@type="viaf"]'); - if ($this->useExternalApis && !empty((string) $identifier[0])) { - $this->getHolderFromViafApi((string) $identifier[0], $holders, $i); + $identifiers = $holders[$i]->getNameIdentifiers('[@type="viaf"]'); + if ($this->useExternalApis && !empty($identifiers)) { + $this->getHolderFromViafApi($identifiers[0]->getValue(), $holders, $i); } else { $this->getHolderFromXml($holders, $i); } @@ -264,8 +266,8 @@ private function getHolderFromXml(array $holders, int $i): void { $this->getHolderFromXmlDisplayForm($holders, $i); // Append "valueURI" to name using Unicode unit separator. - if (isset($holders[$i]['valueURI'])) { - $this->metadata['holder'][$i] .= pack('C', 31) . (string) $holders[$i]['valueURI']; + if (!empty($holders[$i]->getValueURI())) { + $this->metadata['holder'][$i] .= pack('C', 31) . $holders[$i]->getValueURI(); } } @@ -282,9 +284,9 @@ private function getHolderFromXml(array $holders, int $i): void private function getHolderFromXmlDisplayForm(array $holders, int $i): void { // Check if there is a display form. - $displayForm = $holders[$i]->xpath('./mods:displayForm'); - if ($displayForm) { - $this->metadata['holder'][$i] = (string) $displayForm[0]; + $displayForms = $holders[$i]->getDisplayForm(); + if ($displayForms) { + $this->metadata['holder'][$i] = $displayForms[0]->getValue(); } } @@ -297,17 +299,34 @@ private function getHolderFromXmlDisplayForm(array $holders, int $i): void */ private function getPlaces(): void { - $places = $this->xml->xpath('./mods:originInfo[not(./mods:edition="[Electronic ed.]")]/mods:place/mods:placeTerm'); + $places = []; + $originInfos = $this->modsReader->getOriginInfos('[not(./mods:edition="[Electronic ed.]")]'); + foreach ($originInfos as $originInfo) { + foreach ($originInfo->getPlaces() as $place) { + foreach ($place->getPlaceTerms() as $placeTerm) { + $places[] = $placeTerm->getValue(); + } + } + } + // Get "place" and "place_sorting" again if that was to sophisticated. if (empty($places)) { // Get all places and assume these are places of publication. - $places = $this->xml->xpath('./mods:originInfo/mods:place/mods:placeTerm'); + $originInfos = $this->modsReader->getOriginInfos(); + foreach ($originInfos as $originInfo) { + foreach ($originInfo->getPlaces() as $place) { + foreach ($place->getPlaceTerms() as $placeTerm) { + $places[] = $placeTerm->getValue(); + } + } + } } + if (!empty($places)) { foreach ($places as $place) { - $this->metadata['place'][] = (string) $place; + $this->metadata['place'][] = $place; if (!$this->metadata['place_sorting'][0]) { - $this->metadata['place_sorting'][0] = preg_replace('/[[:punct:]]/', '', (string) $place); + $this->metadata['place_sorting'][0] = preg_replace('/[[:punct:]]/', '', $place); } } } @@ -323,31 +342,37 @@ private function getPlaces(): void private function getYears(): void { // Get "year_sorting". - $yearsSorting = $this->xml->xpath('./mods:originInfo[not(./mods:edition="[Electronic ed.]")]/mods:dateOther[@type="order" and @encoding="w3cdtf"]'); + $yearsSorting = $this->modsReader->getOriginInfos('[not(./mods:edition="[Electronic ed.]")]/mods:dateOther[@type="order" and @encoding="w3cdtf"]'); if ($yearsSorting) { foreach ($yearsSorting as $yearSorting) { - $this->metadata['year_sorting'][0] = (int) $yearSorting; + $otherDates = $yearSorting->getOtherDates(); + if (!empty($otherDates)) { + $this->metadata['year_sorting'][0] = $otherDates[0]->getValue(); + } } } // Get "year" and "year_sorting" if not specified separately. - $years = $this->xml->xpath('./mods:originInfo[not(./mods:edition="[Electronic ed.]")]/mods:dateIssued[@keyDate="yes"]'); + $years = $this->modsReader->getOriginInfos('./mods:originInfo[not(./mods:edition="[Electronic ed.]")]/mods:dateIssued[@keyDate="yes"]'); // Get "year" and "year_sorting" again if that was to sophisticated. if (empty($years)) { // Get all dates and assume these are dates of publication. - $years = $this->xml->xpath('./mods:originInfo/mods:dateIssued'); + $years = $this->modsReader->getOriginInfos(); } if (!empty($years)) { foreach ($years as $year) { - $this->metadata['year'][] = (string) $year; - if (!$this->metadata['year_sorting'][0]) { - $yearSorting = str_ireplace('x', '5', preg_replace('/[^\d.x]/i', '', (string) $year)); - if ( - strpos($yearSorting, '.') - || strlen($yearSorting) < 3 - ) { - $yearSorting = (((int) trim($yearSorting, '.') - 1) * 100) + 50; + $issued = $year->getIssuedDates(); + if (!empty($issued)) { + $this->metadata['year'][] = $issued[0]->getValue(); + if (!$this->metadata['year_sorting'][0]) { + $yearSorting = str_ireplace('x', '5', preg_replace('/[^\d.x]/i', '', $issued[0]->getValue())); + if ( + strpos($yearSorting, '.') + || strlen($yearSorting) < 3 + ) { + $yearSorting = (((int) trim($yearSorting, '.') - 1) * 100) + 50; + } + $this->metadata['year_sorting'][0] = (int) $yearSorting; } - $this->metadata['year_sorting'][0] = (int) $yearSorting; } } } diff --git a/composer.json b/composer.json index 61c3a9697..3b4543ed0 100644 --- a/composer.json +++ b/composer.json @@ -36,6 +36,7 @@ "typo3/cms-extbase": "^10.4.37|^11.5.37", "typo3/cms-tstemplate": "^10.4.37|^11.5.37", "caseyamcl/phpoaipmh": "^3.3.1", + "slub/php-mods-reader": "^0.3.0", "ubl/php-iiif-prezi-reader": "0.3.0", "solarium/solarium": "5.2 - 6.3" }, From 05155847f2b7e389ccdb364dfc677aa513b98708 Mon Sep 17 00:00:00 2001 From: Sebastian Meyer Date: Wed, 29 May 2024 09:35:11 +0200 Subject: [PATCH 2/2] [MAINTENANCE] Update GitHub workflows (#1217) --- .../task-for-the-development-fund.md | 2 +- .github/codeql.yml | 26 ++++---- .github/phpstan.neon | 60 +++++++++---------- .github/pull.yml | 3 + .github/workflows/codeql.yml | 6 +- .github/workflows/documentation.yml | 4 +- .github/workflows/phpstan.yml | 1 + .github/workflows/tests.yml | 1 + 8 files changed, 54 insertions(+), 49 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/task-for-the-development-fund.md b/.github/ISSUE_TEMPLATE/task-for-the-development-fund.md index f67eef870..d798baeac 100644 --- a/.github/ISSUE_TEMPLATE/task-for-the-development-fund.md +++ b/.github/ISSUE_TEMPLATE/task-for-the-development-fund.md @@ -2,7 +2,7 @@ name: Task for the development fund about: A working package which may be sponsored by the Kitodo e.V. development fund. title: "[FUND] " -labels: ⭐ development fund 2024 +labels: ⭐ development fund 2025 assignees: '' --- diff --git a/.github/codeql.yml b/.github/codeql.yml index 56085d7bf..4e2fa3a6d 100644 --- a/.github/codeql.yml +++ b/.github/codeql.yml @@ -1,16 +1,16 @@ - name: "CodeQL Configuration" +name: CodeQL Configuration - queries: - - uses: security-and-quality +queries: + - uses: security-and-quality - query-filters: - - exclude: - problem.severity: - - note +query-filters: + - exclude: + problem.severity: + - note - paths-ignore: - - Resources/Public/JavaScript/jPlayer - - Resources/Public/JavaScript/jQuery - - Resources/Public/JavaScript/jQueryUI - - Resources/Public/JavaScript/OpenLayers - - Resources/Public/JavaScript/Toastify +paths-ignore: + - Resources/Public/JavaScript/jPlayer + - Resources/Public/JavaScript/jQuery + - Resources/Public/JavaScript/jQueryUI + - Resources/Public/JavaScript/OpenLayers + - Resources/Public/JavaScript/Toastify diff --git a/.github/phpstan.neon b/.github/phpstan.neon index 4f2fb950c..7307b130f 100644 --- a/.github/phpstan.neon +++ b/.github/phpstan.neon @@ -1,31 +1,31 @@ parameters: - ignoreErrors: - - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::countByPid\(\)\.#' - - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findByIsListed\(\)\.#' - - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findByIsSortable\(\)\.#' - - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByFeUserId\(\)\.#' - - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByIndexName\(\)\.#' - - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByLocation\(\)\.#' - - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByPid\(\)\.#' - - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByRecordId\(\)\.#' - - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneBySessionId\(\)\.#' - - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByType\(\)\.#' - - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByUid\(\)\.#' - - '#Call to an undefined method Psr\\Http\\Message\\RequestFactoryInterface::request\(\)\.#' - - '#Call to an undefined method Solarium\\Core\\Query\\DocumentInterface::setField\(\)\.#' - - '#Call to an undefined method Ubl\\Iiif\\Presentation\\Common\\Model\\Resources\\IiifResourceInterface::getHeight\(\)\.#' - - '#Call to an undefined method Ubl\\Iiif\\Presentation\\Common\\Model\\Resources\\IiifResourceInterface::getWidth\(\)\.#' - - '#Call to an undefined method Ubl\\Iiif\\Presentation\\Common\\Model\\Resources\\IiifResourceInterface::getPossibleTextAnnotationContainers\(\)\.#' - - '#Call to an undefined method Ubl\\Iiif\\Presentation\\Common\\Model\\Resources\\IiifResourceInterface::getTextAnnotations\(\)\.#' - - '#Call to an undefined method Ubl\\Iiif\\Presentation\\Common\\Model\\Resources\\ManifestInterface::getOriginalJsonArray\(\)\.#' - - '#Call to an undefined method Ubl\\Iiif\\Presentation\\Common\\Model\\Resources\\RangeInterface::getMemberRangesAndRanges\(\)\.#' - - '#Constant LOG_SEVERITY_ERROR not found\.#' - - '#Constant LOG_SEVERITY_NOTICE not found\.#' - - '#Constant LOG_SEVERITY_WARNING not found\.#' - - '#Constant TYPO3_MODE not found\.#' - level: 5 - paths: - - ../Classes/ - excludePaths: - - ../Classes/Controller/OaiPmhController.php - treatPhpDocTypesAsCertain: false + ignoreErrors: + - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::countByPid\(\)\.#' + - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findByIsListed\(\)\.#' + - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findByIsSortable\(\)\.#' + - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByFeUserId\(\)\.#' + - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByIndexName\(\)\.#' + - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByLocation\(\)\.#' + - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByPid\(\)\.#' + - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByRecordId\(\)\.#' + - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneBySessionId\(\)\.#' + - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByType\(\)\.#' + - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByUid\(\)\.#' + - '#Call to an undefined method Psr\\Http\\Message\\RequestFactoryInterface::request\(\)\.#' + - '#Call to an undefined method Solarium\\Core\\Query\\DocumentInterface::setField\(\)\.#' + - '#Call to an undefined method Ubl\\Iiif\\Presentation\\Common\\Model\\Resources\\IiifResourceInterface::getHeight\(\)\.#' + - '#Call to an undefined method Ubl\\Iiif\\Presentation\\Common\\Model\\Resources\\IiifResourceInterface::getWidth\(\)\.#' + - '#Call to an undefined method Ubl\\Iiif\\Presentation\\Common\\Model\\Resources\\IiifResourceInterface::getPossibleTextAnnotationContainers\(\)\.#' + - '#Call to an undefined method Ubl\\Iiif\\Presentation\\Common\\Model\\Resources\\IiifResourceInterface::getTextAnnotations\(\)\.#' + - '#Call to an undefined method Ubl\\Iiif\\Presentation\\Common\\Model\\Resources\\ManifestInterface::getOriginalJsonArray\(\)\.#' + - '#Call to an undefined method Ubl\\Iiif\\Presentation\\Common\\Model\\Resources\\RangeInterface::getMemberRangesAndRanges\(\)\.#' + - '#Constant LOG_SEVERITY_ERROR not found\.#' + - '#Constant LOG_SEVERITY_NOTICE not found\.#' + - '#Constant LOG_SEVERITY_WARNING not found\.#' + - '#Constant TYPO3_MODE not found\.#' + level: 5 + paths: + - ../Classes/ + excludePaths: + - ../Classes/Controller/OaiPmhController.php + treatPhpDocTypesAsCertain: false diff --git a/.github/pull.yml b/.github/pull.yml index e00fbe74a..8ef8ba932 100644 --- a/.github/pull.yml +++ b/.github/pull.yml @@ -18,3 +18,6 @@ rules: - base: 4.x upstream: kitodo:4.x mergeMethod: hardreset + - base: 5.x + upstream: kitodo:5.x + mergeMethod: hardreset diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index c17f1b765..12969e1e1 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -1,14 +1,14 @@ -name: "CodeQL" +name: CodeQL on: push: - branches: [ "master", "1.x", "2.x", "3.2.x", "3.3.x", "4.x" ] + branches: [ "master", "1.x", "2.x", "3.2.x", "3.3.x", "4.x", "5.x" ] pull_request: branches: [ "master" ] jobs: analyze: - name: Analyze + name: Static Code Analysis runs-on: ubuntu-latest permissions: actions: read diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 8180669aa..769245370 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -1,10 +1,10 @@ -name: Build Documentation +name: Documentation on: [ push, pull_request ] jobs: tests: - name: Documentation + name: Build Test runs-on: ubuntu-latest steps: - name: Checkout diff --git a/.github/workflows/phpstan.yml b/.github/workflows/phpstan.yml index 35c5266de..e998d5de5 100644 --- a/.github/workflows/phpstan.yml +++ b/.github/workflows/phpstan.yml @@ -8,6 +8,7 @@ on: jobs: phpstan: + name: Static Code Analysis runs-on: ubuntu-latest steps: - name: Checkout code diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 49233f11d..52e4a0e4a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,6 +8,7 @@ on: jobs: test: + name: TYPO3 runs-on: ubuntu-latest strategy: matrix: