From ab67d4633078610f947674d034efd82147d9024c Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Wed, 17 Dec 2025 12:57:50 +0200 Subject: [PATCH] Unify methods used in record classes to build the Solr array. --- CHANGELOG.md | 4 +- .../Base/Deduplication/DedupHandler.php | 16 +- .../Base/Enrichment/MusicBrainzEnrichment.php | 2 +- .../Base/Enrichment/SkosmosEnrichment.php | 2 +- .../Base/Record/AbstractRecord.php | 230 +++++++++++++++++- src/RecordManager/Base/Record/Dc.php | 151 +++++------- src/RecordManager/Base/Record/Doaj.php | 120 ++++----- src/RecordManager/Base/Record/Eaccpf.php | 38 +-- src/RecordManager/Base/Record/Ead.php | 179 ++++++-------- src/RecordManager/Base/Record/Ead3.php | 103 +++----- src/RecordManager/Base/Record/Ese.php | 119 +++------ src/RecordManager/Base/Record/Forward.php | 108 ++------ .../Base/Record/ForwardAuthority.php | 38 +-- .../Base/Record/FullTextTrait.php | 2 +- src/RecordManager/Base/Record/Lido.php | 187 +++++++------- src/RecordManager/Base/Record/Marc.php | 156 +++++------- .../Base/Record/MarcAuthority.php | 39 +-- src/RecordManager/Base/Record/Qdc.php | 188 +++++--------- .../RecordManagerTest/Base/Record/DcTest.php | 6 +- .../Base/Record/DoajTest.php | 8 +- .../Base/Record/ForwardTest.php | 13 +- .../Base/Record/LidoTest.php | 10 +- .../Base/Record/LrmiTest.php | 7 +- .../RecordManagerTest/Base/Record/QdcTest.php | 7 +- 24 files changed, 779 insertions(+), 954 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b702495f..8579aed6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,12 +26,14 @@ Anything marked with [**BC**] is known to affect backward compatibility with pre - Added support for defining additional HTTP options for Solr index requests. This could be useful e.g. when using SSL with self-signed certificates. - When multi-process support is enabled (i.e. record_workers and/or solr_update_workers set in recordmanager.ini), a few additional worker processes are initialized on startup to take the place of any worker that stops unexpectedly (typically due to an issue with a PHP or one of its extensions causing a segmentation fault). - Regular expression support for suppressOnField setting did not work properly. A new suppressOnFieldRegEx was introduced to make this option explicit. +- Record classes can now implement methods preProcessRecordForIndexing and postProcessRecordForIndexing for additional preparation around toSolrArray method. ### Changed - [**BC**] The HTTP client library has been changed from HTTP_Request2 to Guzzle. This has required some changes to how the HTTP client is used. See e.g. src/RecordManager/Base/Harvest/SierraApi.php for usage examples. This also affects the settings in HTTP section of recordmanager.ini. Only the most commonly used legacy settings are automatically mapped to Guzzle's equivalents. - [**BC**] MARC: Subfields containing record identifiers for linking between records (subfield w in fields 760-787) are no longer updated to use the indexed record ID by default. Instead of the built-in list of fields there is now an option in recordmanager.ini (MarcRecord/linking_id_fields) that can be used to define the linking ID fields if this functionality is desired. -- [**BC**] All Record classes must now implement the getRecordFormat method and call AbtractRecord's toSolrArray in their overridden toSolrArray methods. +- [**BC**] Record classes have been unified to use a common set of methods for building the Solr array (see $solrAuthorityRecordSpecs and $solrBiblioRecordSpecs in AbstractRecord). +- [**BC**] All Record classes must now implement the getRecordFormat method and call AbtractRecord's toSolrArray in any overridden toSolrArray methods. - [**BC**] FullTextTrait's getFullTextFields was renamed to getFullTextField and refactored to return the fulltext field contents instead of a full data array. - [**BC**] Several methods in Record classes have been renamed to improve unity between the classes and to better reflect their nature. Also typing of return values has been added in many places. - [**BC**] All format-specific Skosmos and authority enrichments have been consolidated to format-agnostic SkosmosEnrichment and AuthEnrichment. diff --git a/src/RecordManager/Base/Deduplication/DedupHandler.php b/src/RecordManager/Base/Deduplication/DedupHandler.php index 45fb1c2d..04bf1644 100644 --- a/src/RecordManager/Base/Deduplication/DedupHandler.php +++ b/src/RecordManager/Base/Deduplication/DedupHandler.php @@ -345,7 +345,7 @@ public function updateDedupCandidateKeys(&$record, $metadataRecord) unset($record['title_keys']); } - $keys = $metadataRecord->getISBNs(); + $keys = $metadataRecord->getISBNsForDedup(); $oldKeys = (array)($record['isbn_keys'] ?? []); if (count($oldKeys) !== count($keys) || array_diff($oldKeys, $keys)) { $record['isbn_keys'] = $keys; @@ -863,8 +863,8 @@ function () use ($candidateDbRecord) { } // Check for common ISBN - $origISBNs = $this->filterIds($origRecord->getISBNs(), $origDbRecord); - $candidateISBNs = $this->filterIds($candidateRecord->getISBNs(), $candidateDbRecord); + $origISBNs = $this->filterIds($origRecord->getISBNsForDedup(), $origDbRecord); + $candidateISBNs = $this->filterIds($candidateRecord->getISBNsForDedup(), $candidateDbRecord); $isect = array_intersect($origISBNs, $candidateISBNs); if (!empty($isect)) { // Shared ISBN -> match @@ -908,8 +908,8 @@ function () use ( return true; } - $origISSNs = $this->filterIds($origRecord->getISSNs(), $origDbRecord); - $candidateISSNs = $candidateRecord->getISSNs(); + $origISSNs = $this->filterIds($origRecord->getISSNsForDedup(), $origDbRecord); + $candidateISSNs = $candidateRecord->getISSNsForDedup(); $commonISSNs = array_intersect($origISSNs, $candidateISSNs); if (!empty($origISSNs) && !empty($candidateISSNs) && empty($commonISSNs)) { // Both have ISSNs but none match @@ -952,11 +952,11 @@ function () use ( return false; } - if ($origRecord->getSeriesISSN() != $candidateRecord->getSeriesISSN()) { + if ($origRecord->getSeriesISSNForDedup() != $candidateRecord->getSeriesISSNForDedup()) { return false; } - $candidateNumbering = $candidateRecord->getSeriesNumbering(); - if ($origRecord->getSeriesNumbering() != $candidateNumbering) { + $candidateNumbering = $candidateRecord->getSeriesNumberingForDedup(); + if ($origRecord->getSeriesNumberingForDedup() != $candidateNumbering) { return false; } diff --git a/src/RecordManager/Base/Enrichment/MusicBrainzEnrichment.php b/src/RecordManager/Base/Enrichment/MusicBrainzEnrichment.php index f72fca78..36fd72e1 100644 --- a/src/RecordManager/Base/Enrichment/MusicBrainzEnrichment.php +++ b/src/RecordManager/Base/Enrichment/MusicBrainzEnrichment.php @@ -128,7 +128,7 @@ public function enrich($sourceId, $record, &$solrArray) // Use publisher ids only if barcodes or musicbrainz id did not yield any results if (!$mbIds) { - $shortTitle = $record->getShortTitle(); + $shortTitle = $record->getShortTitleForEnrichment(); foreach ($record->getPublisherNumbers(['0']) as $number) { if ($id = trim($number['id'])) { $newIds = $this->getFromReleaseIndex(self::CATNO, $id, $shortTitle); diff --git a/src/RecordManager/Base/Enrichment/SkosmosEnrichment.php b/src/RecordManager/Base/Enrichment/SkosmosEnrichment.php index bcdbc235..46938304 100644 --- a/src/RecordManager/Base/Enrichment/SkosmosEnrichment.php +++ b/src/RecordManager/Base/Enrichment/SkosmosEnrichment.php @@ -150,7 +150,7 @@ class SkosmosEnrichment extends AbstractEnrichment 'alt' => 'author_variant', 'check' => 'author_corporate', ], - 'getAuthorIds' => [ + 'getPrimaryAuthorIds' => [ 'pref' => 'author', 'alt' => 'author_variant', 'check' => 'author', diff --git a/src/RecordManager/Base/Record/AbstractRecord.php b/src/RecordManager/Base/Record/AbstractRecord.php index 3f8f417e..cfd83328 100644 --- a/src/RecordManager/Base/Record/AbstractRecord.php +++ b/src/RecordManager/Base/Record/AbstractRecord.php @@ -5,7 +5,7 @@ * * PHP version 8 * - * Copyright (C) The National Library of Finland 2011-2022. + * Copyright (C) The National Library of Finland 2011-2025. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -34,6 +34,7 @@ use RecordManager\Base\Utils\MetadataUtils; use function in_array; +use function is_callable; /** * Base class for record drivers @@ -112,6 +113,103 @@ abstract class AbstractRecord */ protected $resultCache = []; + /** + * Is this an authority record? + * + * @var bool + */ + protected bool $isAuthorityRecord = false; + + /** + * Specifications for building a Solr authority record. + * + * @var array + */ + protected $solrAuthorityRecordSpecs = [ + 'allfields' => ['method' => 'getAllFields'], + 'birth_date' => ['method' => 'getBirthDate'], + 'birth_place' => ['method' => 'getBirthPlace'], + 'country' => ['method' => 'getCountry'], + 'death_date' => ['method' => 'getDeathDate'], + 'death_place' => ['method' => 'getDeathPlace'], + 'field_of_activity' => ['method' => 'getFieldsOfActivity'], + 'fullrecord' => ['method' => 'getFullRecord'], + 'heading' => ['method' => 'getHeading'], + 'heading_keywords' => ['method' => 'getHeadingKeywords'], + 'language' => ['method' => 'getHeadingLanguage'], + 'occupation' => ['method' => 'getOccupations'], + 'record_type' => ['method' => 'getRecordType'], + 'related_place' => ['method' => 'getRelatedPlaces'], + 'source' => ['method' => 'getRecordSource'], + 'use_for' => ['method' => 'getUseForHeadings'], + 'use_for_keywords' => ['method' => 'getUseForHeadingKeywords'], + ]; + + /** + * Specifications for building a Solr biblio record. + * + * @var array + */ + protected $solrBiblioRecordSpecs = [ + 'allfields' => ['method' => 'getAllFields'], + 'author' => ['method' => 'getPrimaryAuthors'], + 'author_role' => ['method' => 'getPrimaryAuthorRoles'], + 'author_sort' => ['method' => 'getAuthorSort'], + 'author2' => ['method' => 'getSecondaryAuthors'], + 'author2_role' => ['method' => 'getSecondaryAuthorRoles'], + 'author_corporate' => ['method' => 'getCorporateAuthors'], + 'author_corporate_role' => ['method' => 'getCorporateAuthorRoles'], + 'building' => ['method' => 'getBuilding'], + 'callnumber-first' => ['method' => 'getCallNumberFirst'], + 'callnumber-label' => ['method' => 'getCallNumberLabels'], + 'callnumber-raw' => ['method' => 'getCallNumbersRaw'], + 'callnumber-subject' => ['method' => 'getCallNumberSubject'], + 'collection' => ['method' => 'getCollection'], + 'contents' => ['method' => 'getContents'], + 'ctrlnum' => ['method' => 'getControlNumbers'], + 'dateSpan' => ['method' => 'getDateSpans'], + 'description' => ['method' => 'getDescription'], + 'doi_str_mv' => ['method' => 'getDOIs'], + 'edition' => ['method' => 'getEdition'], + 'era_facet' => ['method' => 'getEraFacets'], + 'era' => ['method' => 'getEras'], + 'format' => ['method' => 'getFormat'], + 'fullrecord' => ['method' => 'getFullRecord'], + 'fulltext' => ['method' => 'getFullTextField'], + 'genre_facet' => ['method' => 'getGenreFacets'], + 'genre' => ['method' => 'getGenres'], + 'geographic_facet' => ['method' => 'getGeographicFacets'], + 'geographic' => ['method' => 'getGeographicTopics'], + 'illustrated' => ['method' => 'getIllustrated'], + 'institution' => ['method' => 'getInstitution'], + 'isbn' => ['method' => 'getISBNs'], + 'issn' => ['method' => 'getISSNs'], + 'language' => ['method' => 'getLanguages'], + 'lccn' => ['method' => 'getLCCN'], + 'oclc_num' => ['method' => 'getOclcNumbers'], + 'physical' => ['method' => 'getPhysicalDescriptions'], + 'publishDate' => ['method' => 'getPublicationYears'], + 'publishDateRange' => ['method' => 'getPublicationDateRanges'], + 'publishDateSort' => ['method' => 'getPublicationYear'], + 'publisher' => ['method' => 'getPublishers'], + 'series' => ['method' => 'getSeries'], + 'series2' => ['method' => 'getSeries2'], + 'thumbnail' => ['method' => 'getThumbnailUrl'], + 'title_alt' => ['method' => 'getAltTitles'], + 'title_full' => ['method' => 'getFullTitle'], + 'title_new' => ['method' => 'getNewTitles'], + 'title_old' => ['method' => 'getOldTitles'], + 'title_short' => ['method' => 'getShortTitle'], + 'title_sort' => ['method' => 'getTitleSort'], + 'title_sub' => ['method' => 'getTitleSub'], + 'title' => ['method' => 'getTitle'], + 'topic_browse' => ['method' => 'getTopicsForBrowse'], + 'topic_facet' => ['method' => 'getTopicFacets'], + 'topic' => ['method' => 'getTopics'], + 'url' => ['method' => 'getUrls'], + 'uuid_str_mv' => ['method' => 'getUUIDs'], + ]; + /** * Constructor * @@ -222,9 +320,22 @@ public function getHostRecordIDs(): array */ public function toSolrArray(?Database $db = null) { - return [ + $this->preProcessRecordForIndexing($db); + $data = [ 'record_format' => $this->getRecordFormat(), ]; + $specs = $this->isAuthorityRecord ? $this->solrAuthorityRecordSpecs : $this->solrBiblioRecordSpecs; + foreach ($specs as $field => $specs) { + if (($method = $specs['method'] ?? null) && is_callable([$this, $method])) { + if ($specs['useData'] ?? false) { + $this->$method($data); + } else { + $data[$field] = $this->$method(); + } + } + } + $this->postProcessRecordForIndexing($db, $data); + return $data; } /** @@ -256,6 +367,28 @@ public function getTitle($forFiling = false) return ''; } + /** + * Get record title for sorting. + * + * @return string + * + * @SuppressWarnings(PHPMD.UnusedFormalParameter) + */ + public function getTitleSort() + { + return $this->getTitle(true); + } + + /** + * Get short title for enrichment. + * + * @return string + */ + public function getShortTitleForEnrichment(): string + { + return $this->getTitle(); + } + /** * Return format(s) from predefined values * @@ -347,41 +480,41 @@ public function getUniqueIDs() } /** - * Dedup: Return (unique) ISBNs in ISBN-13 format without dashes + * Get (unique) ISBNs in ISBN-13 format without dashes. * * @return array */ - public function getISBNs() + public function getISBNsForDedup(): array { - return []; + return $this->getISBNs(); } /** - * Dedup: Return ISSNs + * Dedup: Get ISSNs. * * @return array */ - public function getISSNs() + public function getISSNsForDedup(): array { - return []; + return $this->getISSNs(); } /** - * Dedup: Return series ISSN + * Dedup: Get series ISSN. * * @return string */ - public function getSeriesISSN() + public function getSeriesISSNForDedup(): string { return ''; } /** - * Dedup: Return series numbering + * Dedup: Get series numbering. * * @return string */ - public function getSeriesNumbering() + public function getSeriesNumberingForDedup(): string { return ''; } @@ -550,6 +683,49 @@ public function getdataSourceConfig() return $this->dataSourceConfig[$this->source]; } + /** + * Get (unique) ISBNs in ISBN-13 format without dashes. + * + * @return array + */ + protected function getISBNs(): array + { + return []; + } + + /** + * Get ISSNs. + * + * @return array + */ + protected function getISSNs(): array + { + return []; + } + + /** + * Do any pre-processing for the record before the conversion to Solr array. + * + * @param ?Database $db Database connection, if available + * + * @return void + */ + protected function preProcessRecordForIndexing(?Database $db): void + { + } + + /** + * Do any post-processing for the record after the main conversion to Solr array. + * + * @param ?Database $db Database connection, if available + * @param array $data Array of Solr fields + * + * @return void + */ + protected function postProcessRecordForIndexing(?Database $db, &$data): void + { + } + /** * Get record format. * @@ -623,4 +799,34 @@ protected function validateDate($dateString) } return ''; } + + /** + * Get publication years. + * + * @return array + */ + protected function getPublicationYears(): array + { + return []; + } + + /** + * Get publication date ranges. + * + * @return array + */ + protected function getPublicationDateRanges(): array + { + return $this->getPublicationYears(); + } + + /** + * Get full title. + * + * @return string + */ + protected function getFullTitle(): string + { + return $this->getTitle(); + } } diff --git a/src/RecordManager/Base/Record/Dc.php b/src/RecordManager/Base/Record/Dc.php index d1041734..7363c085 100644 --- a/src/RecordManager/Base/Record/Dc.php +++ b/src/RecordManager/Base/Record/Dc.php @@ -142,43 +142,6 @@ public function getID() return $id; } - /** - * Return fields to be indexed in Solr - * - * @param ?Database $db Database connection. Omit to avoid database lookups for related records. - * - * @return array - */ - public function toSolrArray(?Database $db = null) - { - $data = parent::toSolrArray($db); - - $data['ctrlnum'] = $this->getControlNumbers(); - $data['fullrecord'] = $this->getFullRecord(); - $data['allfields'] = $this->getAllFields(); - $data['language'] = $this->getLanguages(); - $data['format'] = $this->getFormat(); - $data['author'] = $this->getPrimaryAuthors(); - $data['author2'] = $this->getSecondaryAuthors(); - $data['author_sort'] = $this->getAuthorSort($data['author']); - $data['title'] = $data['title_full'] = $this->getTitle(); - $data['title_short'] = $this->getShortTitle($data['title']); - $data['title_sub'] = $this->getTitleSub($data['title']); - $data['title_sort'] = $this->getTitle(true); - $data['publisher'] = $this->getPublishers(); - $data['publishDate'] = $this->getPublicationYear(); - $data['publishDateRange'] = $this->getPublicationYears(); - $data['isbn'] = $this->getISBNs(); - $data['doi_str_mv'] = $this->getDOIs(); - $data['topic'] = $this->getTopics(); - $data['topic_facet'] = $this->getTopicFacets(); - $data['url'] = $this->getUrls(); - $data['contents'] = $this->getContents(); - $data['fulltext'] = $this->getFullTextField($this->doc); - - return $data; - } - /** * Dedup: Return full title (for debugging purposes only) * @@ -199,65 +162,39 @@ public function getFullTitleForDebugging() */ public function getTitle($forFiling = false) { + $key = __METHOD__ . ($forFiling ? '1' : '0'); + if (isset($this->resultCache[$key])) { + return $this->resultCache[$key]; + } + $title = trim((string)$this->doc->title); if ($forFiling) { $title = $this->metadataUtils->createSortTitle($title); } else { - $title - = $this->metadataUtils->stripTrailingPunctuation($title, '', true); + $title = $this->metadataUtils->stripTrailingPunctuation($title, '', true); } - return $title; + return $this->resultCache[$key] = $title; } /** - * Return main author (format: Last, First) + * Get short title for enrichment. * * @return string */ - public function getMainAuthor() - { - return trim((string)$this->doc->creator); - } - - /** - * Dedup: Return ISBNs in ISBN-13 format without dashes - * - * @return array - */ - public function getISBNs() + public function getShortTitleForEnrichment(): string { - $arr = []; - foreach ($this->doc->identifier as $identifier) { - $identifier = str_replace('-', '', trim($identifier)); - if ('' === $identifier || !preg_match('{([0-9]{9,12}[0-9xX])}', $identifier, $matches)) { - continue; - } - $isbn = $this->metadataUtils->normalizeISBN($matches[1]); - if ($isbn) { - $arr[] = $isbn; - } - } - return array_values(array_unique($arr)); - } - - /** - * Dedup: Return series ISSN - * - * @return string - */ - public function getSeriesISSN() - { - return ''; + $titleParts = explode(' : ', $this->getTitle(), 2); + return $titleParts[0]; } /** - * Dedup: Return series numbering + * Return main author (format: Last, First) * * @return string */ - public function getSeriesNumbering() + public function getMainAuthor() { - return ''; + return trim((string)$this->doc->creator); } /** @@ -296,6 +233,37 @@ public function getPageCount() return ''; } + /** + * Get ISBNs in ISBN-13 format without dashes. + * + * @return array + */ + protected function getISBNs(): array + { + $arr = []; + foreach ($this->doc->identifier as $identifier) { + $identifier = str_replace('-', '', trim($identifier)); + if ('' === $identifier || !preg_match('{([0-9]{9,12}[0-9xX])}', $identifier, $matches)) { + continue; + } + $isbn = $this->metadataUtils->normalizeISBN($matches[1]); + if ($isbn) { + $arr[] = $isbn; + } + } + return array_values(array_unique($arr)); + } + + /** + * Get full title. + * + * @return string + */ + protected function getFullTitle(): string + { + return $this->getTitle(); + } + /** * Get DOIs * @@ -416,38 +384,33 @@ protected function getSecondaryAuthors(): array /** * Get author sort field. * - * @param array $authors Primary authors - * * @return string */ - protected function getAuthorSort(array $authors): string + protected function getAuthorSort(): string { + $authors = $this->getPrimaryAuthors(); return $authors[0] ?? ''; } /** * Get short title. * - * @param string $fullTitle Full title - * * @return string */ - protected function getShortTitle(string $fullTitle): string + protected function getShortTitle(): string { - $titleParts = explode(' : ', $fullTitle, 2); + $titleParts = explode(' : ', $this->getFullTitle(), 2); return $titleParts[0]; } /** * Get subtitle. * - * @param string $fullTitle Full title - * * @return string */ - protected function getTitleSub(string $fullTitle): string + protected function getTitleSub(): string { - $titleParts = explode(' : ', $fullTitle, 2); + $titleParts = explode(' : ', $this->getFullTitle(), 2); return $titleParts[1] ?? ''; } @@ -533,7 +496,7 @@ protected function getFullRecord(): string } /** - * Return publication years + * Get publication years. * * @return array */ @@ -548,4 +511,14 @@ protected function getPublicationYears(): array } return $result; } + + /** + * Get full text field for a given document + * + * @return string + */ + protected function getFullTextField(): string + { + return $this->getFullTextFieldForDocument($this->doc); + } } diff --git a/src/RecordManager/Base/Record/Doaj.php b/src/RecordManager/Base/Record/Doaj.php index b711fe08..afbd9f19 100644 --- a/src/RecordManager/Base/Record/Doaj.php +++ b/src/RecordManager/Base/Record/Doaj.php @@ -151,40 +151,6 @@ public function getID() return $id; } - /** - * Return fields to be indexed in Solr - * - * @param ?Database $db Database connection. Omit to avoid database lookups for related records. - * - * @return array - */ - public function toSolrArray(?Database $db = null) - { - $data = parent::toSolrArray($db); - - $this->recordDoc = $this->doc->children($this->recordNs); - $data['ctrlnum'] = $this->getControlNumbers(); - $data['fullrecord'] = $this->getFullRecord(); - $data['allfields'] = $this->getAllFields(); - $data['language'] = $this->getLanguages(); - $data['format'] = $this->getFormat(); - $data['author'] = $this->getPrimaryAuthors(); - $data['title'] = $this->getTitle(); - $data['title_full'] = $this->getFullTitle(); - $data['title_short'] = $this->getShortTitle($data['title']); - $data['title_sub'] = $this->getTitleSub($data['title']); - $data['title_sort'] = $this->getTitle(true); - $data['publisher'] = $this->getPublishers(); - $data['publishDate'] = $this->getPublicationYear(); - $data['publishDateRange'] = $this->getPublicationYears(); - $data['topic'] = $this->getTopics(); - $data['topic_facet'] = $this->getTopicFacets(); - $data['url'] = $this->getUrls(); - $data['fulltext'] = $this->getFullTextField($this->recordDoc); - - return $data; - } - /** * Dedup: Return full title (for debugging purposes only) * @@ -205,14 +171,18 @@ public function getFullTitleForDebugging() */ public function getTitle($forFiling = false) { + $key = __METHOD__ . ($forFiling ? '1' : '0'); + if (isset($this->resultCache[$key])) { + return $this->resultCache[$key]; + } + $title = trim((string)$this->doc->children($this->recordNs)->title); if ($forFiling) { $title = $this->metadataUtils->createSortTitle($title); } else { - $title - = $this->metadataUtils->stripTrailingPunctuation($title, '', true); + $title = $this->metadataUtils->stripTrailingPunctuation($title, '', true); } - return $title; + return $this->resultCache[$key] = $title; } /** @@ -225,36 +195,6 @@ public function getMainAuthor() return trim((string)($this->doc->children($this->recordNs)->authors->author->name ?? '')); } - /** - * Dedup: Return ISBNs in ISBN-13 format without dashes - * - * @return array - */ - public function getISBNs() - { - return []; - } - - /** - * Dedup: Return series ISSN - * - * @return string - */ - public function getSeriesISSN() - { - return ''; - } - - /** - * Dedup: Return series numbering - * - * @return string - */ - public function getSeriesNumbering() - { - return ''; - } - /** * Dedup: Return format from predefined values * @@ -290,6 +230,28 @@ public function getPageCount() return ''; } + /** + * Get ISBNs in ISBN-13 format without dashes. + * + * @return array + */ + protected function getISBNs(): array + { + return []; + } + + /** + * Do any pre-processing for the record before the conversion to Solr array. + * + * @param ?Database $db Database connection, if available + * + * @return void + */ + protected function preProcessRecordForIndexing(?Database $db): void + { + $this->recordDoc = $this->doc->children($this->recordNs); + } + /** * Get record format. * @@ -390,26 +352,22 @@ protected function getFullTitle(): string /** * Get short title. * - * @param string $fullTitle Full title - * * @return string */ - protected function getShortTitle(string $fullTitle): string + protected function getShortTitle(): string { - $titleParts = explode(' : ', $fullTitle, 2); + $titleParts = explode(' : ', $this->getFullTitle(), 2); return $titleParts[0]; } /** * Get subtitle. * - * @param string $fullTitle Full title - * * @return string */ - protected function getTitleSub(string $fullTitle): string + protected function getTitleSub(): string { - $titleParts = explode(' : ', $fullTitle, 2); + $titleParts = explode(' : ', $this->getFullTitle(), 2); return $titleParts[1] ?? ''; } @@ -469,7 +427,7 @@ protected function getUrls() } /** - * Return publication years + * Get publication years. * * @return array */ @@ -482,4 +440,14 @@ protected function getPublicationYears(): array } return []; } + + /** + * Get full text field for a given document + * + * @return string + */ + protected function getFullTextField(): string + { + return $this->getFullTextFieldForDocument($this->recordDoc); + } } diff --git a/src/RecordManager/Base/Record/Eaccpf.php b/src/RecordManager/Base/Record/Eaccpf.php index 5bf12a13..fcc97981 100644 --- a/src/RecordManager/Base/Record/Eaccpf.php +++ b/src/RecordManager/Base/Record/Eaccpf.php @@ -30,8 +30,6 @@ namespace RecordManager\Base\Record; -use RecordManager\Base\Database\DatabaseInterface as Database; - /** * EAC-CPF Record Class * @@ -48,6 +46,13 @@ class Eaccpf extends AbstractRecord { use XmlRecordTrait; + /** + * Is this an authority record? + * + * @var bool + */ + protected bool $isAuthorityRecord = true; + /** * Return record ID (local) * @@ -62,35 +67,6 @@ public function getID() return urlencode($id); } - /** - * Return fields to be indexed in Solr - * - * @param ?Database $db Database connection. Omit to avoid database lookups for related records. - * - * @return array - */ - public function toSolrArray(?Database $db = null) - { - $data = parent::toSolrArray($db); - - $data['fullrecord'] = $this->getFullRecord(); - $data['allfields'] = $this->getAllFields(); - $data['source'] = $this->getRecordSource(); - $data['record_type'] = $this->getRecordType(); - $data['heading'] = $this->getHeading(); - $data['use_for'] = $this->getUseForHeadings(); - $data['birth_date'] = $this->getBirthDate(); - $data['death_date'] = $this->getDeathDate(); - $data['birth_place'] = $this->getBirthPlace(); - $data['death_place'] = $this->getDeathPlace(); - $data['related_place'] = $this->getRelatedPlaces(); - $data['field_of_activity'] = $this->getFieldsOfActivity(); - $data['occupation'] = $this->getOccupations(); - $data['language'] = $this->getHeadingLanguage(); - - return $data; - } - /** * Get record format. * diff --git a/src/RecordManager/Base/Record/Ead.php b/src/RecordManager/Base/Record/Ead.php index 86d33f96..33ada60d 100644 --- a/src/RecordManager/Base/Record/Ead.php +++ b/src/RecordManager/Base/Record/Ead.php @@ -148,47 +148,6 @@ public function toXML() return (string)$xml; } - /** - * Return fields to be indexed in Solr - * - * @param Database $db Database connection. Omit to avoid database lookups for - * related records. - * - * @return array - */ - public function toSolrArray(?Database $db = null) - { - $data = parent::toSolrArray($db); - - $doc = $this->doc; - $data['ctrlnum'] = (string)$this->doc->attributes()->{'id'}; - $data['title_sub'] = $this->getTitleSub(); - $data['title_short'] = $this->getShortTitle(); - $data['title'] = $this->getTitleField(); - $data['title_full'] = $this->getFullTitle(); - $data['title_sort'] = $this->getTitleSort(); - $data['description'] = $this->getDescription(); - $data['author'] = $this->getPrimaryAuthors(); - $data['author2'] = $this->getSecondaryAuthors(); - $data['author_sort'] = $this->getAuthorSort($data['author']); - $data['author_corporate'] = $this->getCorporateAuthors(); - $data['topic'] = $this->getTopics(); - $data['topic_facet'] = $this->getTopicFacets(); - $data['format'] = $this->getFormat(); - $data['institution'] = $this->getInstitution(); - $data['series'] = $this->getSeries(); - $data['language'] = $this->getLanguages(); - $data['physical'] = $this->getPhysicalExtent(); - $data['thumbnail'] = $this->getThumbnailUrl(); - $data['allfields'] = $this->getAllFields($doc); - $data['fullrecord'] = $this->getFullRecord(); - - $this->addGeographicData($data); - $this->addHierarchyFields($data); - - return $data; - } - /** * Return format from predefined values * @@ -249,12 +208,30 @@ public function getMainAuthor() */ public function getTitle($forFiling = false) { - $title = (string)($this->doc->did->unittitle ?? ''); - if ($forFiling) { - $title = $this->metadataUtils->createSortTitle($title); + if (isset($this->resultCache[__METHOD__])) { + return $this->resultCache[__METHOD__]; + } + + $titleSub = $this->getTitleSub(); + $shortTitle = $this->getShortTitle(); + + $title = ''; + // Ini handling returns true as '1': + $prependTitle = $this->getDriverParam('prependTitleWithSubtitle', '1'); + if ( + '1' === $prependTitle + || ('children' === $prependTitle && $this->doc->{'add-data'}->{'parent'}) + ) { + if ( + '' !== $titleSub + && $titleSub !== $shortTitle + ) { + $title = $titleSub . ' '; + } } + $title .= $shortTitle; - return $title; + return $this->resultCache[__METHOD__] = $title; } /** @@ -264,7 +241,31 @@ public function getTitle($forFiling = false) */ public function getShortTitle(): string { - return $this->getTitle(); + return (string)($this->doc->did->unittitle ?? ''); + } + + /** + * Get sort title. + * + * @return string + */ + public function getTitleSort(): string + { + return mb_strtolower($this->metadataUtils->stripPunctuation($this->getShortTitle()), 'UTF-8'); + } + + /** + * Do any post-processing for the record after the main conversion to Solr array. + * + * @param ?Database $db Database connection, if available + * @param array $data Array of Solr fields + * + * @return void + */ + protected function postProcessRecordForIndexing(?Database $db, &$data): void + { + $this->addGeographicData($data); + $this->addHierarchyFields($data); } /** @@ -404,12 +405,13 @@ protected function getUnitId() /** * Get all XML fields * - * @param \SimpleXMLElement $xml The XML document + * @param ?\SimpleXMLElement $xml XML fragment to process, or null to process whole document * * @return array */ - protected function getAllFields($xml) + protected function getAllFields($xml = null) { + $xml ??= $this->doc; $allFields = []; foreach ($xml->children() as $field) { $s = trim((string)$field); @@ -529,6 +531,10 @@ protected function getDescription(): string */ protected function getPrimaryAuthors(): array { + if (isset($this->resultCache[__METHOD__])) { + return $this->resultCache[__METHOD__]; + } + $result = []; if ($names = $this->doc->xpath('controlaccess/persname')) { foreach ($names as $name) { @@ -537,7 +543,7 @@ protected function getPrimaryAuthors(): array } } } - return $result; + return $this->resultCache[__METHOD__] = $result; } /** @@ -578,12 +584,11 @@ protected function getCorporateAuthors(): array /** * Get author sort field. * - * @param array $authors Primary authors - * * @return string */ - protected function getAuthorSort(array $authors): string + protected function getAuthorSort(): string { + $authors = $this->getPrimaryAuthors(); return $authors[0] ?? ''; } @@ -600,59 +605,6 @@ protected function getInstitution(): string return ''; } - /** - * Get title field. - * - * @return string - */ - protected function getTitleField(): string - { - if (isset($this->resultCache[__METHOD__])) { - return $this->resultCache[__METHOD__]; - } - - $titleSub = $this->getTitleSub(); - $shortTitle = $this->getShortTitle(); - - $title = ''; - // Ini handling returns true as '1': - $prependTitle = $this->getDriverParam('prependTitleWithSubtitle', '1'); - if ( - '1' === $prependTitle - || ('children' === $prependTitle && $this->doc->{'add-data'}->{'parent'}) - ) { - if ( - '' !== $titleSub - && $titleSub !== $shortTitle - ) { - $title = $titleSub . ' '; - } - } - $title .= $shortTitle; - - return $this->resultCache[__METHOD__] = $title; - } - - /** - * Get full title. - * - * @return string - */ - protected function getFullTitle(): string - { - return $this->getTitleField(); - } - - /** - * Get sort title. - * - * @return string - */ - protected function getTitleSort(): string - { - return mb_strtolower($this->metadataUtils->stripPunctuation($this->getTitle()), 'UTF-8'); - } - /** * Get languages * @@ -671,11 +623,11 @@ protected function getLanguages() } /** - * Get physical extent + * Get physical descriptions. * * @return array */ - protected function getPhysicalExtent() + protected function getPhysicalDescriptions(): array { $result = []; if ($extents = $this->doc->did->xpath('physdesc/extent')) { @@ -747,4 +699,17 @@ protected function addHierarchyFields(array &$data): void = trim($this->getUnitId() . ' ' . $data['title']); } } + + /** + * Get control numbers. + * + * @return array + */ + protected function getControlNumbers(): array + { + if ($id = (string)$this->doc->attributes()->{'id'}) { + return [$id]; + } + return []; + } } diff --git a/src/RecordManager/Base/Record/Ead3.php b/src/RecordManager/Base/Record/Ead3.php index 5b5025ba..cd2640d2 100644 --- a/src/RecordManager/Base/Record/Ead3.php +++ b/src/RecordManager/Base/Record/Ead3.php @@ -114,46 +114,6 @@ public function toXML() return (string)$xml; } - /** - * Return fields to be indexed in Solr - * - * @param ?Database $db Database connection. Omit to avoid database lookups for related records. - * - * @return array - */ - public function toSolrArray(?Database $db = null) - { - $data = parent::toSolrArray($db); - - $doc = $this->doc; - $data['ctrlnum'] = $this->getOldIdentifier(); - $data['title_sub'] = $this->getTitleSub(); - $data['title_short'] = $this->getShortTitle(); - $data['title'] = $this->getTitleField(); - $data['title_full'] = $this->getFullTitle(); - $data['title_sort'] = $this->getTitleSort(); - $data['description'] = $this->getDescription(); - $data['author'] = $this->getAuthors(); - $data['author_sort'] = $this->getAuthorSort($data['author']); - $data['author_corporate'] = $this->getCorporateAuthors(); - $data['geographic'] = $this->getGeographicTopics(); - $data['geographic_facet'] = $this->getGeographicFacets(); - $data['topic'] = $this->getTopics(); - $data['topic_facet'] = $this->getTopicFacets(); - $data['format'] = $this->getFormat(); - $data['institution'] = $this->getInstitution(); - $data['series'] = $this->getSeries(); - $data['language'] = $this->getLanguages(); - $data['physical'] = $this->getPhysicalExtent(); - $data['thumbnail'] = $this->getThumbnailUrl(); - $data['fullrecord'] = $this->getFullRecord(); - $data['allfields'] = $this->getAllFields($doc); - - $this->addHierarchyFields($data); - - return $data; - } - /** * Return format from predefined values * @@ -194,7 +154,7 @@ public function getTitle($forFiling = false) */ public function getMainAuthor() { - $authors = $this->getAuthors(); + $authors = $this->getPrimaryAuthors(); return $authors[0] ?? ''; } @@ -219,11 +179,11 @@ public function getRawGeographicTopicIds(): array } /** - * Get author identifiers + * Get primary author identifiers * * @return array */ - public function getAuthorIds(): array + public function getPrimaryAuthorIds(): array { return []; } @@ -258,6 +218,29 @@ public function getShortTitle(): string return $this->getTitle(); } + /** + * Get sort title. + * + * @return string + */ + public function getTitleSort(): string + { + return mb_strtolower($this->metadataUtils->stripPunctuation($this->getTitleField()), 'UTF-8'); + } + + /** + * Do any post-processing for the record after the main conversion to Solr array. + * + * @param ?Database $db Database connection, if available + * @param array $data Array of Solr fields + * + * @return void + */ + protected function postProcessRecordForIndexing(?Database $db, &$data): void + { + $this->addHierarchyFields($data); + } + /** * Get record format. * @@ -300,12 +283,16 @@ protected function getDescription(): string } /** - * Get authors + * Get primary authors. * * @return array */ - protected function getAuthors(): array + protected function getPrimaryAuthors(): array { + if (isset($this->resultCache[__METHOD__])) { + return $this->resultCache[__METHOD__]; + } + $result = []; foreach ($this->getAuthorElements() as $name) { foreach ($name->part as $part) { @@ -314,7 +301,7 @@ protected function getAuthors(): array } } } - return $result; + return $this->resultCache[__METHOD__] = $result; } /** @@ -487,11 +474,11 @@ protected function getLanguages() } /** - * Get physical extent + * Get physical descriptions. * * @return array */ - protected function getPhysicalExtent() + protected function getPhysicalDescriptions(): array { $result = []; foreach ($this->doc->did->physdesc->extent ?? [] as $extent) { @@ -610,12 +597,13 @@ protected function addHierarchyFields(array &$data): void /** * Get all XML fields * - * @param \SimpleXMLElement $xml The XML document + * @param ?\SimpleXMLElement $xml XML fragment to process, or null to process whole document * * @return array */ - protected function getAllFields($xml) + protected function getAllFields($xml = null) { + $xml ??= $this->doc; $allFields = []; foreach ($xml->children() as $field) { $s = trim((string)$field); @@ -643,12 +631,11 @@ protected function getFullRecord(): string /** * Get author sort field. * - * @param array $authors Primary authors - * * @return string */ - protected function getAuthorSort(array $authors): string + protected function getAuthorSort(): string { + $authors = $this->getPrimaryAuthors(); return $authors[0] ?? ''; } @@ -694,14 +681,4 @@ protected function getFullTitle(): string { return $this->getTitleField(); } - - /** - * Get sort title. - * - * @return string - */ - protected function getTitleSort(): string - { - return mb_strtolower($this->metadataUtils->stripPunctuation($this->getTitleField()), 'UTF-8'); - } } diff --git a/src/RecordManager/Base/Record/Ese.php b/src/RecordManager/Base/Record/Ese.php index 7c0862f3..b2d03ab3 100644 --- a/src/RecordManager/Base/Record/Ese.php +++ b/src/RecordManager/Base/Record/Ese.php @@ -29,8 +29,6 @@ namespace RecordManager\Base\Record; -use RecordManager\Base\Database\DatabaseInterface as Database; - /** * Ese record class * @@ -56,40 +54,6 @@ public function getID() return ''; } - /** - * Return fields to be indexed in Solr - * - * @param ?Database $db Database connection. Omit to avoid database lookups for related records. - * - * @return array - */ - public function toSolrArray(?Database $db = null) - { - $data = parent::toSolrArray($db); - - $data['ctrlnum'] = $this->getControlNumbers(); - $data['fullrecord'] = $this->getFullRecord(); - $data['allfields'] = $this->getAllFields(); - $data['language'] = $this->getLanguages(); - $data['format'] = $this->getFormat(); - $data['author'] = $this->getPrimaryAuthors(); - $data['author2'] = $this->getSecondaryAuthors(); - $data['title'] = $this->getTitle(); - $data['title_full'] = $this->getFullTitle(); - $data['title_short'] = $this->getShortTitle($data['title']); - $data['title_sub'] = $this->getTitleSub($data['title']); - $data['title_sort'] = $this->getTitle(true); - $data['publisher'] = $this->getPublishers(); - $data['publishDate'] = $this->getPublicationYear(); - $data['publishDateRange'] = $this->getPublicationYears(); - $data['isbn'] = $this->getISBNs(); - $data['topic'] = $this->getTopics(); - $data['topic_facet'] = $this->getTopicFacets(); - $data['url'] = $this->getUrls(); - - return $data; - } - /** * Dedup: Return full title (for debugging purposes only) * @@ -110,11 +74,16 @@ public function getFullTitleForDebugging() */ public function getTitle($forFiling = false) { + $key = __METHOD__ . ($forFiling ? '1' : '0'); + if (isset($this->resultCache[$key])) { + return $this->resultCache[$key]; + } + $title = trim((string)$this->doc->title); if ($forFiling) { $title = $this->metadataUtils->createSortTitle($title); } - return $title; + return $this->resultCache[$key] = $title; } /** @@ -127,47 +96,6 @@ public function getMainAuthor() return (string)$this->doc->creator; } - /** - * Dedup: Return ISBNs in ISBN-13 format without dashes - * - * @return array - */ - public function getISBNs() - { - $arr = []; - foreach ($this->doc->identifier as $identifier) { - $identifier = str_replace('-', '', $identifier); - if (!preg_match('{([0-9]{9,12}[0-9xX])}', $identifier, $matches)) { - continue; - } - $isbn = $this->metadataUtils->normalizeISBN($matches[1]); - if ($isbn) { - $arr[] = $isbn; - } - } - return array_values(array_unique($arr)); - } - - /** - * Dedup: Return series ISSN - * - * @return string - */ - public function getSeriesISSN() - { - return ''; - } - - /** - * Dedup: Return series numbering - * - * @return string - */ - public function getSeriesNumbering() - { - return ''; - } - /** * Dedup: Return format from predefined values * @@ -203,6 +131,27 @@ public function getPageCount() return ''; } + /** + * Get ISBNs in ISBN-13 format without dashes. + * + * @return array + */ + protected function getISBNs(): array + { + $arr = []; + foreach ($this->doc->identifier as $identifier) { + $identifier = str_replace('-', '', $identifier); + if (!preg_match('{([0-9]{9,12}[0-9xX])}', $identifier, $matches)) { + continue; + } + $isbn = $this->metadataUtils->normalizeISBN($matches[1]); + if ($isbn) { + $arr[] = $isbn; + } + } + return array_values(array_unique($arr)); + } + /** * Get record format. * @@ -315,26 +264,22 @@ protected function getFullTitle(): string /** * Get short title. * - * @param string $fullTitle Full title - * * @return string */ - protected function getShortTitle(string $fullTitle): string + protected function getShortTitle(): string { - $titleParts = explode(' : ', $fullTitle, 2); + $titleParts = explode(' : ', $this->getFullTitle(), 2); return $titleParts[0]; } /** * Get subtitle. * - * @param string $fullTitle Full title - * * @return string */ - protected function getTitleSub(string $fullTitle): string + protected function getTitleSub(): string { - $titleParts = explode(' : ', $fullTitle, 2); + $titleParts = explode(' : ', $this->getFullTitle(), 2); return $titleParts[1] ?? ''; } @@ -390,7 +335,7 @@ protected function getUrls() } /** - * Return publication years + * Get publication years. * * @return array */ diff --git a/src/RecordManager/Base/Record/Forward.php b/src/RecordManager/Base/Record/Forward.php index a75f1e9d..0fda4929 100644 --- a/src/RecordManager/Base/Record/Forward.php +++ b/src/RecordManager/Base/Record/Forward.php @@ -29,7 +29,6 @@ namespace RecordManager\Base\Record; -use RecordManager\Base\Database\DatabaseInterface as Database; use RecordManager\Base\Utils\Logger; use RecordManager\Base\Utils\MetadataUtils; @@ -147,52 +146,6 @@ public function getID() return $id; } - /** - * Return fields to be indexed in Solr - * - * @param ?Database $db Database connection. Omit to avoid database lookups for related records. - * - * @return array - */ - public function toSolrArray(?Database $db = null) - { - $data = parent::toSolrArray($db); - - $data['ctrlnum'] = $this->getControlNumbers(); - $data['fullrecord'] = $this->getFullRecord(); - $data['publishDate'] = $this->getPublicationYear(); - $data['publishDateRange'] = $this->getPublicationYears(); - $data['title'] = $this->getTitle(); - $data['title_full'] = $this->getFullTitle(); - $data['title_short'] = $this->getShortTitle(); - $data['title_sort'] = $this->getTitle(true); - $data['title_alt'] = $this->getAltTitles(); - $data['description'] = $this->getDescription(); - $data['topic'] = $this->getTopics(); - $data['topic_facet'] = $this->getTopicFacets(); - $data['url'] = $this->getUrls(); - $data['thumbnail'] = $this->getThumbnailUrl(); - $data['author'] = $this->getPrimaryAuthorNamesSorted(); - // Support for author_variant is currently not implemented - $data['author_role'] = $this->getPrimaryAuthorRolesSorted(); - $data['author_sort'] = $this->getAuthorSort($data['author']); - $data['author2'] = $this->getSecondaryAuthorNames(); - // Support for author2_variant is currently not implemented - $data['author2_role'] = $this->getSecondaryAuthorRoles(); - $data['author_corporate'] = $this->getCorporateAuthorNames(); - $data['author_corporate_role'] = $this->getCorporateAuthorRoles(); - $data['geographic'] = $this->getGeographicTopics(); - $data['geographic_facet'] = $this->getGeographicFacets(); - $data['genre'] = $this->getGenres(); - $data['genre_facet'] = $this->getGenreFacets(); - $data['url'] = $this->getUrls(); - $data['format'] = $this->getFormat(); - $data['publisher'] = $this->getPublishers(); - $data['allfields'] = $this->getAllFields(); - - return $data; - } - /** * Return main author (format: Last, First) * @@ -315,7 +268,7 @@ protected function getAllFields($fields = null) */ protected function getAuthorsByRelator($relators = []) { - $key = md5(__METHOD__ . json_encode($relators)); + $key = md5(__METHOD__ . implode(',', $relators)); if (isset($this->resultCache[$key])) { return $this->resultCache[$key]; } @@ -355,27 +308,17 @@ protected function getRelator($agent) * * @return array */ - protected function getPrimaryAuthors(): array + protected function getPrimaryAuthorsUnsorted(): array { return $this->getAuthorsByRelator($this->primaryAuthorRelators); } - /** - * Get secondary authors - * - * @return array - */ - protected function getSecondaryAuthors(): array - { - return $this->getAuthorsByRelator($this->secondaryAuthorRelators); - } - /** * Get secondary author names. * * @return array */ - protected function getSecondaryAuthorNames(): array + protected function getSecondaryAuthors(): array { return $this->getAuthorsByRelator($this->secondaryAuthorRelators)['names']; } @@ -396,16 +339,6 @@ protected function getSecondaryAuthorRoles(): array * @return array */ protected function getCorporateAuthors(): array - { - return $this->getAuthorsByRelator($this->corporateAuthorRelators); - } - - /** - * Get corporate author names. - * - * @return array - */ - protected function getCorporateAuthorNames(): array { return $this->getAuthorsByRelator($this->corporateAuthorRelators)['names']; } @@ -425,13 +358,13 @@ protected function getCorporateAuthorRoles(): array * * @return array */ - protected function getPrimaryAuthorsSorted() + protected function getPrimaryAuthorsSorted(): array { if (isset($this->resultCache[__METHOD__])) { return $this->resultCache[__METHOD__]; } - $unsortedPrimaryAuthors = $this->getPrimaryAuthors(); + $unsortedPrimaryAuthors = $this->getPrimaryAuthorsUnsorted(); // Make sure directors are first of the primary authors $directors = $others = [ 'names' => [], @@ -453,11 +386,11 @@ protected function getPrimaryAuthorsSorted() } /** - * Get sorted primary author names. + * Get primary author names. * * @return array */ - protected function getPrimaryAuthorNamesSorted(): array + protected function getPrimaryAuthors(): array { return $this->getPrimaryAuthorsSorted()['names']; } @@ -467,7 +400,7 @@ protected function getPrimaryAuthorNamesSorted(): array * * @return array */ - protected function getPrimaryAuthorRolesSorted(): array + protected function getPrimaryAuthorRoles(): array { return $this->getPrimaryAuthorsSorted()['relators']; } @@ -475,25 +408,25 @@ protected function getPrimaryAuthorRolesSorted(): array /** * Get contents * - * @param string $language Optionally take only description in the given language - * * @return array */ - protected function getContents($language = null) + protected function getContents() { $results = []; + $primaryLanguageResults = []; foreach ($this->getMainElement()->ContentDescription as $description) { - if (null !== $language && (string)$description->Language !== $language) { - continue; - } if ( (string)$description->DescriptionType == 'Content description' && !empty($description->DescriptionText) ) { - $results[] = (string)$description->DescriptionText; + $descriptionText = (string)$description->DescriptionText; + $results[] = $descriptionText; + if ((string)$description->Language === $this->primaryLanguage) { + $primaryLanguageResults[] = $descriptionText; + } } } - return $results; + return $primaryLanguageResults ?: $results; } /** @@ -639,7 +572,7 @@ protected function getDescription(): string if (!$descriptions) { $descriptions = $this->getDescriptions(); } - $contents = $this->getContents($this->primaryLanguage); + $contents = $this->getContents(); if (!$contents) { $contents = $this->getContents(); } @@ -680,12 +613,11 @@ protected function getTopicFacets(): array /** * Get author sort field. * - * @param array $authors Primary authors - * * @return string */ - protected function getAuthorSort(array $authors): string + protected function getAuthorSort(): string { + $authors = $this->getPrimaryAuthors(); return $authors[0] ?? ''; } @@ -719,7 +651,7 @@ protected function getGeographicFacets(): array } /** - * Return publication years + * Get publication years. * * @return array */ diff --git a/src/RecordManager/Base/Record/ForwardAuthority.php b/src/RecordManager/Base/Record/ForwardAuthority.php index 1ddbf5b9..c4042a9d 100644 --- a/src/RecordManager/Base/Record/ForwardAuthority.php +++ b/src/RecordManager/Base/Record/ForwardAuthority.php @@ -29,8 +29,6 @@ namespace RecordManager\Base\Record; -use RecordManager\Base\Database\DatabaseInterface as Database; - use function assert; use function is_array; @@ -49,6 +47,13 @@ class ForwardAuthority extends AbstractRecord { use XmlRecordTrait; + /** + * Is this an authority record? + * + * @var bool + */ + protected bool $isAuthorityRecord = true; + /** * Return record ID (local) * @@ -61,35 +66,6 @@ public function getID() . (string)$doc->AgentIdentifier->IDValue; } - /** - * Return fields to be indexed in Solr - * - * @param ?Database $db Database connection. Omit to avoid database lookups for related records. - * - * @return array - */ - public function toSolrArray(?Database $db = null) - { - $data = parent::toSolrArray($db); - - $data['fullrecord'] = $this->getFullRecord(); - $data['allfields'] = $this->getAllFields(); - $data['source'] = $this->getRecordSource(); - $data['record_type'] = $this->getRecordType(); - $data['heading'] = $this->getHeading(); - $data['use_for'] = $this->getUseForHeadings(); - $data['birth_date'] = $this->getBirthDate(); - $data['death_date'] = $this->getDeathDate(); - $data['birth_place'] = $this->getBirthPlace(); - $data['death_place'] = $this->getDeathPlace(); - $data['related_place'] = $this->getRelatedPlaces(); - $data['field_of_activity'] = $this->getFieldsOfActivity(); - $data['occupation'] = $this->getOccupations(); - $data['language'] = $this->getHeadingLanguage(); - - return $data; - } - /** * Get record format. * diff --git a/src/RecordManager/Base/Record/FullTextTrait.php b/src/RecordManager/Base/Record/FullTextTrait.php index 3101db22..30165986 100644 --- a/src/RecordManager/Base/Record/FullTextTrait.php +++ b/src/RecordManager/Base/Record/FullTextTrait.php @@ -85,7 +85,7 @@ trait FullTextTrait * * @return string */ - protected function getFullTextField($doc): string + protected function getFullTextFieldForDocument($doc): string { $fulltext = []; $xpaths = $this->getDriverParam('fullTextXpaths', []); diff --git a/src/RecordManager/Base/Record/Lido.php b/src/RecordManager/Base/Record/Lido.php index 4bec6a74..e9324999 100644 --- a/src/RecordManager/Base/Record/Lido.php +++ b/src/RecordManager/Base/Record/Lido.php @@ -5,7 +5,7 @@ * * PHP version 8 * - * Copyright (C) The National Library of Finland 2011-2022. + * Copyright (C) The National Library of Finland 2011-2025. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -134,55 +134,24 @@ class Lido extends AbstractRecord protected $excludedLocationAppellationValueLabels = []; /** - * Return record ID (local) + * LIDO elements excluded from allfields. * - * @return string + * @var array */ - public function getID() - { - return (string)$this->doc->lido->lidoRecID; - } + protected $excludeFromAllFields = [ + 'conceptID', 'eventType', 'legalBodyWeblink', 'linkResource', + 'objectMeasurementsWrap', 'recordMetadataDate', 'recordType', + 'resourceWrap', 'relatedWorksWrap', 'rightsType', 'roleActor', + ]; /** - * Return fields to be indexed in Solr - * - * @param ?Database $db Database connection. Omit to avoid database lookups for related records. + * Return record ID (local) * - * @return array + * @return string */ - public function toSolrArray(?Database $db = null) + public function getID() { - $data = parent::toSolrArray($db); - - $data['title'] = $this->getTitle(false); - $data['title_short'] = $this->getShortTitle(); - $data['title_full'] = $this->getFullTitle(); - $data['title_sort'] = $this->getTitle(true); - $data['title_alt'] = $this->getAltTitles(); - $data['description'] = $this->getDescription(); - $data['format'] = $this->getObjectWorkType(); - $data['institution'] = $this->getLegalBodyName(); - $data['author'] = $this->getAuthors(); - $data['author_sort'] = $this->getAuthorSort($data['author']); - $data['author2'] = $this->getSecondaryAuthors(); - $data['topic'] = $this->getTopics(); - $data['topic_facet'] = $this->getTopics(); - $data['material_str_mv'] = $this->getMaterials(); - $data['era'] = $this->getEras(); - $data['era_facet'] = $this->getEraFacets(); - $data['geographic'] = $this->getGeographicTopics(); - $data['geographic_facet'] = $this->getGeographicFacets(); - $data['collection'] = $this->getCollection(); - $data['url'] = $this->getURLs(); - $data['thumbnail'] = $this->getThumbnailUrl(); - $data['ctrlnum'] = $this->getControlNumbers(); - $data['isbn'] = $this->getISBNs(); - $data['issn'] = $this->getISSNs(); - $data['allfields'] = $this->getAllFields($this->doc); - - $this->addHierarchyFields($data); - - return $data; + return (string)$this->doc->lido->lidoRecID; } /** @@ -264,7 +233,7 @@ public function getLocations() */ public function getMainAuthor() { - $authors = $this->getAuthors(); + $authors = $this->getPrimaryAuthors(); return $authors ? $authors[0] : ''; } @@ -324,31 +293,12 @@ public function getWorkIdentificationData() return [compact('titles', 'authors', 'titlesAltScript', 'authorsAltScript')]; } - /** - * Dedup: Return ISBNs in ISBN-13 format without dashes - * - * @return array - */ - public function getISBNs() - { - $arr = []; - foreach ($this->getIdentifiersByType(['isbn'], []) as $identifier) { - if ($isbn = $this->metadataUtils->normalizeISBN($this->checkISBN((string)$identifier))) { - $arr[] = $isbn; - } else { - $this->storeWarning("Invalid ISBN '$identifier'"); - } - } - - return array_unique($arr); - } - /** * Dedup: Return ISSNs * * @return array */ - public function getISSNs() + public function getISSNs(): array { return $this->getIdentifiersByType(['issn'], []); } @@ -374,11 +324,11 @@ public function getRawGeographicTopicIds(): array } /** - * Get author identifiers + * Get primary author identifiers * * @return array */ - public function getAuthorIds(): array + public function getPrimaryAuthorIds(): array { return []; } @@ -413,6 +363,50 @@ public function getFullTitle(): string return $this->getTitle(); } + /** + * Get format. + * + * @link http://www.lido-schema.org/schema/v1.0/lido-v1.0-schema-listing.html + * #objectWorkTypeWrap + * @return string + */ + public function getFormat() + { + return $this->getObjectWorkType(); + } + + /** + * Do any post-processing for the record after the main conversion to Solr array. + * + * @param ?Database $db Database connection, if available + * @param array $data Array of Solr fields + * + * @return void + */ + protected function postProcessRecordForIndexing(?Database $db, &$data): void + { + $this->addHierarchyFields($data); + } + + /** + * Get ISBNs in ISBN-13 format without dashes. + * + * @return array + */ + protected function getISBNs(): array + { + $arr = []; + foreach ($this->getIdentifiersByType(['isbn'], []) as $identifier) { + if ($isbn = $this->metadataUtils->normalizeISBN($this->checkISBN((string)$identifier))) { + $arr[] = $isbn; + } else { + $this->storeWarning("Invalid ISBN '$identifier'"); + } + } + + return array_unique($arr); + } + /** * Get record format. * @@ -635,6 +629,16 @@ protected function getSubLocation($place, $isSub = false) ? (string)$place->namePlaceSet->appellationValue : ''; } + /** + * Get institution. + * + * @return string + */ + protected function getInstitution(): string + { + return $this->getLegalBodyName(); + } + /** * Return the legal body name. * @@ -715,7 +719,7 @@ protected function getObjectWorkType() * * @return array */ - protected function getURLs() + protected function getUrls() { $results = []; foreach ($this->getResourceSetNodes() as $set) { @@ -734,15 +738,20 @@ protected function getURLs() /** * Return names of actors associated with specified event * - * @param string|array $event Event type(s) allowed (null = all types) - * @param string|array $role Roles allowed (null = all roles) - * @param bool $includeRoles Whether to include actor roles in the - * results + * @param string|array|null $event Event type(s) allowed (null = all types) + * @param string|array|null $role Roles allowed (null = all roles) + * @param bool $includeRoles Whether to include actor roles in the results * * @return array */ protected function getActors($event = null, $role = null, $includeRoles = false) { + $key = md5(__METHOD__ . ($event ? implode(',', (array)$event) : 'null') . '|' + . ($role ? implode(',', $role) : 'null') . '|' . ($includeRoles ? '1' : '0')); + if (isset($this->resultCache[$key])) { + return $this->resultCache[$key]; + } + $result = []; foreach ($this->getEventNodes($event) as $eventNode) { foreach ($eventNode->eventActor as $actorNode) { @@ -767,7 +776,7 @@ protected function getActors($event = null, $role = null, $includeRoles = false) } } - return $result; + return $this->resultCache[$key] = $result; } /** @@ -999,21 +1008,16 @@ protected function getEventMaterials($eventType) * * A recursive method for fetching all relevant fields * - * @param \SimpleXMLElement $xml The XML document + * @param ?\SimpleXMLElement $xml XML fragment to process, or null to process whole document * * @return array */ - protected function getAllFields($xml) + protected function getAllFields($xml = null) { - $ignoredFields = [ - 'conceptID', 'eventType', 'legalBodyWeblink', 'linkResource', - 'objectMeasurementsWrap', 'recordMetadataDate', 'recordType', - 'resourceWrap', 'relatedWorksWrap', 'rightsType', 'roleActor', - ]; - + $xml ??= $this->doc; $allFields = []; foreach ($xml->children() as $tag => $field) { - if (in_array($tag, $ignoredFields)) { + if (in_array($tag, $this->excludeFromAllFields)) { continue; } $s = trim((string)$field); @@ -1328,17 +1332,17 @@ protected function getPlaceEvents(): array } /** - * Get authors + * Get primary authors. * * @return array */ - protected function getAuthors(): array + protected function getPrimaryAuthors(): array { return $this->getActors($this->getMainEvents()); } /** - * Get secondary authors + * Get secondary authors. * * @return array */ @@ -1349,16 +1353,6 @@ protected function getSecondaryAuthors(): array : []; } - /** - * Get materials - * - * @return array - */ - protected function getMaterials(): array - { - return $this->getEventMaterials($this->getMainEvents()); - } - /** * Get Display dates * @@ -1527,12 +1521,11 @@ protected function checkISBN($identifier = ''): string /** * Get author sort field. * - * @param array $authors Primary authors - * * @return string */ - protected function getAuthorSort(array $authors): string + protected function getAuthorSort(): string { + $authors = $this->getPrimaryAuthors(); return $authors[0] ?? ''; } diff --git a/src/RecordManager/Base/Record/Marc.php b/src/RecordManager/Base/Record/Marc.php index 28ac0f26..d66c0335 100644 --- a/src/RecordManager/Base/Record/Marc.php +++ b/src/RecordManager/Base/Record/Marc.php @@ -273,75 +273,6 @@ public function toXML() return substr($collection, $startPos, $endPos + 9 - $startPos); } - /** - * Return fields to be indexed in Solr - * - * @param ?Database $db Database connection. Omit to avoid database lookups for related records. - * - * @return array - */ - public function toSolrArray(?Database $db = null) - { - $this->processLinkingFields($db); - - $data = parent::toSolrArray($db); - - $data['building'] = $this->getBuilding(); - $this->addGeographicLocationFields($data); - $data['lccn'] = $this->getLCCN(); - $data['ctrlnum'] = $this->getControlNumbers(); - $data['fullrecord'] = $this->getFullRecord(); - $data['allfields'] = $this->getAllFields(); - $data['language'] = $this->getLanguages(); - $data['format'] = $this->getFormat(); - $this->addAuthorFields($data); - $data['title'] = $this->getTitle(); - $data['title_sub'] = $this->getTitleSub(); - $data['title_short'] = $this->getShortTitle(true); - $data['title_full'] = $this->getFullTitle(true); - $data['title_alt'] = $this->getAltTitles(); - $data['title_old'] = $this->getOldTitles(); - $data['title_new'] = $this->getNewTitles(); - $data['title_sort'] = $this->getTitle(true); - $data['series'] = $this->getSeries(); - $data['series2'] = $this->getSeries2(); - $data['publisher'] = $this->getPublishers(); - $data['publishDateSort'] = $this->getPublicationYear(); - $data['publishDate'] = $this->getPublicationYears(); - $data['publishDateRange'] = $this->getPublicationDateRanges(); - $data['physical'] = $this->getPhysicalDescriptions(); - $data['dateSpan'] = $this->getDateSpans(); - $data['edition'] = $this->getEdition(); - $data['contents'] = $this->getContents(); - $data['isbn'] = $this->getISBNFields(); - $data['issn'] = $this->getISSNFields(); - $data['doi_str_mv'] = $this->getDOIs(); - $data['callnumber-first'] = $this->getCallNumberFirst(); - $data['callnumber-subject'] = $this->getCallNumberSubject(); - $data['callnumber-raw'] = $this->getCallNumbersRaw(); - $data['callnumber-label'] = $this->getCallNumberLabels(); - $this->augmentCallNumberFields($data); - $data['topic'] = $this->getTopics(); - $data['topic_facet'] = $this->getTopicFacets(); - $data['topic_browse'] = $this->getTopicsForBrowse(); - $data['genre'] = $this->getGenres(); - $data['genre_facet'] = $this->getGenreFacets(); - $data['geographic'] = $this->getGeographicTopics(); - $data['geographic_facet'] = $this->getGeographicFacets(); - $data['era'] = $this->getEras(); - $data['era_facet'] = $this->getEraFacets(); - $data['url'] = $this->getUrls(); - $data['illustrated'] = $this->getIllustrated(); - $this->addDeweyFields($data); - $data['oclc_num'] = $this->getOclcNumbers(); - $data['uuid_str_mv'] = $this->getUUIDs(); - - // Get warnings from the MARC handler last: - $this->storeWarnings($this->record->getWarnings()); - - return $data; - } - /** * Return record ID (local) * @@ -623,7 +554,14 @@ public function getMainAuthor() */ public function getFullTitleForDebugging() { - return $this->getFullTitle(); + $title = $this->getFieldSubfields( + '245', + ['a', 'b', 'c', 'f', 'g', 'h', 'k', 'n', 'p', 's'], + false + ); + // Try to clean up the title but return original if it only contains + // punctuation: + return $this->metadataUtils->stripTrailingPunctuation($title, '', true); } /** @@ -753,11 +691,11 @@ public function getUUIDs(): array } /** - * Dedup: Return (unique) ISBNs in ISBN-13 format without dashes + * Dedup: Get (unique) ISBNs in ISBN-13 format without dashes * * @return array */ - public function getISBNs() + public function getISBNsForDedup(): array { $arr = []; $fields = $this->record->getFields('020'); @@ -778,11 +716,11 @@ public function getISBNs() } /** - * Dedup: Return ISSNs + * Dedup: Get ISSNs. * * @return array */ - public function getISSNs() + public function getISSNsForDedup(): array { $arr = []; $fields = $this->record->getFields('022'); @@ -797,21 +735,21 @@ public function getISSNs() } /** - * Dedup: Return series ISSN + * Dedup: Get series ISSN. * * @return string */ - public function getSeriesISSN() + public function getSeriesISSNForDedup(): string { return $this->getFieldSubfield('490', 'x'); } /** - * Dedup: Return series numbering + * Dedup: Get series numbering. * * @return string */ - public function getSeriesNumbering() + public function getSeriesNumberingForDedup(): string { return $this->getFieldSubfield('490', 'v'); } @@ -1311,14 +1249,12 @@ public function getPublisherNumbers(array $includeInd1 = []): array /** * Get short title * - * @param bool $allowUniformTitle Return uniform title if title does not exist - * * @return string */ - public function getShortTitle(bool $allowUniformTitle = false): string + public function getShortTitle(): string { $title = $this->getFieldSubfields('245', ['a'], false); - if ($allowUniformTitle && '' === $title) { + if ('' === $title) { $title = $this->getFieldSubfields('240', ['a', 'n', 'p']); } // Try to clean up the title but return original if it only contains @@ -1327,7 +1263,51 @@ public function getShortTitle(bool $allowUniformTitle = false): string } /** - * Return publication years + * Get short title for enrichment. + * + * @return string + */ + public function getShortTitleForEnrichment(): string + { + $title = $this->getFieldSubfields('245', ['a'], false); + // Try to clean up the title but return original if it only contains + // punctuation: + return $this->metadataUtils->stripTrailingPunctuation($title, '', true); + } + + /** + * Do any pre-processing for the record before the conversion to Solr array. + * + * @param ?Database $db Database connection, if available + * + * @return void + */ + protected function preProcessRecordForIndexing(?Database $db): void + { + $this->processLinkingFields($db); + } + + /** + * Do any post-processing for the record after the main conversion to Solr array. + * + * @param ?Database $db Database connection, if available + * @param array $data Array of Solr fields + * + * @return void + */ + protected function postProcessRecordForIndexing(?Database $db, &$data): void + { + $this->addGeographicLocationFields($data); + $this->addAuthorFields($data); + $this->augmentCallNumberFields($data); + $this->addDeweyFields($data); + + // Get warnings from the MARC handler last: + $this->storeWarnings($this->record->getWarnings()); + } + + /** + * Get publication years. * * @return array */ @@ -1474,18 +1454,16 @@ protected function getControlNumbers(): array /** * Get full title * - * @param bool $allowUniformTitle Return uniform title if title does not exist - * * @return string */ - protected function getFullTitle(bool $allowUniformTitle = false): string + protected function getFullTitle(): string { $title = $this->getFieldSubfields( '245', ['a', 'b', 'c', 'f', 'g', 'h', 'k', 'n', 'p', 's'], false ); - if ($allowUniformTitle && '' === $title) { + if ('' === $title) { $title = $this->getFieldSubfields('240', ['a', 'd', 'f', 'g', 'h', 'k', 'n', 'o', 'p', 'r', 's']); } // Try to clean up the title but return original if it only contains @@ -1796,7 +1774,7 @@ protected function getContents(): array * * @return array */ - protected function getISBNFields(): array + protected function getISBNs(): array { $result = []; foreach ($this->isbnFields as $fieldSpec) { @@ -2752,7 +2730,7 @@ protected function augmentCallNumberFields(array &$data): void { $useHILCC = $this->getDriverParam('useHILCC', false); $sortKey = ''; - foreach ($data['callnumber-raw'] as $callnumber) { + foreach ($data['callnumber-raw'] ?? [] as $callnumber) { $cn = new LcCallNumber($callnumber); // Store sort key even from an invalid CN in case we don't find a valid // one: diff --git a/src/RecordManager/Base/Record/MarcAuthority.php b/src/RecordManager/Base/Record/MarcAuthority.php index 08c4710b..fb2ca228 100644 --- a/src/RecordManager/Base/Record/MarcAuthority.php +++ b/src/RecordManager/Base/Record/MarcAuthority.php @@ -29,8 +29,6 @@ namespace RecordManager\Base\Record; -use RecordManager\Base\Database\DatabaseInterface as Database; - /** * Forward authority Record Class * @@ -44,6 +42,13 @@ */ class MarcAuthority extends Marc { + /** + * Is this an authority record? + * + * @var bool + */ + protected bool $isAuthorityRecord = true; + /** * Delimiter for separating name related subfields. * @@ -61,36 +66,6 @@ public function getID() return $this->getFieldSubfield('035', 'a'); } - /** - * Return fields to be indexed in Solr - * - * @param ?Database $db Database connection. Omit to avoid database lookups for related records. - * - * @return array - */ - public function toSolrArray(?Database $db = null) - { - $data = parent::toSolrArray($db); - - $data['fullrecord'] = $this->getFullRecord(); - $data['allfields'] = $this->getAllFields(); - $data['source'] = $this->getRecordSource(); - $data['heading'] = $this->getHeading(); - $data['heading_keywords'] = $this->getHeadingKeywords(); - $data['use_for'] = $this->getUseForHeadings(); - $data['use_for_keywords'] = $this->getUseForHeadingKeywords(); - $data['record_type'] = $this->getRecordType(); - $data['birth_date'] = $this->getBirthDate(); - $data['death_date'] = $this->getDeathDate(); - $data['birth_place'] = $this->getBirthPlace(); - $data['death_place'] = $this->getDeathPlace(); - $data['country'] = $this->getCountry(); - $data['field_of_activity'] = $this->getFieldsOfActivity(); - $data['occupation'] = $this->getOccupations(); - - return $data; - } - /** * Get fields of activity * diff --git a/src/RecordManager/Base/Record/Qdc.php b/src/RecordManager/Base/Record/Qdc.php index 8be835be..c69fa4fb 100644 --- a/src/RecordManager/Base/Record/Qdc.php +++ b/src/RecordManager/Base/Record/Qdc.php @@ -144,50 +144,6 @@ public function getID() return trim($id); } - /** - * Return fields to be indexed in Solr - * - * @param ?Database $db Database connection. Omit to avoid database lookups for related records. - * - * @return array - */ - public function toSolrArray(?Database $db = null) - { - $data = parent::toSolrArray($db); - - $data['ctrlnum'] = $this->getControlNumbers(); - $data['fullrecord'] = $this->getFullRecord(); - $data['allfields'] = $this->getAllFields(); - $data['language'] = $this->getLanguages(); - $data['format'] = $this->getFormat(); - $data['author'] = $this->getPrimaryAuthors(); - $data['author2'] = $this->getSecondaryAuthors(); - $data['author_corporate'] = $this->getCorporateAuthors(); - $data['author_sort'] = $this->getAuthorSort($data['author']); - $data['title'] = $this->getTitle(); - $data['title_full'] = $this->getFullTitle(); - $data['title_short'] = $this->getShortTitle($data['title']); - $data['title_sub'] = $this->getTitleSub($data['title']); - $data['title_sort'] = $this->getTitle(true); - $data['title_alt'] = $this->getAltTitles(); - $data['publisher'] = $this->getPublishers(); - $data['publishDate'] = $this->getPublicationYear(); - $data['publishDateRange'] = $this->getPublicationYears(); - $data['isbn'] = $this->getISBNs(); - $data['issn'] = $this->getISSNs(); - $data['doi_str_mv'] = $this->getDOIs(); - $data['topic'] = $this->getTopics(); - $data['topic_facet'] = $this->getTopicFacets(); - $data['url'] = $this->getUrls(); - $data['contents'] = $this->getContents(); - $data['description'] = $this->getDescription(); - $data['series'] = $this->getSeries(); - $data['fulltext'] = $this->getFullTextField($this->doc); - $this->addHierarchyFields($data); - - return $data; - } - /** * Dedup: Return full title (for debugging purposes only) * @@ -261,68 +217,6 @@ public function getUniqueIDs() return array_unique($arr); } - /** - * Dedup: Return ISBNs in ISBN-13 format without dashes - * - * @return array - */ - public function getISBNs() - { - $arr = []; - foreach ([$this->doc->identifier, $this->doc->isFormatOf] as $field) { - foreach ($field as $identifier) { - $identifier = str_replace('-', '', trim($identifier)); - if ('' === $identifier || !preg_match('{^([0-9]{9,12}[0-9xX])}', $identifier, $matches)) { - continue; - } - $isbn = $this->metadataUtils->normalizeISBN($matches[1]); - if ($isbn) { - $arr[] = $isbn; - } - } - } - - return array_unique($arr); - } - - /** - * Dedup: Return ISSNs - * - * @return array - */ - public function getISSNs() - { - $result = []; - foreach ([$this->doc->relation, $this->doc->identifier] as $fields) { - foreach ($fields as $current) { - if ((string)$current->attributes()->{'type'} === 'issn') { - $result[] = trim((string)$current); - } - } - } - return $result; - } - - /** - * Dedup: Return series ISSN - * - * @return string - */ - public function getSeriesISSN() - { - return ''; - } - - /** - * Dedup: Return series numbering - * - * @return string - */ - public function getSeriesNumbering() - { - return ''; - } - /** * Dedup: Return format from predefined values * @@ -400,6 +294,48 @@ public function getSeries() return []; } + /** + * Get ISBNs in ISBN-13 format without dashes. + * + * @return array + */ + protected function getISBNs(): array + { + $arr = []; + foreach ([$this->doc->identifier, $this->doc->isFormatOf] as $field) { + foreach ($field as $identifier) { + $identifier = str_replace('-', '', trim($identifier)); + if ('' === $identifier || !preg_match('{^([0-9]{9,12}[0-9xX])}', $identifier, $matches)) { + continue; + } + $isbn = $this->metadataUtils->normalizeISBN($matches[1]); + if ($isbn) { + $arr[] = $isbn; + } + } + } + + return array_unique($arr); + } + + /** + * Get ISSNs. + * + * @return array + */ + protected function getISSNs(): array + { + $result = []; + foreach ([$this->doc->relation, $this->doc->identifier] as $fields) { + foreach ($fields as $current) { + if ((string)$current->attributes()->{'type'} === 'issn') { + $result[] = trim((string)$current); + } + } + } + return $result; + } + /** * Get topics. * @@ -595,17 +531,6 @@ protected function getValues($tag, array $attributes = []) return $result; } - /** - * Add hierarchy fields. Must be called after title is present in the array. - * - * @param array $data Reference to the target array - * - * @return void - */ - protected function addHierarchyFields(array &$data): void - { - } - /** * Get record format. * @@ -630,12 +555,11 @@ protected function getControlNumbers(): array /** * Get author sort field. * - * @param array $authors Primary authors - * * @return string */ - protected function getAuthorSort(array $authors): string + protected function getAuthorSort(): string { + $authors = $this->getPrimaryAuthors(); return $authors[0] ?? ''; } @@ -652,26 +576,22 @@ protected function getFullTitle(): string /** * Get short title. * - * @param string $fullTitle Full title - * * @return string */ - protected function getShortTitle(string $fullTitle): string + protected function getShortTitle(): string { - $titleParts = explode(' : ', $fullTitle, 2); + $titleParts = explode(' : ', $this->getFullTitle(), 2); return $titleParts[0]; } /** * Get subtitle. * - * @param string $fullTitle Full title - * * @return string */ - protected function getTitleSub(string $fullTitle): string + protected function getTitleSub(): string { - $titleParts = explode(' : ', $fullTitle, 2); + $titleParts = explode(' : ', $this->getFullTitle(), 2); return $titleParts[1] ?? ''; } @@ -737,7 +657,7 @@ protected function getFullRecord(): string } /** - * Return publication years + * Get publication years. * * @return array */ @@ -762,4 +682,14 @@ protected function getPublicationYears(): array } return $result; } + + /** + * Get full text field for a given document + * + * @return string + */ + protected function getFullTextField(): string + { + return $this->getFullTextFieldForDocument($this->doc); + } } diff --git a/tests/RecordManagerTest/Base/Record/DcTest.php b/tests/RecordManagerTest/Base/Record/DcTest.php index ac635b50..b8a2a506 100644 --- a/tests/RecordManagerTest/Base/Record/DcTest.php +++ b/tests/RecordManagerTest/Base/Record/DcTest.php @@ -106,11 +106,15 @@ public function testDc1() 'publisher' => [ 'Publisher', ], - 'publishDate' => '2025', + 'publishDate' => [ + '2025', + ], + 'publishDateSort' => '2025', 'publishDateRange' => [ '2025', ], 'isbn' => [], + 'issn' => [], 'doi_str_mv' => [], 'topic_facet' => [ 'Topic', diff --git a/tests/RecordManagerTest/Base/Record/DoajTest.php b/tests/RecordManagerTest/Base/Record/DoajTest.php index 7b5e2225..c352d1fd 100644 --- a/tests/RecordManagerTest/Base/Record/DoajTest.php +++ b/tests/RecordManagerTest/Base/Record/DoajTest.php @@ -95,7 +95,10 @@ public function testDoaj1() 'publisher' => [ 'Verlag Krause und Pachernegg GmbH', ], - 'publishDate' => '1998', + 'publishDate' => [ + '1998', + ], + 'publishDateSort' => '1998', 'publishDateRange' => [ '1998', ], @@ -109,6 +112,9 @@ public function testDoaj1() 'http://www.kup.at/kup/pdf/648.pdf', ], 'fulltext' => '', + 'doi_str_mv' => [], + 'isbn' => [], + 'issn' => [], ]; $this->compareArray($expected, $fields, 'toSolrArray'); diff --git a/tests/RecordManagerTest/Base/Record/ForwardTest.php b/tests/RecordManagerTest/Base/Record/ForwardTest.php index 78463e7e..4293065e 100644 --- a/tests/RecordManagerTest/Base/Record/ForwardTest.php +++ b/tests/RecordManagerTest/Base/Record/ForwardTest.php @@ -5,7 +5,7 @@ * * PHP version 8 * - * Copyright (C) The National Library of Finland 2020. + * Copyright (C) The National Library of Finland 2020-2025. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -58,7 +58,10 @@ public function testForward1() 'ctrlnum' => [ 'elonet_elokuva_1511500', ], - 'publishDate' => '2011', + 'publishDate' => [ + '2011', + ], + 'publishDateSort' => '2011', 'publishDateRange' => [ '2011', ], @@ -1434,6 +1437,12 @@ public function testForward1() 'Marcelin asuinkorttelit aiottiin purkaa kesken elokuvan kuvausten.' . ' Talot purettiin heti kun elokuva oli kuvattu[...]', ], + 'contents' => [ + 'Marcel Marx, entinen kirjailija, elää Ranskassa Le Havressa vaatimattomasti kengänkiillottajana' + . ' vaimonsa Arlettyn kanssa[...]', + ], + 'isbn' => [], + 'issn' => [], ]; $this->compareArray($expected, $fields, 'toSolrArray'); diff --git a/tests/RecordManagerTest/Base/Record/LidoTest.php b/tests/RecordManagerTest/Base/Record/LidoTest.php index 941ac6b7..a5beb5c2 100644 --- a/tests/RecordManagerTest/Base/Record/LidoTest.php +++ b/tests/RecordManagerTest/Base/Record/LidoTest.php @@ -5,7 +5,7 @@ * * PHP version 8 * - * Copyright (C) The National Library of Finland 2020-2022. + * Copyright (C) The National Library of Finland 2020-2025. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -81,7 +81,6 @@ public function testLido1() 'retkeily', 'ulkoilu', ], - 'material_str_mv' => [], 'geographic_facet' => [], 'geographic' => [], 'era' => [], @@ -119,6 +118,9 @@ public function testLido1() 'Test Institution', '247394', ], + 'publishDate' => [], + 'publishDateRange' => [], + 'publishDateSort' => '', ]; $this->compareArray($expected, $fields, 'toSolrArray'); @@ -199,7 +201,6 @@ public function testLido1NonMergedTitle() 'retkeily', 'ulkoilu', ], - 'material_str_mv' => [], 'geographic_facet' => [], 'geographic' => [], 'era' => [], @@ -237,6 +238,9 @@ public function testLido1NonMergedTitle() 'Test Institution', '247394', ], + 'publishDate' => [], + 'publishDateRange' => [], + 'publishDateSort' => '', ]; $this->compareArray($expected, $fields, 'toSolrArray'); diff --git a/tests/RecordManagerTest/Base/Record/LrmiTest.php b/tests/RecordManagerTest/Base/Record/LrmiTest.php index 36da8cfd..4ef86550 100644 --- a/tests/RecordManagerTest/Base/Record/LrmiTest.php +++ b/tests/RecordManagerTest/Base/Record/LrmiTest.php @@ -5,7 +5,7 @@ * * PHP version 8 * - * Copyright (C) The National Library of Finland 2022. + * Copyright (C) The National Library of Finland 2022-2025. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -163,7 +163,10 @@ public function testLrmi1() ], 'title_sort' => 'opetuksen ja oppimisen suunnittelu learning design', 'publisher' => [], - 'publishDate' => '2019', + 'publishDate' => [ + '2019', + ], + 'publishDateSort' => '2019', 'publishDateRange' => [ '2019', ], diff --git a/tests/RecordManagerTest/Base/Record/QdcTest.php b/tests/RecordManagerTest/Base/Record/QdcTest.php index e4c53c0b..b6dfd67f 100644 --- a/tests/RecordManagerTest/Base/Record/QdcTest.php +++ b/tests/RecordManagerTest/Base/Record/QdcTest.php @@ -5,7 +5,7 @@ * * PHP version 8 * - * Copyright (C) The National Library of Finland 2023. + * Copyright (C) The National Library of Finland 2023-2025. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -119,7 +119,10 @@ public function testQdc1() 'publisher' => [ 'Sanitation Project, Research Institute for Humanity and Nature', ], - 'publishDate' => '2021', + 'publishDate' => [ + '2021', + ], + 'publishDateSort' => '2021', 'publishDateRange' => [ '2021', ],