Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@ Anything marked with [**BC**] is known to affect backward compatibility with pre
- Added support for defining additional HTTP options for Solr index requests. This could be useful e.g. when using SSL with self-signed certificates.
- When multi-process support is enabled (i.e. record_workers and/or solr_update_workers set in recordmanager.ini), a few additional worker processes are initialized on startup to take the place of any worker that stops unexpectedly (typically due to an issue with a PHP or one of its extensions causing a segmentation fault).
- Regular expression support for suppressOnField setting did not work properly. A new suppressOnFieldRegEx was introduced to make this option explicit.
- Record classes can now implement methods preProcessRecordForIndexing and postProcessRecordForIndexing for additional preparation around toSolrArray method.

### Changed

- [**BC**] The HTTP client library has been changed from HTTP_Request2 to Guzzle. This has required some changes to how the HTTP client is used. See e.g. src/RecordManager/Base/Harvest/SierraApi.php for usage examples. This also affects the settings in HTTP section of recordmanager.ini. Only the most commonly used legacy settings are automatically mapped to Guzzle's equivalents.
- [**BC**] MARC: Subfields containing record identifiers for linking between records (subfield w in fields 760-787) are no longer updated to use the indexed record ID by default. Instead of the built-in list of fields there is now an option in recordmanager.ini (MarcRecord/linking_id_fields) that can be used to define the linking ID fields if this functionality is desired.
- [**BC**] All Record classes must now implement the getRecordFormat method and call AbtractRecord's toSolrArray in their overridden toSolrArray methods.
- [**BC**] Record classes have been unified to use a common set of methods for building the Solr array (see $solrAuthorityRecordSpecs and $solrBiblioRecordSpecs in AbstractRecord).
- [**BC**] All Record classes must now implement the getRecordFormat method and call AbtractRecord's toSolrArray in any overridden toSolrArray methods.
- [**BC**] FullTextTrait's getFullTextFields was renamed to getFullTextField and refactored to return the fulltext field contents instead of a full data array.
- [**BC**] Several methods in Record classes have been renamed to improve unity between the classes and to better reflect their nature. Also typing of return values has been added in many places.
- [**BC**] All format-specific Skosmos and authority enrichments have been consolidated to format-agnostic SkosmosEnrichment and AuthEnrichment.
Expand Down
16 changes: 8 additions & 8 deletions src/RecordManager/Base/Deduplication/DedupHandler.php
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ public function updateDedupCandidateKeys(&$record, $metadataRecord)
unset($record['title_keys']);
}

$keys = $metadataRecord->getISBNs();
$keys = $metadataRecord->getISBNsForDedup();
$oldKeys = (array)($record['isbn_keys'] ?? []);
if (count($oldKeys) !== count($keys) || array_diff($oldKeys, $keys)) {
$record['isbn_keys'] = $keys;
Expand Down Expand Up @@ -863,8 +863,8 @@ function () use ($candidateDbRecord) {
}

// Check for common ISBN
$origISBNs = $this->filterIds($origRecord->getISBNs(), $origDbRecord);
$candidateISBNs = $this->filterIds($candidateRecord->getISBNs(), $candidateDbRecord);
$origISBNs = $this->filterIds($origRecord->getISBNsForDedup(), $origDbRecord);
$candidateISBNs = $this->filterIds($candidateRecord->getISBNsForDedup(), $candidateDbRecord);
$isect = array_intersect($origISBNs, $candidateISBNs);
if (!empty($isect)) {
// Shared ISBN -> match
Expand Down Expand Up @@ -908,8 +908,8 @@ function () use (
return true;
}

$origISSNs = $this->filterIds($origRecord->getISSNs(), $origDbRecord);
$candidateISSNs = $candidateRecord->getISSNs();
$origISSNs = $this->filterIds($origRecord->getISSNsForDedup(), $origDbRecord);
$candidateISSNs = $candidateRecord->getISSNsForDedup();
$commonISSNs = array_intersect($origISSNs, $candidateISSNs);
if (!empty($origISSNs) && !empty($candidateISSNs) && empty($commonISSNs)) {
// Both have ISSNs but none match
Expand Down Expand Up @@ -952,11 +952,11 @@ function () use (
return false;
}

if ($origRecord->getSeriesISSN() != $candidateRecord->getSeriesISSN()) {
if ($origRecord->getSeriesISSNForDedup() != $candidateRecord->getSeriesISSNForDedup()) {
return false;
}
$candidateNumbering = $candidateRecord->getSeriesNumbering();
if ($origRecord->getSeriesNumbering() != $candidateNumbering) {
$candidateNumbering = $candidateRecord->getSeriesNumberingForDedup();
if ($origRecord->getSeriesNumberingForDedup() != $candidateNumbering) {
return false;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ public function enrich($sourceId, $record, &$solrArray)

// Use publisher ids only if barcodes or musicbrainz id did not yield any results
if (!$mbIds) {
$shortTitle = $record->getShortTitle();
$shortTitle = $record->getShortTitleForEnrichment();
foreach ($record->getPublisherNumbers(['0']) as $number) {
if ($id = trim($number['id'])) {
$newIds = $this->getFromReleaseIndex(self::CATNO, $id, $shortTitle);
Expand Down
2 changes: 1 addition & 1 deletion src/RecordManager/Base/Enrichment/SkosmosEnrichment.php
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ class SkosmosEnrichment extends AbstractEnrichment
'alt' => 'author_variant',
'check' => 'author_corporate',
],
'getAuthorIds' => [
'getPrimaryAuthorIds' => [
'pref' => 'author',
'alt' => 'author_variant',
'check' => 'author',
Expand Down
230 changes: 218 additions & 12 deletions src/RecordManager/Base/Record/AbstractRecord.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*
* PHP version 8
*
* Copyright (C) The National Library of Finland 2011-2022.
* Copyright (C) The National Library of Finland 2011-2025.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2,
Expand Down Expand Up @@ -34,6 +34,7 @@
use RecordManager\Base\Utils\MetadataUtils;

use function in_array;
use function is_callable;

/**
* Base class for record drivers
Expand Down Expand Up @@ -112,6 +113,103 @@ abstract class AbstractRecord
*/
protected $resultCache = [];

/**
* Is this an authority record?
*
* @var bool
*/
protected bool $isAuthorityRecord = false;

/**
* Specifications for building a Solr authority record.
*
* @var array
*/
protected $solrAuthorityRecordSpecs = [
'allfields' => ['method' => 'getAllFields'],
'birth_date' => ['method' => 'getBirthDate'],
'birth_place' => ['method' => 'getBirthPlace'],
'country' => ['method' => 'getCountry'],
'death_date' => ['method' => 'getDeathDate'],
'death_place' => ['method' => 'getDeathPlace'],
'field_of_activity' => ['method' => 'getFieldsOfActivity'],
'fullrecord' => ['method' => 'getFullRecord'],
'heading' => ['method' => 'getHeading'],
'heading_keywords' => ['method' => 'getHeadingKeywords'],
'language' => ['method' => 'getHeadingLanguage'],
'occupation' => ['method' => 'getOccupations'],
'record_type' => ['method' => 'getRecordType'],
'related_place' => ['method' => 'getRelatedPlaces'],
'source' => ['method' => 'getRecordSource'],
'use_for' => ['method' => 'getUseForHeadings'],
'use_for_keywords' => ['method' => 'getUseForHeadingKeywords'],
];

/**
* Specifications for building a Solr biblio record.
*
* @var array
*/
protected $solrBiblioRecordSpecs = [
'allfields' => ['method' => 'getAllFields'],
'author' => ['method' => 'getPrimaryAuthors'],
'author_role' => ['method' => 'getPrimaryAuthorRoles'],
'author_sort' => ['method' => 'getAuthorSort'],
'author2' => ['method' => 'getSecondaryAuthors'],
'author2_role' => ['method' => 'getSecondaryAuthorRoles'],
'author_corporate' => ['method' => 'getCorporateAuthors'],
'author_corporate_role' => ['method' => 'getCorporateAuthorRoles'],
'building' => ['method' => 'getBuilding'],
'callnumber-first' => ['method' => 'getCallNumberFirst'],
'callnumber-label' => ['method' => 'getCallNumberLabels'],
'callnumber-raw' => ['method' => 'getCallNumbersRaw'],
'callnumber-subject' => ['method' => 'getCallNumberSubject'],
'collection' => ['method' => 'getCollection'],
'contents' => ['method' => 'getContents'],
'ctrlnum' => ['method' => 'getControlNumbers'],
'dateSpan' => ['method' => 'getDateSpans'],
'description' => ['method' => 'getDescription'],
'doi_str_mv' => ['method' => 'getDOIs'],
'edition' => ['method' => 'getEdition'],
'era_facet' => ['method' => 'getEraFacets'],
'era' => ['method' => 'getEras'],
'format' => ['method' => 'getFormat'],
'fullrecord' => ['method' => 'getFullRecord'],
'fulltext' => ['method' => 'getFullTextField'],
'genre_facet' => ['method' => 'getGenreFacets'],
'genre' => ['method' => 'getGenres'],
'geographic_facet' => ['method' => 'getGeographicFacets'],
'geographic' => ['method' => 'getGeographicTopics'],
'illustrated' => ['method' => 'getIllustrated'],
'institution' => ['method' => 'getInstitution'],
'isbn' => ['method' => 'getISBNs'],
'issn' => ['method' => 'getISSNs'],
'language' => ['method' => 'getLanguages'],
'lccn' => ['method' => 'getLCCN'],
'oclc_num' => ['method' => 'getOclcNumbers'],
'physical' => ['method' => 'getPhysicalDescriptions'],
'publishDate' => ['method' => 'getPublicationYears'],
'publishDateRange' => ['method' => 'getPublicationDateRanges'],
'publishDateSort' => ['method' => 'getPublicationYear'],
'publisher' => ['method' => 'getPublishers'],
'series' => ['method' => 'getSeries'],
'series2' => ['method' => 'getSeries2'],
'thumbnail' => ['method' => 'getThumbnailUrl'],
'title_alt' => ['method' => 'getAltTitles'],
'title_full' => ['method' => 'getFullTitle'],
'title_new' => ['method' => 'getNewTitles'],
'title_old' => ['method' => 'getOldTitles'],
'title_short' => ['method' => 'getShortTitle'],
'title_sort' => ['method' => 'getTitleSort'],
'title_sub' => ['method' => 'getTitleSub'],
'title' => ['method' => 'getTitle'],
'topic_browse' => ['method' => 'getTopicsForBrowse'],
'topic_facet' => ['method' => 'getTopicFacets'],
'topic' => ['method' => 'getTopics'],
'url' => ['method' => 'getUrls'],
'uuid_str_mv' => ['method' => 'getUUIDs'],
];

/**
* Constructor
*
Expand Down Expand Up @@ -222,9 +320,22 @@ public function getHostRecordIDs(): array
*/
public function toSolrArray(?Database $db = null)
{
return [
$this->preProcessRecordForIndexing($db);
$data = [
'record_format' => $this->getRecordFormat(),
];
$specs = $this->isAuthorityRecord ? $this->solrAuthorityRecordSpecs : $this->solrBiblioRecordSpecs;
foreach ($specs as $field => $specs) {
if (($method = $specs['method'] ?? null) && is_callable([$this, $method])) {
if ($specs['useData'] ?? false) {
$this->$method($data);
} else {
$data[$field] = $this->$method();
}
}
}
$this->postProcessRecordForIndexing($db, $data);
return $data;
}

/**
Expand Down Expand Up @@ -256,6 +367,28 @@ public function getTitle($forFiling = false)
return '';
}

/**
* Get record title for sorting.
*
* @return string
*
* @SuppressWarnings(PHPMD.UnusedFormalParameter)
*/
public function getTitleSort()
{
return $this->getTitle(true);
}

/**
* Get short title for enrichment.
*
* @return string
*/
public function getShortTitleForEnrichment(): string
{
return $this->getTitle();
}

/**
* Return format(s) from predefined values
*
Expand Down Expand Up @@ -347,41 +480,41 @@ public function getUniqueIDs()
}

/**
* Dedup: Return (unique) ISBNs in ISBN-13 format without dashes
* Get (unique) ISBNs in ISBN-13 format without dashes.
*
* @return array
*/
public function getISBNs()
public function getISBNsForDedup(): array
{
return [];
return $this->getISBNs();
}

/**
* Dedup: Return ISSNs
* Dedup: Get ISSNs.
*
* @return array
*/
public function getISSNs()
public function getISSNsForDedup(): array
{
return [];
return $this->getISSNs();
}

/**
* Dedup: Return series ISSN
* Dedup: Get series ISSN.
*
* @return string
*/
public function getSeriesISSN()
public function getSeriesISSNForDedup(): string
{
return '';
}

/**
* Dedup: Return series numbering
* Dedup: Get series numbering.
*
* @return string
*/
public function getSeriesNumbering()
public function getSeriesNumberingForDedup(): string
{
return '';
}
Expand Down Expand Up @@ -550,6 +683,49 @@ public function getdataSourceConfig()
return $this->dataSourceConfig[$this->source];
}

/**
* Get (unique) ISBNs in ISBN-13 format without dashes.
*
* @return array
*/
protected function getISBNs(): array
{
return [];
}

/**
* Get ISSNs.
*
* @return array
*/
protected function getISSNs(): array
{
return [];
}

/**
* Do any pre-processing for the record before the conversion to Solr array.
*
* @param ?Database $db Database connection, if available
*
* @return void
*/
protected function preProcessRecordForIndexing(?Database $db): void
{
}

/**
* Do any post-processing for the record after the main conversion to Solr array.
*
* @param ?Database $db Database connection, if available
* @param array $data Array of Solr fields
*
* @return void
*/
protected function postProcessRecordForIndexing(?Database $db, &$data): void
{
}

/**
* Get record format.
*
Expand Down Expand Up @@ -623,4 +799,34 @@ protected function validateDate($dateString)
}
return '';
}

/**
* Get publication years.
*
* @return array
*/
protected function getPublicationYears(): array
{
return [];
}

/**
* Get publication date ranges.
*
* @return array
*/
protected function getPublicationDateRanges(): array
{
return $this->getPublicationYears();
}

/**
* Get full title.
*
* @return string
*/
protected function getFullTitle(): string
{
return $this->getTitle();
}
}
Loading