From dbb6f5c2a5ab94f18fc4f0c9cedcbc71227ad240 Mon Sep 17 00:00:00 2001 From: Graham Hukill Date: Mon, 8 Dec 2025 13:48:35 -0500 Subject: [PATCH] Add fulltext field to TimdexRecord model Why these changes are being introduced: Decisions have been made to move incrementally into supporting full-text search in TIMDEX. The first known use case is for the mitlibwebsite source, where we'll store the full-text of websites for searching. While it's possible a single record may have multiple full-texts associated with it, our initial requirements are only for a single, root level, simple field. How this addresses that need: Adds new root level, simple, field called 'fulltext' to TimdexRecord model class. This field is not yet used by any sources. Side effects of this change: * Eventually, sources will begin to populate this. Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/USE-256 --- tests/conftest.py | 1 + tests/test_models.py | 7 +++++++ transmogrifier/models.py | 1 + 3 files changed, 9 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 132721a..8a8e424 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -261,6 +261,7 @@ def timdex_record_all_fields_and_subfields(): ], subjects=[timdex.Subject(value=["Stuff"], kind="LCSH")], summary=["This is data."], + fulltext="This is the full text of the resource.", ) diff --git a/tests/test_models.py b/tests/test_models.py index 20d6d4d..f15f432 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -33,6 +33,7 @@ def test_timdex_record_required_fields_only(timdex_record_required_fields): assert timdex_record_required_fields.rights is None assert timdex_record_required_fields.subjects is None assert timdex_record_required_fields.summary is None + assert timdex_record_required_fields.fulltext is None def test_timdex_record_required_subfields_only(timdex_record_required_fields): @@ -77,6 +78,7 @@ def test_timdex_record_required_subfields_only(timdex_record_required_fields): assert timdex_record_required_fields.rights is None assert timdex_record_required_fields.subjects[0].value == ["Stuff"] assert timdex_record_required_fields.summary is None + assert timdex_record_required_fields.fulltext is None def test_timdex_record_all_fields_and_subfields(timdex_record_all_fields_and_subfields): @@ -212,6 +214,10 @@ def test_timdex_record_all_fields_and_subfields(timdex_record_all_fields_and_sub assert timdex_record_all_fields_and_subfields.subjects[0].value == ["Stuff"] assert timdex_record_all_fields_and_subfields.subjects[0].kind == "LCSH" assert timdex_record_all_fields_and_subfields.summary[0] == "This is data." + assert ( + timdex_record_all_fields_and_subfields.fulltext + == "This is the full text of the resource." + ) def test_record_asdict_filters_empty_fields( @@ -319,6 +325,7 @@ def test_record_asdict_includes_all_fields(timdex_record_all_fields_and_subfield ], "subjects": [{"kind": "LCSH", "value": ["Stuff"]}], "summary": ["This is data."], + "fulltext": "This is the full text of the resource.", } diff --git a/transmogrifier/models.py b/transmogrifier/models.py index 2a1fdea..451b5c1 100644 --- a/transmogrifier/models.py +++ b/transmogrifier/models.py @@ -255,6 +255,7 @@ class TimdexRecord: default=None, converter=dedupe, validator=optional(list_of(str)) ) format: str | None = field(default=None, validator=optional(instance_of(str))) + fulltext: str | None = field(default=None, validator=optional(instance_of(str))) funding_information: list[Funder] | None = field( default=None, converter=dedupe, validator=optional(list_of(Funder)) )