From 8904b766fec99fdaa37aa2cfd06aff0b4e9f8b61 Mon Sep 17 00:00:00 2001 From: Denis Bezykornov Date: Tue, 25 Nov 2025 02:06:47 +0300 Subject: [PATCH 1/3] Add backend capabilities to search in TM from a document --- backend/app/routers/document.py | 53 ++++++ backend/app/translation_memory/query.py | 19 +- backend/tests/routers/test_tm_search.py | 238 ++++++++++++++++++++++++ 3 files changed, 306 insertions(+), 4 deletions(-) create mode 100644 backend/tests/routers/test_tm_search.py diff --git a/backend/app/routers/document.py b/backend/app/routers/document.py index ce2a946..7536093 100644 --- a/backend/app/routers/document.py +++ b/backend/app/routers/document.py @@ -23,6 +23,8 @@ from app.translation_memory.schema import ( MemorySubstitution, TranslationMemory, + TranslationMemoryListResponse, + TranslationMemoryListSimilarResponse, ) from app.user.depends import get_current_user_id, has_user_role @@ -244,6 +246,57 @@ def find_memory(id_: int, memories): return models.StatusMessage(message="Memory list updated") +@router.get("/{doc_id}/tm/exact") +def search_tm_exact( + doc_id: int, + db: Annotated[Session, Depends(get_db)], + source: Annotated[str, Query(description="Source text to search for")], +) -> TranslationMemoryListResponse: + doc = get_doc_by_id(db, doc_id) + tm_ids = [tm.id for tm in doc.memories] + + if not tm_ids: + return TranslationMemoryListResponse(records=[], page=0, total_records=0) + + records, count = TranslationMemoryQuery(db).get_memory_records_paged( + memory_ids=tm_ids, + page=0, + page_records=20, + query=source, + ) + + return TranslationMemoryListResponse( + records=records, + page=0, + total_records=count, + ) + + +@router.get("/{doc_id}/tm/similar") +def search_tm_similar( + doc_id: int, + db: Annotated[Session, Depends(get_db)], + source: Annotated[str, Query(description="Source text to search for")], +) -> TranslationMemoryListSimilarResponse: + doc = get_doc_by_id(db, doc_id) + tm_ids = [tm.id for tm in doc.memories] + + if not tm_ids: + return TranslationMemoryListSimilarResponse(records=[], page=0, total_records=0) + + records = TranslationMemoryQuery(db).get_memory_records_paged_similar( + memory_ids=tm_ids, + page_records=20, + query=source, + ) + + return TranslationMemoryListSimilarResponse( + records=records, + page=0, + total_records=len(records), + ) + + @router.get("/{doc_id}/glossaries") def get_glossaries( doc_id: int, db: Annotated[Session, Depends(get_db)] diff --git a/backend/app/translation_memory/query.py b/backend/app/translation_memory/query.py index 16f2c54..a247ccf 100644 --- a/backend/app/translation_memory/query.py +++ b/backend/app/translation_memory/query.py @@ -37,12 +37,17 @@ def get_memory_records_count(self, memory_id: int) -> int: def get_memory_records_paged( self, - memory_id: int, + memory_ids: int | list[int], page: int, page_records: int, query: str | None, ) -> tuple[list[schema.TranslationMemoryRecord], int]: - filters = [TranslationMemoryRecord.document_id == memory_id] + # Handle both single int and list of ints + if isinstance(memory_ids, int): + filters = [TranslationMemoryRecord.document_id == memory_ids] + else: + filters = [TranslationMemoryRecord.document_id.in_(memory_ids)] + if query: filters.append(TranslationMemoryRecord.source.ilike(f"%{query}%")) @@ -67,7 +72,7 @@ def get_memory_records_paged( def get_memory_records_paged_similar( self, - memory_id: int, + memory_ids: int | list[int], page_records: int, query: str, ) -> list[schema.TranslationMemoryRecordWithSimilarity]: @@ -80,6 +85,12 @@ def get_memory_records_paged_similar( {"threshold": 0.25}, ) + # Handle both single int and list of ints for filtering + if isinstance(memory_ids, int): + filter_condition = TranslationMemoryRecord.document_id == memory_ids + else: + filter_condition = TranslationMemoryRecord.document_id.in_(memory_ids) + return [ schema.TranslationMemoryRecordWithSimilarity( id=scalar.id, @@ -95,7 +106,7 @@ def get_memory_records_paged_similar( similarity_func, ) .filter( - TranslationMemoryRecord.document_id == memory_id, + filter_condition, TranslationMemoryRecord.source.op("%")(query), ) .order_by(similarity_func.desc()) diff --git a/backend/tests/routers/test_tm_search.py b/backend/tests/routers/test_tm_search.py new file mode 100644 index 0000000..3c04eb0 --- /dev/null +++ b/backend/tests/routers/test_tm_search.py @@ -0,0 +1,238 @@ +from fastapi.testclient import TestClient +from sqlalchemy.orm import Session + +from app.documents.models import ( + DocMemoryAssociation, + Document, + DocumentType, + TmMode, +) +from app.translation_memory.models import TranslationMemory, TranslationMemoryRecord + + +def test_search_tm_exact_with_no_linked_memories( + user_logged_client: TestClient, session: Session +): + """Test exact search returns empty response when document has no linked TMs""" + with session as s: + s.add( + Document( + name="test_doc.txt", + type=DocumentType.txt, + processing_status="pending", + created_by=1, + ) + ) + s.commit() + + response = user_logged_client.get("/document/1/tm/exact?source=Hello") + assert response.status_code == 200 + response_json = response.json() + assert response_json["records"] == [] + assert response_json["page"] == 0 + assert response_json["total_records"] == 0 + + +def test_search_tm_exact_with_linked_memories( + user_logged_client: TestClient, session: Session +): + """Test exact search finds matches in linked translation memories""" + with session as s: + # Create document + s.add( + Document( + name="test_doc.txt", + type=DocumentType.txt, + processing_status="pending", + created_by=1, + ) + ) + + # Create translation memory with records + tm = TranslationMemory(name="test_memory.tmx", created_by=1) + s.add(tm) + s.flush() + + # Add some records to TM + s.add_all( + [ + TranslationMemoryRecord( + document_id=tm.id, + source="Hello World", + target="Привет Мир", + ), + TranslationMemoryRecord( + document_id=tm.id, + source="Goodbye", + target="Пока", + ), + TranslationMemoryRecord( + document_id=tm.id, + source="Hello Again", + target="Привет снова", + ), + ] + ) + + # Link TM to document + s.add(DocMemoryAssociation(doc_id=1, tm_id=tm.id, mode=TmMode.read)) + s.commit() + + # Test exact search for "Hello" + response = user_logged_client.get("/document/1/tm/exact?source=Hello") + assert response.status_code == 200 + response_json = response.json() + assert ( + len(response_json["records"]) == 2 + ) # Should find "Hello World" and "Hello Again" + assert response_json["page"] == 0 + assert response_json["total_records"] == 2 + + # Check the returned records + sources = [record["source"] for record in response_json["records"]] + assert "Hello World" in sources + assert "Hello Again" in sources + + +def test_search_tm_exact_with_multiple_linked_memories( + user_logged_client: TestClient, session: Session +): + """Test exact search across multiple linked translation memories""" + with session as s: + # Create document + s.add( + Document( + name="test_doc.txt", + type=DocumentType.txt, + processing_status="pending", + created_by=1, + ) + ) + + # Create two translation memories + tm1 = TranslationMemory(name="memory1.tmx", created_by=1) + tm2 = TranslationMemory(name="memory2.tmx", created_by=1) + s.add_all([tm1, tm2]) + s.flush() + + # Add records to both TMs + s.add_all( + [ + TranslationMemoryRecord( + document_id=tm1.id, + source="Hello World", + target="Привет Мир", + ), + TranslationMemoryRecord( + document_id=tm2.id, + source="Hello Again", + target="Привет снова", + ), + TranslationMemoryRecord( + document_id=tm2.id, + source="Goodbye", + target="Пока", + ), + ] + ) + + # Link both TMs to document + s.add(DocMemoryAssociation(doc_id=1, tm_id=tm1.id, mode=TmMode.read)) + s.add(DocMemoryAssociation(doc_id=1, tm_id=tm2.id, mode=TmMode.read)) + s.commit() + + # Test exact search for "Hello" + response = user_logged_client.get("/document/1/tm/exact?source=Hello") + assert response.status_code == 200 + response_json = response.json() + assert len(response_json["records"]) == 2 # Should find from both TMs + assert response_json["page"] == 0 + assert response_json["total_records"] == 2 + + +def test_search_tm_exact_returns_404_for_nonexistent_document( + user_logged_client: TestClient, +): + """Test exact search returns 404 for non-existent document""" + response = user_logged_client.get("/document/999/tm/exact?source=Hello") + assert response.status_code == 404 + + +def test_search_tm_exact_no_results(user_logged_client: TestClient, session: Session): + """Test exact search returns no results when no matches found""" + with session as s: + # Create document + s.add( + Document( + name="test_doc.txt", + type=DocumentType.txt, + processing_status="pending", + created_by=1, + ) + ) + + # Create TM with different records + tm = TranslationMemory(name="test_memory.tmx", created_by=1) + s.add(tm) + s.flush() + + s.add( + TranslationMemoryRecord( + document_id=tm.id, + source="Goodbye World", + target="Пока Мир", + ) + ) + + # Link TM to document + s.add(DocMemoryAssociation(doc_id=1, tm_id=tm.id, mode=TmMode.read)) + s.commit() + + # Test exact search for "Hello" (should not find anything) + response = user_logged_client.get("/document/1/tm/exact?source=Hello") + assert response.status_code == 200 + response_json = response.json() + assert response_json["records"] == [] + assert response_json["page"] == 0 + assert response_json["total_records"] == 0 + + +def test_search_tm_limit_20_results(user_logged_client: TestClient, session: Session): + """Test that search endpoints limit results to 20 records""" + with session as s: + # Create document + s.add( + Document( + name="test_doc.txt", + type=DocumentType.txt, + processing_status="pending", + created_by=1, + ) + ) + + # Create TM + tm = TranslationMemory(name="test_memory.tmx", created_by=1) + s.add(tm) + s.flush() + + # Add 25 records starting with "Hello" + s.add_all( + [ + TranslationMemoryRecord( + document_id=tm.id, + source=f"Hello World {i}", + target=f"Привет Мир {i}", + ) + for i in range(25) + ] + ) + + # Link TM to document + s.add(DocMemoryAssociation(doc_id=1, tm_id=tm.id, mode=TmMode.read)) + s.commit() + + # Test exact search - should only return 20 results + response = user_logged_client.get("/document/1/tm/exact?source=Hello") + assert response.status_code == 200 + response_json = response.json() + assert len(response_json["records"]) == 20 # Limited to 20 From 93f0accf7ed91741b41f13031fe9862d45427dc7 Mon Sep 17 00:00:00 2001 From: Denis Bezykornov Date: Wed, 26 Nov 2025 00:11:39 +0300 Subject: [PATCH 2/3] Generate client --- frontend/src/client/services/DocumentService.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/frontend/src/client/services/DocumentService.ts b/frontend/src/client/services/DocumentService.ts index 67f64d5..0b9812d 100644 --- a/frontend/src/client/services/DocumentService.ts +++ b/frontend/src/client/services/DocumentService.ts @@ -13,6 +13,8 @@ import {DocumentRecordUpdateResponse} from '../schemas/DocumentRecordUpdateRespo import {DocumentRecordUpdate} from '../schemas/DocumentRecordUpdate' import {DocTranslationMemory} from '../schemas/DocTranslationMemory' import {DocTranslationMemoryUpdate} from '../schemas/DocTranslationMemoryUpdate' +import {TranslationMemoryListResponse} from '../schemas/TranslationMemoryListResponse' +import {TranslationMemoryListSimilarResponse} from '../schemas/TranslationMemoryListSimilarResponse' import {DocGlossary} from '../schemas/DocGlossary' import {DocGlossaryUpdate} from '../schemas/DocGlossaryUpdate' import {DocumentProcessingSettings} from '../schemas/DocumentProcessingSettings' @@ -49,6 +51,12 @@ export const getTranslationMemories = async (doc_id: number): Promise => { return await api.post(`/document/${doc_id}/memories`, content) } +export const searchTmExact = async (doc_id: number, source: string): Promise => { + return await api.get(`/document/${doc_id}/tm/exact`, {query: {source}}) +} +export const searchTmSimilar = async (doc_id: number, source: string): Promise => { + return await api.get(`/document/${doc_id}/tm/similar`, {query: {source}}) +} export const getGlossaries = async (doc_id: number): Promise => { return await api.get(`/document/${doc_id}/glossaries`) } From e7cba0f4d676fb892e4da100d7290ea6c6592828 Mon Sep 17 00:00:00 2001 From: Denis Bezykornov Date: Wed, 26 Nov 2025 00:29:32 +0300 Subject: [PATCH 3/3] Add searching for memory records in frontend --- frontend/src/components/TmSearchModal.vue | 158 ++++++++++++++++++ .../src/components/document/FilterPanel.vue | 11 +- frontend/src/views/DocView.vue | 9 + 3 files changed, 175 insertions(+), 3 deletions(-) create mode 100644 frontend/src/components/TmSearchModal.vue diff --git a/frontend/src/components/TmSearchModal.vue b/frontend/src/components/TmSearchModal.vue new file mode 100644 index 0000000..fa4dc8f --- /dev/null +++ b/frontend/src/components/TmSearchModal.vue @@ -0,0 +1,158 @@ + + + diff --git a/frontend/src/components/document/FilterPanel.vue b/frontend/src/components/document/FilterPanel.vue index a16a44a..3db4112 100644 --- a/frontend/src/components/document/FilterPanel.vue +++ b/frontend/src/components/document/FilterPanel.vue @@ -7,6 +7,7 @@ import {debounce} from '../../utilities/utils' const emit = defineEmits<{ sourceFilterUpdate: [string] targetFilterUpdate: [string] + openTmSearch: [] }>() const sourceFilter = ref('') @@ -26,9 +27,6 @@ watch(targetFilter, (newVal) => updateTargetFilter(newVal)) diff --git a/frontend/src/views/DocView.vue b/frontend/src/views/DocView.vue index 221fcd5..49256a0 100644 --- a/frontend/src/views/DocView.vue +++ b/frontend/src/views/DocView.vue @@ -13,6 +13,7 @@ import ProcessingErrorMessage from '../components/document/ProcessingErrorMessag import RoutingLink from '../components/RoutingLink.vue' import DocumentSkeleton from '../components/document/DocumentSkeleton.vue' import FilterPanel from '../components/document/FilterPanel.vue' +import TmSearchModal from '../components/TmSearchModal.vue' import { getDoc, getDocRecords, @@ -189,6 +190,8 @@ const currentSegmentId = computed(() => { return undefined return recordsData.value.records[focusedSegmentIdx.value].id }) + +const showTmSearchModal = ref(false)