Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions backend/app/routers/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from app.translation_memory.schema import (
MemorySubstitution,
TranslationMemory,
TranslationMemoryListResponse,
TranslationMemoryListSimilarResponse,
)
from app.user.depends import get_current_user_id, has_user_role

Expand Down Expand Up @@ -244,6 +246,57 @@ def find_memory(id_: int, memories):
return models.StatusMessage(message="Memory list updated")


@router.get("/{doc_id}/tm/exact")
def search_tm_exact(
doc_id: int,
db: Annotated[Session, Depends(get_db)],
source: Annotated[str, Query(description="Source text to search for")],
) -> TranslationMemoryListResponse:
doc = get_doc_by_id(db, doc_id)
tm_ids = [tm.id for tm in doc.memories]

if not tm_ids:
return TranslationMemoryListResponse(records=[], page=0, total_records=0)

records, count = TranslationMemoryQuery(db).get_memory_records_paged(
memory_ids=tm_ids,
page=0,
page_records=20,
query=source,
)

return TranslationMemoryListResponse(
records=records,
page=0,
total_records=count,
)


@router.get("/{doc_id}/tm/similar")
def search_tm_similar(
doc_id: int,
db: Annotated[Session, Depends(get_db)],
source: Annotated[str, Query(description="Source text to search for")],
) -> TranslationMemoryListSimilarResponse:
doc = get_doc_by_id(db, doc_id)
tm_ids = [tm.id for tm in doc.memories]

if not tm_ids:
return TranslationMemoryListSimilarResponse(records=[], page=0, total_records=0)

records = TranslationMemoryQuery(db).get_memory_records_paged_similar(
memory_ids=tm_ids,
page_records=20,
query=source,
)

return TranslationMemoryListSimilarResponse(
records=records,
page=0,
total_records=len(records),
)


@router.get("/{doc_id}/glossaries")
def get_glossaries(
doc_id: int, db: Annotated[Session, Depends(get_db)]
Expand Down
19 changes: 15 additions & 4 deletions backend/app/translation_memory/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,17 @@ def get_memory_records_count(self, memory_id: int) -> int:

def get_memory_records_paged(
self,
memory_id: int,
memory_ids: int | list[int],
page: int,
page_records: int,
query: str | None,
) -> tuple[list[schema.TranslationMemoryRecord], int]:
filters = [TranslationMemoryRecord.document_id == memory_id]
# Handle both single int and list of ints
if isinstance(memory_ids, int):
filters = [TranslationMemoryRecord.document_id == memory_ids]
else:
filters = [TranslationMemoryRecord.document_id.in_(memory_ids)]

if query:
filters.append(TranslationMemoryRecord.source.ilike(f"%{query}%"))

Expand All @@ -67,7 +72,7 @@ def get_memory_records_paged(

def get_memory_records_paged_similar(
self,
memory_id: int,
memory_ids: int | list[int],
page_records: int,
query: str,
) -> list[schema.TranslationMemoryRecordWithSimilarity]:
Expand All @@ -80,6 +85,12 @@ def get_memory_records_paged_similar(
{"threshold": 0.25},
)

# Handle both single int and list of ints for filtering
if isinstance(memory_ids, int):
filter_condition = TranslationMemoryRecord.document_id == memory_ids
else:
filter_condition = TranslationMemoryRecord.document_id.in_(memory_ids)

return [
schema.TranslationMemoryRecordWithSimilarity(
id=scalar.id,
Expand All @@ -95,7 +106,7 @@ def get_memory_records_paged_similar(
similarity_func,
)
.filter(
TranslationMemoryRecord.document_id == memory_id,
filter_condition,
TranslationMemoryRecord.source.op("%")(query),
)
.order_by(similarity_func.desc())
Expand Down
238 changes: 238 additions & 0 deletions backend/tests/routers/test_tm_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
from fastapi.testclient import TestClient
from sqlalchemy.orm import Session

from app.documents.models import (
DocMemoryAssociation,
Document,
DocumentType,
TmMode,
)
from app.translation_memory.models import TranslationMemory, TranslationMemoryRecord


def test_search_tm_exact_with_no_linked_memories(
user_logged_client: TestClient, session: Session
):
"""Test exact search returns empty response when document has no linked TMs"""
with session as s:
s.add(
Document(
name="test_doc.txt",
type=DocumentType.txt,
processing_status="pending",
created_by=1,
)
)
s.commit()

response = user_logged_client.get("/document/1/tm/exact?source=Hello")
assert response.status_code == 200
response_json = response.json()
assert response_json["records"] == []
assert response_json["page"] == 0
assert response_json["total_records"] == 0


def test_search_tm_exact_with_linked_memories(
user_logged_client: TestClient, session: Session
):
"""Test exact search finds matches in linked translation memories"""
with session as s:
# Create document
s.add(
Document(
name="test_doc.txt",
type=DocumentType.txt,
processing_status="pending",
created_by=1,
)
)

# Create translation memory with records
tm = TranslationMemory(name="test_memory.tmx", created_by=1)
s.add(tm)
s.flush()

# Add some records to TM
s.add_all(
[
TranslationMemoryRecord(
document_id=tm.id,
source="Hello World",
target="Привет Мир",
),
TranslationMemoryRecord(
document_id=tm.id,
source="Goodbye",
target="Пока",
),
TranslationMemoryRecord(
document_id=tm.id,
source="Hello Again",
target="Привет снова",
),
]
)

# Link TM to document
s.add(DocMemoryAssociation(doc_id=1, tm_id=tm.id, mode=TmMode.read))
s.commit()

# Test exact search for "Hello"
response = user_logged_client.get("/document/1/tm/exact?source=Hello")
assert response.status_code == 200
response_json = response.json()
assert (
len(response_json["records"]) == 2
) # Should find "Hello World" and "Hello Again"
assert response_json["page"] == 0
assert response_json["total_records"] == 2

# Check the returned records
sources = [record["source"] for record in response_json["records"]]
assert "Hello World" in sources
assert "Hello Again" in sources


def test_search_tm_exact_with_multiple_linked_memories(
user_logged_client: TestClient, session: Session
):
"""Test exact search across multiple linked translation memories"""
with session as s:
# Create document
s.add(
Document(
name="test_doc.txt",
type=DocumentType.txt,
processing_status="pending",
created_by=1,
)
)

# Create two translation memories
tm1 = TranslationMemory(name="memory1.tmx", created_by=1)
tm2 = TranslationMemory(name="memory2.tmx", created_by=1)
s.add_all([tm1, tm2])
s.flush()

# Add records to both TMs
s.add_all(
[
TranslationMemoryRecord(
document_id=tm1.id,
source="Hello World",
target="Привет Мир",
),
TranslationMemoryRecord(
document_id=tm2.id,
source="Hello Again",
target="Привет снова",
),
TranslationMemoryRecord(
document_id=tm2.id,
source="Goodbye",
target="Пока",
),
]
)

# Link both TMs to document
s.add(DocMemoryAssociation(doc_id=1, tm_id=tm1.id, mode=TmMode.read))
s.add(DocMemoryAssociation(doc_id=1, tm_id=tm2.id, mode=TmMode.read))
s.commit()

# Test exact search for "Hello"
response = user_logged_client.get("/document/1/tm/exact?source=Hello")
assert response.status_code == 200
response_json = response.json()
assert len(response_json["records"]) == 2 # Should find from both TMs
assert response_json["page"] == 0
assert response_json["total_records"] == 2


def test_search_tm_exact_returns_404_for_nonexistent_document(
user_logged_client: TestClient,
):
"""Test exact search returns 404 for non-existent document"""
response = user_logged_client.get("/document/999/tm/exact?source=Hello")
assert response.status_code == 404


def test_search_tm_exact_no_results(user_logged_client: TestClient, session: Session):
"""Test exact search returns no results when no matches found"""
with session as s:
# Create document
s.add(
Document(
name="test_doc.txt",
type=DocumentType.txt,
processing_status="pending",
created_by=1,
)
)

# Create TM with different records
tm = TranslationMemory(name="test_memory.tmx", created_by=1)
s.add(tm)
s.flush()

s.add(
TranslationMemoryRecord(
document_id=tm.id,
source="Goodbye World",
target="Пока Мир",
)
)

# Link TM to document
s.add(DocMemoryAssociation(doc_id=1, tm_id=tm.id, mode=TmMode.read))
s.commit()

# Test exact search for "Hello" (should not find anything)
response = user_logged_client.get("/document/1/tm/exact?source=Hello")
assert response.status_code == 200
response_json = response.json()
assert response_json["records"] == []
assert response_json["page"] == 0
assert response_json["total_records"] == 0


def test_search_tm_limit_20_results(user_logged_client: TestClient, session: Session):
"""Test that search endpoints limit results to 20 records"""
with session as s:
# Create document
s.add(
Document(
name="test_doc.txt",
type=DocumentType.txt,
processing_status="pending",
created_by=1,
)
)

# Create TM
tm = TranslationMemory(name="test_memory.tmx", created_by=1)
s.add(tm)
s.flush()

# Add 25 records starting with "Hello"
s.add_all(
[
TranslationMemoryRecord(
document_id=tm.id,
source=f"Hello World {i}",
target=f"Привет Мир {i}",
)
for i in range(25)
]
)

# Link TM to document
s.add(DocMemoryAssociation(doc_id=1, tm_id=tm.id, mode=TmMode.read))
s.commit()

# Test exact search - should only return 20 results
response = user_logged_client.get("/document/1/tm/exact?source=Hello")
assert response.status_code == 200
response_json = response.json()
assert len(response_json["records"]) == 20 # Limited to 20
8 changes: 8 additions & 0 deletions frontend/src/client/services/DocumentService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import {DocumentRecordUpdateResponse} from '../schemas/DocumentRecordUpdateRespo
import {DocumentRecordUpdate} from '../schemas/DocumentRecordUpdate'
import {DocTranslationMemory} from '../schemas/DocTranslationMemory'
import {DocTranslationMemoryUpdate} from '../schemas/DocTranslationMemoryUpdate'
import {TranslationMemoryListResponse} from '../schemas/TranslationMemoryListResponse'
import {TranslationMemoryListSimilarResponse} from '../schemas/TranslationMemoryListSimilarResponse'
import {DocGlossary} from '../schemas/DocGlossary'
import {DocGlossaryUpdate} from '../schemas/DocGlossaryUpdate'
import {DocumentProcessingSettings} from '../schemas/DocumentProcessingSettings'
Expand Down Expand Up @@ -49,6 +51,12 @@ export const getTranslationMemories = async (doc_id: number): Promise<DocTransla
export const setTranslationMemories = async (doc_id: number, content: DocTranslationMemoryUpdate): Promise<StatusMessage> => {
return await api.post<StatusMessage>(`/document/${doc_id}/memories`, content)
}
export const searchTmExact = async (doc_id: number, source: string): Promise<TranslationMemoryListResponse> => {
return await api.get<TranslationMemoryListResponse>(`/document/${doc_id}/tm/exact`, {query: {source}})
}
export const searchTmSimilar = async (doc_id: number, source: string): Promise<TranslationMemoryListSimilarResponse> => {
return await api.get<TranslationMemoryListSimilarResponse>(`/document/${doc_id}/tm/similar`, {query: {source}})
}
export const getGlossaries = async (doc_id: number): Promise<DocGlossary[]> => {
return await api.get<DocGlossary[]>(`/document/${doc_id}/glossaries`)
}
Expand Down
Loading