From 5c6597423098f4e60c7cc1e3c04fbe5f6a5761f9 Mon Sep 17 00:00:00 2001 From: Denis Bezykornov Date: Sat, 22 Nov 2025 18:17:33 +0300 Subject: [PATCH 1/8] Preserve some translation results if LLM returns not correct results --- backend/app/translators/llm.py | 18 ++++++++++++------ backend/tests/test_llm.py | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/backend/app/translators/llm.py b/backend/app/translators/llm.py index 8ed72fe..f76c8e7 100644 --- a/backend/app/translators/llm.py +++ b/backend/app/translators/llm.py @@ -14,7 +14,7 @@ def generate_prompt_prologue() -> str: if not settings.llm_prompt: - logging.error('No LLM prompt configured') + logging.error("No LLM prompt configured") return settings.llm_prompt @@ -55,22 +55,28 @@ def generate_prompt( return "\n\n".join(parts), len(task_lines) +SEG_MATCHER = re.compile(r"(.*)") + + def parse_lines(network_out: str, expected_size: int) -> tuple[list[str], bool]: - output = [] + output: list[str] = [] split = network_out.strip().splitlines() if len(split) != expected_size: logging.warning("Unexpected LLM output, not enough lines returned %s", split) return [], False + failed = False for line in split: - m = re.match(r"(.*)", line) + m = re.match(SEG_MATCHER, line) if not m: - logging.warning("Unexpected LLM output, not match found in %s", line) - return [], False + logging.warning("Unexpected LLM output, no match found in %s", line) + output.append("") + failed = True + continue output.append(m.group(1)) - return output, True + return output, not failed def translate_lines( diff --git a/backend/tests/test_llm.py b/backend/tests/test_llm.py index 4f515a9..b54d9d5 100644 --- a/backend/tests/test_llm.py +++ b/backend/tests/test_llm.py @@ -194,7 +194,7 @@ def test_parse_lines_invalid_format(): result, success = llm.parse_lines(network_out, expected_size) assert not success - assert result == [] + assert result == ["", "translation2"] def test_parse_lines_empty_content(): From 046addac22051bf44c8b87ea8475d7f4e65fded0 Mon Sep 17 00:00:00 2001 From: Denis Bezykornov Date: Sun, 23 Nov 2025 14:03:51 +0300 Subject: [PATCH 2/8] Add endpoint to search in TM --- backend/app/routers/translation_memory.py | 17 ++++- backend/app/translation_memory/query.py | 78 ++++++++++++++++++++- backend/app/translation_memory/schema.py | 9 +++ backend/tests/routers/test_routes_tms.py | 82 +++++++++++++++++++++++ 4 files changed, 184 insertions(+), 2 deletions(-) diff --git a/backend/app/routers/translation_memory.py b/backend/app/routers/translation_memory.py index 5ac66f5..05bea25 100644 --- a/backend/app/routers/translation_memory.py +++ b/backend/app/routers/translation_memory.py @@ -20,7 +20,7 @@ def get_memory_by_id(db: Session, memory_id: int): doc = TranslationMemoryQuery(db).get_memory(memory_id) if not doc: raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, detail="Document not found" + status_code=status.HTTP_404_NOT_FOUND, detail="Memory not found" ) return doc @@ -116,6 +116,21 @@ def delete_memory(tm_id: int, db: Annotated[Session, Depends(get_db)]) -> Status return StatusMessage(message="Deleted") +@router.get("/{tm_id}/search") +def search_translation_memory( + tm_id: int, + query: Annotated[str, Query(min_length=1)], + mode: schema.TranslationMemorySearchMode, + db: Annotated[Session, Depends(get_db)], +) -> list[schema.TranslationMemorySearchResult]: + get_memory_by_id(db, tm_id) + return TranslationMemoryQuery(db).search_memory_records( + query=query, + mode=mode, + tm_id=tm_id, + ) + + @router.get( "/{tm_id}/download", response_class=StreamingResponse, diff --git a/backend/app/translation_memory/query.py b/backend/app/translation_memory/query.py index 5a0ff5f..21698a1 100644 --- a/backend/app/translation_memory/query.py +++ b/backend/app/translation_memory/query.py @@ -5,7 +5,11 @@ from sqlalchemy.orm import Session from .models import TranslationMemory, TranslationMemoryRecord -from .schema import MemorySubstitution +from .schema import ( + MemorySubstitution, + TranslationMemorySearchMode, + TranslationMemorySearchResult, +) class TranslationMemoryQuery: @@ -90,6 +94,78 @@ def delete_memory(self, memory: TranslationMemory): self.__db.delete(memory) self.__db.commit() + def search_memory_records( + self, + query: str, + mode: TranslationMemorySearchMode, + tm_id: int, + ) -> list[TranslationMemorySearchResult]: + if mode == TranslationMemorySearchMode.EXACT: + return self._search_exact(query, tm_id) + elif mode == TranslationMemorySearchMode.SIMILAR: + return self._search_similar(query, tm_id) + else: + raise ValueError(f"Unsupported search mode: {mode}") + + def _search_exact( + self, query: str, tm_id: int + ) -> list[TranslationMemorySearchResult]: + query_filter = TranslationMemoryRecord.source.ilike(f"%{query}%") + + records = self.__db.execute( + select(TranslationMemoryRecord) + .filter(query_filter, TranslationMemoryRecord.document_id == tm_id) + .order_by(TranslationMemoryRecord.id) + .limit(20) + ).scalars() + + return [ + TranslationMemorySearchResult( + id=record.id, + source=record.source, + target=record.target, + similarity=None, + ) + for record in records + ] + + def _search_similar( + self, query: str, tm_id: int + ) -> list[TranslationMemorySearchResult]: + # Use the same approach as get_substitutions but with different parameters + similarity_func = func.similarity(TranslationMemoryRecord.source, query) + + # Set similarity threshold to 0.25 (25%) as required + self.__db.execute( + text("SET pg_trgm.similarity_threshold TO :threshold"), + {"threshold": 0.25}, + ) + + records = self.__db.execute( + select( + TranslationMemoryRecord.id, + TranslationMemoryRecord.source, + TranslationMemoryRecord.target, + similarity_func, + ) + .filter( + TranslationMemoryRecord.source.op("%")(query), + TranslationMemoryRecord.document_id == tm_id, + ) + .order_by(similarity_func.desc()) + .limit(20) + ).all() + + return [ + TranslationMemorySearchResult( + id=record.id, + source=record.source, + target=record.target, + similarity=record.similarity, + ) + for record in records + ] + def add_or_update_record(self, document_id: int, source: str, target: str): record = self.__db.execute( select(TranslationMemoryRecord) diff --git a/backend/app/translation_memory/schema.py b/backend/app/translation_memory/schema.py index 3c15d01..9387a3a 100644 --- a/backend/app/translation_memory/schema.py +++ b/backend/app/translation_memory/schema.py @@ -32,3 +32,12 @@ class TranslationMemoryRecord(Identified): class TranslationMemoryCreationSettings(BaseModel): name: str = Field(min_length=1) + + +class TranslationMemorySearchMode(str, Enum): + EXACT = "exact" + SIMILAR = "similar" + + +class TranslationMemorySearchResult(TranslationMemoryRecord): + similarity: float | None = None diff --git a/backend/tests/routers/test_routes_tms.py b/backend/tests/routers/test_routes_tms.py index 6ab0a7d..7212b99 100644 --- a/backend/tests/routers/test_routes_tms.py +++ b/backend/tests/routers/test_routes_tms.py @@ -206,3 +206,85 @@ def test_can_download_document(user_logged_client: TestClient, session: Session) def test_download_returns_404_for_non_existing_tm(user_logged_client: TestClient): response = user_logged_client.get("/translation_memory/999/download") assert response.status_code == 404 + + +def test_search_exact_match(user_logged_client: TestClient, session: Session): + tm_records = [ + TranslationMemoryRecord(source="Hello world", target="Hola mundo"), + TranslationMemoryRecord(source="Goodbye world", target="Adiós mundo"), + TranslationMemoryRecord(source="Welcome home", target="Bienvenido a casa"), + ] + with session as s: + s.add(TranslationMemory(name="test_doc.tmx", records=tm_records, created_by=1)) + s.commit() + + # Test exact search for "world" + response = user_logged_client.get( + "/translation_memory/1/search", params={"query": "world", "mode": "exact"} + ) + assert response.status_code == 200 + + results = response.json() + assert len(results) == 2 + # Should return records containing "world" in source + sources = [result["source"] for result in results] + assert "Hello world" in sources + assert "Goodbye world" in sources + # Similarity should be None for exact search + assert all(result["similarity"] is None for result in results) + + +def test_search_in_specific_tm(user_logged_client: TestClient, session: Session): + # Create a TM with test records + tm_records = [ + TranslationMemoryRecord(source="Hello world", target="Hola mundo"), + TranslationMemoryRecord(source="Goodbye world", target="Adiós mundo"), + ] + with session as s: + s.add(TranslationMemory(name="test.tmx", records=tm_records, created_by=1)) + s.commit() + + # Search in specific TM + response = user_logged_client.get( + "/translation_memory/1/search", params={"query": "world", "mode": "exact"} + ) + assert response.status_code == 200 + + results = response.json() + assert len(results) == 2 + sources = [result["source"] for result in results] + assert "Hello world" in sources + assert "Goodbye world" in sources + + +def test_search_in_nonexistent_tm(user_logged_client: TestClient): + response = user_logged_client.get( + "/translation_memory/999/search", params={"query": "test", "mode": "exact"} + ) + assert response.status_code == 404 + assert "Memory not found" in response.json()["detail"] + + +def test_search_empty_query(user_logged_client: TestClient): + response = user_logged_client.get( + "/translation_memory/1/search", params={"query": "", "mode": "exact"} + ) + assert response.status_code == 422 # Validation error for empty query + + +def test_search_no_results(user_logged_client: TestClient, session: Session): + tm_records = [ + TranslationMemoryRecord(source="Hello world", target="Hola mundo"), + ] + with session as s: + s.add(TranslationMemory(name="test_doc.tmx", records=tm_records, created_by=1)) + s.commit() + + # Search for something that doesn't exist + response = user_logged_client.get( + "/translation_memory/1/search", params={"query": "nonexistent", "mode": "exact"} + ) + assert response.status_code == 200 + + results = response.json() + assert len(results) == 0 From 44565f886209732e44ce7e56c936f3beb91e3246 Mon Sep 17 00:00:00 2001 From: Denis Bezykornov Date: Sun, 23 Nov 2025 22:17:50 +0300 Subject: [PATCH 3/8] Refactor TM view to use pinia collada --- frontend/src/views/TmView.vue | 41 ++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/frontend/src/views/TmView.vue b/frontend/src/views/TmView.vue index bbfca2f..70b1af7 100644 --- a/frontend/src/views/TmView.vue +++ b/frontend/src/views/TmView.vue @@ -1,14 +1,13 @@