diff --git a/backend/alembic/versions/dcd734dfe9c4_convert_documentrecordhistorychangetype_enum.py b/backend/alembic/versions/dcd734dfe9c4_convert_documentrecordhistorychangetype_enum.py new file mode 100644 index 0000000..9a641c3 --- /dev/null +++ b/backend/alembic/versions/dcd734dfe9c4_convert_documentrecordhistorychangetype_enum.py @@ -0,0 +1,63 @@ +"""Convert DocumentRecordHistoryChangeType enum to string + +Revision ID: dcd734dfe9c4 +Revises: 71a5eef94341 +Create Date: 2026-02-17 02:32:03.411573 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# pylint: disable=E1101 + +# revision identifiers, used by Alembic. +revision: str = 'dcd734dfe9c4' +down_revision: Union[str, None] = '71a5eef94341' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +change_type_enum = sa.Enum( + 'initial_import', + 'machine_translation', + 'tm_substitution', + 'glossary_substitution', + 'repetition', + 'manual_edit', + name='documentrecordhistorychangetype' +) + + +def upgrade() -> None: + # Convert enum column to text/string for flexibility + op.alter_column( + 'document_record_history', + 'change_type', + type_=sa.Text(), + ) + change_type_enum.drop(op.get_bind(), checkfirst=True) + + +def downgrade() -> None: + # Convert any translation_update values to manual_edit before recreating enum + op.execute( + sa.text(""" + UPDATE document_record_history + SET change_type = 'manual_edit' + WHERE change_type = 'translation_update'; + """) + ) + + # Recreate enum type (without translation_update) + change_type_enum.create(op.get_bind(), checkfirst=True) + + # Convert text column back to enum + op.execute( + sa.text(""" + ALTER TABLE document_record_history + ALTER COLUMN change_type TYPE documentrecordhistorychangetype + USING change_type::text::documentrecordhistorychangetype; + """) + ) diff --git a/backend/app/documents/models.py b/backend/app/documents/models.py index dfe8a80..21d9afc 100644 --- a/backend/app/documents/models.py +++ b/backend/app/documents/models.py @@ -28,6 +28,7 @@ class DocumentRecordHistoryChangeType(Enum): glossary_substitution = "glossary_substitution" repetition = "repetition" manual_edit = "manual_edit" + translation_update = "translation_update" class TmMode(Enum): @@ -203,7 +204,9 @@ class DocumentRecordHistory(Base): diff: Mapped[str] = mapped_column() author_id: Mapped[int | None] = mapped_column(ForeignKey("user.id"), nullable=True) timestamp: Mapped[datetime] = mapped_column(default=utc_time) - change_type: Mapped[DocumentRecordHistoryChangeType] = mapped_column() + change_type: Mapped[DocumentRecordHistoryChangeType] = mapped_column( + SqlEnum(DocumentRecordHistoryChangeType, native_enum=False) + ) record: Mapped["DocumentRecord"] = relationship(back_populates="history") author: Mapped["User"] = relationship() diff --git a/backend/app/documents/schema.py b/backend/app/documents/schema.py index 68b5b37..306ec55 100644 --- a/backend/app/documents/schema.py +++ b/backend/app/documents/schema.py @@ -124,3 +124,10 @@ class DocumentUpdateResponse(BaseModel): project_id: int | None model_config = ConfigDict(from_attributes=True) + + +class XliffUploadOptions(BaseModel): + update_approved: bool = Field( + default=False, + description="If True, forcefully update approved records. If False (default), skip approved records.", + ) diff --git a/backend/app/formats/xliff.py b/backend/app/formats/xliff.py index 7ac5df0..6a3267a 100644 --- a/backend/app/formats/xliff.py +++ b/backend/app/formats/xliff.py @@ -211,6 +211,8 @@ def extract_xliff_content(content: bytes) -> XliffData: root: etree._Element = etree.fromstring( content, parser=etree.XMLParser(recover=True) ) + if root is None: + raise RuntimeError("Error: Invalid XML file") version = root.attrib.get("version") if not version or version != "1.2": diff --git a/backend/app/routers/document.py b/backend/app/routers/document.py index 5544b1c..24f64dc 100644 --- a/backend/app/routers/document.py +++ b/backend/app/routers/document.py @@ -1,6 +1,15 @@ from typing import Annotated -from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile, status +from fastapi import ( + APIRouter, + Depends, + File, + Form, + HTTPException, + Query, + UploadFile, + status, +) from fastapi.responses import StreamingResponse from sqlalchemy.orm import Session @@ -260,3 +269,28 @@ def update_document( status_code=status.HTTP_403_FORBIDDEN, detail=str(e), ) + + +@router.post("/upload_xliff") +async def upload_xliff( + service: Annotated[DocumentService, Depends(get_service)], + current_user: Annotated[int, Depends(get_current_user_id)], + file: Annotated[UploadFile, File()], + update_approved: Annotated[bool, Form()] = False, +) -> models.StatusMessage: + try: + return await service.upload_xliff( + file, + doc_schema.XliffUploadOptions(update_approved=update_approved), + current_user, + ) + except EntityNotFound as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) + except BusinessLogicError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) diff --git a/backend/app/services/document_service.py b/backend/app/services/document_service.py index f6ba0da..da9d9f4 100644 --- a/backend/app/services/document_service.py +++ b/backend/app/services/document_service.py @@ -13,13 +13,17 @@ from app.documents import schema as doc_schema from app.documents.models import ( Document, + DocumentRecord, + DocumentRecordHistoryChangeType, DocumentType, TmMode, XliffRecord, ) from app.documents.query import ( + DocumentRecordHistoryQuery, GenericDocsQuery, ) +from app.documents.utils import compute_diff from app.formats.txt import extract_txt_content from app.formats.xliff import ( SegmentState, @@ -54,6 +58,7 @@ def __init__(self, db: Session): self.__query = GenericDocsQuery(db) self.__glossary_query = GlossaryQuery(db) self.__tm_query = TranslationMemoryQuery(db) + self.__history_query = DocumentRecordHistoryQuery(db) def get_document(self, doc_id: int) -> doc_schema.DocumentWithRecordsCount: """ @@ -653,3 +658,101 @@ def encode_to_latin_1(self, original: str): for c in original: output += c if (c.isalnum() or c in "'().[] -") else "_" return output + + async def upload_xliff( + self, + file: UploadFile, + options: doc_schema.XliffUploadOptions, + current_user: int, + ) -> models.StatusMessage: + """ + Upload XLIFF file and update document records. + + Args: + file: Uploaded XLIFF file + options: Upload options including update_approved flag + current_user: ID of user performing the upload + + Returns: + StatusMessage indicating success + + Raises: + EntityNotFound: If document not found + """ + # Read file content + file_data = await file.read() + original_document = file_data.decode("utf-8") + + # Parse XLIFF + try: + xliff_data = extract_xliff_content(original_document.encode("utf-8")) + except RuntimeError: + raise BusinessLogicError("Invalid XLIFF format") + + # Extract document ID from first file element + file_element = xliff_data.xliff_file.find( + ".//{urn:oasis:names:tc:xliff:document:1.2}file", + namespaces=xliff_data.xliff_file.nsmap, + ) + if file_element is None: + raise BusinessLogicError("Invalid XLIFF format: no file element found") + + doc_id_str = file_element.get("original") + if not doc_id_str: + raise BusinessLogicError( + "Invalid XLIFF format: file element missing original attribute" + ) + + try: + doc_id = int(doc_id_str) + except ValueError: + raise BusinessLogicError(f"Invalid document ID in XLIFF: {doc_id_str}") + + # Validate document exists + self._get_document_by_id(doc_id) + + # Prepare history entries for bulk creation + history_entries = [] + + # Update records + for segment in xliff_data.segments: + record = ( + self.__db.query(DocumentRecord) + .filter_by(document_id=doc_id, id=segment.id_) + .first() + ) + + if not record: + continue + + # Check if we should update this record + should_update = options.update_approved or not record.approved + + if not should_update: + continue + + old_target = record.target + new_target = segment.translation or "" + # Only update if the new translation is different and not empty + if old_target != new_target and new_target: + record.target = new_target + record.approved = segment.approved + + # Prepare history entry + history_entries.append( + (record.id, compute_diff(old_target, new_target)) + ) + + # Bulk create history entries + if history_entries: + self.__history_query.bulk_create_history_entry( + history_entries, + current_user, + DocumentRecordHistoryChangeType.translation_update, + ) + + self.__db.commit() + updated_count = len(history_entries) + return models.StatusMessage( + message=f"Successfully updated {updated_count} record(s)" + ) diff --git a/backend/tests/fixtures/upload_test.xliff b/backend/tests/fixtures/upload_test.xliff new file mode 100644 index 0000000..cccd0ae --- /dev/null +++ b/backend/tests/fixtures/upload_test.xliff @@ -0,0 +1,32 @@ + + + +
+ +
+ + + Regional Effects + Региональные эффекты + + + User Interface + Пользовательский интерфейс + + + Approved Segment + Утвержденный сегмент + + + 123456789 + + + + Something else + Что-то еще + + +
+
diff --git a/backend/tests/routers/test_routes_documents.py b/backend/tests/routers/test_routes_documents.py index 7d3b229..9b83681 100644 --- a/backend/tests/routers/test_routes_documents.py +++ b/backend/tests/routers/test_routes_documents.py @@ -8,6 +8,8 @@ DocMemoryAssociation, Document, DocumentRecord, + DocumentRecordHistory, + DocumentRecordHistoryChangeType, DocumentType, TmMode, TxtDocument, @@ -1308,3 +1310,218 @@ def test_download_xliff_shows_404_for_unknown_doc(user_logged_client: TestClient """Test 404 when downloading XLIFF for non-existent document.""" response = user_logged_client.get("/document/1/download_xliff") assert response.status_code == 404 + + +def test_upload_xliff_success(user_logged_client: TestClient, session: Session): + """Test successful XLIFF upload with record updates.""" + with session as s: + records = [ + DocumentRecord( + source="Regional Effects", + target="", + approved=False, + ), + DocumentRecord( + source="User Interface", + target="", + approved=False, + ), + DocumentRecord( + source="Approved Segment", + target="Old approved text", + approved=True, + ), + DocumentRecord( + source="123456789", + target="", + approved=False, + ), + DocumentRecord( + source="Something else", + target="", + approved=False, + ), + ] + s.add( + Document( + name="test_doc", + type=DocumentType.txt, + records=records, + processing_status="done", + created_by=1, + ) + ) + s.commit() + + with open("tests/fixtures/upload_test.xliff", "rb") as fp: + response = user_logged_client.post( + "/document/upload_xliff", files={"file": fp}, data={} + ) + assert response.status_code == 200 + assert response.json() == {"message": "Successfully updated 3 record(s)"} + + with session as s: + updated_records = s.query(DocumentRecord).filter_by(document_id=1).all() + # Check that records with non-empty targets were updated + regional_effects = next( + (r for r in updated_records if r.source == "Regional Effects"), None + ) + assert regional_effects + assert regional_effects.target == "Региональные эффекты" + assert regional_effects.approved is False + + user_interface = next( + (r for r in updated_records if r.source == "User Interface"), None + ) + assert user_interface + assert user_interface.target == "Пользовательский интерфейс" + assert user_interface.approved is False + + # Check that approved record was NOT updated + approved_segment = next( + (r for r in updated_records if r.source == "Approved Segment"), None + ) + assert approved_segment + assert approved_segment.target == "Old approved text" + assert approved_segment.approved is True + + # Check that records with empty targets were NOT updated + record_123456789 = next( + (r for r in updated_records if r.source == "123456789"), None + ) + assert record_123456789 + assert record_123456789.target == "" + assert record_123456789.approved is False + + something_else = next( + (r for r in updated_records if r.source == "Something else"), None + ) + assert something_else + assert something_else.target == "Что-то еще" + assert something_else.approved is False + + +def test_upload_xliff_with_update_approved( + user_logged_client: TestClient, session: Session +): + """Test XLIFF upload with update_approved=True to update approved records.""" + with session as s: + records = [ + DocumentRecord( + source="Regional Effects", + target="Old text", + approved=True, + ), + DocumentRecord( + source="User Interface", + target="Old text", + approved=False, + ), + ] + s.add( + Document( + name="test_doc.xliff", + type=DocumentType.xliff, + records=records, + processing_status="done", + created_by=1, + ) + ) + s.commit() + + with open("tests/fixtures/upload_test.xliff", "rb") as fp: + response = user_logged_client.post( + "/document/upload_xliff", + files={"file": fp}, + data={"update_approved": "true"}, + ) + assert response.status_code == 200 + assert response.json() == {"message": "Successfully updated 2 record(s)"} + + with session as s: + updated_records = s.query(DocumentRecord).filter_by(document_id=1).all() + # Check that approved record WAS updated + regional_effects = next( + (r for r in updated_records if r.source == "Regional Effects"), None + ) + assert regional_effects + assert regional_effects.target == "Региональные эффекты" + assert regional_effects.approved is False # it is False in XLIFF + + +def test_upload_xliff_document_not_found(user_logged_client: TestClient): + """Test XLIFF upload with non-existent document ID.""" + with open("tests/fixtures/upload_test.xliff", "rb") as fp: + response = user_logged_client.post( + "/document/upload_xliff", files={"file": fp}, data={} + ) + assert response.status_code == 404 + assert "Document not found" in response.json()["detail"] + + +def test_upload_xliff_invalid_format(user_logged_client: TestClient): + """Test XLIFF upload with invalid XLIFF format.""" + with open("tests/fixtures/small.txt", "rb") as fp: + response = user_logged_client.post("/document/upload_xliff", files={"file": fp}) + assert response.status_code == 400 + assert "Invalid XLIFF format" in response.json()["detail"] + + +def test_upload_xliff_history_tracking( + user_logged_client: TestClient, session: Session +): + """Test that history entries are created for XLIFF upload.""" + with session as s: + records = [ + DocumentRecord( + id=1, + source="Regional Effects", + target="Old text", + approved=False, + ), + DocumentRecord( + id=2, + source="User Interface", + target="Old text", + approved=False, + ), + ] + s.add( + Document( + name="test_doc", + type=DocumentType.txt, + records=records, + processing_status="done", + created_by=1, + ) + ) + s.commit() + + with open("tests/fixtures/upload_test.xliff", "rb") as fp: + response = user_logged_client.post( + "/document/upload_xliff", files={"file": fp}, data={} + ) + assert response.status_code == 200 + + with session as s: + history_entries = ( + s.query(DocumentRecordHistory) + .filter(DocumentRecordHistory.record_id.in_([1, 2])) + .all() + ) + assert len(history_entries) == 2 + assert all( + h.change_type == DocumentRecordHistoryChangeType.translation_update + for h in history_entries + ) + assert all(h.author_id == 1 for h in history_entries) + + +def test_upload_xliff_unauthenticated(fastapi_client: TestClient): + """Test that unauthenticated requests are rejected.""" + # Clear any existing cookies to ensure clean state + fastapi_client.cookies.clear() + + with open("tests/fixtures/upload_test.xliff", "rb") as fp: + response = fastapi_client.post("/document/upload_xliff", files={"file": fp}) + assert response.status_code == 401 diff --git a/frontend/src/client/schemas/Body_upload_xliff_document_upload_xliff_post.ts b/frontend/src/client/schemas/Body_upload_xliff_document_upload_xliff_post.ts new file mode 100644 index 0000000..4ebea6c --- /dev/null +++ b/frontend/src/client/schemas/Body_upload_xliff_document_upload_xliff_post.ts @@ -0,0 +1,6 @@ +// This file is autogenerated, do not edit directly. + +export interface Body_upload_xliff_document_upload_xliff_post { + file: Blob + update_approved?: boolean +} diff --git a/frontend/src/client/schemas/DocumentRecordHistoryChangeType.ts b/frontend/src/client/schemas/DocumentRecordHistoryChangeType.ts index ec5f294..be999fb 100644 --- a/frontend/src/client/schemas/DocumentRecordHistoryChangeType.ts +++ b/frontend/src/client/schemas/DocumentRecordHistoryChangeType.ts @@ -1,3 +1,3 @@ // This file is autogenerated, do not edit directly. -export type DocumentRecordHistoryChangeType = 'initial_import' | 'machine_translation' | 'tm_substitution' | 'glossary_substitution' | 'repetition' | 'manual_edit' +export type DocumentRecordHistoryChangeType = 'initial_import' | 'machine_translation' | 'tm_substitution' | 'glossary_substitution' | 'repetition' | 'manual_edit' | 'translation_update' diff --git a/frontend/src/client/services/DocumentService.ts b/frontend/src/client/services/DocumentService.ts index 594fb9d..21b9298 100644 --- a/frontend/src/client/services/DocumentService.ts +++ b/frontend/src/client/services/DocumentService.ts @@ -17,6 +17,7 @@ import {DocGlossaryUpdate} from '../schemas/DocGlossaryUpdate' import {Document} from '../schemas/Document' import {Body_create_doc_document__post} from '../schemas/Body_create_doc_document__post' import {DocumentProcessingSettings} from '../schemas/DocumentProcessingSettings' +import {Body_upload_xliff_document_upload_xliff_post} from '../schemas/Body_upload_xliff_document_upload_xliff_post' export const getDoc = async (doc_id: number): Promise => { return await api.get(`/document/${doc_id}`) @@ -53,7 +54,10 @@ export const setGlossaries = async (doc_id: number, content: DocGlossaryUpdate): } export const createDoc = async (data: Body_create_doc_document__post): Promise => { const formData = new FormData() - formData.append('file', data.file) + for (const key of Object.keys(data) as Array) { + const val = data[key]; + if (val !== undefined) formData.append(key, val instanceof Blob ? val : String(val)) + } return await api.post(`/document/`, formData) } export const processDoc = async (doc_id: number, content: DocumentProcessingSettings): Promise => { @@ -65,3 +69,14 @@ export const getDownloadDocLink = (doc_id: number): string => { export const getDownloadOriginalDocLink = (doc_id: number): string => { return getApiBase() + `/document/${doc_id}/download_original` } +export const getDownloadXliffLink = (doc_id: number): string => { + return getApiBase() + `/document/${doc_id}/download_xliff` +} +export const uploadXliff = async (data: Body_upload_xliff_document_upload_xliff_post): Promise => { + const formData = new FormData() + for (const key of Object.keys(data) as Array) { + const val = data[key]; + if (val !== undefined) formData.append(key, val instanceof Blob ? val : String(val)) + } + return await api.post(`/document/upload_xliff`, formData) +} diff --git a/frontend/src/client/services/GlossaryService.ts b/frontend/src/client/services/GlossaryService.ts index fa40b36..06e68d7 100644 --- a/frontend/src/client/services/GlossaryService.ts +++ b/frontend/src/client/services/GlossaryService.ts @@ -41,6 +41,9 @@ export const deleteGlossaryRecord = async (record_id: number): Promise => { const formData = new FormData() - formData.append('file', data.file) + for (const key of Object.keys(data) as Array) { + const val = data[key]; + if (val !== undefined) formData.append(key, val instanceof Blob ? val : String(val)) + } return await api.post(`/glossary/load_file`, formData, {query: {glossary_name}}) } diff --git a/frontend/src/client/services/TmsService.ts b/frontend/src/client/services/TmsService.ts index bfb6a3c..5865700 100644 --- a/frontend/src/client/services/TmsService.ts +++ b/frontend/src/client/services/TmsService.ts @@ -30,7 +30,10 @@ export const getMemoryRecordsSimilar = async (tm_id: number, query: string): Pro } export const createMemoryFromFile = async (data: Body_create_memory_from_file_translation_memory_upload_post): Promise => { const formData = new FormData() - formData.append('file', data.file) + for (const key of Object.keys(data) as Array) { + const val = data[key]; + if (val !== undefined) formData.append(key, val instanceof Blob ? val : String(val)) + } return await api.post(`/translation_memory/upload`, formData) } export const getDownloadMemoryLink = (tm_id: number): string => { diff --git a/frontend/src/components/AddDocumentModal.vue b/frontend/src/components/AddDocumentModal.vue new file mode 100644 index 0000000..547738a --- /dev/null +++ b/frontend/src/components/AddDocumentModal.vue @@ -0,0 +1,26 @@ + + + diff --git a/frontend/src/components/UploadXliffModal.vue b/frontend/src/components/UploadXliffModal.vue new file mode 100644 index 0000000..60b02ce --- /dev/null +++ b/frontend/src/components/UploadXliffModal.vue @@ -0,0 +1,19 @@ + + + diff --git a/frontend/src/components/XliffUploadingDialog.vue b/frontend/src/components/XliffUploadingDialog.vue new file mode 100644 index 0000000..265b51b --- /dev/null +++ b/frontend/src/components/XliffUploadingDialog.vue @@ -0,0 +1,81 @@ + + + diff --git a/frontend/src/components/document/SegmentHistoryModal.vue b/frontend/src/components/document/SegmentHistoryModal.vue index 83bfdf7..0caf023 100644 --- a/frontend/src/components/document/SegmentHistoryModal.vue +++ b/frontend/src/components/document/SegmentHistoryModal.vue @@ -38,6 +38,7 @@ const changeTypeColors: Record = { glossary_substitution: '#10B981', repetition: '#14B8A6', manual_edit: '#F59E0B', + translation_update: '#EC4899', } const changeTypeLabels: Record = { @@ -47,6 +48,7 @@ const changeTypeLabels: Record = { glossary_substitution: 'Glossary Substitution', repetition: 'Repetition', manual_edit: 'Manual Edit', + translation_update: 'Translation Update', } const {data: history, status: historyStatus} = useQuery({ diff --git a/frontend/src/views/IndexView.vue b/frontend/src/views/IndexView.vue index ea1c903..a2133d6 100644 --- a/frontend/src/views/IndexView.vue +++ b/frontend/src/views/IndexView.vue @@ -1,12 +1,10 @@