Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Convert DocumentRecordHistoryChangeType enum to string

Revision ID: dcd734dfe9c4
Revises: 71a5eef94341
Create Date: 2026-02-17 02:32:03.411573

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# pylint: disable=E1101

# revision identifiers, used by Alembic.
revision: str = 'dcd734dfe9c4'
down_revision: Union[str, None] = '71a5eef94341'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None

change_type_enum = sa.Enum(
'initial_import',
'machine_translation',
'tm_substitution',
'glossary_substitution',
'repetition',
'manual_edit',
name='documentrecordhistorychangetype'
)


def upgrade() -> None:
# Convert enum column to text/string for flexibility
op.alter_column(
'document_record_history',
'change_type',
type_=sa.Text(),
)
change_type_enum.drop(op.get_bind(), checkfirst=True)


def downgrade() -> None:
# Convert any translation_update values to manual_edit before recreating enum
op.execute(
sa.text("""
UPDATE document_record_history
SET change_type = 'manual_edit'
WHERE change_type = 'translation_update';
""")
)

# Recreate enum type (without translation_update)
change_type_enum.create(op.get_bind(), checkfirst=True)

# Convert text column back to enum
op.execute(
sa.text("""
ALTER TABLE document_record_history
ALTER COLUMN change_type TYPE documentrecordhistorychangetype
USING change_type::text::documentrecordhistorychangetype;
""")
)
5 changes: 4 additions & 1 deletion backend/app/documents/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class DocumentRecordHistoryChangeType(Enum):
glossary_substitution = "glossary_substitution"
repetition = "repetition"
manual_edit = "manual_edit"
translation_update = "translation_update"


class TmMode(Enum):
Expand Down Expand Up @@ -203,7 +204,9 @@ class DocumentRecordHistory(Base):
diff: Mapped[str] = mapped_column()
author_id: Mapped[int | None] = mapped_column(ForeignKey("user.id"), nullable=True)
timestamp: Mapped[datetime] = mapped_column(default=utc_time)
change_type: Mapped[DocumentRecordHistoryChangeType] = mapped_column()
change_type: Mapped[DocumentRecordHistoryChangeType] = mapped_column(
SqlEnum(DocumentRecordHistoryChangeType, native_enum=False)
)

record: Mapped["DocumentRecord"] = relationship(back_populates="history")
author: Mapped["User"] = relationship()
Expand Down
7 changes: 7 additions & 0 deletions backend/app/documents/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,10 @@ class DocumentUpdateResponse(BaseModel):
project_id: int | None

model_config = ConfigDict(from_attributes=True)


class XliffUploadOptions(BaseModel):
update_approved: bool = Field(
default=False,
description="If True, forcefully update approved records. If False (default), skip approved records.",
)
2 changes: 2 additions & 0 deletions backend/app/formats/xliff.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ def extract_xliff_content(content: bytes) -> XliffData:
root: etree._Element = etree.fromstring(
content, parser=etree.XMLParser(recover=True)
)
if root is None:
raise RuntimeError("Error: Invalid XML file")

version = root.attrib.get("version")
if not version or version != "1.2":
Expand Down
36 changes: 35 additions & 1 deletion backend/app/routers/document.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
from typing import Annotated

from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile, status
from fastapi import (
APIRouter,
Depends,
File,
Form,
HTTPException,
Query,
UploadFile,
status,
)
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session

Expand Down Expand Up @@ -260,3 +269,28 @@ def update_document(
status_code=status.HTTP_403_FORBIDDEN,
detail=str(e),
)


@router.post("/upload_xliff")
async def upload_xliff(
service: Annotated[DocumentService, Depends(get_service)],
current_user: Annotated[int, Depends(get_current_user_id)],
file: Annotated[UploadFile, File()],
update_approved: Annotated[bool, Form()] = False,
) -> models.StatusMessage:
try:
return await service.upload_xliff(
file,
doc_schema.XliffUploadOptions(update_approved=update_approved),
current_user,
)
except EntityNotFound as e:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(e),
)
except BusinessLogicError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=str(e),
)
103 changes: 103 additions & 0 deletions backend/app/services/document_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,17 @@
from app.documents import schema as doc_schema
from app.documents.models import (
Document,
DocumentRecord,
DocumentRecordHistoryChangeType,
DocumentType,
TmMode,
XliffRecord,
)
from app.documents.query import (
DocumentRecordHistoryQuery,
GenericDocsQuery,
)
from app.documents.utils import compute_diff
from app.formats.txt import extract_txt_content
from app.formats.xliff import (
SegmentState,
Expand Down Expand Up @@ -54,6 +58,7 @@ def __init__(self, db: Session):
self.__query = GenericDocsQuery(db)
self.__glossary_query = GlossaryQuery(db)
self.__tm_query = TranslationMemoryQuery(db)
self.__history_query = DocumentRecordHistoryQuery(db)

def get_document(self, doc_id: int) -> doc_schema.DocumentWithRecordsCount:
"""
Expand Down Expand Up @@ -653,3 +658,101 @@ def encode_to_latin_1(self, original: str):
for c in original:
output += c if (c.isalnum() or c in "'().[] -") else "_"
return output

async def upload_xliff(
self,
file: UploadFile,
options: doc_schema.XliffUploadOptions,
current_user: int,
) -> models.StatusMessage:
"""
Upload XLIFF file and update document records.

Args:
file: Uploaded XLIFF file
options: Upload options including update_approved flag
current_user: ID of user performing the upload

Returns:
StatusMessage indicating success

Raises:
EntityNotFound: If document not found
"""
# Read file content
file_data = await file.read()
original_document = file_data.decode("utf-8")

# Parse XLIFF
try:
xliff_data = extract_xliff_content(original_document.encode("utf-8"))
except RuntimeError:
raise BusinessLogicError("Invalid XLIFF format")

# Extract document ID from first file element
file_element = xliff_data.xliff_file.find(
".//{urn:oasis:names:tc:xliff:document:1.2}file",
namespaces=xliff_data.xliff_file.nsmap,
)
if file_element is None:
raise BusinessLogicError("Invalid XLIFF format: no file element found")

doc_id_str = file_element.get("original")
if not doc_id_str:
raise BusinessLogicError(
"Invalid XLIFF format: file element missing original attribute"
)

try:
doc_id = int(doc_id_str)
except ValueError:
raise BusinessLogicError(f"Invalid document ID in XLIFF: {doc_id_str}")

# Validate document exists
self._get_document_by_id(doc_id)

# Prepare history entries for bulk creation
history_entries = []

# Update records
for segment in xliff_data.segments:
record = (
self.__db.query(DocumentRecord)
.filter_by(document_id=doc_id, id=segment.id_)
.first()
)

if not record:
continue

# Check if we should update this record
should_update = options.update_approved or not record.approved

if not should_update:
continue

old_target = record.target
new_target = segment.translation or ""
# Only update if the new translation is different and not empty
if old_target != new_target and new_target:
record.target = new_target
record.approved = segment.approved

# Prepare history entry
history_entries.append(
(record.id, compute_diff(old_target, new_target))
)

# Bulk create history entries
if history_entries:
self.__history_query.bulk_create_history_entry(
history_entries,
current_user,
DocumentRecordHistoryChangeType.translation_update,
)

self.__db.commit()
updated_count = len(history_entries)
return models.StatusMessage(
message=f"Successfully updated {updated_count} record(s)"
)
32 changes: 32 additions & 0 deletions backend/tests/fixtures/upload_test.xliff
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<?xml version="1.0" encoding="utf-8"?>
<xliff version="1.2"
xmlns:sc="SmartcatXliff"
xmlns="urn:oasis:names:tc:xliff:document:1.2">
<file datatype="plaintext" date="2023-11-07T20:56:05.0017382Z" original="1" source-language="en" target-language="ru">
<header>
<tool tool-name="Smartcat.ai" tool-id="40c7d5b2-da26-4b36-84f1-8305b3aadb03" tool-version="1.0.11.0" />
</header>
<body>
<trans-unit id="1" approved="no" sc:locked="false" sc:last-modified-date="2023-10-19T18:29:22.711Z" sc:last-modified-user="Person 1">
<source xml:space="preserve">Regional Effects</source>
<target state="translated" xml:space="preserve">Региональные эффекты</target>
</trans-unit>
<trans-unit id="2" approved="no" sc:locked="false" sc:last-modified-date="2023-10-19T18:29:22.911Z" sc:last-modified-user="Person 2">
<source xml:space="preserve">User Interface</source>
<target state="needs-translation" xml:space="preserve">Пользовательский интерфейс</target>
</trans-unit>
<trans-unit id="3" approved="yes" sc:locked="false" sc:last-modified-date="2023-10-19T18:29:22.967Z" sc:last-modified-user="Person 3">
<source xml:space="preserve">Approved Segment</source>
<target state="final" xml:space="preserve">Утвержденный сегмент</target>
</trans-unit>
<trans-unit id="4" approved="no" sc:locked="false" sc:last-modified-date="2023-10-19T18:29:22.968Z" sc:last-modified-user="Person 4">
<source xml:space="preserve">123456789</source>
<target state="needs-translation" xml:space="preserve"></target>
</trans-unit>
<trans-unit id="5" approved="no" sc:locked="false" sc:last-modified-date="2023-10-19T18:29:23.968Z" sc:last-modified-user="Person 5">
<source xml:space="preserve">Something else</source>
<target state="translated" xml:space="preserve">Что-то еще</target>
</trans-unit>
</body>
</file>
</xliff>
Loading