diff --git a/backend/app/routers/translate.py b/backend/app/routers/translate.py
index d42681f..fd68f65 100644
--- a/backend/app/routers/translate.py
+++ b/backend/app/routers/translate.py
@@ -501,3 +501,139 @@ async def delete_translation(
logger.info(f"Deleted translation {lang} for task {task_id}")
return None
+
+
+@router.post("/{task_id}/pdf")
+async def download_translated_pdf(
+ task_id: str,
+ lang: str = Query(..., description="Target language code"),
+ db: Session = Depends(get_db),
+ current_user: User = Depends(get_current_user)
+):
+ """
+ Download a translated PDF with layout preservation.
+
+ - **task_id**: Task UUID
+ - **lang**: Target language code (e.g., 'en', 'ja')
+
+ Returns PDF file with translated content preserving original layout.
+ """
+ from app.services.pdf_generator_service import pdf_generator_service
+ from app.services.translation_service import list_available_translations
+ import tempfile
+
+ # Verify task ownership
+ task = task_service.get_task_by_id(
+ db=db,
+ task_id=task_id,
+ user_id=current_user.id
+ )
+
+ if not task:
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail="Task not found"
+ )
+
+ if not task.result_json_path:
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail="OCR result not found"
+ )
+
+ result_json_path = Path(task.result_json_path)
+ if not result_json_path.exists():
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail="Result file not found"
+ )
+
+ # Find translation file
+ result_dir = result_json_path.parent
+ base_name = result_json_path.stem.replace('_result', '').replace('edit_', '')
+ translation_file = result_dir / f"{base_name}_translated_{lang}.json"
+
+ # Also try with edit_ prefix removed differently
+ if not translation_file.exists():
+ translation_file = result_dir / f"edit_translated_{lang}.json"
+
+ if not translation_file.exists():
+ # List available translations for error message
+ available = list_available_translations(result_dir)
+ if available:
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail=f"Translation for language '{lang}' not found. Available translations: {', '.join(available)}"
+ )
+ else:
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail=f"No translations found for this task. Please translate the document first."
+ )
+
+ # Check translation status in translation JSON
+ try:
+ with open(translation_file, 'r', encoding='utf-8') as f:
+ translation_data = json.load(f)
+
+ if not translation_data.get('translations'):
+ raise HTTPException(
+ status_code=status.HTTP_400_BAD_REQUEST,
+ detail="Translation file is empty or incomplete"
+ )
+ except json.JSONDecodeError:
+ raise HTTPException(
+ status_code=status.HTTP_400_BAD_REQUEST,
+ detail="Invalid translation file format"
+ )
+
+ # Generate translated PDF to temp file
+ output_filename = f"{task_id}_translated_{lang}.pdf"
+
+ with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmp_file:
+ output_path = Path(tmp_file.name)
+
+ try:
+ # Get source file path for images if available
+ source_file_path = None
+ if task.file_path and Path(task.file_path).exists():
+ source_file_path = Path(task.file_path)
+
+ success = pdf_generator_service.generate_translated_pdf(
+ result_json_path=result_json_path,
+ translation_json_path=translation_file,
+ output_path=output_path,
+ source_file_path=source_file_path
+ )
+
+ if not success:
+ raise HTTPException(
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+ detail="Failed to generate translated PDF"
+ )
+
+ logger.info(f"Generated translated PDF for task {task_id}, lang={lang}")
+
+ return FileResponse(
+ path=str(output_path),
+ filename=output_filename,
+ media_type="application/pdf",
+ headers={
+ "Content-Disposition": f'attachment; filename="{output_filename}"'
+ }
+ )
+
+ except HTTPException:
+ # Clean up temp file on HTTP errors
+ if output_path.exists():
+ output_path.unlink()
+ raise
+ except Exception as e:
+ # Clean up temp file on unexpected errors
+ if output_path.exists():
+ output_path.unlink()
+ logger.exception(f"Failed to generate translated PDF for task {task_id}")
+ raise HTTPException(
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+ detail=f"Failed to generate translated PDF: {str(e)}"
+ )
diff --git a/backend/app/services/pdf_generator_service.py b/backend/app/services/pdf_generator_service.py
index fb4c076..90b5314 100644
--- a/backend/app/services/pdf_generator_service.py
+++ b/backend/app/services/pdf_generator_service.py
@@ -3601,6 +3601,100 @@ class PDFGeneratorService:
except Exception as e:
logger.error(f"Failed to draw image element {element.element_id}: {e}")
+ def generate_translated_pdf(
+ self,
+ result_json_path: Path,
+ translation_json_path: Path,
+ output_path: Path,
+ source_file_path: Optional[Path] = None
+ ) -> bool:
+ """
+ Generate layout-preserving PDF with translated content.
+
+ This method loads the original result JSON and translation JSON,
+ merges them to replace original content with translations, and
+ generates a PDF with the translated content at original positions.
+
+ Args:
+ result_json_path: Path to original result JSON file (UnifiedDocument format)
+ translation_json_path: Path to translation JSON file
+ output_path: Path to save generated translated PDF
+ source_file_path: Optional path to original source file
+
+ Returns:
+ True if successful, False otherwise
+ """
+ import tempfile
+
+ try:
+ # Import apply_translations from translation service
+ from app.services.translation_service import apply_translations
+
+ # Load original result JSON
+ logger.info(f"Loading result JSON: {result_json_path}")
+ with open(result_json_path, 'r', encoding='utf-8') as f:
+ result_json = json.load(f)
+
+ # Load translation JSON
+ logger.info(f"Loading translation JSON: {translation_json_path}")
+ with open(translation_json_path, 'r', encoding='utf-8') as f:
+ translation_json = json.load(f)
+
+ # Extract translations dict from translation JSON
+ translations = translation_json.get('translations', {})
+ if not translations:
+ logger.warning("No translations found in translation JSON")
+ # Still generate PDF with original content as fallback
+ return self.generate_layout_pdf(
+ json_path=result_json_path,
+ output_path=output_path,
+ source_file_path=source_file_path
+ )
+
+ # Apply translations to result JSON
+ translated_doc = apply_translations(result_json, translations)
+
+ target_lang = translation_json.get('target_lang', 'unknown')
+ logger.info(
+ f"Generating translated PDF: {len(translations)} translations applied, "
+ f"target_lang={target_lang}"
+ )
+
+ # Write translated JSON to a temporary file and use existing generate_layout_pdf
+ with tempfile.NamedTemporaryFile(
+ mode='w',
+ suffix='_translated.json',
+ delete=False,
+ encoding='utf-8'
+ ) as tmp_file:
+ json.dump(translated_doc, tmp_file, ensure_ascii=False, indent=2)
+ tmp_path = Path(tmp_file.name)
+
+ try:
+ # Use existing PDF generation with translated content
+ success = self.generate_layout_pdf(
+ json_path=tmp_path,
+ output_path=output_path,
+ source_file_path=source_file_path
+ )
+ return success
+ finally:
+ # Clean up temporary file
+ if tmp_path.exists():
+ tmp_path.unlink()
+
+ except FileNotFoundError as e:
+ logger.error(f"File not found: {e}")
+ return False
+ except json.JSONDecodeError as e:
+ logger.error(f"Invalid JSON: {e}")
+ return False
+ except Exception as e:
+ logger.error(f"Failed to generate translated PDF: {e}")
+ import traceback
+ traceback.print_exc()
+ return False
+
# Singleton instance
pdf_generator_service = PDFGeneratorService()
diff --git a/backend/app/services/translation_service.py b/backend/app/services/translation_service.py
index 12abebd..7515bfc 100644
--- a/backend/app/services/translation_service.py
+++ b/backend/app/services/translation_service.py
@@ -35,6 +35,166 @@ TABLE_TYPE = 'table'
SKIP_TYPES = {'page_number', 'image', 'chart', 'logo', 'reference'}
+def apply_translations(
+ result_json: Dict,
+ translations: Dict[str, Any]
+) -> Dict:
+ """
+ Apply translations to a result JSON document, creating a translated copy.
+
+ This function merges translation data with the original document structure,
+ replacing original content with translated content while preserving all
+ other properties (bounding boxes, styles, etc.).
+
+ Args:
+ result_json: Original UnifiedDocument JSON data
+ translations: Translation dict mapping element_id to translated content.
+ For text elements: element_id -> translated_string
+ For tables: element_id -> {"cells": [{"row": int, "col": int, "content": str}]}
+
+ Returns:
+ A deep copy of result_json with translations applied
+ """
+ import copy
+ translated_doc = copy.deepcopy(result_json)
+ applied_count = 0
+
+ for page in translated_doc.get('pages', []):
+ for elem in page.get('elements', []):
+ elem_id = elem.get('element_id', '')
+ elem_type = elem.get('type', '')
+
+ if elem_id not in translations:
+ continue
+
+ translation = translations[elem_id]
+
+ # Handle text elements (string translation)
+ if isinstance(translation, str):
+ if elem_type in TRANSLATABLE_TEXT_TYPES:
+ elem['content'] = translation
+ applied_count += 1
+ else:
+ logger.warning(
+ f"Translation for {elem_id} is string but element type is {elem_type}"
+ )
+
+ # Handle table elements (cells translation)
+ elif isinstance(translation, dict) and 'cells' in translation:
+ if elem_type == TABLE_TYPE and isinstance(elem.get('content'), dict):
+ _apply_table_translation(elem, translation)
+ applied_count += 1
+ else:
+ logger.warning(
+ f"Translation for {elem_id} is table but element type is {elem_type}"
+ )
+
+ logger.info(f"Applied {applied_count} translations to document")
+ return translated_doc
+
+
+def _apply_table_translation(
+ table_elem: Dict,
+ translation: Dict[str, Any]
+) -> None:
+ """
+ Apply translation to a table element's cells.
+
+ Args:
+ table_elem: Table element dict with content.cells
+ translation: Translation dict with 'cells' list
+ """
+ content = table_elem.get('content', {})
+ original_cells = content.get('cells', [])
+
+ if not original_cells:
+ return
+
+ # Build lookup for translated cells by (row, col)
+ translated_cells = {}
+ for cell in translation.get('cells', []):
+ row = cell.get('row', 0)
+ col = cell.get('col', 0)
+ translated_cells[(row, col)] = cell.get('content', '')
+
+ # Apply translations to matching cells
+ for cell in original_cells:
+ row = cell.get('row', 0)
+ col = cell.get('col', 0)
+ key = (row, col)
+
+ if key in translated_cells:
+ cell['content'] = translated_cells[key]
+
+
+def load_translation_json(translation_path: Path) -> Optional[Dict]:
+ """
+ Load translation JSON file.
+
+ Args:
+ translation_path: Path to translation JSON file
+
+ Returns:
+ Translation JSON dict or None if file doesn't exist
+ """
+ if not translation_path.exists():
+ return None
+
+ try:
+ with open(translation_path, 'r', encoding='utf-8') as f:
+ return json.load(f)
+ except Exception as e:
+ logger.error(f"Failed to load translation JSON: {e}")
+ return None
+
+
+def find_translation_file(
+ result_dir: Path,
+ target_lang: str
+) -> Optional[Path]:
+ """
+ Find translation file for a given language in result directory.
+
+ Args:
+ result_dir: Directory containing result files
+ target_lang: Target language code (e.g., 'en', 'zh-TW')
+
+ Returns:
+ Path to translation file or None if not found
+ """
+ # Look for *_translated_{lang}.json pattern
+ pattern = f"*_translated_{target_lang}.json"
+ matches = list(result_dir.glob(pattern))
+
+ if matches:
+ return matches[0]
+ return None
+
+
+def list_available_translations(result_dir: Path) -> List[str]:
+ """
+ List all available translation languages for a result directory.
+
+ Args:
+ result_dir: Directory containing result files
+
+ Returns:
+ List of language codes with available translations
+ """
+ languages = []
+ pattern = "*_translated_*.json"
+
+ for path in result_dir.glob(pattern):
+ # Extract language from filename: xxx_translated_{lang}.json
+ stem = path.stem
+ if '_translated_' in stem:
+ lang = stem.split('_translated_')[-1]
+ if lang:
+ languages.append(lang)
+
+ return languages
+
+
@dataclass
class TranslationBatch:
"""A batch of items to translate together"""
diff --git a/backend/tests/api/test_translate_pdf_api.py b/backend/tests/api/test_translate_pdf_api.py
new file mode 100644
index 0000000..dbe254e
--- /dev/null
+++ b/backend/tests/api/test_translate_pdf_api.py
@@ -0,0 +1,727 @@
+"""
+API integration tests for Translated PDF Download endpoint.
+
+Tests the POST /api/v2/translate/{task_id}/pdf endpoint for downloading
+translated PDFs with layout preservation.
+
+Note: These tests use extensive mocking to avoid importing heavy dependencies
+like PaddleOCR and PyTorch which aren't available in the test environment.
+"""
+
+import pytest
+import json
+import sys
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+from datetime import datetime
+
+# Mock heavy dependencies before importing app modules
+sys.modules['paddleocr'] = MagicMock()
+sys.modules['paddlex'] = MagicMock()
+sys.modules['torch'] = MagicMock()
+sys.modules['modelscope'] = MagicMock()
+
+from fastapi.testclient import TestClient
+from fastapi import FastAPI, Depends, HTTPException, status, Query
+from fastapi.responses import FileResponse
+from sqlalchemy import create_engine, Column, Integer, String, Boolean, Enum as SQLEnum
+from sqlalchemy.orm import sessionmaker, declarative_base
+import enum
+
+
+# Create test models without importing from app
+Base = declarative_base()
+
+
+class TaskStatusEnum(enum.Enum):
+ PENDING = "pending"
+ PROCESSING = "processing"
+ COMPLETED = "completed"
+ FAILED = "failed"
+
+
+class MockUser(Base):
+ __tablename__ = "users"
+ id = Column(Integer, primary_key=True, index=True)
+ email = Column(String, unique=True, index=True)
+ hashed_password = Column(String)
+ is_active = Column(Boolean, default=True)
+
+
+class MockTask(Base):
+ __tablename__ = "tasks"
+ id = Column(Integer, primary_key=True, index=True)
+ user_id = Column(Integer)
+ task_id = Column(String, unique=True, index=True)
+ filename = Column(String)
+ status = Column(SQLEnum(TaskStatusEnum), default=TaskStatusEnum.PENDING)
+ result_json_path = Column(String, nullable=True)
+ file_path = Column(String, nullable=True)
+
+
+# Create test database
+SQLALCHEMY_DATABASE_URL = "sqlite:///./test_translate_pdf.db"
+engine = create_engine(SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False})
+TestingSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+
+
+def create_test_app():
+ """Create a minimal FastAPI app for testing the translate PDF endpoint"""
+ test_app = FastAPI()
+
+ @test_app.post("/api/v2/translate/{task_id}/pdf")
+ async def download_translated_pdf(
+ task_id: str,
+ lang: str = Query(..., description="Target language code"),
+ ):
+ """Mock implementation of the translated PDF endpoint"""
+ from app.services.pdf_generator_service import pdf_generator_service
+
+ # Get db_session and current_user from app state (set in test)
+ db = test_app.state.db_session
+ current_user = test_app.state.current_user
+
+ # Find task
+ task = db.query(MockTask).filter(
+ MockTask.task_id == task_id,
+ MockTask.user_id == current_user.id
+ ).first()
+
+ if not task:
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail="Task not found"
+ )
+
+ if not task.result_json_path:
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail="OCR result not found"
+ )
+
+ result_json_path = Path(task.result_json_path)
+ if not result_json_path.exists():
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail="Result file not found"
+ )
+
+ # Find translation file
+ result_dir = result_json_path.parent
+ base_name = result_json_path.stem.replace('_result', '').replace('edit_', '')
+ translation_file = result_dir / f"{base_name}_translated_{lang}.json"
+
+ if not translation_file.exists():
+ translation_file = result_dir / f"edit_translated_{lang}.json"
+
+ if not translation_file.exists():
+ # List available translations
+ available = [f.stem.split("_translated_")[-1]
+ for f in result_dir.glob("*_translated_*.json")]
+ if available:
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail=f"Translation for language '{lang}' not found. Available translations: {', '.join(available)}"
+ )
+ else:
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail="No translations found for this task."
+ )
+
+ # Check translation content
+ try:
+ with open(translation_file, 'r', encoding='utf-8') as f:
+ translation_data = json.load(f)
+
+ if not translation_data.get('translations'):
+ raise HTTPException(
+ status_code=status.HTTP_400_BAD_REQUEST,
+ detail="Translation file is empty or incomplete"
+ )
+ except json.JSONDecodeError:
+ raise HTTPException(
+ status_code=status.HTTP_400_BAD_REQUEST,
+ detail="Invalid translation file format"
+ )
+
+ # Generate PDF
+ import tempfile
+ output_filename = f"{task_id}_translated_{lang}.pdf"
+
+ with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmp_file:
+ output_path = Path(tmp_file.name)
+
+ try:
+ source_file_path = None
+ if task.file_path and Path(task.file_path).exists():
+ source_file_path = Path(task.file_path)
+
+ success = pdf_generator_service.generate_translated_pdf(
+ result_json_path=result_json_path,
+ translation_json_path=translation_file,
+ output_path=output_path,
+ source_file_path=source_file_path
+ )
+
+ if not success:
+ raise HTTPException(
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+ detail="Failed to generate translated PDF"
+ )
+
+ return FileResponse(
+ path=str(output_path),
+ filename=output_filename,
+ media_type="application/pdf",
+ headers={
+ "Content-Disposition": f'attachment; filename="{output_filename}"'
+ }
+ )
+
+ except HTTPException:
+ if output_path.exists():
+ output_path.unlink()
+ raise
+ except Exception as e:
+ if output_path.exists():
+ output_path.unlink()
+ raise HTTPException(
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+ detail=f"Failed to generate translated PDF: {str(e)}"
+ )
+
+ return test_app
+
+
+@pytest.fixture(scope="function")
+def db_session():
+ """Create test database session"""
+ Base.metadata.create_all(bind=engine)
+ session = TestingSessionLocal()
+ try:
+ yield session
+ finally:
+ session.close()
+ Base.metadata.drop_all(bind=engine)
+
+
+@pytest.fixture
+def test_user(db_session):
+ """Create test user"""
+ user = MockUser(
+ email="translate_test@example.com",
+ hashed_password="test_hash",
+ is_active=True
+ )
+ db_session.add(user)
+ db_session.commit()
+ db_session.refresh(user)
+ return user
+
+
+@pytest.fixture
+def test_app(db_session, test_user):
+ """Create test app with dependencies injected"""
+ app = create_test_app()
+ app.state.db_session = db_session
+ app.state.current_user = test_user
+ return app
+
+
+@pytest.fixture
+def client(test_app):
+ """Create test client"""
+ return TestClient(test_app)
+
+
+@pytest.fixture
+def test_task_with_result(db_session, test_user, tmp_path):
+ """Create test task with result JSON and translation file"""
+ task_id = "test-translate-pdf-123"
+ result_dir = tmp_path / "results" / task_id
+ result_dir.mkdir(parents=True)
+
+ # Create result JSON
+ result_json = {
+ "document_info": {
+ "total_pages": 1,
+ "processing_track": "Direct"
+ },
+ "pages": [
+ {
+ "page_number": 1,
+ "width": 612,
+ "height": 792,
+ "elements": [
+ {
+ "element_id": "text_1",
+ "type": "text",
+ "content": "Hello World",
+ "bounding_box": {"x": 72, "y": 72, "width": 200, "height": 20}
+ }
+ ]
+ }
+ ]
+ }
+ result_json_path = result_dir / "edit_result.json"
+ result_json_path.write_text(json.dumps(result_json), encoding='utf-8')
+
+ # Create translation file
+ translation_json = {
+ "task_id": task_id,
+ "target_lang": "zh-TW",
+ "translated_at": datetime.utcnow().isoformat() + "Z",
+ "provider": "dify",
+ "translations": {
+ "text_1": "你好世界"
+ },
+ "statistics": {
+ "total_elements": 1,
+ "translated_elements": 1,
+ "skipped_elements": 0,
+ "total_characters": 11,
+ "processing_time_seconds": 1.5,
+ "total_tokens": 50
+ }
+ }
+ translation_path = result_dir / "edit_translated_zh-TW.json"
+ translation_path.write_text(json.dumps(translation_json), encoding='utf-8')
+
+ # Create task
+ task = MockTask(
+ user_id=test_user.id,
+ task_id=task_id,
+ filename="test.pdf",
+ status=TaskStatusEnum.COMPLETED,
+ result_json_path=str(result_json_path),
+ file_path=str(tmp_path / "test.pdf")
+ )
+ db_session.add(task)
+ db_session.commit()
+ db_session.refresh(task)
+
+ return task, result_dir
+
+
+@pytest.fixture
+def test_task_no_result(db_session, test_user):
+ """Create test task without result JSON"""
+ task = MockTask(
+ user_id=test_user.id,
+ task_id="test-no-result-456",
+ filename="test.pdf",
+ status=TaskStatusEnum.COMPLETED,
+ result_json_path=None
+ )
+ db_session.add(task)
+ db_session.commit()
+ db_session.refresh(task)
+ return task
+
+
+@pytest.fixture
+def test_task_no_translation(db_session, test_user, tmp_path):
+ """Create test task with result JSON but no translation"""
+ task_id = "test-no-translation-789"
+ result_dir = tmp_path / "results" / task_id
+ result_dir.mkdir(parents=True)
+
+ # Create result JSON only (no translation file)
+ result_json = {
+ "document_info": {"total_pages": 1, "processing_track": "Direct"},
+ "pages": [{"page_number": 1, "width": 612, "height": 792, "elements": []}]
+ }
+ result_json_path = result_dir / "edit_result.json"
+ result_json_path.write_text(json.dumps(result_json), encoding='utf-8')
+
+ task = MockTask(
+ user_id=test_user.id,
+ task_id=task_id,
+ filename="test.pdf",
+ status=TaskStatusEnum.COMPLETED,
+ result_json_path=str(result_json_path)
+ )
+ db_session.add(task)
+ db_session.commit()
+ db_session.refresh(task)
+
+ return task
+
+
+@pytest.fixture
+def test_task_empty_translation(db_session, test_user, tmp_path):
+ """Create test task with empty translation file"""
+ task_id = "test-empty-translation-101"
+ result_dir = tmp_path / "results" / task_id
+ result_dir.mkdir(parents=True)
+
+ # Create result JSON
+ result_json = {
+ "document_info": {"total_pages": 1, "processing_track": "Direct"},
+ "pages": [{"page_number": 1, "width": 612, "height": 792, "elements": []}]
+ }
+ result_json_path = result_dir / "edit_result.json"
+ result_json_path.write_text(json.dumps(result_json), encoding='utf-8')
+
+ # Create empty translation file
+ translation_json = {
+ "task_id": task_id,
+ "target_lang": "ja",
+ "translations": {} # Empty translations
+ }
+ translation_path = result_dir / "edit_translated_ja.json"
+ translation_path.write_text(json.dumps(translation_json), encoding='utf-8')
+
+ task = MockTask(
+ user_id=test_user.id,
+ task_id=task_id,
+ filename="test.pdf",
+ status=TaskStatusEnum.COMPLETED,
+ result_json_path=str(result_json_path)
+ )
+ db_session.add(task)
+ db_session.commit()
+ db_session.refresh(task)
+
+ return task
+
+
+@pytest.fixture
+def other_user(db_session):
+ """Create another user for ownership tests"""
+ user = MockUser(
+ email="other_user@example.com",
+ hashed_password="other_hash",
+ is_active=True
+ )
+ db_session.add(user)
+ db_session.commit()
+ db_session.refresh(user)
+ return user
+
+
+class TestTranslatedPDFDownload:
+ """Tests for POST /api/v2/translate/{task_id}/pdf endpoint"""
+
+ @patch('app.services.pdf_generator_service.pdf_generator_service')
+ def test_download_translated_pdf_success(
+ self, mock_pdf_service, client, db_session, test_user, test_task_with_result, tmp_path
+ ):
+ """Test successful translated PDF download"""
+ task, result_dir = test_task_with_result
+
+ # Create a mock PDF file for the response
+ mock_pdf_path = tmp_path / "output.pdf"
+ mock_pdf_path.write_bytes(b"%PDF-1.4 mock pdf content")
+
+ def mock_generate(result_json_path, translation_json_path, output_path, source_file_path=None):
+ # Copy mock PDF to output path
+ output_path.write_bytes(mock_pdf_path.read_bytes())
+ return True
+
+ mock_pdf_service.generate_translated_pdf.side_effect = mock_generate
+
+ response = client.post(
+ f"/api/v2/translate/{task.task_id}/pdf?lang=zh-TW"
+ )
+
+ assert response.status_code == 200
+ assert response.headers["content-type"] == "application/pdf"
+ assert "attachment" in response.headers.get("content-disposition", "")
+ assert task.task_id in response.headers.get("content-disposition", "")
+
+ # Verify PDF service was called
+ mock_pdf_service.generate_translated_pdf.assert_called_once()
+
+ def test_download_pdf_task_not_found(self, client, db_session, test_user):
+ """Test 404 when task doesn't exist"""
+ response = client.post(
+ "/api/v2/translate/nonexistent-task-id/pdf?lang=zh-TW"
+ )
+
+ assert response.status_code == 404
+ assert "Task not found" in response.json()["detail"]
+
+ def test_download_pdf_no_result_json(self, client, db_session, test_user, test_task_no_result):
+ """Test 404 when task has no result JSON"""
+ response = client.post(
+ f"/api/v2/translate/{test_task_no_result.task_id}/pdf?lang=zh-TW"
+ )
+
+ assert response.status_code == 404
+ assert "OCR result not found" in response.json()["detail"]
+
+ def test_download_pdf_translation_not_found(
+ self, client, db_session, test_user, test_task_no_translation
+ ):
+ """Test 404 when translation for requested language doesn't exist"""
+ response = client.post(
+ f"/api/v2/translate/{test_task_no_translation.task_id}/pdf?lang=ko"
+ )
+
+ assert response.status_code == 404
+ detail = response.json()["detail"]
+ # Message could mention the language or indicate no translations found
+ assert "ko" in detail or "translation" in detail.lower() or "found" in detail.lower()
+
+ def test_download_pdf_empty_translation(
+ self, client, db_session, test_user, test_task_empty_translation
+ ):
+ """Test 400 when translation file is empty"""
+ response = client.post(
+ f"/api/v2/translate/{test_task_empty_translation.task_id}/pdf?lang=ja"
+ )
+
+ assert response.status_code == 400
+ assert "empty" in response.json()["detail"].lower() or "incomplete" in response.json()["detail"].lower()
+
+ def test_download_pdf_missing_lang_param(
+ self, client, db_session, test_user, test_task_with_result
+ ):
+ """Test 422 when lang query parameter is missing"""
+ task, _ = test_task_with_result
+
+ response = client.post(
+ f"/api/v2/translate/{task.task_id}/pdf"
+ )
+
+ # FastAPI returns 422 for missing required query params
+ assert response.status_code == 422
+
+ def test_download_pdf_wrong_user(
+ self, db_session, other_user, test_task_with_result, tmp_path
+ ):
+ """Test 404 when task belongs to different user"""
+ task, _ = test_task_with_result
+
+ # Create new app with other_user
+ app = create_test_app()
+ app.state.db_session = db_session
+ app.state.current_user = other_user
+ client = TestClient(app)
+
+ response = client.post(
+ f"/api/v2/translate/{task.task_id}/pdf?lang=zh-TW"
+ )
+
+ # Task service returns None for tasks not owned by current user
+ assert response.status_code == 404
+ assert "Task not found" in response.json()["detail"]
+
+ @patch('app.services.pdf_generator_service.pdf_generator_service')
+ def test_download_pdf_generation_failure(
+ self, mock_pdf_service, client, db_session, test_user, test_task_with_result
+ ):
+ """Test 500 when PDF generation fails"""
+ task, _ = test_task_with_result
+
+ # Mock PDF generation failure
+ mock_pdf_service.generate_translated_pdf.return_value = False
+
+ response = client.post(
+ f"/api/v2/translate/{task.task_id}/pdf?lang=zh-TW"
+ )
+
+ assert response.status_code == 500
+ assert "Failed to generate" in response.json()["detail"]
+
+ @patch('app.services.pdf_generator_service.pdf_generator_service')
+ def test_download_pdf_exception_handling(
+ self, mock_pdf_service, client, db_session, test_user, test_task_with_result
+ ):
+ """Test 500 when PDF generation raises exception"""
+ task, _ = test_task_with_result
+
+ # Mock PDF generation exception
+ mock_pdf_service.generate_translated_pdf.side_effect = Exception("Unexpected error")
+
+ response = client.post(
+ f"/api/v2/translate/{task.task_id}/pdf?lang=zh-TW"
+ )
+
+ assert response.status_code == 500
+ assert "Failed to generate" in response.json()["detail"]
+
+
+class TestTranslatedPDFWithMultipleLanguages:
+ """Tests for multiple translation languages"""
+
+ @pytest.fixture
+ def task_with_multiple_translations(self, db_session, test_user, tmp_path):
+ """Create task with translations in multiple languages"""
+ task_id = "test-multi-lang-222"
+ result_dir = tmp_path / "results" / task_id
+ result_dir.mkdir(parents=True)
+
+ # Create result JSON
+ result_json = {
+ "document_info": {"total_pages": 1, "processing_track": "Direct"},
+ "pages": [{
+ "page_number": 1,
+ "width": 612, "height": 792,
+ "elements": [
+ {"element_id": "text_1", "type": "text", "content": "Hello",
+ "bounding_box": {"x": 72, "y": 72, "width": 100, "height": 20}}
+ ]
+ }]
+ }
+ result_json_path = result_dir / "edit_result.json"
+ result_json_path.write_text(json.dumps(result_json), encoding='utf-8')
+
+ # Create translations for multiple languages
+ for lang, translation in [("zh-TW", "你好"), ("ja", "こんにちは"), ("ko", "안녕하세요")]:
+ translation_json = {
+ "task_id": task_id,
+ "target_lang": lang,
+ "translated_at": datetime.utcnow().isoformat() + "Z",
+ "translations": {"text_1": translation},
+ "statistics": {"translated_elements": 1}
+ }
+ (result_dir / f"edit_translated_{lang}.json").write_text(
+ json.dumps(translation_json), encoding='utf-8'
+ )
+
+ task = MockTask(
+ user_id=test_user.id,
+ task_id=task_id,
+ filename="test.pdf",
+ status=TaskStatusEnum.COMPLETED,
+ result_json_path=str(result_json_path)
+ )
+ db_session.add(task)
+ db_session.commit()
+ db_session.refresh(task)
+
+ return task, result_dir
+
+ @patch('app.services.pdf_generator_service.pdf_generator_service')
+ def test_download_different_languages(
+ self, mock_pdf_service, client, db_session, test_user,
+ task_with_multiple_translations, tmp_path
+ ):
+ """Test downloading PDFs for different languages"""
+ task, result_dir = task_with_multiple_translations
+
+ mock_pdf_path = tmp_path / "output.pdf"
+ mock_pdf_path.write_bytes(b"%PDF-1.4 mock")
+
+ def mock_generate(result_json_path, translation_json_path, output_path, source_file_path=None):
+ output_path.write_bytes(mock_pdf_path.read_bytes())
+ return True
+
+ mock_pdf_service.generate_translated_pdf.side_effect = mock_generate
+
+ for lang in ["zh-TW", "ja", "ko"]:
+ response = client.post(
+ f"/api/v2/translate/{task.task_id}/pdf?lang={lang}"
+ )
+
+ assert response.status_code == 200, f"Failed for language {lang}"
+ assert response.headers["content-type"] == "application/pdf"
+
+ # Verify PDF service was called 3 times
+ assert mock_pdf_service.generate_translated_pdf.call_count == 3
+
+ def test_download_nonexistent_language(
+ self, client, db_session, test_user, task_with_multiple_translations
+ ):
+ """Test 404 for language that doesn't exist"""
+ task, _ = task_with_multiple_translations
+
+ response = client.post(
+ f"/api/v2/translate/{task.task_id}/pdf?lang=de"
+ )
+
+ assert response.status_code == 404
+ detail = response.json()["detail"]
+ # Should mention available languages
+ assert "zh-TW" in detail or "ja" in detail or "ko" in detail or "not found" in detail.lower()
+
+
+class TestInvalidTranslationFile:
+ """Tests for invalid translation file scenarios"""
+
+ @pytest.fixture
+ def task_with_invalid_json(self, db_session, test_user, tmp_path):
+ """Create task with invalid JSON translation file"""
+ task_id = "test-invalid-json-333"
+ result_dir = tmp_path / "results" / task_id
+ result_dir.mkdir(parents=True)
+
+ # Create result JSON
+ result_json = {
+ "document_info": {"total_pages": 1, "processing_track": "Direct"},
+ "pages": [{"page_number": 1, "width": 612, "height": 792, "elements": []}]
+ }
+ result_json_path = result_dir / "edit_result.json"
+ result_json_path.write_text(json.dumps(result_json), encoding='utf-8')
+
+ # Create invalid JSON translation file
+ (result_dir / "edit_translated_en.json").write_text("{ invalid json }", encoding='utf-8')
+
+ task = MockTask(
+ user_id=test_user.id,
+ task_id=task_id,
+ filename="test.pdf",
+ status=TaskStatusEnum.COMPLETED,
+ result_json_path=str(result_json_path)
+ )
+ db_session.add(task)
+ db_session.commit()
+ db_session.refresh(task)
+
+ return task
+
+ def test_download_pdf_invalid_json(
+ self, client, db_session, test_user, task_with_invalid_json
+ ):
+ """Test 400 when translation file has invalid JSON"""
+ response = client.post(
+ f"/api/v2/translate/{task_with_invalid_json.task_id}/pdf?lang=en"
+ )
+
+ assert response.status_code == 400
+ assert "Invalid" in response.json()["detail"] or "format" in response.json()["detail"].lower()
+
+
+class TestResultFileNotFound:
+ """Tests for missing result file scenario"""
+
+ @pytest.fixture
+ def task_with_missing_file(self, db_session, test_user, tmp_path):
+ """Create task pointing to non-existent result file"""
+ task_id = "test-missing-file-444"
+ result_dir = tmp_path / "results" / task_id
+ result_dir.mkdir(parents=True)
+
+ # Point to non-existent file
+ result_json_path = result_dir / "nonexistent_result.json"
+
+ task = MockTask(
+ user_id=test_user.id,
+ task_id=task_id,
+ filename="test.pdf",
+ status=TaskStatusEnum.COMPLETED,
+ result_json_path=str(result_json_path)
+ )
+ db_session.add(task)
+ db_session.commit()
+ db_session.refresh(task)
+
+ return task
+
+ def test_download_pdf_result_file_missing(
+ self, client, db_session, test_user, task_with_missing_file
+ ):
+ """Test 404 when result file doesn't exist on disk"""
+ response = client.post(
+ f"/api/v2/translate/{task_with_missing_file.task_id}/pdf?lang=zh-TW"
+ )
+
+ assert response.status_code == 404
+ assert "not found" in response.json()["detail"].lower()
+
+
+if __name__ == '__main__':
+ pytest.main([__file__, '-v'])
diff --git a/backend/tests/services/test_translated_pdf.py b/backend/tests/services/test_translated_pdf.py
new file mode 100644
index 0000000..981c4b0
--- /dev/null
+++ b/backend/tests/services/test_translated_pdf.py
@@ -0,0 +1,564 @@
+"""
+Unit tests for translated PDF generation functionality.
+
+Tests the generate_translated_pdf() method in PDFGeneratorService
+and track-specific behavior (Direct, OCR, Hybrid).
+"""
+
+import pytest
+import json
+import tempfile
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+from app.services.pdf_generator_service import PDFGeneratorService
+from app.services.translation_service import apply_translations
+
+
+class TestGenerateTranslatedPDF:
+ """Tests for generate_translated_pdf() method"""
+
+ @pytest.fixture
+ def pdf_service(self):
+ """Create PDF generator service instance"""
+ return PDFGeneratorService()
+
+ @pytest.fixture
+ def sample_result_json(self, tmp_path):
+ """Create sample result JSON file"""
+ result_data = {
+ "metadata": {
+ "processing_track": "direct",
+ "source_file": "test.pdf",
+ "page_count": 1
+ },
+ "pages": [
+ {
+ "page_number": 1,
+ "width": 612,
+ "height": 792,
+ "elements": [
+ {
+ "element_id": "text_1",
+ "type": "text",
+ "content": "Hello World",
+ "bounding_box": {
+ "x": 72,
+ "y": 720,
+ "width": 200,
+ "height": 20
+ },
+ "style_info": {
+ "font_size": 12,
+ "font_name": "Helvetica"
+ }
+ },
+ {
+ "element_id": "title_1",
+ "type": "title",
+ "content": "Document Title",
+ "bounding_box": {
+ "x": 72,
+ "y": 750,
+ "width": 300,
+ "height": 30
+ },
+ "style_info": {
+ "font_size": 18,
+ "font_name": "Helvetica-Bold"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ result_file = tmp_path / "edit_result.json"
+ result_file.write_text(json.dumps(result_data), encoding='utf-8')
+ return result_file
+
+ @pytest.fixture
+ def sample_translation_json(self, tmp_path):
+ """Create sample translation JSON file"""
+ translation_data = {
+ "target_lang": "zh-TW",
+ "source_lang": "en",
+ "translated_at": "2024-01-01T00:00:00Z",
+ "translations": {
+ "text_1": "你好世界",
+ "title_1": "文件標題"
+ },
+ "statistics": {
+ "translated_elements": 2,
+ "total_characters": 100
+ }
+ }
+ translation_file = tmp_path / "edit_translated_zh-TW.json"
+ translation_file.write_text(json.dumps(translation_data), encoding='utf-8')
+ return translation_file
+
+ def test_generate_translated_pdf_success(
+ self, pdf_service, sample_result_json, sample_translation_json, tmp_path
+ ):
+ """Test successful translated PDF generation"""
+ output_path = tmp_path / "output.pdf"
+
+ success = pdf_service.generate_translated_pdf(
+ result_json_path=sample_result_json,
+ translation_json_path=sample_translation_json,
+ output_path=output_path
+ )
+
+ assert success is True
+ assert output_path.exists()
+ assert output_path.stat().st_size > 0
+ # PDF files start with %PDF
+ with open(output_path, 'rb') as f:
+ header = f.read(4)
+ assert header == b'%PDF'
+
+ def test_generate_translated_pdf_missing_result(
+ self, pdf_service, sample_translation_json, tmp_path
+ ):
+ """Test with missing result JSON file"""
+ output_path = tmp_path / "output.pdf"
+ missing_result = tmp_path / "non_existent.json"
+
+ success = pdf_service.generate_translated_pdf(
+ result_json_path=missing_result,
+ translation_json_path=sample_translation_json,
+ output_path=output_path
+ )
+
+ assert success is False
+ assert not output_path.exists()
+
+ def test_generate_translated_pdf_missing_translation(
+ self, pdf_service, sample_result_json, tmp_path
+ ):
+ """Test with missing translation JSON file"""
+ output_path = tmp_path / "output.pdf"
+ missing_translation = tmp_path / "non_existent_translation.json"
+
+ success = pdf_service.generate_translated_pdf(
+ result_json_path=sample_result_json,
+ translation_json_path=missing_translation,
+ output_path=output_path
+ )
+
+ assert success is False
+ assert not output_path.exists()
+
+ def test_generate_translated_pdf_empty_translations(
+ self, pdf_service, sample_result_json, tmp_path
+ ):
+ """Test with empty translations (should fall back to original)"""
+ empty_translation_data = {
+ "target_lang": "zh-TW",
+ "translations": {}
+ }
+ empty_translation_file = tmp_path / "empty_translated.json"
+ empty_translation_file.write_text(json.dumps(empty_translation_data), encoding='utf-8')
+ output_path = tmp_path / "output.pdf"
+
+ success = pdf_service.generate_translated_pdf(
+ result_json_path=sample_result_json,
+ translation_json_path=empty_translation_file,
+ output_path=output_path
+ )
+
+ # Should succeed even with empty translations (uses original content)
+ assert success is True
+ assert output_path.exists()
+
+ def test_generate_translated_pdf_partial_translations(
+ self, pdf_service, sample_result_json, tmp_path
+ ):
+ """Test with partial translations (some elements not translated)"""
+ partial_translation_data = {
+ "target_lang": "zh-TW",
+ "translations": {
+ "text_1": "你好世界"
+ # title_1 not translated
+ }
+ }
+ partial_translation_file = tmp_path / "partial_translated.json"
+ partial_translation_file.write_text(json.dumps(partial_translation_data), encoding='utf-8')
+ output_path = tmp_path / "output.pdf"
+
+ success = pdf_service.generate_translated_pdf(
+ result_json_path=sample_result_json,
+ translation_json_path=partial_translation_file,
+ output_path=output_path
+ )
+
+ assert success is True
+ assert output_path.exists()
+
+
+class TestTrackSpecificPDFGeneration:
+ """Tests for track-specific PDF generation behavior"""
+
+ @pytest.fixture
+ def pdf_service(self):
+ return PDFGeneratorService()
+
+ def create_result_with_track(self, tmp_path, track: str, with_table: bool = False):
+ """Helper to create result JSON with specific track"""
+ elements = [
+ {
+ "element_id": "text_1",
+ "type": "text",
+ "content": "Sample text content",
+ "bounding_box": {"x": 72, "y": 720, "width": 200, "height": 20},
+ "style_info": {"font_size": 12}
+ }
+ ]
+
+ if with_table:
+ elements.append({
+ "element_id": "table_1",
+ "type": "table",
+ "content": {
+ "cells": [
+ {"row": 0, "col": 0, "content": "Header 1"},
+ {"row": 0, "col": 1, "content": "Header 2"},
+ {"row": 1, "col": 0, "content": "Data 1"},
+ {"row": 1, "col": 1, "content": "Data 2"},
+ ]
+ },
+ "bounding_box": {"x": 72, "y": 500, "width": 400, "height": 100}
+ })
+
+ result_data = {
+ "metadata": {
+ "processing_track": track,
+ "source_file": f"test_{track}.pdf",
+ "page_count": 1
+ },
+ "pages": [
+ {
+ "page_number": 1,
+ "width": 612,
+ "height": 792,
+ "elements": elements
+ }
+ ]
+ }
+
+ result_file = tmp_path / f"{track}_result.json"
+ result_file.write_text(json.dumps(result_data), encoding='utf-8')
+ return result_file
+
+ def create_translation_for_track(self, tmp_path, track: str, with_table: bool = False):
+ """Helper to create translation JSON"""
+ translations = {
+ "text_1": "翻譯的文字內容"
+ }
+
+ if with_table:
+ translations["table_1"] = {
+ "cells": [
+ {"row": 0, "col": 0, "content": "表頭 1"},
+ {"row": 0, "col": 1, "content": "表頭 2"},
+ {"row": 1, "col": 0, "content": "資料 1"},
+ {"row": 1, "col": 1, "content": "資料 2"},
+ ]
+ }
+
+ translation_data = {
+ "target_lang": "zh-TW",
+ "translations": translations
+ }
+
+ translation_file = tmp_path / f"{track}_translated_zh-TW.json"
+ translation_file.write_text(json.dumps(translation_data), encoding='utf-8')
+ return translation_file
+
+ def test_direct_track_pdf_generation(self, pdf_service, tmp_path):
+ """Test PDF generation for Direct track documents"""
+ result_file = self.create_result_with_track(tmp_path, "direct")
+ translation_file = self.create_translation_for_track(tmp_path, "direct")
+ output_path = tmp_path / "direct_output.pdf"
+
+ success = pdf_service.generate_translated_pdf(
+ result_json_path=result_file,
+ translation_json_path=translation_file,
+ output_path=output_path
+ )
+
+ assert success is True
+ assert output_path.exists()
+ assert output_path.stat().st_size > 0
+
+ def test_ocr_track_pdf_generation(self, pdf_service, tmp_path):
+ """Test PDF generation for OCR track documents"""
+ result_file = self.create_result_with_track(tmp_path, "ocr")
+ translation_file = self.create_translation_for_track(tmp_path, "ocr")
+ output_path = tmp_path / "ocr_output.pdf"
+
+ success = pdf_service.generate_translated_pdf(
+ result_json_path=result_file,
+ translation_json_path=translation_file,
+ output_path=output_path
+ )
+
+ assert success is True
+ assert output_path.exists()
+ assert output_path.stat().st_size > 0
+
+ def test_hybrid_track_pdf_generation(self, pdf_service, tmp_path):
+ """Test PDF generation for Hybrid track documents"""
+ result_file = self.create_result_with_track(tmp_path, "hybrid")
+ translation_file = self.create_translation_for_track(tmp_path, "hybrid")
+ output_path = tmp_path / "hybrid_output.pdf"
+
+ success = pdf_service.generate_translated_pdf(
+ result_json_path=result_file,
+ translation_json_path=translation_file,
+ output_path=output_path
+ )
+
+ assert success is True
+ assert output_path.exists()
+ assert output_path.stat().st_size > 0
+
+ def test_document_with_table_direct_track(self, pdf_service, tmp_path):
+ """Test PDF generation for Direct track document with tables"""
+ result_file = self.create_result_with_track(tmp_path, "direct", with_table=True)
+ translation_file = self.create_translation_for_track(tmp_path, "direct", with_table=True)
+ output_path = tmp_path / "direct_table_output.pdf"
+
+ success = pdf_service.generate_translated_pdf(
+ result_json_path=result_file,
+ translation_json_path=translation_file,
+ output_path=output_path
+ )
+
+ assert success is True
+ assert output_path.exists()
+ assert output_path.stat().st_size > 0
+
+ def test_document_with_table_ocr_track(self, pdf_service, tmp_path):
+ """Test PDF generation for OCR track document with tables"""
+ result_file = self.create_result_with_track(tmp_path, "ocr", with_table=True)
+ translation_file = self.create_translation_for_track(tmp_path, "ocr", with_table=True)
+ output_path = tmp_path / "ocr_table_output.pdf"
+
+ success = pdf_service.generate_translated_pdf(
+ result_json_path=result_file,
+ translation_json_path=translation_file,
+ output_path=output_path
+ )
+
+ assert success is True
+ assert output_path.exists()
+ assert output_path.stat().st_size > 0
+
+
+class TestTranslationMergeIntegration:
+ """Integration tests for translation merging with PDF generation"""
+
+ @pytest.fixture
+ def pdf_service(self):
+ return PDFGeneratorService()
+
+ def test_translations_applied_to_pdf(self, pdf_service, tmp_path):
+ """Test that translations are properly applied before PDF generation"""
+ # Create result with specific content
+ result_data = {
+ "metadata": {"processing_track": "direct"},
+ "pages": [
+ {
+ "page_number": 1,
+ "width": 612,
+ "height": 792,
+ "elements": [
+ {
+ "element_id": "text_1",
+ "type": "text",
+ "content": "ORIGINAL_MARKER_TEXT",
+ "bounding_box": {"x": 72, "y": 720, "width": 200, "height": 20},
+ "style_info": {"font_size": 12}
+ }
+ ]
+ }
+ ]
+ }
+ result_file = tmp_path / "result.json"
+ result_file.write_text(json.dumps(result_data), encoding='utf-8')
+
+ # Create translation
+ translation_data = {
+ "translations": {
+ "text_1": "TRANSLATED_MARKER_TEXT"
+ }
+ }
+ translation_file = tmp_path / "translation.json"
+ translation_file.write_text(json.dumps(translation_data), encoding='utf-8')
+
+ output_path = tmp_path / "output.pdf"
+
+ success = pdf_service.generate_translated_pdf(
+ result_json_path=result_file,
+ translation_json_path=translation_file,
+ output_path=output_path
+ )
+
+ assert success is True
+ assert output_path.exists()
+
+ # Read PDF content (basic check - the translated text should be in the PDF)
+ with open(output_path, 'rb') as f:
+ pdf_content = f.read()
+ # Check that the file is a valid PDF
+ assert pdf_content.startswith(b'%PDF')
+
+ def test_multi_page_translated_pdf(self, pdf_service, tmp_path):
+ """Test translated PDF generation for multi-page documents"""
+ result_data = {
+ "metadata": {"processing_track": "direct"},
+ "pages": [
+ {
+ "page_number": 1,
+ "width": 612,
+ "height": 792,
+ "elements": [
+ {
+ "element_id": "p1_text",
+ "type": "text",
+ "content": "Page 1 content",
+ "bounding_box": {"x": 72, "y": 720, "width": 200, "height": 20},
+ "style_info": {"font_size": 12}
+ }
+ ]
+ },
+ {
+ "page_number": 2,
+ "width": 612,
+ "height": 792,
+ "elements": [
+ {
+ "element_id": "p2_text",
+ "type": "text",
+ "content": "Page 2 content",
+ "bounding_box": {"x": 72, "y": 720, "width": 200, "height": 20},
+ "style_info": {"font_size": 12}
+ }
+ ]
+ }
+ ]
+ }
+ result_file = tmp_path / "multi_page_result.json"
+ result_file.write_text(json.dumps(result_data), encoding='utf-8')
+
+ translation_data = {
+ "translations": {
+ "p1_text": "第一頁內容",
+ "p2_text": "第二頁內容"
+ }
+ }
+ translation_file = tmp_path / "multi_page_translation.json"
+ translation_file.write_text(json.dumps(translation_data), encoding='utf-8')
+
+ output_path = tmp_path / "multi_page_output.pdf"
+
+ success = pdf_service.generate_translated_pdf(
+ result_json_path=result_file,
+ translation_json_path=translation_file,
+ output_path=output_path
+ )
+
+ assert success is True
+ assert output_path.exists()
+ assert output_path.stat().st_size > 0
+
+
+class TestErrorHandling:
+ """Tests for error handling in translated PDF generation"""
+
+ @pytest.fixture
+ def pdf_service(self):
+ return PDFGeneratorService()
+
+ def test_invalid_json_result(self, pdf_service, tmp_path):
+ """Test handling of invalid JSON in result file"""
+ invalid_result = tmp_path / "invalid.json"
+ invalid_result.write_text("{ invalid json }", encoding='utf-8')
+
+ translation_data = {"translations": {}}
+ translation_file = tmp_path / "translation.json"
+ translation_file.write_text(json.dumps(translation_data), encoding='utf-8')
+
+ output_path = tmp_path / "output.pdf"
+
+ success = pdf_service.generate_translated_pdf(
+ result_json_path=invalid_result,
+ translation_json_path=translation_file,
+ output_path=output_path
+ )
+
+ assert success is False
+
+ def test_invalid_json_translation(self, pdf_service, tmp_path):
+ """Test handling of invalid JSON in translation file"""
+ result_data = {
+ "pages": [{"page_number": 1, "width": 612, "height": 792, "elements": []}]
+ }
+ result_file = tmp_path / "result.json"
+ result_file.write_text(json.dumps(result_data), encoding='utf-8')
+
+ invalid_translation = tmp_path / "invalid_translation.json"
+ invalid_translation.write_text("{ invalid json }", encoding='utf-8')
+
+ output_path = tmp_path / "output.pdf"
+
+ success = pdf_service.generate_translated_pdf(
+ result_json_path=result_file,
+ translation_json_path=invalid_translation,
+ output_path=output_path
+ )
+
+ assert success is False
+
+ def test_temp_file_cleanup_on_success(self, pdf_service, tmp_path):
+ """Test that temporary files are cleaned up after successful generation"""
+ result_data = {
+ "pages": [
+ {
+ "page_number": 1,
+ "width": 612,
+ "height": 792,
+ "elements": [
+ {
+ "element_id": "text_1",
+ "type": "text",
+ "content": "Test",
+ "bounding_box": {"x": 72, "y": 720, "width": 100, "height": 20},
+ "style_info": {"font_size": 12}
+ }
+ ]
+ }
+ ]
+ }
+ result_file = tmp_path / "result.json"
+ result_file.write_text(json.dumps(result_data), encoding='utf-8')
+
+ translation_data = {"translations": {"text_1": "測試"}}
+ translation_file = tmp_path / "translation.json"
+ translation_file.write_text(json.dumps(translation_data), encoding='utf-8')
+
+ output_path = tmp_path / "output.pdf"
+
+ # Check temp directory for translated JSON files before and after
+ import tempfile
+ temp_dir = Path(tempfile.gettempdir())
+
+ success = pdf_service.generate_translated_pdf(
+ result_json_path=result_file,
+ translation_json_path=translation_file,
+ output_path=output_path
+ )
+
+ assert success is True
+ # Temp file should be cleaned up (we can't guarantee exact filename,
+ # but the method is responsible for cleanup)
diff --git a/backend/tests/services/test_translation_merge.py b/backend/tests/services/test_translation_merge.py
new file mode 100644
index 0000000..d227121
--- /dev/null
+++ b/backend/tests/services/test_translation_merge.py
@@ -0,0 +1,523 @@
+"""
+Unit tests for translation merging functionality.
+
+Tests the apply_translations() function and related utilities
+for merging translation data with UnifiedDocument structure.
+"""
+
+import pytest
+import json
+import tempfile
+from pathlib import Path
+
+from app.services.translation_service import (
+ apply_translations,
+ _apply_table_translation,
+ load_translation_json,
+ find_translation_file,
+ list_available_translations,
+ TRANSLATABLE_TEXT_TYPES,
+ TABLE_TYPE,
+)
+
+
+class TestApplyTranslations:
+ """Tests for apply_translations() function"""
+
+ def test_apply_text_translation(self):
+ """Test applying translation to text elements"""
+ result_json = {
+ "pages": [
+ {
+ "page_number": 1,
+ "elements": [
+ {
+ "element_id": "text_1",
+ "type": "text",
+ "content": "Hello World",
+ "bounding_box": {"x": 0, "y": 0, "width": 100, "height": 20}
+ }
+ ]
+ }
+ ]
+ }
+ translations = {
+ "text_1": "你好世界"
+ }
+
+ result = apply_translations(result_json, translations)
+
+ assert result["pages"][0]["elements"][0]["content"] == "你好世界"
+ # Original should be unchanged
+ assert result_json["pages"][0]["elements"][0]["content"] == "Hello World"
+
+ def test_apply_multiple_translations(self):
+ """Test applying translations to multiple elements"""
+ result_json = {
+ "pages": [
+ {
+ "page_number": 1,
+ "elements": [
+ {"element_id": "title_1", "type": "title", "content": "Title"},
+ {"element_id": "text_1", "type": "text", "content": "Body text"},
+ {"element_id": "header_1", "type": "header", "content": "Header"},
+ ]
+ }
+ ]
+ }
+ translations = {
+ "title_1": "標題",
+ "text_1": "正文",
+ "header_1": "頁首"
+ }
+
+ result = apply_translations(result_json, translations)
+
+ assert result["pages"][0]["elements"][0]["content"] == "標題"
+ assert result["pages"][0]["elements"][1]["content"] == "正文"
+ assert result["pages"][0]["elements"][2]["content"] == "頁首"
+
+ def test_preserve_non_translated_elements(self):
+ """Test that elements without translations are preserved"""
+ result_json = {
+ "pages": [
+ {
+ "page_number": 1,
+ "elements": [
+ {"element_id": "text_1", "type": "text", "content": "Translate me"},
+ {"element_id": "text_2", "type": "text", "content": "Keep me"},
+ ]
+ }
+ ]
+ }
+ translations = {
+ "text_1": "翻譯我"
+ }
+
+ result = apply_translations(result_json, translations)
+
+ assert result["pages"][0]["elements"][0]["content"] == "翻譯我"
+ assert result["pages"][0]["elements"][1]["content"] == "Keep me"
+
+ def test_preserve_element_properties(self):
+ """Test that element properties (bounding_box, style_info) are preserved"""
+ result_json = {
+ "pages": [
+ {
+ "page_number": 1,
+ "elements": [
+ {
+ "element_id": "text_1",
+ "type": "text",
+ "content": "Original",
+ "bounding_box": {"x": 10, "y": 20, "width": 100, "height": 30},
+ "style_info": {"font_size": 12, "font_name": "Arial"}
+ }
+ ]
+ }
+ ]
+ }
+ translations = {"text_1": "Translated"}
+
+ result = apply_translations(result_json, translations)
+
+ elem = result["pages"][0]["elements"][0]
+ assert elem["content"] == "Translated"
+ assert elem["bounding_box"] == {"x": 10, "y": 20, "width": 100, "height": 30}
+ assert elem["style_info"] == {"font_size": 12, "font_name": "Arial"}
+
+ def test_multi_page_document(self):
+ """Test translation across multiple pages"""
+ result_json = {
+ "pages": [
+ {
+ "page_number": 1,
+ "elements": [{"element_id": "p1_text", "type": "text", "content": "Page 1"}]
+ },
+ {
+ "page_number": 2,
+ "elements": [{"element_id": "p2_text", "type": "text", "content": "Page 2"}]
+ }
+ ]
+ }
+ translations = {
+ "p1_text": "第一頁",
+ "p2_text": "第二頁"
+ }
+
+ result = apply_translations(result_json, translations)
+
+ assert result["pages"][0]["elements"][0]["content"] == "第一頁"
+ assert result["pages"][1]["elements"][0]["content"] == "第二頁"
+
+ def test_all_translatable_types(self):
+ """Test that all translatable text types are handled"""
+ elements = []
+ translations = {}
+ for i, elem_type in enumerate(TRANSLATABLE_TEXT_TYPES):
+ elem_id = f"{elem_type}_{i}"
+ elements.append({
+ "element_id": elem_id,
+ "type": elem_type,
+ "content": f"Original {elem_type}"
+ })
+ translations[elem_id] = f"Translated {elem_type}"
+
+ result_json = {"pages": [{"page_number": 1, "elements": elements}]}
+ result = apply_translations(result_json, translations)
+
+ for i, elem_type in enumerate(TRANSLATABLE_TEXT_TYPES):
+ assert result["pages"][0]["elements"][i]["content"] == f"Translated {elem_type}"
+
+ def test_skip_non_translatable_types(self):
+ """Test that non-translatable types are not modified even with translation"""
+ result_json = {
+ "pages": [
+ {
+ "page_number": 1,
+ "elements": [
+ {"element_id": "img_1", "type": "image", "content": "image.png"},
+ {"element_id": "chart_1", "type": "chart", "content": "chart data"},
+ ]
+ }
+ ]
+ }
+ # Even though translations exist, image/chart should not be modified
+ translations = {
+ "img_1": "Should not apply",
+ "chart_1": "Should not apply"
+ }
+
+ result = apply_translations(result_json, translations)
+
+ assert result["pages"][0]["elements"][0]["content"] == "image.png"
+ assert result["pages"][0]["elements"][1]["content"] == "chart data"
+
+ def test_empty_translations(self):
+ """Test with empty translations dict"""
+ result_json = {
+ "pages": [
+ {
+ "page_number": 1,
+ "elements": [{"element_id": "text_1", "type": "text", "content": "Original"}]
+ }
+ ]
+ }
+
+ result = apply_translations(result_json, {})
+
+ assert result["pages"][0]["elements"][0]["content"] == "Original"
+
+ def test_empty_document(self):
+ """Test with empty document"""
+ result_json = {"pages": []}
+ translations = {"text_1": "Translation"}
+
+ result = apply_translations(result_json, translations)
+
+ assert result["pages"] == []
+
+
+class TestApplyTableTranslation:
+ """Tests for _apply_table_translation() function"""
+
+ def test_apply_table_cell_translation(self):
+ """Test applying translations to table cells"""
+ table_elem = {
+ "element_id": "table_1",
+ "type": "table",
+ "content": {
+ "cells": [
+ {"row": 0, "col": 0, "content": "Header 1"},
+ {"row": 0, "col": 1, "content": "Header 2"},
+ {"row": 1, "col": 0, "content": "Data 1"},
+ {"row": 1, "col": 1, "content": "Data 2"},
+ ]
+ }
+ }
+ translation = {
+ "cells": [
+ {"row": 0, "col": 0, "content": "表頭 1"},
+ {"row": 0, "col": 1, "content": "表頭 2"},
+ {"row": 1, "col": 0, "content": "資料 1"},
+ {"row": 1, "col": 1, "content": "資料 2"},
+ ]
+ }
+
+ _apply_table_translation(table_elem, translation)
+
+ cells = table_elem["content"]["cells"]
+ assert cells[0]["content"] == "表頭 1"
+ assert cells[1]["content"] == "表頭 2"
+ assert cells[2]["content"] == "資料 1"
+ assert cells[3]["content"] == "資料 2"
+
+ def test_partial_table_translation(self):
+ """Test partial translation of table cells"""
+ table_elem = {
+ "element_id": "table_1",
+ "type": "table",
+ "content": {
+ "cells": [
+ {"row": 0, "col": 0, "content": "A"},
+ {"row": 0, "col": 1, "content": "B"},
+ {"row": 1, "col": 0, "content": "C"},
+ {"row": 1, "col": 1, "content": "D"},
+ ]
+ }
+ }
+ # Only translate some cells
+ translation = {
+ "cells": [
+ {"row": 0, "col": 0, "content": "甲"},
+ {"row": 1, "col": 1, "content": "丁"},
+ ]
+ }
+
+ _apply_table_translation(table_elem, translation)
+
+ cells = table_elem["content"]["cells"]
+ assert cells[0]["content"] == "甲" # Translated
+ assert cells[1]["content"] == "B" # Original
+ assert cells[2]["content"] == "C" # Original
+ assert cells[3]["content"] == "丁" # Translated
+
+ def test_table_with_empty_cells(self):
+ """Test table with empty cells list"""
+ table_elem = {
+ "element_id": "table_1",
+ "type": "table",
+ "content": {"cells": []}
+ }
+ translation = {
+ "cells": [{"row": 0, "col": 0, "content": "New"}]
+ }
+
+ # Should not raise error
+ _apply_table_translation(table_elem, translation)
+ assert table_elem["content"]["cells"] == []
+
+ def test_table_translation_via_apply_translations(self):
+ """Test table translation through main apply_translations function"""
+ result_json = {
+ "pages": [
+ {
+ "page_number": 1,
+ "elements": [
+ {
+ "element_id": "table_1",
+ "type": "table",
+ "content": {
+ "cells": [
+ {"row": 0, "col": 0, "content": "Name"},
+ {"row": 0, "col": 1, "content": "Value"},
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ }
+ translations = {
+ "table_1": {
+ "cells": [
+ {"row": 0, "col": 0, "content": "名稱"},
+ {"row": 0, "col": 1, "content": "數值"},
+ ]
+ }
+ }
+
+ result = apply_translations(result_json, translations)
+
+ cells = result["pages"][0]["elements"][0]["content"]["cells"]
+ assert cells[0]["content"] == "名稱"
+ assert cells[1]["content"] == "數值"
+
+
+class TestTranslationFileUtilities:
+ """Tests for translation file utility functions"""
+
+ def test_load_translation_json(self, tmp_path):
+ """Test loading translation JSON file"""
+ translation_data = {
+ "translations": {"text_1": "Translation"},
+ "target_lang": "zh-TW"
+ }
+ translation_file = tmp_path / "test_translated_zh-TW.json"
+ translation_file.write_text(json.dumps(translation_data), encoding='utf-8')
+
+ result = load_translation_json(translation_file)
+
+ assert result is not None
+ assert result["translations"]["text_1"] == "Translation"
+ assert result["target_lang"] == "zh-TW"
+
+ def test_load_translation_json_not_found(self, tmp_path):
+ """Test loading non-existent translation file"""
+ non_existent = tmp_path / "does_not_exist.json"
+
+ result = load_translation_json(non_existent)
+
+ assert result is None
+
+ def test_find_translation_file(self, tmp_path):
+ """Test finding translation file by language"""
+ # Create test files
+ (tmp_path / "doc_translated_en.json").write_text("{}", encoding='utf-8')
+ (tmp_path / "doc_translated_zh-TW.json").write_text("{}", encoding='utf-8')
+
+ result = find_translation_file(tmp_path, "zh-TW")
+
+ assert result is not None
+ assert result.name == "doc_translated_zh-TW.json"
+
+ def test_find_translation_file_not_found(self, tmp_path):
+ """Test finding non-existent translation file"""
+ (tmp_path / "doc_translated_en.json").write_text("{}", encoding='utf-8')
+
+ result = find_translation_file(tmp_path, "ja")
+
+ assert result is None
+
+ def test_list_available_translations(self, tmp_path):
+ """Test listing available translation languages"""
+ (tmp_path / "doc_translated_en.json").write_text("{}", encoding='utf-8')
+ (tmp_path / "doc_translated_zh-TW.json").write_text("{}", encoding='utf-8')
+ (tmp_path / "doc_translated_ja.json").write_text("{}", encoding='utf-8')
+ (tmp_path / "other_file.json").write_text("{}", encoding='utf-8')
+
+ result = list_available_translations(tmp_path)
+
+ assert set(result) == {"en", "zh-TW", "ja"}
+
+ def test_list_available_translations_empty(self, tmp_path):
+ """Test listing when no translations exist"""
+ (tmp_path / "result.json").write_text("{}", encoding='utf-8')
+
+ result = list_available_translations(tmp_path)
+
+ assert result == []
+
+
+class TestDeepCopyBehavior:
+ """Tests to verify deep copy behavior"""
+
+ def test_original_not_modified(self):
+ """Test that original document is not modified"""
+ original = {
+ "pages": [
+ {
+ "page_number": 1,
+ "elements": [
+ {"element_id": "text_1", "type": "text", "content": "Original"}
+ ]
+ }
+ ]
+ }
+ original_content = original["pages"][0]["elements"][0]["content"]
+ translations = {"text_1": "Modified"}
+
+ result = apply_translations(original, translations)
+
+ # Original should be unchanged
+ assert original["pages"][0]["elements"][0]["content"] == original_content
+ # Result should have translation
+ assert result["pages"][0]["elements"][0]["content"] == "Modified"
+
+ def test_nested_objects_are_copied(self):
+ """Test that nested objects are properly deep copied"""
+ original = {
+ "pages": [
+ {
+ "page_number": 1,
+ "elements": [
+ {
+ "element_id": "table_1",
+ "type": "table",
+ "content": {
+ "cells": [
+ {"row": 0, "col": 0, "content": "Original"}
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ }
+ original_cell_content = original["pages"][0]["elements"][0]["content"]["cells"][0]["content"]
+
+ translations = {
+ "table_1": {"cells": [{"row": 0, "col": 0, "content": "Modified"}]}
+ }
+
+ result = apply_translations(original, translations)
+
+ # Original nested content should be unchanged
+ assert original["pages"][0]["elements"][0]["content"]["cells"][0]["content"] == original_cell_content
+
+
+class TestEdgeCases:
+ """Tests for edge cases and error handling"""
+
+ def test_missing_element_id(self):
+ """Test handling elements without element_id"""
+ result_json = {
+ "pages": [
+ {
+ "page_number": 1,
+ "elements": [
+ {"type": "text", "content": "No ID element"}
+ ]
+ }
+ ]
+ }
+ translations = {"text_1": "Translation"}
+
+ # Should not raise error
+ result = apply_translations(result_json, translations)
+ assert result["pages"][0]["elements"][0]["content"] == "No ID element"
+
+ def test_missing_type(self):
+ """Test handling elements without type"""
+ result_json = {
+ "pages": [
+ {
+ "page_number": 1,
+ "elements": [
+ {"element_id": "elem_1", "content": "No type"}
+ ]
+ }
+ ]
+ }
+ translations = {"elem_1": "Translation"}
+
+ # Should not raise error, should not apply translation without matching type
+ result = apply_translations(result_json, translations)
+ # Content unchanged because type doesn't match TRANSLATABLE_TEXT_TYPES
+ assert result["pages"][0]["elements"][0]["content"] == "No type"
+
+ def test_unicode_translations(self):
+ """Test handling of various unicode characters"""
+ result_json = {
+ "pages": [
+ {
+ "page_number": 1,
+ "elements": [
+ {"element_id": "text_1", "type": "text", "content": "English"},
+ {"element_id": "text_2", "type": "text", "content": "More text"},
+ {"element_id": "text_3", "type": "text", "content": "Another"},
+ ]
+ }
+ ]
+ }
+ translations = {
+ "text_1": "日本語テキスト", # Japanese
+ "text_2": "한국어 텍스트", # Korean
+ "text_3": "العربية" # Arabic
+ }
+
+ result = apply_translations(result_json, translations)
+
+ assert result["pages"][0]["elements"][0]["content"] == "日本語テキスト"
+ assert result["pages"][0]["elements"][1]["content"] == "한국어 텍스트"
+ assert result["pages"][0]["elements"][2]["content"] == "العربية"
diff --git a/frontend/src/pages/TaskDetailPage.tsx b/frontend/src/pages/TaskDetailPage.tsx
index 547d37e..470dbf6 100644
--- a/frontend/src/pages/TaskDetailPage.tsx
+++ b/frontend/src/pages/TaskDetailPage.tsx
@@ -25,7 +25,8 @@ import {
Languages,
Globe,
CheckCircle,
- Trash2
+ Trash2,
+ FileOutput
} from 'lucide-react'
import type { ProcessingTrack, TranslationStatus, TranslationListItem } from '@/types/apiV2'
import { Badge } from '@/components/ui/badge'
@@ -327,6 +328,24 @@ export default function TaskDetailPage() {
}
}
+ const handleDownloadTranslatedPdf = async (lang: string) => {
+ if (!taskId) return
+ try {
+ await apiClientV2.downloadTranslatedPdf(taskId, lang)
+ toast({
+ title: '下載成功',
+ description: `翻譯 PDF (${lang}) 已下載`,
+ variant: 'success',
+ })
+ } catch (error: any) {
+ toast({
+ title: '下載失敗',
+ description: error.response?.data?.detail || t('errors.networkError'),
+ variant: 'destructive',
+ })
+ }
+ }
+
const getStatusBadge = (status: string) => {
switch (status) {
case 'completed':
@@ -603,7 +622,16 @@ export default function TaskDetailPage() {
className="gap-1"
>
- 下載
+ JSON
+
+