diff --git a/backend/app/routers/translate.py b/backend/app/routers/translate.py index d42681f..fd68f65 100644 --- a/backend/app/routers/translate.py +++ b/backend/app/routers/translate.py @@ -501,3 +501,139 @@ async def delete_translation( logger.info(f"Deleted translation {lang} for task {task_id}") return None + + +@router.post("/{task_id}/pdf") +async def download_translated_pdf( + task_id: str, + lang: str = Query(..., description="Target language code"), + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """ + Download a translated PDF with layout preservation. + + - **task_id**: Task UUID + - **lang**: Target language code (e.g., 'en', 'ja') + + Returns PDF file with translated content preserving original layout. + """ + from app.services.pdf_generator_service import pdf_generator_service + from app.services.translation_service import list_available_translations + import tempfile + + # Verify task ownership + task = task_service.get_task_by_id( + db=db, + task_id=task_id, + user_id=current_user.id + ) + + if not task: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Task not found" + ) + + if not task.result_json_path: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="OCR result not found" + ) + + result_json_path = Path(task.result_json_path) + if not result_json_path.exists(): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Result file not found" + ) + + # Find translation file + result_dir = result_json_path.parent + base_name = result_json_path.stem.replace('_result', '').replace('edit_', '') + translation_file = result_dir / f"{base_name}_translated_{lang}.json" + + # Also try with edit_ prefix removed differently + if not translation_file.exists(): + translation_file = result_dir / f"edit_translated_{lang}.json" + + if not translation_file.exists(): + # List available translations for error message + available = list_available_translations(result_dir) + if available: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Translation for language '{lang}' not found. Available translations: {', '.join(available)}" + ) + else: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"No translations found for this task. Please translate the document first." + ) + + # Check translation status in translation JSON + try: + with open(translation_file, 'r', encoding='utf-8') as f: + translation_data = json.load(f) + + if not translation_data.get('translations'): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Translation file is empty or incomplete" + ) + except json.JSONDecodeError: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Invalid translation file format" + ) + + # Generate translated PDF to temp file + output_filename = f"{task_id}_translated_{lang}.pdf" + + with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmp_file: + output_path = Path(tmp_file.name) + + try: + # Get source file path for images if available + source_file_path = None + if task.file_path and Path(task.file_path).exists(): + source_file_path = Path(task.file_path) + + success = pdf_generator_service.generate_translated_pdf( + result_json_path=result_json_path, + translation_json_path=translation_file, + output_path=output_path, + source_file_path=source_file_path + ) + + if not success: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to generate translated PDF" + ) + + logger.info(f"Generated translated PDF for task {task_id}, lang={lang}") + + return FileResponse( + path=str(output_path), + filename=output_filename, + media_type="application/pdf", + headers={ + "Content-Disposition": f'attachment; filename="{output_filename}"' + } + ) + + except HTTPException: + # Clean up temp file on HTTP errors + if output_path.exists(): + output_path.unlink() + raise + except Exception as e: + # Clean up temp file on unexpected errors + if output_path.exists(): + output_path.unlink() + logger.exception(f"Failed to generate translated PDF for task {task_id}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Failed to generate translated PDF: {str(e)}" + ) diff --git a/backend/app/services/pdf_generator_service.py b/backend/app/services/pdf_generator_service.py index fb4c076..90b5314 100644 --- a/backend/app/services/pdf_generator_service.py +++ b/backend/app/services/pdf_generator_service.py @@ -3601,6 +3601,100 @@ class PDFGeneratorService: except Exception as e: logger.error(f"Failed to draw image element {element.element_id}: {e}") + def generate_translated_pdf( + self, + result_json_path: Path, + translation_json_path: Path, + output_path: Path, + source_file_path: Optional[Path] = None + ) -> bool: + """ + Generate layout-preserving PDF with translated content. + + This method loads the original result JSON and translation JSON, + merges them to replace original content with translations, and + generates a PDF with the translated content at original positions. + + Args: + result_json_path: Path to original result JSON file (UnifiedDocument format) + translation_json_path: Path to translation JSON file + output_path: Path to save generated translated PDF + source_file_path: Optional path to original source file + + Returns: + True if successful, False otherwise + """ + import tempfile + + try: + # Import apply_translations from translation service + from app.services.translation_service import apply_translations + + # Load original result JSON + logger.info(f"Loading result JSON: {result_json_path}") + with open(result_json_path, 'r', encoding='utf-8') as f: + result_json = json.load(f) + + # Load translation JSON + logger.info(f"Loading translation JSON: {translation_json_path}") + with open(translation_json_path, 'r', encoding='utf-8') as f: + translation_json = json.load(f) + + # Extract translations dict from translation JSON + translations = translation_json.get('translations', {}) + if not translations: + logger.warning("No translations found in translation JSON") + # Still generate PDF with original content as fallback + return self.generate_layout_pdf( + json_path=result_json_path, + output_path=output_path, + source_file_path=source_file_path + ) + + # Apply translations to result JSON + translated_doc = apply_translations(result_json, translations) + + target_lang = translation_json.get('target_lang', 'unknown') + logger.info( + f"Generating translated PDF: {len(translations)} translations applied, " + f"target_lang={target_lang}" + ) + + # Write translated JSON to a temporary file and use existing generate_layout_pdf + with tempfile.NamedTemporaryFile( + mode='w', + suffix='_translated.json', + delete=False, + encoding='utf-8' + ) as tmp_file: + json.dump(translated_doc, tmp_file, ensure_ascii=False, indent=2) + tmp_path = Path(tmp_file.name) + + try: + # Use existing PDF generation with translated content + success = self.generate_layout_pdf( + json_path=tmp_path, + output_path=output_path, + source_file_path=source_file_path + ) + return success + finally: + # Clean up temporary file + if tmp_path.exists(): + tmp_path.unlink() + + except FileNotFoundError as e: + logger.error(f"File not found: {e}") + return False + except json.JSONDecodeError as e: + logger.error(f"Invalid JSON: {e}") + return False + except Exception as e: + logger.error(f"Failed to generate translated PDF: {e}") + import traceback + traceback.print_exc() + return False + # Singleton instance pdf_generator_service = PDFGeneratorService() diff --git a/backend/app/services/translation_service.py b/backend/app/services/translation_service.py index 12abebd..7515bfc 100644 --- a/backend/app/services/translation_service.py +++ b/backend/app/services/translation_service.py @@ -35,6 +35,166 @@ TABLE_TYPE = 'table' SKIP_TYPES = {'page_number', 'image', 'chart', 'logo', 'reference'} +def apply_translations( + result_json: Dict, + translations: Dict[str, Any] +) -> Dict: + """ + Apply translations to a result JSON document, creating a translated copy. + + This function merges translation data with the original document structure, + replacing original content with translated content while preserving all + other properties (bounding boxes, styles, etc.). + + Args: + result_json: Original UnifiedDocument JSON data + translations: Translation dict mapping element_id to translated content. + For text elements: element_id -> translated_string + For tables: element_id -> {"cells": [{"row": int, "col": int, "content": str}]} + + Returns: + A deep copy of result_json with translations applied + """ + import copy + translated_doc = copy.deepcopy(result_json) + applied_count = 0 + + for page in translated_doc.get('pages', []): + for elem in page.get('elements', []): + elem_id = elem.get('element_id', '') + elem_type = elem.get('type', '') + + if elem_id not in translations: + continue + + translation = translations[elem_id] + + # Handle text elements (string translation) + if isinstance(translation, str): + if elem_type in TRANSLATABLE_TEXT_TYPES: + elem['content'] = translation + applied_count += 1 + else: + logger.warning( + f"Translation for {elem_id} is string but element type is {elem_type}" + ) + + # Handle table elements (cells translation) + elif isinstance(translation, dict) and 'cells' in translation: + if elem_type == TABLE_TYPE and isinstance(elem.get('content'), dict): + _apply_table_translation(elem, translation) + applied_count += 1 + else: + logger.warning( + f"Translation for {elem_id} is table but element type is {elem_type}" + ) + + logger.info(f"Applied {applied_count} translations to document") + return translated_doc + + +def _apply_table_translation( + table_elem: Dict, + translation: Dict[str, Any] +) -> None: + """ + Apply translation to a table element's cells. + + Args: + table_elem: Table element dict with content.cells + translation: Translation dict with 'cells' list + """ + content = table_elem.get('content', {}) + original_cells = content.get('cells', []) + + if not original_cells: + return + + # Build lookup for translated cells by (row, col) + translated_cells = {} + for cell in translation.get('cells', []): + row = cell.get('row', 0) + col = cell.get('col', 0) + translated_cells[(row, col)] = cell.get('content', '') + + # Apply translations to matching cells + for cell in original_cells: + row = cell.get('row', 0) + col = cell.get('col', 0) + key = (row, col) + + if key in translated_cells: + cell['content'] = translated_cells[key] + + +def load_translation_json(translation_path: Path) -> Optional[Dict]: + """ + Load translation JSON file. + + Args: + translation_path: Path to translation JSON file + + Returns: + Translation JSON dict or None if file doesn't exist + """ + if not translation_path.exists(): + return None + + try: + with open(translation_path, 'r', encoding='utf-8') as f: + return json.load(f) + except Exception as e: + logger.error(f"Failed to load translation JSON: {e}") + return None + + +def find_translation_file( + result_dir: Path, + target_lang: str +) -> Optional[Path]: + """ + Find translation file for a given language in result directory. + + Args: + result_dir: Directory containing result files + target_lang: Target language code (e.g., 'en', 'zh-TW') + + Returns: + Path to translation file or None if not found + """ + # Look for *_translated_{lang}.json pattern + pattern = f"*_translated_{target_lang}.json" + matches = list(result_dir.glob(pattern)) + + if matches: + return matches[0] + return None + + +def list_available_translations(result_dir: Path) -> List[str]: + """ + List all available translation languages for a result directory. + + Args: + result_dir: Directory containing result files + + Returns: + List of language codes with available translations + """ + languages = [] + pattern = "*_translated_*.json" + + for path in result_dir.glob(pattern): + # Extract language from filename: xxx_translated_{lang}.json + stem = path.stem + if '_translated_' in stem: + lang = stem.split('_translated_')[-1] + if lang: + languages.append(lang) + + return languages + + @dataclass class TranslationBatch: """A batch of items to translate together""" diff --git a/backend/tests/api/test_translate_pdf_api.py b/backend/tests/api/test_translate_pdf_api.py new file mode 100644 index 0000000..dbe254e --- /dev/null +++ b/backend/tests/api/test_translate_pdf_api.py @@ -0,0 +1,727 @@ +""" +API integration tests for Translated PDF Download endpoint. + +Tests the POST /api/v2/translate/{task_id}/pdf endpoint for downloading +translated PDFs with layout preservation. + +Note: These tests use extensive mocking to avoid importing heavy dependencies +like PaddleOCR and PyTorch which aren't available in the test environment. +""" + +import pytest +import json +import sys +from pathlib import Path +from unittest.mock import patch, MagicMock +from datetime import datetime + +# Mock heavy dependencies before importing app modules +sys.modules['paddleocr'] = MagicMock() +sys.modules['paddlex'] = MagicMock() +sys.modules['torch'] = MagicMock() +sys.modules['modelscope'] = MagicMock() + +from fastapi.testclient import TestClient +from fastapi import FastAPI, Depends, HTTPException, status, Query +from fastapi.responses import FileResponse +from sqlalchemy import create_engine, Column, Integer, String, Boolean, Enum as SQLEnum +from sqlalchemy.orm import sessionmaker, declarative_base +import enum + + +# Create test models without importing from app +Base = declarative_base() + + +class TaskStatusEnum(enum.Enum): + PENDING = "pending" + PROCESSING = "processing" + COMPLETED = "completed" + FAILED = "failed" + + +class MockUser(Base): + __tablename__ = "users" + id = Column(Integer, primary_key=True, index=True) + email = Column(String, unique=True, index=True) + hashed_password = Column(String) + is_active = Column(Boolean, default=True) + + +class MockTask(Base): + __tablename__ = "tasks" + id = Column(Integer, primary_key=True, index=True) + user_id = Column(Integer) + task_id = Column(String, unique=True, index=True) + filename = Column(String) + status = Column(SQLEnum(TaskStatusEnum), default=TaskStatusEnum.PENDING) + result_json_path = Column(String, nullable=True) + file_path = Column(String, nullable=True) + + +# Create test database +SQLALCHEMY_DATABASE_URL = "sqlite:///./test_translate_pdf.db" +engine = create_engine(SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}) +TestingSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + + +def create_test_app(): + """Create a minimal FastAPI app for testing the translate PDF endpoint""" + test_app = FastAPI() + + @test_app.post("/api/v2/translate/{task_id}/pdf") + async def download_translated_pdf( + task_id: str, + lang: str = Query(..., description="Target language code"), + ): + """Mock implementation of the translated PDF endpoint""" + from app.services.pdf_generator_service import pdf_generator_service + + # Get db_session and current_user from app state (set in test) + db = test_app.state.db_session + current_user = test_app.state.current_user + + # Find task + task = db.query(MockTask).filter( + MockTask.task_id == task_id, + MockTask.user_id == current_user.id + ).first() + + if not task: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Task not found" + ) + + if not task.result_json_path: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="OCR result not found" + ) + + result_json_path = Path(task.result_json_path) + if not result_json_path.exists(): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Result file not found" + ) + + # Find translation file + result_dir = result_json_path.parent + base_name = result_json_path.stem.replace('_result', '').replace('edit_', '') + translation_file = result_dir / f"{base_name}_translated_{lang}.json" + + if not translation_file.exists(): + translation_file = result_dir / f"edit_translated_{lang}.json" + + if not translation_file.exists(): + # List available translations + available = [f.stem.split("_translated_")[-1] + for f in result_dir.glob("*_translated_*.json")] + if available: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Translation for language '{lang}' not found. Available translations: {', '.join(available)}" + ) + else: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="No translations found for this task." + ) + + # Check translation content + try: + with open(translation_file, 'r', encoding='utf-8') as f: + translation_data = json.load(f) + + if not translation_data.get('translations'): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Translation file is empty or incomplete" + ) + except json.JSONDecodeError: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Invalid translation file format" + ) + + # Generate PDF + import tempfile + output_filename = f"{task_id}_translated_{lang}.pdf" + + with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmp_file: + output_path = Path(tmp_file.name) + + try: + source_file_path = None + if task.file_path and Path(task.file_path).exists(): + source_file_path = Path(task.file_path) + + success = pdf_generator_service.generate_translated_pdf( + result_json_path=result_json_path, + translation_json_path=translation_file, + output_path=output_path, + source_file_path=source_file_path + ) + + if not success: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to generate translated PDF" + ) + + return FileResponse( + path=str(output_path), + filename=output_filename, + media_type="application/pdf", + headers={ + "Content-Disposition": f'attachment; filename="{output_filename}"' + } + ) + + except HTTPException: + if output_path.exists(): + output_path.unlink() + raise + except Exception as e: + if output_path.exists(): + output_path.unlink() + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Failed to generate translated PDF: {str(e)}" + ) + + return test_app + + +@pytest.fixture(scope="function") +def db_session(): + """Create test database session""" + Base.metadata.create_all(bind=engine) + session = TestingSessionLocal() + try: + yield session + finally: + session.close() + Base.metadata.drop_all(bind=engine) + + +@pytest.fixture +def test_user(db_session): + """Create test user""" + user = MockUser( + email="translate_test@example.com", + hashed_password="test_hash", + is_active=True + ) + db_session.add(user) + db_session.commit() + db_session.refresh(user) + return user + + +@pytest.fixture +def test_app(db_session, test_user): + """Create test app with dependencies injected""" + app = create_test_app() + app.state.db_session = db_session + app.state.current_user = test_user + return app + + +@pytest.fixture +def client(test_app): + """Create test client""" + return TestClient(test_app) + + +@pytest.fixture +def test_task_with_result(db_session, test_user, tmp_path): + """Create test task with result JSON and translation file""" + task_id = "test-translate-pdf-123" + result_dir = tmp_path / "results" / task_id + result_dir.mkdir(parents=True) + + # Create result JSON + result_json = { + "document_info": { + "total_pages": 1, + "processing_track": "Direct" + }, + "pages": [ + { + "page_number": 1, + "width": 612, + "height": 792, + "elements": [ + { + "element_id": "text_1", + "type": "text", + "content": "Hello World", + "bounding_box": {"x": 72, "y": 72, "width": 200, "height": 20} + } + ] + } + ] + } + result_json_path = result_dir / "edit_result.json" + result_json_path.write_text(json.dumps(result_json), encoding='utf-8') + + # Create translation file + translation_json = { + "task_id": task_id, + "target_lang": "zh-TW", + "translated_at": datetime.utcnow().isoformat() + "Z", + "provider": "dify", + "translations": { + "text_1": "你好世界" + }, + "statistics": { + "total_elements": 1, + "translated_elements": 1, + "skipped_elements": 0, + "total_characters": 11, + "processing_time_seconds": 1.5, + "total_tokens": 50 + } + } + translation_path = result_dir / "edit_translated_zh-TW.json" + translation_path.write_text(json.dumps(translation_json), encoding='utf-8') + + # Create task + task = MockTask( + user_id=test_user.id, + task_id=task_id, + filename="test.pdf", + status=TaskStatusEnum.COMPLETED, + result_json_path=str(result_json_path), + file_path=str(tmp_path / "test.pdf") + ) + db_session.add(task) + db_session.commit() + db_session.refresh(task) + + return task, result_dir + + +@pytest.fixture +def test_task_no_result(db_session, test_user): + """Create test task without result JSON""" + task = MockTask( + user_id=test_user.id, + task_id="test-no-result-456", + filename="test.pdf", + status=TaskStatusEnum.COMPLETED, + result_json_path=None + ) + db_session.add(task) + db_session.commit() + db_session.refresh(task) + return task + + +@pytest.fixture +def test_task_no_translation(db_session, test_user, tmp_path): + """Create test task with result JSON but no translation""" + task_id = "test-no-translation-789" + result_dir = tmp_path / "results" / task_id + result_dir.mkdir(parents=True) + + # Create result JSON only (no translation file) + result_json = { + "document_info": {"total_pages": 1, "processing_track": "Direct"}, + "pages": [{"page_number": 1, "width": 612, "height": 792, "elements": []}] + } + result_json_path = result_dir / "edit_result.json" + result_json_path.write_text(json.dumps(result_json), encoding='utf-8') + + task = MockTask( + user_id=test_user.id, + task_id=task_id, + filename="test.pdf", + status=TaskStatusEnum.COMPLETED, + result_json_path=str(result_json_path) + ) + db_session.add(task) + db_session.commit() + db_session.refresh(task) + + return task + + +@pytest.fixture +def test_task_empty_translation(db_session, test_user, tmp_path): + """Create test task with empty translation file""" + task_id = "test-empty-translation-101" + result_dir = tmp_path / "results" / task_id + result_dir.mkdir(parents=True) + + # Create result JSON + result_json = { + "document_info": {"total_pages": 1, "processing_track": "Direct"}, + "pages": [{"page_number": 1, "width": 612, "height": 792, "elements": []}] + } + result_json_path = result_dir / "edit_result.json" + result_json_path.write_text(json.dumps(result_json), encoding='utf-8') + + # Create empty translation file + translation_json = { + "task_id": task_id, + "target_lang": "ja", + "translations": {} # Empty translations + } + translation_path = result_dir / "edit_translated_ja.json" + translation_path.write_text(json.dumps(translation_json), encoding='utf-8') + + task = MockTask( + user_id=test_user.id, + task_id=task_id, + filename="test.pdf", + status=TaskStatusEnum.COMPLETED, + result_json_path=str(result_json_path) + ) + db_session.add(task) + db_session.commit() + db_session.refresh(task) + + return task + + +@pytest.fixture +def other_user(db_session): + """Create another user for ownership tests""" + user = MockUser( + email="other_user@example.com", + hashed_password="other_hash", + is_active=True + ) + db_session.add(user) + db_session.commit() + db_session.refresh(user) + return user + + +class TestTranslatedPDFDownload: + """Tests for POST /api/v2/translate/{task_id}/pdf endpoint""" + + @patch('app.services.pdf_generator_service.pdf_generator_service') + def test_download_translated_pdf_success( + self, mock_pdf_service, client, db_session, test_user, test_task_with_result, tmp_path + ): + """Test successful translated PDF download""" + task, result_dir = test_task_with_result + + # Create a mock PDF file for the response + mock_pdf_path = tmp_path / "output.pdf" + mock_pdf_path.write_bytes(b"%PDF-1.4 mock pdf content") + + def mock_generate(result_json_path, translation_json_path, output_path, source_file_path=None): + # Copy mock PDF to output path + output_path.write_bytes(mock_pdf_path.read_bytes()) + return True + + mock_pdf_service.generate_translated_pdf.side_effect = mock_generate + + response = client.post( + f"/api/v2/translate/{task.task_id}/pdf?lang=zh-TW" + ) + + assert response.status_code == 200 + assert response.headers["content-type"] == "application/pdf" + assert "attachment" in response.headers.get("content-disposition", "") + assert task.task_id in response.headers.get("content-disposition", "") + + # Verify PDF service was called + mock_pdf_service.generate_translated_pdf.assert_called_once() + + def test_download_pdf_task_not_found(self, client, db_session, test_user): + """Test 404 when task doesn't exist""" + response = client.post( + "/api/v2/translate/nonexistent-task-id/pdf?lang=zh-TW" + ) + + assert response.status_code == 404 + assert "Task not found" in response.json()["detail"] + + def test_download_pdf_no_result_json(self, client, db_session, test_user, test_task_no_result): + """Test 404 when task has no result JSON""" + response = client.post( + f"/api/v2/translate/{test_task_no_result.task_id}/pdf?lang=zh-TW" + ) + + assert response.status_code == 404 + assert "OCR result not found" in response.json()["detail"] + + def test_download_pdf_translation_not_found( + self, client, db_session, test_user, test_task_no_translation + ): + """Test 404 when translation for requested language doesn't exist""" + response = client.post( + f"/api/v2/translate/{test_task_no_translation.task_id}/pdf?lang=ko" + ) + + assert response.status_code == 404 + detail = response.json()["detail"] + # Message could mention the language or indicate no translations found + assert "ko" in detail or "translation" in detail.lower() or "found" in detail.lower() + + def test_download_pdf_empty_translation( + self, client, db_session, test_user, test_task_empty_translation + ): + """Test 400 when translation file is empty""" + response = client.post( + f"/api/v2/translate/{test_task_empty_translation.task_id}/pdf?lang=ja" + ) + + assert response.status_code == 400 + assert "empty" in response.json()["detail"].lower() or "incomplete" in response.json()["detail"].lower() + + def test_download_pdf_missing_lang_param( + self, client, db_session, test_user, test_task_with_result + ): + """Test 422 when lang query parameter is missing""" + task, _ = test_task_with_result + + response = client.post( + f"/api/v2/translate/{task.task_id}/pdf" + ) + + # FastAPI returns 422 for missing required query params + assert response.status_code == 422 + + def test_download_pdf_wrong_user( + self, db_session, other_user, test_task_with_result, tmp_path + ): + """Test 404 when task belongs to different user""" + task, _ = test_task_with_result + + # Create new app with other_user + app = create_test_app() + app.state.db_session = db_session + app.state.current_user = other_user + client = TestClient(app) + + response = client.post( + f"/api/v2/translate/{task.task_id}/pdf?lang=zh-TW" + ) + + # Task service returns None for tasks not owned by current user + assert response.status_code == 404 + assert "Task not found" in response.json()["detail"] + + @patch('app.services.pdf_generator_service.pdf_generator_service') + def test_download_pdf_generation_failure( + self, mock_pdf_service, client, db_session, test_user, test_task_with_result + ): + """Test 500 when PDF generation fails""" + task, _ = test_task_with_result + + # Mock PDF generation failure + mock_pdf_service.generate_translated_pdf.return_value = False + + response = client.post( + f"/api/v2/translate/{task.task_id}/pdf?lang=zh-TW" + ) + + assert response.status_code == 500 + assert "Failed to generate" in response.json()["detail"] + + @patch('app.services.pdf_generator_service.pdf_generator_service') + def test_download_pdf_exception_handling( + self, mock_pdf_service, client, db_session, test_user, test_task_with_result + ): + """Test 500 when PDF generation raises exception""" + task, _ = test_task_with_result + + # Mock PDF generation exception + mock_pdf_service.generate_translated_pdf.side_effect = Exception("Unexpected error") + + response = client.post( + f"/api/v2/translate/{task.task_id}/pdf?lang=zh-TW" + ) + + assert response.status_code == 500 + assert "Failed to generate" in response.json()["detail"] + + +class TestTranslatedPDFWithMultipleLanguages: + """Tests for multiple translation languages""" + + @pytest.fixture + def task_with_multiple_translations(self, db_session, test_user, tmp_path): + """Create task with translations in multiple languages""" + task_id = "test-multi-lang-222" + result_dir = tmp_path / "results" / task_id + result_dir.mkdir(parents=True) + + # Create result JSON + result_json = { + "document_info": {"total_pages": 1, "processing_track": "Direct"}, + "pages": [{ + "page_number": 1, + "width": 612, "height": 792, + "elements": [ + {"element_id": "text_1", "type": "text", "content": "Hello", + "bounding_box": {"x": 72, "y": 72, "width": 100, "height": 20}} + ] + }] + } + result_json_path = result_dir / "edit_result.json" + result_json_path.write_text(json.dumps(result_json), encoding='utf-8') + + # Create translations for multiple languages + for lang, translation in [("zh-TW", "你好"), ("ja", "こんにちは"), ("ko", "안녕하세요")]: + translation_json = { + "task_id": task_id, + "target_lang": lang, + "translated_at": datetime.utcnow().isoformat() + "Z", + "translations": {"text_1": translation}, + "statistics": {"translated_elements": 1} + } + (result_dir / f"edit_translated_{lang}.json").write_text( + json.dumps(translation_json), encoding='utf-8' + ) + + task = MockTask( + user_id=test_user.id, + task_id=task_id, + filename="test.pdf", + status=TaskStatusEnum.COMPLETED, + result_json_path=str(result_json_path) + ) + db_session.add(task) + db_session.commit() + db_session.refresh(task) + + return task, result_dir + + @patch('app.services.pdf_generator_service.pdf_generator_service') + def test_download_different_languages( + self, mock_pdf_service, client, db_session, test_user, + task_with_multiple_translations, tmp_path + ): + """Test downloading PDFs for different languages""" + task, result_dir = task_with_multiple_translations + + mock_pdf_path = tmp_path / "output.pdf" + mock_pdf_path.write_bytes(b"%PDF-1.4 mock") + + def mock_generate(result_json_path, translation_json_path, output_path, source_file_path=None): + output_path.write_bytes(mock_pdf_path.read_bytes()) + return True + + mock_pdf_service.generate_translated_pdf.side_effect = mock_generate + + for lang in ["zh-TW", "ja", "ko"]: + response = client.post( + f"/api/v2/translate/{task.task_id}/pdf?lang={lang}" + ) + + assert response.status_code == 200, f"Failed for language {lang}" + assert response.headers["content-type"] == "application/pdf" + + # Verify PDF service was called 3 times + assert mock_pdf_service.generate_translated_pdf.call_count == 3 + + def test_download_nonexistent_language( + self, client, db_session, test_user, task_with_multiple_translations + ): + """Test 404 for language that doesn't exist""" + task, _ = task_with_multiple_translations + + response = client.post( + f"/api/v2/translate/{task.task_id}/pdf?lang=de" + ) + + assert response.status_code == 404 + detail = response.json()["detail"] + # Should mention available languages + assert "zh-TW" in detail or "ja" in detail or "ko" in detail or "not found" in detail.lower() + + +class TestInvalidTranslationFile: + """Tests for invalid translation file scenarios""" + + @pytest.fixture + def task_with_invalid_json(self, db_session, test_user, tmp_path): + """Create task with invalid JSON translation file""" + task_id = "test-invalid-json-333" + result_dir = tmp_path / "results" / task_id + result_dir.mkdir(parents=True) + + # Create result JSON + result_json = { + "document_info": {"total_pages": 1, "processing_track": "Direct"}, + "pages": [{"page_number": 1, "width": 612, "height": 792, "elements": []}] + } + result_json_path = result_dir / "edit_result.json" + result_json_path.write_text(json.dumps(result_json), encoding='utf-8') + + # Create invalid JSON translation file + (result_dir / "edit_translated_en.json").write_text("{ invalid json }", encoding='utf-8') + + task = MockTask( + user_id=test_user.id, + task_id=task_id, + filename="test.pdf", + status=TaskStatusEnum.COMPLETED, + result_json_path=str(result_json_path) + ) + db_session.add(task) + db_session.commit() + db_session.refresh(task) + + return task + + def test_download_pdf_invalid_json( + self, client, db_session, test_user, task_with_invalid_json + ): + """Test 400 when translation file has invalid JSON""" + response = client.post( + f"/api/v2/translate/{task_with_invalid_json.task_id}/pdf?lang=en" + ) + + assert response.status_code == 400 + assert "Invalid" in response.json()["detail"] or "format" in response.json()["detail"].lower() + + +class TestResultFileNotFound: + """Tests for missing result file scenario""" + + @pytest.fixture + def task_with_missing_file(self, db_session, test_user, tmp_path): + """Create task pointing to non-existent result file""" + task_id = "test-missing-file-444" + result_dir = tmp_path / "results" / task_id + result_dir.mkdir(parents=True) + + # Point to non-existent file + result_json_path = result_dir / "nonexistent_result.json" + + task = MockTask( + user_id=test_user.id, + task_id=task_id, + filename="test.pdf", + status=TaskStatusEnum.COMPLETED, + result_json_path=str(result_json_path) + ) + db_session.add(task) + db_session.commit() + db_session.refresh(task) + + return task + + def test_download_pdf_result_file_missing( + self, client, db_session, test_user, task_with_missing_file + ): + """Test 404 when result file doesn't exist on disk""" + response = client.post( + f"/api/v2/translate/{task_with_missing_file.task_id}/pdf?lang=zh-TW" + ) + + assert response.status_code == 404 + assert "not found" in response.json()["detail"].lower() + + +if __name__ == '__main__': + pytest.main([__file__, '-v']) diff --git a/backend/tests/services/test_translated_pdf.py b/backend/tests/services/test_translated_pdf.py new file mode 100644 index 0000000..981c4b0 --- /dev/null +++ b/backend/tests/services/test_translated_pdf.py @@ -0,0 +1,564 @@ +""" +Unit tests for translated PDF generation functionality. + +Tests the generate_translated_pdf() method in PDFGeneratorService +and track-specific behavior (Direct, OCR, Hybrid). +""" + +import pytest +import json +import tempfile +from pathlib import Path +from unittest.mock import patch, MagicMock + +from app.services.pdf_generator_service import PDFGeneratorService +from app.services.translation_service import apply_translations + + +class TestGenerateTranslatedPDF: + """Tests for generate_translated_pdf() method""" + + @pytest.fixture + def pdf_service(self): + """Create PDF generator service instance""" + return PDFGeneratorService() + + @pytest.fixture + def sample_result_json(self, tmp_path): + """Create sample result JSON file""" + result_data = { + "metadata": { + "processing_track": "direct", + "source_file": "test.pdf", + "page_count": 1 + }, + "pages": [ + { + "page_number": 1, + "width": 612, + "height": 792, + "elements": [ + { + "element_id": "text_1", + "type": "text", + "content": "Hello World", + "bounding_box": { + "x": 72, + "y": 720, + "width": 200, + "height": 20 + }, + "style_info": { + "font_size": 12, + "font_name": "Helvetica" + } + }, + { + "element_id": "title_1", + "type": "title", + "content": "Document Title", + "bounding_box": { + "x": 72, + "y": 750, + "width": 300, + "height": 30 + }, + "style_info": { + "font_size": 18, + "font_name": "Helvetica-Bold" + } + } + ] + } + ] + } + result_file = tmp_path / "edit_result.json" + result_file.write_text(json.dumps(result_data), encoding='utf-8') + return result_file + + @pytest.fixture + def sample_translation_json(self, tmp_path): + """Create sample translation JSON file""" + translation_data = { + "target_lang": "zh-TW", + "source_lang": "en", + "translated_at": "2024-01-01T00:00:00Z", + "translations": { + "text_1": "你好世界", + "title_1": "文件標題" + }, + "statistics": { + "translated_elements": 2, + "total_characters": 100 + } + } + translation_file = tmp_path / "edit_translated_zh-TW.json" + translation_file.write_text(json.dumps(translation_data), encoding='utf-8') + return translation_file + + def test_generate_translated_pdf_success( + self, pdf_service, sample_result_json, sample_translation_json, tmp_path + ): + """Test successful translated PDF generation""" + output_path = tmp_path / "output.pdf" + + success = pdf_service.generate_translated_pdf( + result_json_path=sample_result_json, + translation_json_path=sample_translation_json, + output_path=output_path + ) + + assert success is True + assert output_path.exists() + assert output_path.stat().st_size > 0 + # PDF files start with %PDF + with open(output_path, 'rb') as f: + header = f.read(4) + assert header == b'%PDF' + + def test_generate_translated_pdf_missing_result( + self, pdf_service, sample_translation_json, tmp_path + ): + """Test with missing result JSON file""" + output_path = tmp_path / "output.pdf" + missing_result = tmp_path / "non_existent.json" + + success = pdf_service.generate_translated_pdf( + result_json_path=missing_result, + translation_json_path=sample_translation_json, + output_path=output_path + ) + + assert success is False + assert not output_path.exists() + + def test_generate_translated_pdf_missing_translation( + self, pdf_service, sample_result_json, tmp_path + ): + """Test with missing translation JSON file""" + output_path = tmp_path / "output.pdf" + missing_translation = tmp_path / "non_existent_translation.json" + + success = pdf_service.generate_translated_pdf( + result_json_path=sample_result_json, + translation_json_path=missing_translation, + output_path=output_path + ) + + assert success is False + assert not output_path.exists() + + def test_generate_translated_pdf_empty_translations( + self, pdf_service, sample_result_json, tmp_path + ): + """Test with empty translations (should fall back to original)""" + empty_translation_data = { + "target_lang": "zh-TW", + "translations": {} + } + empty_translation_file = tmp_path / "empty_translated.json" + empty_translation_file.write_text(json.dumps(empty_translation_data), encoding='utf-8') + output_path = tmp_path / "output.pdf" + + success = pdf_service.generate_translated_pdf( + result_json_path=sample_result_json, + translation_json_path=empty_translation_file, + output_path=output_path + ) + + # Should succeed even with empty translations (uses original content) + assert success is True + assert output_path.exists() + + def test_generate_translated_pdf_partial_translations( + self, pdf_service, sample_result_json, tmp_path + ): + """Test with partial translations (some elements not translated)""" + partial_translation_data = { + "target_lang": "zh-TW", + "translations": { + "text_1": "你好世界" + # title_1 not translated + } + } + partial_translation_file = tmp_path / "partial_translated.json" + partial_translation_file.write_text(json.dumps(partial_translation_data), encoding='utf-8') + output_path = tmp_path / "output.pdf" + + success = pdf_service.generate_translated_pdf( + result_json_path=sample_result_json, + translation_json_path=partial_translation_file, + output_path=output_path + ) + + assert success is True + assert output_path.exists() + + +class TestTrackSpecificPDFGeneration: + """Tests for track-specific PDF generation behavior""" + + @pytest.fixture + def pdf_service(self): + return PDFGeneratorService() + + def create_result_with_track(self, tmp_path, track: str, with_table: bool = False): + """Helper to create result JSON with specific track""" + elements = [ + { + "element_id": "text_1", + "type": "text", + "content": "Sample text content", + "bounding_box": {"x": 72, "y": 720, "width": 200, "height": 20}, + "style_info": {"font_size": 12} + } + ] + + if with_table: + elements.append({ + "element_id": "table_1", + "type": "table", + "content": { + "cells": [ + {"row": 0, "col": 0, "content": "Header 1"}, + {"row": 0, "col": 1, "content": "Header 2"}, + {"row": 1, "col": 0, "content": "Data 1"}, + {"row": 1, "col": 1, "content": "Data 2"}, + ] + }, + "bounding_box": {"x": 72, "y": 500, "width": 400, "height": 100} + }) + + result_data = { + "metadata": { + "processing_track": track, + "source_file": f"test_{track}.pdf", + "page_count": 1 + }, + "pages": [ + { + "page_number": 1, + "width": 612, + "height": 792, + "elements": elements + } + ] + } + + result_file = tmp_path / f"{track}_result.json" + result_file.write_text(json.dumps(result_data), encoding='utf-8') + return result_file + + def create_translation_for_track(self, tmp_path, track: str, with_table: bool = False): + """Helper to create translation JSON""" + translations = { + "text_1": "翻譯的文字內容" + } + + if with_table: + translations["table_1"] = { + "cells": [ + {"row": 0, "col": 0, "content": "表頭 1"}, + {"row": 0, "col": 1, "content": "表頭 2"}, + {"row": 1, "col": 0, "content": "資料 1"}, + {"row": 1, "col": 1, "content": "資料 2"}, + ] + } + + translation_data = { + "target_lang": "zh-TW", + "translations": translations + } + + translation_file = tmp_path / f"{track}_translated_zh-TW.json" + translation_file.write_text(json.dumps(translation_data), encoding='utf-8') + return translation_file + + def test_direct_track_pdf_generation(self, pdf_service, tmp_path): + """Test PDF generation for Direct track documents""" + result_file = self.create_result_with_track(tmp_path, "direct") + translation_file = self.create_translation_for_track(tmp_path, "direct") + output_path = tmp_path / "direct_output.pdf" + + success = pdf_service.generate_translated_pdf( + result_json_path=result_file, + translation_json_path=translation_file, + output_path=output_path + ) + + assert success is True + assert output_path.exists() + assert output_path.stat().st_size > 0 + + def test_ocr_track_pdf_generation(self, pdf_service, tmp_path): + """Test PDF generation for OCR track documents""" + result_file = self.create_result_with_track(tmp_path, "ocr") + translation_file = self.create_translation_for_track(tmp_path, "ocr") + output_path = tmp_path / "ocr_output.pdf" + + success = pdf_service.generate_translated_pdf( + result_json_path=result_file, + translation_json_path=translation_file, + output_path=output_path + ) + + assert success is True + assert output_path.exists() + assert output_path.stat().st_size > 0 + + def test_hybrid_track_pdf_generation(self, pdf_service, tmp_path): + """Test PDF generation for Hybrid track documents""" + result_file = self.create_result_with_track(tmp_path, "hybrid") + translation_file = self.create_translation_for_track(tmp_path, "hybrid") + output_path = tmp_path / "hybrid_output.pdf" + + success = pdf_service.generate_translated_pdf( + result_json_path=result_file, + translation_json_path=translation_file, + output_path=output_path + ) + + assert success is True + assert output_path.exists() + assert output_path.stat().st_size > 0 + + def test_document_with_table_direct_track(self, pdf_service, tmp_path): + """Test PDF generation for Direct track document with tables""" + result_file = self.create_result_with_track(tmp_path, "direct", with_table=True) + translation_file = self.create_translation_for_track(tmp_path, "direct", with_table=True) + output_path = tmp_path / "direct_table_output.pdf" + + success = pdf_service.generate_translated_pdf( + result_json_path=result_file, + translation_json_path=translation_file, + output_path=output_path + ) + + assert success is True + assert output_path.exists() + assert output_path.stat().st_size > 0 + + def test_document_with_table_ocr_track(self, pdf_service, tmp_path): + """Test PDF generation for OCR track document with tables""" + result_file = self.create_result_with_track(tmp_path, "ocr", with_table=True) + translation_file = self.create_translation_for_track(tmp_path, "ocr", with_table=True) + output_path = tmp_path / "ocr_table_output.pdf" + + success = pdf_service.generate_translated_pdf( + result_json_path=result_file, + translation_json_path=translation_file, + output_path=output_path + ) + + assert success is True + assert output_path.exists() + assert output_path.stat().st_size > 0 + + +class TestTranslationMergeIntegration: + """Integration tests for translation merging with PDF generation""" + + @pytest.fixture + def pdf_service(self): + return PDFGeneratorService() + + def test_translations_applied_to_pdf(self, pdf_service, tmp_path): + """Test that translations are properly applied before PDF generation""" + # Create result with specific content + result_data = { + "metadata": {"processing_track": "direct"}, + "pages": [ + { + "page_number": 1, + "width": 612, + "height": 792, + "elements": [ + { + "element_id": "text_1", + "type": "text", + "content": "ORIGINAL_MARKER_TEXT", + "bounding_box": {"x": 72, "y": 720, "width": 200, "height": 20}, + "style_info": {"font_size": 12} + } + ] + } + ] + } + result_file = tmp_path / "result.json" + result_file.write_text(json.dumps(result_data), encoding='utf-8') + + # Create translation + translation_data = { + "translations": { + "text_1": "TRANSLATED_MARKER_TEXT" + } + } + translation_file = tmp_path / "translation.json" + translation_file.write_text(json.dumps(translation_data), encoding='utf-8') + + output_path = tmp_path / "output.pdf" + + success = pdf_service.generate_translated_pdf( + result_json_path=result_file, + translation_json_path=translation_file, + output_path=output_path + ) + + assert success is True + assert output_path.exists() + + # Read PDF content (basic check - the translated text should be in the PDF) + with open(output_path, 'rb') as f: + pdf_content = f.read() + # Check that the file is a valid PDF + assert pdf_content.startswith(b'%PDF') + + def test_multi_page_translated_pdf(self, pdf_service, tmp_path): + """Test translated PDF generation for multi-page documents""" + result_data = { + "metadata": {"processing_track": "direct"}, + "pages": [ + { + "page_number": 1, + "width": 612, + "height": 792, + "elements": [ + { + "element_id": "p1_text", + "type": "text", + "content": "Page 1 content", + "bounding_box": {"x": 72, "y": 720, "width": 200, "height": 20}, + "style_info": {"font_size": 12} + } + ] + }, + { + "page_number": 2, + "width": 612, + "height": 792, + "elements": [ + { + "element_id": "p2_text", + "type": "text", + "content": "Page 2 content", + "bounding_box": {"x": 72, "y": 720, "width": 200, "height": 20}, + "style_info": {"font_size": 12} + } + ] + } + ] + } + result_file = tmp_path / "multi_page_result.json" + result_file.write_text(json.dumps(result_data), encoding='utf-8') + + translation_data = { + "translations": { + "p1_text": "第一頁內容", + "p2_text": "第二頁內容" + } + } + translation_file = tmp_path / "multi_page_translation.json" + translation_file.write_text(json.dumps(translation_data), encoding='utf-8') + + output_path = tmp_path / "multi_page_output.pdf" + + success = pdf_service.generate_translated_pdf( + result_json_path=result_file, + translation_json_path=translation_file, + output_path=output_path + ) + + assert success is True + assert output_path.exists() + assert output_path.stat().st_size > 0 + + +class TestErrorHandling: + """Tests for error handling in translated PDF generation""" + + @pytest.fixture + def pdf_service(self): + return PDFGeneratorService() + + def test_invalid_json_result(self, pdf_service, tmp_path): + """Test handling of invalid JSON in result file""" + invalid_result = tmp_path / "invalid.json" + invalid_result.write_text("{ invalid json }", encoding='utf-8') + + translation_data = {"translations": {}} + translation_file = tmp_path / "translation.json" + translation_file.write_text(json.dumps(translation_data), encoding='utf-8') + + output_path = tmp_path / "output.pdf" + + success = pdf_service.generate_translated_pdf( + result_json_path=invalid_result, + translation_json_path=translation_file, + output_path=output_path + ) + + assert success is False + + def test_invalid_json_translation(self, pdf_service, tmp_path): + """Test handling of invalid JSON in translation file""" + result_data = { + "pages": [{"page_number": 1, "width": 612, "height": 792, "elements": []}] + } + result_file = tmp_path / "result.json" + result_file.write_text(json.dumps(result_data), encoding='utf-8') + + invalid_translation = tmp_path / "invalid_translation.json" + invalid_translation.write_text("{ invalid json }", encoding='utf-8') + + output_path = tmp_path / "output.pdf" + + success = pdf_service.generate_translated_pdf( + result_json_path=result_file, + translation_json_path=invalid_translation, + output_path=output_path + ) + + assert success is False + + def test_temp_file_cleanup_on_success(self, pdf_service, tmp_path): + """Test that temporary files are cleaned up after successful generation""" + result_data = { + "pages": [ + { + "page_number": 1, + "width": 612, + "height": 792, + "elements": [ + { + "element_id": "text_1", + "type": "text", + "content": "Test", + "bounding_box": {"x": 72, "y": 720, "width": 100, "height": 20}, + "style_info": {"font_size": 12} + } + ] + } + ] + } + result_file = tmp_path / "result.json" + result_file.write_text(json.dumps(result_data), encoding='utf-8') + + translation_data = {"translations": {"text_1": "測試"}} + translation_file = tmp_path / "translation.json" + translation_file.write_text(json.dumps(translation_data), encoding='utf-8') + + output_path = tmp_path / "output.pdf" + + # Check temp directory for translated JSON files before and after + import tempfile + temp_dir = Path(tempfile.gettempdir()) + + success = pdf_service.generate_translated_pdf( + result_json_path=result_file, + translation_json_path=translation_file, + output_path=output_path + ) + + assert success is True + # Temp file should be cleaned up (we can't guarantee exact filename, + # but the method is responsible for cleanup) diff --git a/backend/tests/services/test_translation_merge.py b/backend/tests/services/test_translation_merge.py new file mode 100644 index 0000000..d227121 --- /dev/null +++ b/backend/tests/services/test_translation_merge.py @@ -0,0 +1,523 @@ +""" +Unit tests for translation merging functionality. + +Tests the apply_translations() function and related utilities +for merging translation data with UnifiedDocument structure. +""" + +import pytest +import json +import tempfile +from pathlib import Path + +from app.services.translation_service import ( + apply_translations, + _apply_table_translation, + load_translation_json, + find_translation_file, + list_available_translations, + TRANSLATABLE_TEXT_TYPES, + TABLE_TYPE, +) + + +class TestApplyTranslations: + """Tests for apply_translations() function""" + + def test_apply_text_translation(self): + """Test applying translation to text elements""" + result_json = { + "pages": [ + { + "page_number": 1, + "elements": [ + { + "element_id": "text_1", + "type": "text", + "content": "Hello World", + "bounding_box": {"x": 0, "y": 0, "width": 100, "height": 20} + } + ] + } + ] + } + translations = { + "text_1": "你好世界" + } + + result = apply_translations(result_json, translations) + + assert result["pages"][0]["elements"][0]["content"] == "你好世界" + # Original should be unchanged + assert result_json["pages"][0]["elements"][0]["content"] == "Hello World" + + def test_apply_multiple_translations(self): + """Test applying translations to multiple elements""" + result_json = { + "pages": [ + { + "page_number": 1, + "elements": [ + {"element_id": "title_1", "type": "title", "content": "Title"}, + {"element_id": "text_1", "type": "text", "content": "Body text"}, + {"element_id": "header_1", "type": "header", "content": "Header"}, + ] + } + ] + } + translations = { + "title_1": "標題", + "text_1": "正文", + "header_1": "頁首" + } + + result = apply_translations(result_json, translations) + + assert result["pages"][0]["elements"][0]["content"] == "標題" + assert result["pages"][0]["elements"][1]["content"] == "正文" + assert result["pages"][0]["elements"][2]["content"] == "頁首" + + def test_preserve_non_translated_elements(self): + """Test that elements without translations are preserved""" + result_json = { + "pages": [ + { + "page_number": 1, + "elements": [ + {"element_id": "text_1", "type": "text", "content": "Translate me"}, + {"element_id": "text_2", "type": "text", "content": "Keep me"}, + ] + } + ] + } + translations = { + "text_1": "翻譯我" + } + + result = apply_translations(result_json, translations) + + assert result["pages"][0]["elements"][0]["content"] == "翻譯我" + assert result["pages"][0]["elements"][1]["content"] == "Keep me" + + def test_preserve_element_properties(self): + """Test that element properties (bounding_box, style_info) are preserved""" + result_json = { + "pages": [ + { + "page_number": 1, + "elements": [ + { + "element_id": "text_1", + "type": "text", + "content": "Original", + "bounding_box": {"x": 10, "y": 20, "width": 100, "height": 30}, + "style_info": {"font_size": 12, "font_name": "Arial"} + } + ] + } + ] + } + translations = {"text_1": "Translated"} + + result = apply_translations(result_json, translations) + + elem = result["pages"][0]["elements"][0] + assert elem["content"] == "Translated" + assert elem["bounding_box"] == {"x": 10, "y": 20, "width": 100, "height": 30} + assert elem["style_info"] == {"font_size": 12, "font_name": "Arial"} + + def test_multi_page_document(self): + """Test translation across multiple pages""" + result_json = { + "pages": [ + { + "page_number": 1, + "elements": [{"element_id": "p1_text", "type": "text", "content": "Page 1"}] + }, + { + "page_number": 2, + "elements": [{"element_id": "p2_text", "type": "text", "content": "Page 2"}] + } + ] + } + translations = { + "p1_text": "第一頁", + "p2_text": "第二頁" + } + + result = apply_translations(result_json, translations) + + assert result["pages"][0]["elements"][0]["content"] == "第一頁" + assert result["pages"][1]["elements"][0]["content"] == "第二頁" + + def test_all_translatable_types(self): + """Test that all translatable text types are handled""" + elements = [] + translations = {} + for i, elem_type in enumerate(TRANSLATABLE_TEXT_TYPES): + elem_id = f"{elem_type}_{i}" + elements.append({ + "element_id": elem_id, + "type": elem_type, + "content": f"Original {elem_type}" + }) + translations[elem_id] = f"Translated {elem_type}" + + result_json = {"pages": [{"page_number": 1, "elements": elements}]} + result = apply_translations(result_json, translations) + + for i, elem_type in enumerate(TRANSLATABLE_TEXT_TYPES): + assert result["pages"][0]["elements"][i]["content"] == f"Translated {elem_type}" + + def test_skip_non_translatable_types(self): + """Test that non-translatable types are not modified even with translation""" + result_json = { + "pages": [ + { + "page_number": 1, + "elements": [ + {"element_id": "img_1", "type": "image", "content": "image.png"}, + {"element_id": "chart_1", "type": "chart", "content": "chart data"}, + ] + } + ] + } + # Even though translations exist, image/chart should not be modified + translations = { + "img_1": "Should not apply", + "chart_1": "Should not apply" + } + + result = apply_translations(result_json, translations) + + assert result["pages"][0]["elements"][0]["content"] == "image.png" + assert result["pages"][0]["elements"][1]["content"] == "chart data" + + def test_empty_translations(self): + """Test with empty translations dict""" + result_json = { + "pages": [ + { + "page_number": 1, + "elements": [{"element_id": "text_1", "type": "text", "content": "Original"}] + } + ] + } + + result = apply_translations(result_json, {}) + + assert result["pages"][0]["elements"][0]["content"] == "Original" + + def test_empty_document(self): + """Test with empty document""" + result_json = {"pages": []} + translations = {"text_1": "Translation"} + + result = apply_translations(result_json, translations) + + assert result["pages"] == [] + + +class TestApplyTableTranslation: + """Tests for _apply_table_translation() function""" + + def test_apply_table_cell_translation(self): + """Test applying translations to table cells""" + table_elem = { + "element_id": "table_1", + "type": "table", + "content": { + "cells": [ + {"row": 0, "col": 0, "content": "Header 1"}, + {"row": 0, "col": 1, "content": "Header 2"}, + {"row": 1, "col": 0, "content": "Data 1"}, + {"row": 1, "col": 1, "content": "Data 2"}, + ] + } + } + translation = { + "cells": [ + {"row": 0, "col": 0, "content": "表頭 1"}, + {"row": 0, "col": 1, "content": "表頭 2"}, + {"row": 1, "col": 0, "content": "資料 1"}, + {"row": 1, "col": 1, "content": "資料 2"}, + ] + } + + _apply_table_translation(table_elem, translation) + + cells = table_elem["content"]["cells"] + assert cells[0]["content"] == "表頭 1" + assert cells[1]["content"] == "表頭 2" + assert cells[2]["content"] == "資料 1" + assert cells[3]["content"] == "資料 2" + + def test_partial_table_translation(self): + """Test partial translation of table cells""" + table_elem = { + "element_id": "table_1", + "type": "table", + "content": { + "cells": [ + {"row": 0, "col": 0, "content": "A"}, + {"row": 0, "col": 1, "content": "B"}, + {"row": 1, "col": 0, "content": "C"}, + {"row": 1, "col": 1, "content": "D"}, + ] + } + } + # Only translate some cells + translation = { + "cells": [ + {"row": 0, "col": 0, "content": "甲"}, + {"row": 1, "col": 1, "content": "丁"}, + ] + } + + _apply_table_translation(table_elem, translation) + + cells = table_elem["content"]["cells"] + assert cells[0]["content"] == "甲" # Translated + assert cells[1]["content"] == "B" # Original + assert cells[2]["content"] == "C" # Original + assert cells[3]["content"] == "丁" # Translated + + def test_table_with_empty_cells(self): + """Test table with empty cells list""" + table_elem = { + "element_id": "table_1", + "type": "table", + "content": {"cells": []} + } + translation = { + "cells": [{"row": 0, "col": 0, "content": "New"}] + } + + # Should not raise error + _apply_table_translation(table_elem, translation) + assert table_elem["content"]["cells"] == [] + + def test_table_translation_via_apply_translations(self): + """Test table translation through main apply_translations function""" + result_json = { + "pages": [ + { + "page_number": 1, + "elements": [ + { + "element_id": "table_1", + "type": "table", + "content": { + "cells": [ + {"row": 0, "col": 0, "content": "Name"}, + {"row": 0, "col": 1, "content": "Value"}, + ] + } + } + ] + } + ] + } + translations = { + "table_1": { + "cells": [ + {"row": 0, "col": 0, "content": "名稱"}, + {"row": 0, "col": 1, "content": "數值"}, + ] + } + } + + result = apply_translations(result_json, translations) + + cells = result["pages"][0]["elements"][0]["content"]["cells"] + assert cells[0]["content"] == "名稱" + assert cells[1]["content"] == "數值" + + +class TestTranslationFileUtilities: + """Tests for translation file utility functions""" + + def test_load_translation_json(self, tmp_path): + """Test loading translation JSON file""" + translation_data = { + "translations": {"text_1": "Translation"}, + "target_lang": "zh-TW" + } + translation_file = tmp_path / "test_translated_zh-TW.json" + translation_file.write_text(json.dumps(translation_data), encoding='utf-8') + + result = load_translation_json(translation_file) + + assert result is not None + assert result["translations"]["text_1"] == "Translation" + assert result["target_lang"] == "zh-TW" + + def test_load_translation_json_not_found(self, tmp_path): + """Test loading non-existent translation file""" + non_existent = tmp_path / "does_not_exist.json" + + result = load_translation_json(non_existent) + + assert result is None + + def test_find_translation_file(self, tmp_path): + """Test finding translation file by language""" + # Create test files + (tmp_path / "doc_translated_en.json").write_text("{}", encoding='utf-8') + (tmp_path / "doc_translated_zh-TW.json").write_text("{}", encoding='utf-8') + + result = find_translation_file(tmp_path, "zh-TW") + + assert result is not None + assert result.name == "doc_translated_zh-TW.json" + + def test_find_translation_file_not_found(self, tmp_path): + """Test finding non-existent translation file""" + (tmp_path / "doc_translated_en.json").write_text("{}", encoding='utf-8') + + result = find_translation_file(tmp_path, "ja") + + assert result is None + + def test_list_available_translations(self, tmp_path): + """Test listing available translation languages""" + (tmp_path / "doc_translated_en.json").write_text("{}", encoding='utf-8') + (tmp_path / "doc_translated_zh-TW.json").write_text("{}", encoding='utf-8') + (tmp_path / "doc_translated_ja.json").write_text("{}", encoding='utf-8') + (tmp_path / "other_file.json").write_text("{}", encoding='utf-8') + + result = list_available_translations(tmp_path) + + assert set(result) == {"en", "zh-TW", "ja"} + + def test_list_available_translations_empty(self, tmp_path): + """Test listing when no translations exist""" + (tmp_path / "result.json").write_text("{}", encoding='utf-8') + + result = list_available_translations(tmp_path) + + assert result == [] + + +class TestDeepCopyBehavior: + """Tests to verify deep copy behavior""" + + def test_original_not_modified(self): + """Test that original document is not modified""" + original = { + "pages": [ + { + "page_number": 1, + "elements": [ + {"element_id": "text_1", "type": "text", "content": "Original"} + ] + } + ] + } + original_content = original["pages"][0]["elements"][0]["content"] + translations = {"text_1": "Modified"} + + result = apply_translations(original, translations) + + # Original should be unchanged + assert original["pages"][0]["elements"][0]["content"] == original_content + # Result should have translation + assert result["pages"][0]["elements"][0]["content"] == "Modified" + + def test_nested_objects_are_copied(self): + """Test that nested objects are properly deep copied""" + original = { + "pages": [ + { + "page_number": 1, + "elements": [ + { + "element_id": "table_1", + "type": "table", + "content": { + "cells": [ + {"row": 0, "col": 0, "content": "Original"} + ] + } + } + ] + } + ] + } + original_cell_content = original["pages"][0]["elements"][0]["content"]["cells"][0]["content"] + + translations = { + "table_1": {"cells": [{"row": 0, "col": 0, "content": "Modified"}]} + } + + result = apply_translations(original, translations) + + # Original nested content should be unchanged + assert original["pages"][0]["elements"][0]["content"]["cells"][0]["content"] == original_cell_content + + +class TestEdgeCases: + """Tests for edge cases and error handling""" + + def test_missing_element_id(self): + """Test handling elements without element_id""" + result_json = { + "pages": [ + { + "page_number": 1, + "elements": [ + {"type": "text", "content": "No ID element"} + ] + } + ] + } + translations = {"text_1": "Translation"} + + # Should not raise error + result = apply_translations(result_json, translations) + assert result["pages"][0]["elements"][0]["content"] == "No ID element" + + def test_missing_type(self): + """Test handling elements without type""" + result_json = { + "pages": [ + { + "page_number": 1, + "elements": [ + {"element_id": "elem_1", "content": "No type"} + ] + } + ] + } + translations = {"elem_1": "Translation"} + + # Should not raise error, should not apply translation without matching type + result = apply_translations(result_json, translations) + # Content unchanged because type doesn't match TRANSLATABLE_TEXT_TYPES + assert result["pages"][0]["elements"][0]["content"] == "No type" + + def test_unicode_translations(self): + """Test handling of various unicode characters""" + result_json = { + "pages": [ + { + "page_number": 1, + "elements": [ + {"element_id": "text_1", "type": "text", "content": "English"}, + {"element_id": "text_2", "type": "text", "content": "More text"}, + {"element_id": "text_3", "type": "text", "content": "Another"}, + ] + } + ] + } + translations = { + "text_1": "日本語テキスト", # Japanese + "text_2": "한국어 텍스트", # Korean + "text_3": "العربية" # Arabic + } + + result = apply_translations(result_json, translations) + + assert result["pages"][0]["elements"][0]["content"] == "日本語テキスト" + assert result["pages"][0]["elements"][1]["content"] == "한국어 텍스트" + assert result["pages"][0]["elements"][2]["content"] == "العربية" diff --git a/frontend/src/pages/TaskDetailPage.tsx b/frontend/src/pages/TaskDetailPage.tsx index 547d37e..470dbf6 100644 --- a/frontend/src/pages/TaskDetailPage.tsx +++ b/frontend/src/pages/TaskDetailPage.tsx @@ -25,7 +25,8 @@ import { Languages, Globe, CheckCircle, - Trash2 + Trash2, + FileOutput } from 'lucide-react' import type { ProcessingTrack, TranslationStatus, TranslationListItem } from '@/types/apiV2' import { Badge } from '@/components/ui/badge' @@ -327,6 +328,24 @@ export default function TaskDetailPage() { } } + const handleDownloadTranslatedPdf = async (lang: string) => { + if (!taskId) return + try { + await apiClientV2.downloadTranslatedPdf(taskId, lang) + toast({ + title: '下載成功', + description: `翻譯 PDF (${lang}) 已下載`, + variant: 'success', + }) + } catch (error: any) { + toast({ + title: '下載失敗', + description: error.response?.data?.detail || t('errors.networkError'), + variant: 'destructive', + }) + } + } + const getStatusBadge = (status: string) => { switch (status) { case 'completed': @@ -603,7 +622,16 @@ export default function TaskDetailPage() { className="gap-1" > - 下載 + JSON + +