Adds the ability to download translated documents as PDF files while
preserving the original document layout. Key changes:
- Add apply_translations() function to merge translation JSON with UnifiedDocument
- Add generate_translated_pdf() method to PDFGeneratorService
- Add POST /api/v2/translate/{task_id}/pdf endpoint
- Add downloadTranslatedPdf() method and PDF button in frontend
- Add comprehensive unit tests (52 tests: merge, PDF generation, API endpoints)
- Archive add-translated-pdf-export proposal
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
565 lines
20 KiB
Python
565 lines
20 KiB
Python
"""
|
|
Unit tests for translated PDF generation functionality.
|
|
|
|
Tests the generate_translated_pdf() method in PDFGeneratorService
|
|
and track-specific behavior (Direct, OCR, Hybrid).
|
|
"""
|
|
|
|
import pytest
|
|
import json
|
|
import tempfile
|
|
from pathlib import Path
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
from app.services.pdf_generator_service import PDFGeneratorService
|
|
from app.services.translation_service import apply_translations
|
|
|
|
|
|
class TestGenerateTranslatedPDF:
|
|
"""Tests for generate_translated_pdf() method"""
|
|
|
|
@pytest.fixture
|
|
def pdf_service(self):
|
|
"""Create PDF generator service instance"""
|
|
return PDFGeneratorService()
|
|
|
|
@pytest.fixture
|
|
def sample_result_json(self, tmp_path):
|
|
"""Create sample result JSON file"""
|
|
result_data = {
|
|
"metadata": {
|
|
"processing_track": "direct",
|
|
"source_file": "test.pdf",
|
|
"page_count": 1
|
|
},
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"width": 612,
|
|
"height": 792,
|
|
"elements": [
|
|
{
|
|
"element_id": "text_1",
|
|
"type": "text",
|
|
"content": "Hello World",
|
|
"bounding_box": {
|
|
"x": 72,
|
|
"y": 720,
|
|
"width": 200,
|
|
"height": 20
|
|
},
|
|
"style_info": {
|
|
"font_size": 12,
|
|
"font_name": "Helvetica"
|
|
}
|
|
},
|
|
{
|
|
"element_id": "title_1",
|
|
"type": "title",
|
|
"content": "Document Title",
|
|
"bounding_box": {
|
|
"x": 72,
|
|
"y": 750,
|
|
"width": 300,
|
|
"height": 30
|
|
},
|
|
"style_info": {
|
|
"font_size": 18,
|
|
"font_name": "Helvetica-Bold"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
result_file = tmp_path / "edit_result.json"
|
|
result_file.write_text(json.dumps(result_data), encoding='utf-8')
|
|
return result_file
|
|
|
|
@pytest.fixture
|
|
def sample_translation_json(self, tmp_path):
|
|
"""Create sample translation JSON file"""
|
|
translation_data = {
|
|
"target_lang": "zh-TW",
|
|
"source_lang": "en",
|
|
"translated_at": "2024-01-01T00:00:00Z",
|
|
"translations": {
|
|
"text_1": "你好世界",
|
|
"title_1": "文件標題"
|
|
},
|
|
"statistics": {
|
|
"translated_elements": 2,
|
|
"total_characters": 100
|
|
}
|
|
}
|
|
translation_file = tmp_path / "edit_translated_zh-TW.json"
|
|
translation_file.write_text(json.dumps(translation_data), encoding='utf-8')
|
|
return translation_file
|
|
|
|
def test_generate_translated_pdf_success(
|
|
self, pdf_service, sample_result_json, sample_translation_json, tmp_path
|
|
):
|
|
"""Test successful translated PDF generation"""
|
|
output_path = tmp_path / "output.pdf"
|
|
|
|
success = pdf_service.generate_translated_pdf(
|
|
result_json_path=sample_result_json,
|
|
translation_json_path=sample_translation_json,
|
|
output_path=output_path
|
|
)
|
|
|
|
assert success is True
|
|
assert output_path.exists()
|
|
assert output_path.stat().st_size > 0
|
|
# PDF files start with %PDF
|
|
with open(output_path, 'rb') as f:
|
|
header = f.read(4)
|
|
assert header == b'%PDF'
|
|
|
|
def test_generate_translated_pdf_missing_result(
|
|
self, pdf_service, sample_translation_json, tmp_path
|
|
):
|
|
"""Test with missing result JSON file"""
|
|
output_path = tmp_path / "output.pdf"
|
|
missing_result = tmp_path / "non_existent.json"
|
|
|
|
success = pdf_service.generate_translated_pdf(
|
|
result_json_path=missing_result,
|
|
translation_json_path=sample_translation_json,
|
|
output_path=output_path
|
|
)
|
|
|
|
assert success is False
|
|
assert not output_path.exists()
|
|
|
|
def test_generate_translated_pdf_missing_translation(
|
|
self, pdf_service, sample_result_json, tmp_path
|
|
):
|
|
"""Test with missing translation JSON file"""
|
|
output_path = tmp_path / "output.pdf"
|
|
missing_translation = tmp_path / "non_existent_translation.json"
|
|
|
|
success = pdf_service.generate_translated_pdf(
|
|
result_json_path=sample_result_json,
|
|
translation_json_path=missing_translation,
|
|
output_path=output_path
|
|
)
|
|
|
|
assert success is False
|
|
assert not output_path.exists()
|
|
|
|
def test_generate_translated_pdf_empty_translations(
|
|
self, pdf_service, sample_result_json, tmp_path
|
|
):
|
|
"""Test with empty translations (should fall back to original)"""
|
|
empty_translation_data = {
|
|
"target_lang": "zh-TW",
|
|
"translations": {}
|
|
}
|
|
empty_translation_file = tmp_path / "empty_translated.json"
|
|
empty_translation_file.write_text(json.dumps(empty_translation_data), encoding='utf-8')
|
|
output_path = tmp_path / "output.pdf"
|
|
|
|
success = pdf_service.generate_translated_pdf(
|
|
result_json_path=sample_result_json,
|
|
translation_json_path=empty_translation_file,
|
|
output_path=output_path
|
|
)
|
|
|
|
# Should succeed even with empty translations (uses original content)
|
|
assert success is True
|
|
assert output_path.exists()
|
|
|
|
def test_generate_translated_pdf_partial_translations(
|
|
self, pdf_service, sample_result_json, tmp_path
|
|
):
|
|
"""Test with partial translations (some elements not translated)"""
|
|
partial_translation_data = {
|
|
"target_lang": "zh-TW",
|
|
"translations": {
|
|
"text_1": "你好世界"
|
|
# title_1 not translated
|
|
}
|
|
}
|
|
partial_translation_file = tmp_path / "partial_translated.json"
|
|
partial_translation_file.write_text(json.dumps(partial_translation_data), encoding='utf-8')
|
|
output_path = tmp_path / "output.pdf"
|
|
|
|
success = pdf_service.generate_translated_pdf(
|
|
result_json_path=sample_result_json,
|
|
translation_json_path=partial_translation_file,
|
|
output_path=output_path
|
|
)
|
|
|
|
assert success is True
|
|
assert output_path.exists()
|
|
|
|
|
|
class TestTrackSpecificPDFGeneration:
|
|
"""Tests for track-specific PDF generation behavior"""
|
|
|
|
@pytest.fixture
|
|
def pdf_service(self):
|
|
return PDFGeneratorService()
|
|
|
|
def create_result_with_track(self, tmp_path, track: str, with_table: bool = False):
|
|
"""Helper to create result JSON with specific track"""
|
|
elements = [
|
|
{
|
|
"element_id": "text_1",
|
|
"type": "text",
|
|
"content": "Sample text content",
|
|
"bounding_box": {"x": 72, "y": 720, "width": 200, "height": 20},
|
|
"style_info": {"font_size": 12}
|
|
}
|
|
]
|
|
|
|
if with_table:
|
|
elements.append({
|
|
"element_id": "table_1",
|
|
"type": "table",
|
|
"content": {
|
|
"cells": [
|
|
{"row": 0, "col": 0, "content": "Header 1"},
|
|
{"row": 0, "col": 1, "content": "Header 2"},
|
|
{"row": 1, "col": 0, "content": "Data 1"},
|
|
{"row": 1, "col": 1, "content": "Data 2"},
|
|
]
|
|
},
|
|
"bounding_box": {"x": 72, "y": 500, "width": 400, "height": 100}
|
|
})
|
|
|
|
result_data = {
|
|
"metadata": {
|
|
"processing_track": track,
|
|
"source_file": f"test_{track}.pdf",
|
|
"page_count": 1
|
|
},
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"width": 612,
|
|
"height": 792,
|
|
"elements": elements
|
|
}
|
|
]
|
|
}
|
|
|
|
result_file = tmp_path / f"{track}_result.json"
|
|
result_file.write_text(json.dumps(result_data), encoding='utf-8')
|
|
return result_file
|
|
|
|
def create_translation_for_track(self, tmp_path, track: str, with_table: bool = False):
|
|
"""Helper to create translation JSON"""
|
|
translations = {
|
|
"text_1": "翻譯的文字內容"
|
|
}
|
|
|
|
if with_table:
|
|
translations["table_1"] = {
|
|
"cells": [
|
|
{"row": 0, "col": 0, "content": "表頭 1"},
|
|
{"row": 0, "col": 1, "content": "表頭 2"},
|
|
{"row": 1, "col": 0, "content": "資料 1"},
|
|
{"row": 1, "col": 1, "content": "資料 2"},
|
|
]
|
|
}
|
|
|
|
translation_data = {
|
|
"target_lang": "zh-TW",
|
|
"translations": translations
|
|
}
|
|
|
|
translation_file = tmp_path / f"{track}_translated_zh-TW.json"
|
|
translation_file.write_text(json.dumps(translation_data), encoding='utf-8')
|
|
return translation_file
|
|
|
|
def test_direct_track_pdf_generation(self, pdf_service, tmp_path):
|
|
"""Test PDF generation for Direct track documents"""
|
|
result_file = self.create_result_with_track(tmp_path, "direct")
|
|
translation_file = self.create_translation_for_track(tmp_path, "direct")
|
|
output_path = tmp_path / "direct_output.pdf"
|
|
|
|
success = pdf_service.generate_translated_pdf(
|
|
result_json_path=result_file,
|
|
translation_json_path=translation_file,
|
|
output_path=output_path
|
|
)
|
|
|
|
assert success is True
|
|
assert output_path.exists()
|
|
assert output_path.stat().st_size > 0
|
|
|
|
def test_ocr_track_pdf_generation(self, pdf_service, tmp_path):
|
|
"""Test PDF generation for OCR track documents"""
|
|
result_file = self.create_result_with_track(tmp_path, "ocr")
|
|
translation_file = self.create_translation_for_track(tmp_path, "ocr")
|
|
output_path = tmp_path / "ocr_output.pdf"
|
|
|
|
success = pdf_service.generate_translated_pdf(
|
|
result_json_path=result_file,
|
|
translation_json_path=translation_file,
|
|
output_path=output_path
|
|
)
|
|
|
|
assert success is True
|
|
assert output_path.exists()
|
|
assert output_path.stat().st_size > 0
|
|
|
|
def test_hybrid_track_pdf_generation(self, pdf_service, tmp_path):
|
|
"""Test PDF generation for Hybrid track documents"""
|
|
result_file = self.create_result_with_track(tmp_path, "hybrid")
|
|
translation_file = self.create_translation_for_track(tmp_path, "hybrid")
|
|
output_path = tmp_path / "hybrid_output.pdf"
|
|
|
|
success = pdf_service.generate_translated_pdf(
|
|
result_json_path=result_file,
|
|
translation_json_path=translation_file,
|
|
output_path=output_path
|
|
)
|
|
|
|
assert success is True
|
|
assert output_path.exists()
|
|
assert output_path.stat().st_size > 0
|
|
|
|
def test_document_with_table_direct_track(self, pdf_service, tmp_path):
|
|
"""Test PDF generation for Direct track document with tables"""
|
|
result_file = self.create_result_with_track(tmp_path, "direct", with_table=True)
|
|
translation_file = self.create_translation_for_track(tmp_path, "direct", with_table=True)
|
|
output_path = tmp_path / "direct_table_output.pdf"
|
|
|
|
success = pdf_service.generate_translated_pdf(
|
|
result_json_path=result_file,
|
|
translation_json_path=translation_file,
|
|
output_path=output_path
|
|
)
|
|
|
|
assert success is True
|
|
assert output_path.exists()
|
|
assert output_path.stat().st_size > 0
|
|
|
|
def test_document_with_table_ocr_track(self, pdf_service, tmp_path):
|
|
"""Test PDF generation for OCR track document with tables"""
|
|
result_file = self.create_result_with_track(tmp_path, "ocr", with_table=True)
|
|
translation_file = self.create_translation_for_track(tmp_path, "ocr", with_table=True)
|
|
output_path = tmp_path / "ocr_table_output.pdf"
|
|
|
|
success = pdf_service.generate_translated_pdf(
|
|
result_json_path=result_file,
|
|
translation_json_path=translation_file,
|
|
output_path=output_path
|
|
)
|
|
|
|
assert success is True
|
|
assert output_path.exists()
|
|
assert output_path.stat().st_size > 0
|
|
|
|
|
|
class TestTranslationMergeIntegration:
|
|
"""Integration tests for translation merging with PDF generation"""
|
|
|
|
@pytest.fixture
|
|
def pdf_service(self):
|
|
return PDFGeneratorService()
|
|
|
|
def test_translations_applied_to_pdf(self, pdf_service, tmp_path):
|
|
"""Test that translations are properly applied before PDF generation"""
|
|
# Create result with specific content
|
|
result_data = {
|
|
"metadata": {"processing_track": "direct"},
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"width": 612,
|
|
"height": 792,
|
|
"elements": [
|
|
{
|
|
"element_id": "text_1",
|
|
"type": "text",
|
|
"content": "ORIGINAL_MARKER_TEXT",
|
|
"bounding_box": {"x": 72, "y": 720, "width": 200, "height": 20},
|
|
"style_info": {"font_size": 12}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
result_file = tmp_path / "result.json"
|
|
result_file.write_text(json.dumps(result_data), encoding='utf-8')
|
|
|
|
# Create translation
|
|
translation_data = {
|
|
"translations": {
|
|
"text_1": "TRANSLATED_MARKER_TEXT"
|
|
}
|
|
}
|
|
translation_file = tmp_path / "translation.json"
|
|
translation_file.write_text(json.dumps(translation_data), encoding='utf-8')
|
|
|
|
output_path = tmp_path / "output.pdf"
|
|
|
|
success = pdf_service.generate_translated_pdf(
|
|
result_json_path=result_file,
|
|
translation_json_path=translation_file,
|
|
output_path=output_path
|
|
)
|
|
|
|
assert success is True
|
|
assert output_path.exists()
|
|
|
|
# Read PDF content (basic check - the translated text should be in the PDF)
|
|
with open(output_path, 'rb') as f:
|
|
pdf_content = f.read()
|
|
# Check that the file is a valid PDF
|
|
assert pdf_content.startswith(b'%PDF')
|
|
|
|
def test_multi_page_translated_pdf(self, pdf_service, tmp_path):
|
|
"""Test translated PDF generation for multi-page documents"""
|
|
result_data = {
|
|
"metadata": {"processing_track": "direct"},
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"width": 612,
|
|
"height": 792,
|
|
"elements": [
|
|
{
|
|
"element_id": "p1_text",
|
|
"type": "text",
|
|
"content": "Page 1 content",
|
|
"bounding_box": {"x": 72, "y": 720, "width": 200, "height": 20},
|
|
"style_info": {"font_size": 12}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"page_number": 2,
|
|
"width": 612,
|
|
"height": 792,
|
|
"elements": [
|
|
{
|
|
"element_id": "p2_text",
|
|
"type": "text",
|
|
"content": "Page 2 content",
|
|
"bounding_box": {"x": 72, "y": 720, "width": 200, "height": 20},
|
|
"style_info": {"font_size": 12}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
result_file = tmp_path / "multi_page_result.json"
|
|
result_file.write_text(json.dumps(result_data), encoding='utf-8')
|
|
|
|
translation_data = {
|
|
"translations": {
|
|
"p1_text": "第一頁內容",
|
|
"p2_text": "第二頁內容"
|
|
}
|
|
}
|
|
translation_file = tmp_path / "multi_page_translation.json"
|
|
translation_file.write_text(json.dumps(translation_data), encoding='utf-8')
|
|
|
|
output_path = tmp_path / "multi_page_output.pdf"
|
|
|
|
success = pdf_service.generate_translated_pdf(
|
|
result_json_path=result_file,
|
|
translation_json_path=translation_file,
|
|
output_path=output_path
|
|
)
|
|
|
|
assert success is True
|
|
assert output_path.exists()
|
|
assert output_path.stat().st_size > 0
|
|
|
|
|
|
class TestErrorHandling:
|
|
"""Tests for error handling in translated PDF generation"""
|
|
|
|
@pytest.fixture
|
|
def pdf_service(self):
|
|
return PDFGeneratorService()
|
|
|
|
def test_invalid_json_result(self, pdf_service, tmp_path):
|
|
"""Test handling of invalid JSON in result file"""
|
|
invalid_result = tmp_path / "invalid.json"
|
|
invalid_result.write_text("{ invalid json }", encoding='utf-8')
|
|
|
|
translation_data = {"translations": {}}
|
|
translation_file = tmp_path / "translation.json"
|
|
translation_file.write_text(json.dumps(translation_data), encoding='utf-8')
|
|
|
|
output_path = tmp_path / "output.pdf"
|
|
|
|
success = pdf_service.generate_translated_pdf(
|
|
result_json_path=invalid_result,
|
|
translation_json_path=translation_file,
|
|
output_path=output_path
|
|
)
|
|
|
|
assert success is False
|
|
|
|
def test_invalid_json_translation(self, pdf_service, tmp_path):
|
|
"""Test handling of invalid JSON in translation file"""
|
|
result_data = {
|
|
"pages": [{"page_number": 1, "width": 612, "height": 792, "elements": []}]
|
|
}
|
|
result_file = tmp_path / "result.json"
|
|
result_file.write_text(json.dumps(result_data), encoding='utf-8')
|
|
|
|
invalid_translation = tmp_path / "invalid_translation.json"
|
|
invalid_translation.write_text("{ invalid json }", encoding='utf-8')
|
|
|
|
output_path = tmp_path / "output.pdf"
|
|
|
|
success = pdf_service.generate_translated_pdf(
|
|
result_json_path=result_file,
|
|
translation_json_path=invalid_translation,
|
|
output_path=output_path
|
|
)
|
|
|
|
assert success is False
|
|
|
|
def test_temp_file_cleanup_on_success(self, pdf_service, tmp_path):
|
|
"""Test that temporary files are cleaned up after successful generation"""
|
|
result_data = {
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"width": 612,
|
|
"height": 792,
|
|
"elements": [
|
|
{
|
|
"element_id": "text_1",
|
|
"type": "text",
|
|
"content": "Test",
|
|
"bounding_box": {"x": 72, "y": 720, "width": 100, "height": 20},
|
|
"style_info": {"font_size": 12}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
result_file = tmp_path / "result.json"
|
|
result_file.write_text(json.dumps(result_data), encoding='utf-8')
|
|
|
|
translation_data = {"translations": {"text_1": "測試"}}
|
|
translation_file = tmp_path / "translation.json"
|
|
translation_file.write_text(json.dumps(translation_data), encoding='utf-8')
|
|
|
|
output_path = tmp_path / "output.pdf"
|
|
|
|
# Check temp directory for translated JSON files before and after
|
|
import tempfile
|
|
temp_dir = Path(tempfile.gettempdir())
|
|
|
|
success = pdf_service.generate_translated_pdf(
|
|
result_json_path=result_file,
|
|
translation_json_path=translation_file,
|
|
output_path=output_path
|
|
)
|
|
|
|
assert success is True
|
|
# Temp file should be cleaned up (we can't guarantee exact filename,
|
|
# but the method is responsible for cleanup)
|