Adds the ability to download translated documents as PDF files while
preserving the original document layout. Key changes:
- Add apply_translations() function to merge translation JSON with UnifiedDocument
- Add generate_translated_pdf() method to PDFGeneratorService
- Add POST /api/v2/translate/{task_id}/pdf endpoint
- Add downloadTranslatedPdf() method and PDF button in frontend
- Add comprehensive unit tests (52 tests: merge, PDF generation, API endpoints)
- Archive add-translated-pdf-export proposal
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
524 lines
18 KiB
Python
524 lines
18 KiB
Python
"""
|
|
Unit tests for translation merging functionality.
|
|
|
|
Tests the apply_translations() function and related utilities
|
|
for merging translation data with UnifiedDocument structure.
|
|
"""
|
|
|
|
import pytest
|
|
import json
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
from app.services.translation_service import (
|
|
apply_translations,
|
|
_apply_table_translation,
|
|
load_translation_json,
|
|
find_translation_file,
|
|
list_available_translations,
|
|
TRANSLATABLE_TEXT_TYPES,
|
|
TABLE_TYPE,
|
|
)
|
|
|
|
|
|
class TestApplyTranslations:
|
|
"""Tests for apply_translations() function"""
|
|
|
|
def test_apply_text_translation(self):
|
|
"""Test applying translation to text elements"""
|
|
result_json = {
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"elements": [
|
|
{
|
|
"element_id": "text_1",
|
|
"type": "text",
|
|
"content": "Hello World",
|
|
"bounding_box": {"x": 0, "y": 0, "width": 100, "height": 20}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
translations = {
|
|
"text_1": "你好世界"
|
|
}
|
|
|
|
result = apply_translations(result_json, translations)
|
|
|
|
assert result["pages"][0]["elements"][0]["content"] == "你好世界"
|
|
# Original should be unchanged
|
|
assert result_json["pages"][0]["elements"][0]["content"] == "Hello World"
|
|
|
|
def test_apply_multiple_translations(self):
|
|
"""Test applying translations to multiple elements"""
|
|
result_json = {
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"elements": [
|
|
{"element_id": "title_1", "type": "title", "content": "Title"},
|
|
{"element_id": "text_1", "type": "text", "content": "Body text"},
|
|
{"element_id": "header_1", "type": "header", "content": "Header"},
|
|
]
|
|
}
|
|
]
|
|
}
|
|
translations = {
|
|
"title_1": "標題",
|
|
"text_1": "正文",
|
|
"header_1": "頁首"
|
|
}
|
|
|
|
result = apply_translations(result_json, translations)
|
|
|
|
assert result["pages"][0]["elements"][0]["content"] == "標題"
|
|
assert result["pages"][0]["elements"][1]["content"] == "正文"
|
|
assert result["pages"][0]["elements"][2]["content"] == "頁首"
|
|
|
|
def test_preserve_non_translated_elements(self):
|
|
"""Test that elements without translations are preserved"""
|
|
result_json = {
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"elements": [
|
|
{"element_id": "text_1", "type": "text", "content": "Translate me"},
|
|
{"element_id": "text_2", "type": "text", "content": "Keep me"},
|
|
]
|
|
}
|
|
]
|
|
}
|
|
translations = {
|
|
"text_1": "翻譯我"
|
|
}
|
|
|
|
result = apply_translations(result_json, translations)
|
|
|
|
assert result["pages"][0]["elements"][0]["content"] == "翻譯我"
|
|
assert result["pages"][0]["elements"][1]["content"] == "Keep me"
|
|
|
|
def test_preserve_element_properties(self):
|
|
"""Test that element properties (bounding_box, style_info) are preserved"""
|
|
result_json = {
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"elements": [
|
|
{
|
|
"element_id": "text_1",
|
|
"type": "text",
|
|
"content": "Original",
|
|
"bounding_box": {"x": 10, "y": 20, "width": 100, "height": 30},
|
|
"style_info": {"font_size": 12, "font_name": "Arial"}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
translations = {"text_1": "Translated"}
|
|
|
|
result = apply_translations(result_json, translations)
|
|
|
|
elem = result["pages"][0]["elements"][0]
|
|
assert elem["content"] == "Translated"
|
|
assert elem["bounding_box"] == {"x": 10, "y": 20, "width": 100, "height": 30}
|
|
assert elem["style_info"] == {"font_size": 12, "font_name": "Arial"}
|
|
|
|
def test_multi_page_document(self):
|
|
"""Test translation across multiple pages"""
|
|
result_json = {
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"elements": [{"element_id": "p1_text", "type": "text", "content": "Page 1"}]
|
|
},
|
|
{
|
|
"page_number": 2,
|
|
"elements": [{"element_id": "p2_text", "type": "text", "content": "Page 2"}]
|
|
}
|
|
]
|
|
}
|
|
translations = {
|
|
"p1_text": "第一頁",
|
|
"p2_text": "第二頁"
|
|
}
|
|
|
|
result = apply_translations(result_json, translations)
|
|
|
|
assert result["pages"][0]["elements"][0]["content"] == "第一頁"
|
|
assert result["pages"][1]["elements"][0]["content"] == "第二頁"
|
|
|
|
def test_all_translatable_types(self):
|
|
"""Test that all translatable text types are handled"""
|
|
elements = []
|
|
translations = {}
|
|
for i, elem_type in enumerate(TRANSLATABLE_TEXT_TYPES):
|
|
elem_id = f"{elem_type}_{i}"
|
|
elements.append({
|
|
"element_id": elem_id,
|
|
"type": elem_type,
|
|
"content": f"Original {elem_type}"
|
|
})
|
|
translations[elem_id] = f"Translated {elem_type}"
|
|
|
|
result_json = {"pages": [{"page_number": 1, "elements": elements}]}
|
|
result = apply_translations(result_json, translations)
|
|
|
|
for i, elem_type in enumerate(TRANSLATABLE_TEXT_TYPES):
|
|
assert result["pages"][0]["elements"][i]["content"] == f"Translated {elem_type}"
|
|
|
|
def test_skip_non_translatable_types(self):
|
|
"""Test that non-translatable types are not modified even with translation"""
|
|
result_json = {
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"elements": [
|
|
{"element_id": "img_1", "type": "image", "content": "image.png"},
|
|
{"element_id": "chart_1", "type": "chart", "content": "chart data"},
|
|
]
|
|
}
|
|
]
|
|
}
|
|
# Even though translations exist, image/chart should not be modified
|
|
translations = {
|
|
"img_1": "Should not apply",
|
|
"chart_1": "Should not apply"
|
|
}
|
|
|
|
result = apply_translations(result_json, translations)
|
|
|
|
assert result["pages"][0]["elements"][0]["content"] == "image.png"
|
|
assert result["pages"][0]["elements"][1]["content"] == "chart data"
|
|
|
|
def test_empty_translations(self):
|
|
"""Test with empty translations dict"""
|
|
result_json = {
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"elements": [{"element_id": "text_1", "type": "text", "content": "Original"}]
|
|
}
|
|
]
|
|
}
|
|
|
|
result = apply_translations(result_json, {})
|
|
|
|
assert result["pages"][0]["elements"][0]["content"] == "Original"
|
|
|
|
def test_empty_document(self):
|
|
"""Test with empty document"""
|
|
result_json = {"pages": []}
|
|
translations = {"text_1": "Translation"}
|
|
|
|
result = apply_translations(result_json, translations)
|
|
|
|
assert result["pages"] == []
|
|
|
|
|
|
class TestApplyTableTranslation:
|
|
"""Tests for _apply_table_translation() function"""
|
|
|
|
def test_apply_table_cell_translation(self):
|
|
"""Test applying translations to table cells"""
|
|
table_elem = {
|
|
"element_id": "table_1",
|
|
"type": "table",
|
|
"content": {
|
|
"cells": [
|
|
{"row": 0, "col": 0, "content": "Header 1"},
|
|
{"row": 0, "col": 1, "content": "Header 2"},
|
|
{"row": 1, "col": 0, "content": "Data 1"},
|
|
{"row": 1, "col": 1, "content": "Data 2"},
|
|
]
|
|
}
|
|
}
|
|
translation = {
|
|
"cells": [
|
|
{"row": 0, "col": 0, "content": "表頭 1"},
|
|
{"row": 0, "col": 1, "content": "表頭 2"},
|
|
{"row": 1, "col": 0, "content": "資料 1"},
|
|
{"row": 1, "col": 1, "content": "資料 2"},
|
|
]
|
|
}
|
|
|
|
_apply_table_translation(table_elem, translation)
|
|
|
|
cells = table_elem["content"]["cells"]
|
|
assert cells[0]["content"] == "表頭 1"
|
|
assert cells[1]["content"] == "表頭 2"
|
|
assert cells[2]["content"] == "資料 1"
|
|
assert cells[3]["content"] == "資料 2"
|
|
|
|
def test_partial_table_translation(self):
|
|
"""Test partial translation of table cells"""
|
|
table_elem = {
|
|
"element_id": "table_1",
|
|
"type": "table",
|
|
"content": {
|
|
"cells": [
|
|
{"row": 0, "col": 0, "content": "A"},
|
|
{"row": 0, "col": 1, "content": "B"},
|
|
{"row": 1, "col": 0, "content": "C"},
|
|
{"row": 1, "col": 1, "content": "D"},
|
|
]
|
|
}
|
|
}
|
|
# Only translate some cells
|
|
translation = {
|
|
"cells": [
|
|
{"row": 0, "col": 0, "content": "甲"},
|
|
{"row": 1, "col": 1, "content": "丁"},
|
|
]
|
|
}
|
|
|
|
_apply_table_translation(table_elem, translation)
|
|
|
|
cells = table_elem["content"]["cells"]
|
|
assert cells[0]["content"] == "甲" # Translated
|
|
assert cells[1]["content"] == "B" # Original
|
|
assert cells[2]["content"] == "C" # Original
|
|
assert cells[3]["content"] == "丁" # Translated
|
|
|
|
def test_table_with_empty_cells(self):
|
|
"""Test table with empty cells list"""
|
|
table_elem = {
|
|
"element_id": "table_1",
|
|
"type": "table",
|
|
"content": {"cells": []}
|
|
}
|
|
translation = {
|
|
"cells": [{"row": 0, "col": 0, "content": "New"}]
|
|
}
|
|
|
|
# Should not raise error
|
|
_apply_table_translation(table_elem, translation)
|
|
assert table_elem["content"]["cells"] == []
|
|
|
|
def test_table_translation_via_apply_translations(self):
|
|
"""Test table translation through main apply_translations function"""
|
|
result_json = {
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"elements": [
|
|
{
|
|
"element_id": "table_1",
|
|
"type": "table",
|
|
"content": {
|
|
"cells": [
|
|
{"row": 0, "col": 0, "content": "Name"},
|
|
{"row": 0, "col": 1, "content": "Value"},
|
|
]
|
|
}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
translations = {
|
|
"table_1": {
|
|
"cells": [
|
|
{"row": 0, "col": 0, "content": "名稱"},
|
|
{"row": 0, "col": 1, "content": "數值"},
|
|
]
|
|
}
|
|
}
|
|
|
|
result = apply_translations(result_json, translations)
|
|
|
|
cells = result["pages"][0]["elements"][0]["content"]["cells"]
|
|
assert cells[0]["content"] == "名稱"
|
|
assert cells[1]["content"] == "數值"
|
|
|
|
|
|
class TestTranslationFileUtilities:
|
|
"""Tests for translation file utility functions"""
|
|
|
|
def test_load_translation_json(self, tmp_path):
|
|
"""Test loading translation JSON file"""
|
|
translation_data = {
|
|
"translations": {"text_1": "Translation"},
|
|
"target_lang": "zh-TW"
|
|
}
|
|
translation_file = tmp_path / "test_translated_zh-TW.json"
|
|
translation_file.write_text(json.dumps(translation_data), encoding='utf-8')
|
|
|
|
result = load_translation_json(translation_file)
|
|
|
|
assert result is not None
|
|
assert result["translations"]["text_1"] == "Translation"
|
|
assert result["target_lang"] == "zh-TW"
|
|
|
|
def test_load_translation_json_not_found(self, tmp_path):
|
|
"""Test loading non-existent translation file"""
|
|
non_existent = tmp_path / "does_not_exist.json"
|
|
|
|
result = load_translation_json(non_existent)
|
|
|
|
assert result is None
|
|
|
|
def test_find_translation_file(self, tmp_path):
|
|
"""Test finding translation file by language"""
|
|
# Create test files
|
|
(tmp_path / "doc_translated_en.json").write_text("{}", encoding='utf-8')
|
|
(tmp_path / "doc_translated_zh-TW.json").write_text("{}", encoding='utf-8')
|
|
|
|
result = find_translation_file(tmp_path, "zh-TW")
|
|
|
|
assert result is not None
|
|
assert result.name == "doc_translated_zh-TW.json"
|
|
|
|
def test_find_translation_file_not_found(self, tmp_path):
|
|
"""Test finding non-existent translation file"""
|
|
(tmp_path / "doc_translated_en.json").write_text("{}", encoding='utf-8')
|
|
|
|
result = find_translation_file(tmp_path, "ja")
|
|
|
|
assert result is None
|
|
|
|
def test_list_available_translations(self, tmp_path):
|
|
"""Test listing available translation languages"""
|
|
(tmp_path / "doc_translated_en.json").write_text("{}", encoding='utf-8')
|
|
(tmp_path / "doc_translated_zh-TW.json").write_text("{}", encoding='utf-8')
|
|
(tmp_path / "doc_translated_ja.json").write_text("{}", encoding='utf-8')
|
|
(tmp_path / "other_file.json").write_text("{}", encoding='utf-8')
|
|
|
|
result = list_available_translations(tmp_path)
|
|
|
|
assert set(result) == {"en", "zh-TW", "ja"}
|
|
|
|
def test_list_available_translations_empty(self, tmp_path):
|
|
"""Test listing when no translations exist"""
|
|
(tmp_path / "result.json").write_text("{}", encoding='utf-8')
|
|
|
|
result = list_available_translations(tmp_path)
|
|
|
|
assert result == []
|
|
|
|
|
|
class TestDeepCopyBehavior:
|
|
"""Tests to verify deep copy behavior"""
|
|
|
|
def test_original_not_modified(self):
|
|
"""Test that original document is not modified"""
|
|
original = {
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"elements": [
|
|
{"element_id": "text_1", "type": "text", "content": "Original"}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
original_content = original["pages"][0]["elements"][0]["content"]
|
|
translations = {"text_1": "Modified"}
|
|
|
|
result = apply_translations(original, translations)
|
|
|
|
# Original should be unchanged
|
|
assert original["pages"][0]["elements"][0]["content"] == original_content
|
|
# Result should have translation
|
|
assert result["pages"][0]["elements"][0]["content"] == "Modified"
|
|
|
|
def test_nested_objects_are_copied(self):
|
|
"""Test that nested objects are properly deep copied"""
|
|
original = {
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"elements": [
|
|
{
|
|
"element_id": "table_1",
|
|
"type": "table",
|
|
"content": {
|
|
"cells": [
|
|
{"row": 0, "col": 0, "content": "Original"}
|
|
]
|
|
}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
original_cell_content = original["pages"][0]["elements"][0]["content"]["cells"][0]["content"]
|
|
|
|
translations = {
|
|
"table_1": {"cells": [{"row": 0, "col": 0, "content": "Modified"}]}
|
|
}
|
|
|
|
result = apply_translations(original, translations)
|
|
|
|
# Original nested content should be unchanged
|
|
assert original["pages"][0]["elements"][0]["content"]["cells"][0]["content"] == original_cell_content
|
|
|
|
|
|
class TestEdgeCases:
|
|
"""Tests for edge cases and error handling"""
|
|
|
|
def test_missing_element_id(self):
|
|
"""Test handling elements without element_id"""
|
|
result_json = {
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"elements": [
|
|
{"type": "text", "content": "No ID element"}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
translations = {"text_1": "Translation"}
|
|
|
|
# Should not raise error
|
|
result = apply_translations(result_json, translations)
|
|
assert result["pages"][0]["elements"][0]["content"] == "No ID element"
|
|
|
|
def test_missing_type(self):
|
|
"""Test handling elements without type"""
|
|
result_json = {
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"elements": [
|
|
{"element_id": "elem_1", "content": "No type"}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
translations = {"elem_1": "Translation"}
|
|
|
|
# Should not raise error, should not apply translation without matching type
|
|
result = apply_translations(result_json, translations)
|
|
# Content unchanged because type doesn't match TRANSLATABLE_TEXT_TYPES
|
|
assert result["pages"][0]["elements"][0]["content"] == "No type"
|
|
|
|
def test_unicode_translations(self):
|
|
"""Test handling of various unicode characters"""
|
|
result_json = {
|
|
"pages": [
|
|
{
|
|
"page_number": 1,
|
|
"elements": [
|
|
{"element_id": "text_1", "type": "text", "content": "English"},
|
|
{"element_id": "text_2", "type": "text", "content": "More text"},
|
|
{"element_id": "text_3", "type": "text", "content": "Another"},
|
|
]
|
|
}
|
|
]
|
|
}
|
|
translations = {
|
|
"text_1": "日本語テキスト", # Japanese
|
|
"text_2": "한국어 텍스트", # Korean
|
|
"text_3": "العربية" # Arabic
|
|
}
|
|
|
|
result = apply_translations(result_json, translations)
|
|
|
|
assert result["pages"][0]["elements"][0]["content"] == "日本語テキスト"
|
|
assert result["pages"][0]["elements"][1]["content"] == "한국어 텍스트"
|
|
assert result["pages"][0]["elements"][2]["content"] == "العربية"
|