""" Unit tests for translation merging functionality. Tests the apply_translations() function and related utilities for merging translation data with UnifiedDocument structure. """ import pytest import json import tempfile from pathlib import Path from app.services.translation_service import ( apply_translations, _apply_table_translation, load_translation_json, find_translation_file, list_available_translations, TRANSLATABLE_TEXT_TYPES, TABLE_TYPE, ) class TestApplyTranslations: """Tests for apply_translations() function""" def test_apply_text_translation(self): """Test applying translation to text elements""" result_json = { "pages": [ { "page_number": 1, "elements": [ { "element_id": "text_1", "type": "text", "content": "Hello World", "bounding_box": {"x": 0, "y": 0, "width": 100, "height": 20} } ] } ] } translations = { "text_1": "你好世界" } result = apply_translations(result_json, translations) assert result["pages"][0]["elements"][0]["content"] == "你好世界" # Original should be unchanged assert result_json["pages"][0]["elements"][0]["content"] == "Hello World" def test_apply_multiple_translations(self): """Test applying translations to multiple elements""" result_json = { "pages": [ { "page_number": 1, "elements": [ {"element_id": "title_1", "type": "title", "content": "Title"}, {"element_id": "text_1", "type": "text", "content": "Body text"}, {"element_id": "header_1", "type": "header", "content": "Header"}, ] } ] } translations = { "title_1": "標題", "text_1": "正文", "header_1": "頁首" } result = apply_translations(result_json, translations) assert result["pages"][0]["elements"][0]["content"] == "標題" assert result["pages"][0]["elements"][1]["content"] == "正文" assert result["pages"][0]["elements"][2]["content"] == "頁首" def test_preserve_non_translated_elements(self): """Test that elements without translations are preserved""" result_json = { "pages": [ { "page_number": 1, "elements": [ {"element_id": "text_1", "type": "text", "content": "Translate me"}, {"element_id": "text_2", "type": "text", "content": "Keep me"}, ] } ] } translations = { "text_1": "翻譯我" } result = apply_translations(result_json, translations) assert result["pages"][0]["elements"][0]["content"] == "翻譯我" assert result["pages"][0]["elements"][1]["content"] == "Keep me" def test_preserve_element_properties(self): """Test that element properties (bounding_box, style_info) are preserved""" result_json = { "pages": [ { "page_number": 1, "elements": [ { "element_id": "text_1", "type": "text", "content": "Original", "bounding_box": {"x": 10, "y": 20, "width": 100, "height": 30}, "style_info": {"font_size": 12, "font_name": "Arial"} } ] } ] } translations = {"text_1": "Translated"} result = apply_translations(result_json, translations) elem = result["pages"][0]["elements"][0] assert elem["content"] == "Translated" assert elem["bounding_box"] == {"x": 10, "y": 20, "width": 100, "height": 30} assert elem["style_info"] == {"font_size": 12, "font_name": "Arial"} def test_multi_page_document(self): """Test translation across multiple pages""" result_json = { "pages": [ { "page_number": 1, "elements": [{"element_id": "p1_text", "type": "text", "content": "Page 1"}] }, { "page_number": 2, "elements": [{"element_id": "p2_text", "type": "text", "content": "Page 2"}] } ] } translations = { "p1_text": "第一頁", "p2_text": "第二頁" } result = apply_translations(result_json, translations) assert result["pages"][0]["elements"][0]["content"] == "第一頁" assert result["pages"][1]["elements"][0]["content"] == "第二頁" def test_all_translatable_types(self): """Test that all translatable text types are handled""" elements = [] translations = {} for i, elem_type in enumerate(TRANSLATABLE_TEXT_TYPES): elem_id = f"{elem_type}_{i}" elements.append({ "element_id": elem_id, "type": elem_type, "content": f"Original {elem_type}" }) translations[elem_id] = f"Translated {elem_type}" result_json = {"pages": [{"page_number": 1, "elements": elements}]} result = apply_translations(result_json, translations) for i, elem_type in enumerate(TRANSLATABLE_TEXT_TYPES): assert result["pages"][0]["elements"][i]["content"] == f"Translated {elem_type}" def test_skip_non_translatable_types(self): """Test that non-translatable types are not modified even with translation""" result_json = { "pages": [ { "page_number": 1, "elements": [ {"element_id": "img_1", "type": "image", "content": "image.png"}, {"element_id": "chart_1", "type": "chart", "content": "chart data"}, ] } ] } # Even though translations exist, image/chart should not be modified translations = { "img_1": "Should not apply", "chart_1": "Should not apply" } result = apply_translations(result_json, translations) assert result["pages"][0]["elements"][0]["content"] == "image.png" assert result["pages"][0]["elements"][1]["content"] == "chart data" def test_empty_translations(self): """Test with empty translations dict""" result_json = { "pages": [ { "page_number": 1, "elements": [{"element_id": "text_1", "type": "text", "content": "Original"}] } ] } result = apply_translations(result_json, {}) assert result["pages"][0]["elements"][0]["content"] == "Original" def test_empty_document(self): """Test with empty document""" result_json = {"pages": []} translations = {"text_1": "Translation"} result = apply_translations(result_json, translations) assert result["pages"] == [] class TestApplyTableTranslation: """Tests for _apply_table_translation() function""" def test_apply_table_cell_translation(self): """Test applying translations to table cells""" table_elem = { "element_id": "table_1", "type": "table", "content": { "cells": [ {"row": 0, "col": 0, "content": "Header 1"}, {"row": 0, "col": 1, "content": "Header 2"}, {"row": 1, "col": 0, "content": "Data 1"}, {"row": 1, "col": 1, "content": "Data 2"}, ] } } translation = { "cells": [ {"row": 0, "col": 0, "content": "表頭 1"}, {"row": 0, "col": 1, "content": "表頭 2"}, {"row": 1, "col": 0, "content": "資料 1"}, {"row": 1, "col": 1, "content": "資料 2"}, ] } _apply_table_translation(table_elem, translation) cells = table_elem["content"]["cells"] assert cells[0]["content"] == "表頭 1" assert cells[1]["content"] == "表頭 2" assert cells[2]["content"] == "資料 1" assert cells[3]["content"] == "資料 2" def test_partial_table_translation(self): """Test partial translation of table cells""" table_elem = { "element_id": "table_1", "type": "table", "content": { "cells": [ {"row": 0, "col": 0, "content": "A"}, {"row": 0, "col": 1, "content": "B"}, {"row": 1, "col": 0, "content": "C"}, {"row": 1, "col": 1, "content": "D"}, ] } } # Only translate some cells translation = { "cells": [ {"row": 0, "col": 0, "content": "甲"}, {"row": 1, "col": 1, "content": "丁"}, ] } _apply_table_translation(table_elem, translation) cells = table_elem["content"]["cells"] assert cells[0]["content"] == "甲" # Translated assert cells[1]["content"] == "B" # Original assert cells[2]["content"] == "C" # Original assert cells[3]["content"] == "丁" # Translated def test_table_with_empty_cells(self): """Test table with empty cells list""" table_elem = { "element_id": "table_1", "type": "table", "content": {"cells": []} } translation = { "cells": [{"row": 0, "col": 0, "content": "New"}] } # Should not raise error _apply_table_translation(table_elem, translation) assert table_elem["content"]["cells"] == [] def test_table_translation_via_apply_translations(self): """Test table translation through main apply_translations function""" result_json = { "pages": [ { "page_number": 1, "elements": [ { "element_id": "table_1", "type": "table", "content": { "cells": [ {"row": 0, "col": 0, "content": "Name"}, {"row": 0, "col": 1, "content": "Value"}, ] } } ] } ] } translations = { "table_1": { "cells": [ {"row": 0, "col": 0, "content": "名稱"}, {"row": 0, "col": 1, "content": "數值"}, ] } } result = apply_translations(result_json, translations) cells = result["pages"][0]["elements"][0]["content"]["cells"] assert cells[0]["content"] == "名稱" assert cells[1]["content"] == "數值" class TestTranslationFileUtilities: """Tests for translation file utility functions""" def test_load_translation_json(self, tmp_path): """Test loading translation JSON file""" translation_data = { "translations": {"text_1": "Translation"}, "target_lang": "zh-TW" } translation_file = tmp_path / "test_translated_zh-TW.json" translation_file.write_text(json.dumps(translation_data), encoding='utf-8') result = load_translation_json(translation_file) assert result is not None assert result["translations"]["text_1"] == "Translation" assert result["target_lang"] == "zh-TW" def test_load_translation_json_not_found(self, tmp_path): """Test loading non-existent translation file""" non_existent = tmp_path / "does_not_exist.json" result = load_translation_json(non_existent) assert result is None def test_find_translation_file(self, tmp_path): """Test finding translation file by language""" # Create test files (tmp_path / "doc_translated_en.json").write_text("{}", encoding='utf-8') (tmp_path / "doc_translated_zh-TW.json").write_text("{}", encoding='utf-8') result = find_translation_file(tmp_path, "zh-TW") assert result is not None assert result.name == "doc_translated_zh-TW.json" def test_find_translation_file_not_found(self, tmp_path): """Test finding non-existent translation file""" (tmp_path / "doc_translated_en.json").write_text("{}", encoding='utf-8') result = find_translation_file(tmp_path, "ja") assert result is None def test_list_available_translations(self, tmp_path): """Test listing available translation languages""" (tmp_path / "doc_translated_en.json").write_text("{}", encoding='utf-8') (tmp_path / "doc_translated_zh-TW.json").write_text("{}", encoding='utf-8') (tmp_path / "doc_translated_ja.json").write_text("{}", encoding='utf-8') (tmp_path / "other_file.json").write_text("{}", encoding='utf-8') result = list_available_translations(tmp_path) assert set(result) == {"en", "zh-TW", "ja"} def test_list_available_translations_empty(self, tmp_path): """Test listing when no translations exist""" (tmp_path / "result.json").write_text("{}", encoding='utf-8') result = list_available_translations(tmp_path) assert result == [] class TestDeepCopyBehavior: """Tests to verify deep copy behavior""" def test_original_not_modified(self): """Test that original document is not modified""" original = { "pages": [ { "page_number": 1, "elements": [ {"element_id": "text_1", "type": "text", "content": "Original"} ] } ] } original_content = original["pages"][0]["elements"][0]["content"] translations = {"text_1": "Modified"} result = apply_translations(original, translations) # Original should be unchanged assert original["pages"][0]["elements"][0]["content"] == original_content # Result should have translation assert result["pages"][0]["elements"][0]["content"] == "Modified" def test_nested_objects_are_copied(self): """Test that nested objects are properly deep copied""" original = { "pages": [ { "page_number": 1, "elements": [ { "element_id": "table_1", "type": "table", "content": { "cells": [ {"row": 0, "col": 0, "content": "Original"} ] } } ] } ] } original_cell_content = original["pages"][0]["elements"][0]["content"]["cells"][0]["content"] translations = { "table_1": {"cells": [{"row": 0, "col": 0, "content": "Modified"}]} } result = apply_translations(original, translations) # Original nested content should be unchanged assert original["pages"][0]["elements"][0]["content"]["cells"][0]["content"] == original_cell_content class TestEdgeCases: """Tests for edge cases and error handling""" def test_missing_element_id(self): """Test handling elements without element_id""" result_json = { "pages": [ { "page_number": 1, "elements": [ {"type": "text", "content": "No ID element"} ] } ] } translations = {"text_1": "Translation"} # Should not raise error result = apply_translations(result_json, translations) assert result["pages"][0]["elements"][0]["content"] == "No ID element" def test_missing_type(self): """Test handling elements without type""" result_json = { "pages": [ { "page_number": 1, "elements": [ {"element_id": "elem_1", "content": "No type"} ] } ] } translations = {"elem_1": "Translation"} # Should not raise error, should not apply translation without matching type result = apply_translations(result_json, translations) # Content unchanged because type doesn't match TRANSLATABLE_TEXT_TYPES assert result["pages"][0]["elements"][0]["content"] == "No type" def test_unicode_translations(self): """Test handling of various unicode characters""" result_json = { "pages": [ { "page_number": 1, "elements": [ {"element_id": "text_1", "type": "text", "content": "English"}, {"element_id": "text_2", "type": "text", "content": "More text"}, {"element_id": "text_3", "type": "text", "content": "Another"}, ] } ] } translations = { "text_1": "日本語テキスト", # Japanese "text_2": "한국어 텍스트", # Korean "text_3": "العربية" # Arabic } result = apply_translations(result_json, translations) assert result["pages"][0]["elements"][0]["content"] == "日本語テキスト" assert result["pages"][0]["elements"][1]["content"] == "한국어 텍스트" assert result["pages"][0]["elements"][2]["content"] == "العربية"