""" Unit tests for translated PDF generation functionality. Tests the generate_translated_pdf() method in PDFGeneratorService and track-specific behavior (Direct, OCR, Hybrid). """ import pytest import json import tempfile from pathlib import Path from unittest.mock import patch, MagicMock from app.services.pdf_generator_service import PDFGeneratorService from app.services.translation_service import apply_translations class TestGenerateTranslatedPDF: """Tests for generate_translated_pdf() method""" @pytest.fixture def pdf_service(self): """Create PDF generator service instance""" return PDFGeneratorService() @pytest.fixture def sample_result_json(self, tmp_path): """Create sample result JSON file""" result_data = { "metadata": { "processing_track": "direct", "source_file": "test.pdf", "page_count": 1 }, "pages": [ { "page_number": 1, "width": 612, "height": 792, "elements": [ { "element_id": "text_1", "type": "text", "content": "Hello World", "bounding_box": { "x": 72, "y": 720, "width": 200, "height": 20 }, "style_info": { "font_size": 12, "font_name": "Helvetica" } }, { "element_id": "title_1", "type": "title", "content": "Document Title", "bounding_box": { "x": 72, "y": 750, "width": 300, "height": 30 }, "style_info": { "font_size": 18, "font_name": "Helvetica-Bold" } } ] } ] } result_file = tmp_path / "edit_result.json" result_file.write_text(json.dumps(result_data), encoding='utf-8') return result_file @pytest.fixture def sample_translation_json(self, tmp_path): """Create sample translation JSON file""" translation_data = { "target_lang": "zh-TW", "source_lang": "en", "translated_at": "2024-01-01T00:00:00Z", "translations": { "text_1": "你好世界", "title_1": "文件標題" }, "statistics": { "translated_elements": 2, "total_characters": 100 } } translation_file = tmp_path / "edit_translated_zh-TW.json" translation_file.write_text(json.dumps(translation_data), encoding='utf-8') return translation_file def test_generate_translated_pdf_success( self, pdf_service, sample_result_json, sample_translation_json, tmp_path ): """Test successful translated PDF generation""" output_path = tmp_path / "output.pdf" success = pdf_service.generate_translated_pdf( result_json_path=sample_result_json, translation_json_path=sample_translation_json, output_path=output_path ) assert success is True assert output_path.exists() assert output_path.stat().st_size > 0 # PDF files start with %PDF with open(output_path, 'rb') as f: header = f.read(4) assert header == b'%PDF' def test_generate_translated_pdf_missing_result( self, pdf_service, sample_translation_json, tmp_path ): """Test with missing result JSON file""" output_path = tmp_path / "output.pdf" missing_result = tmp_path / "non_existent.json" success = pdf_service.generate_translated_pdf( result_json_path=missing_result, translation_json_path=sample_translation_json, output_path=output_path ) assert success is False assert not output_path.exists() def test_generate_translated_pdf_missing_translation( self, pdf_service, sample_result_json, tmp_path ): """Test with missing translation JSON file""" output_path = tmp_path / "output.pdf" missing_translation = tmp_path / "non_existent_translation.json" success = pdf_service.generate_translated_pdf( result_json_path=sample_result_json, translation_json_path=missing_translation, output_path=output_path ) assert success is False assert not output_path.exists() def test_generate_translated_pdf_empty_translations( self, pdf_service, sample_result_json, tmp_path ): """Test with empty translations (should fall back to original)""" empty_translation_data = { "target_lang": "zh-TW", "translations": {} } empty_translation_file = tmp_path / "empty_translated.json" empty_translation_file.write_text(json.dumps(empty_translation_data), encoding='utf-8') output_path = tmp_path / "output.pdf" success = pdf_service.generate_translated_pdf( result_json_path=sample_result_json, translation_json_path=empty_translation_file, output_path=output_path ) # Should succeed even with empty translations (uses original content) assert success is True assert output_path.exists() def test_generate_translated_pdf_partial_translations( self, pdf_service, sample_result_json, tmp_path ): """Test with partial translations (some elements not translated)""" partial_translation_data = { "target_lang": "zh-TW", "translations": { "text_1": "你好世界" # title_1 not translated } } partial_translation_file = tmp_path / "partial_translated.json" partial_translation_file.write_text(json.dumps(partial_translation_data), encoding='utf-8') output_path = tmp_path / "output.pdf" success = pdf_service.generate_translated_pdf( result_json_path=sample_result_json, translation_json_path=partial_translation_file, output_path=output_path ) assert success is True assert output_path.exists() class TestTrackSpecificPDFGeneration: """Tests for track-specific PDF generation behavior""" @pytest.fixture def pdf_service(self): return PDFGeneratorService() def create_result_with_track(self, tmp_path, track: str, with_table: bool = False): """Helper to create result JSON with specific track""" elements = [ { "element_id": "text_1", "type": "text", "content": "Sample text content", "bounding_box": {"x": 72, "y": 720, "width": 200, "height": 20}, "style_info": {"font_size": 12} } ] if with_table: elements.append({ "element_id": "table_1", "type": "table", "content": { "cells": [ {"row": 0, "col": 0, "content": "Header 1"}, {"row": 0, "col": 1, "content": "Header 2"}, {"row": 1, "col": 0, "content": "Data 1"}, {"row": 1, "col": 1, "content": "Data 2"}, ] }, "bounding_box": {"x": 72, "y": 500, "width": 400, "height": 100} }) result_data = { "metadata": { "processing_track": track, "source_file": f"test_{track}.pdf", "page_count": 1 }, "pages": [ { "page_number": 1, "width": 612, "height": 792, "elements": elements } ] } result_file = tmp_path / f"{track}_result.json" result_file.write_text(json.dumps(result_data), encoding='utf-8') return result_file def create_translation_for_track(self, tmp_path, track: str, with_table: bool = False): """Helper to create translation JSON""" translations = { "text_1": "翻譯的文字內容" } if with_table: translations["table_1"] = { "cells": [ {"row": 0, "col": 0, "content": "表頭 1"}, {"row": 0, "col": 1, "content": "表頭 2"}, {"row": 1, "col": 0, "content": "資料 1"}, {"row": 1, "col": 1, "content": "資料 2"}, ] } translation_data = { "target_lang": "zh-TW", "translations": translations } translation_file = tmp_path / f"{track}_translated_zh-TW.json" translation_file.write_text(json.dumps(translation_data), encoding='utf-8') return translation_file def test_direct_track_pdf_generation(self, pdf_service, tmp_path): """Test PDF generation for Direct track documents""" result_file = self.create_result_with_track(tmp_path, "direct") translation_file = self.create_translation_for_track(tmp_path, "direct") output_path = tmp_path / "direct_output.pdf" success = pdf_service.generate_translated_pdf( result_json_path=result_file, translation_json_path=translation_file, output_path=output_path ) assert success is True assert output_path.exists() assert output_path.stat().st_size > 0 def test_ocr_track_pdf_generation(self, pdf_service, tmp_path): """Test PDF generation for OCR track documents""" result_file = self.create_result_with_track(tmp_path, "ocr") translation_file = self.create_translation_for_track(tmp_path, "ocr") output_path = tmp_path / "ocr_output.pdf" success = pdf_service.generate_translated_pdf( result_json_path=result_file, translation_json_path=translation_file, output_path=output_path ) assert success is True assert output_path.exists() assert output_path.stat().st_size > 0 def test_hybrid_track_pdf_generation(self, pdf_service, tmp_path): """Test PDF generation for Hybrid track documents""" result_file = self.create_result_with_track(tmp_path, "hybrid") translation_file = self.create_translation_for_track(tmp_path, "hybrid") output_path = tmp_path / "hybrid_output.pdf" success = pdf_service.generate_translated_pdf( result_json_path=result_file, translation_json_path=translation_file, output_path=output_path ) assert success is True assert output_path.exists() assert output_path.stat().st_size > 0 def test_document_with_table_direct_track(self, pdf_service, tmp_path): """Test PDF generation for Direct track document with tables""" result_file = self.create_result_with_track(tmp_path, "direct", with_table=True) translation_file = self.create_translation_for_track(tmp_path, "direct", with_table=True) output_path = tmp_path / "direct_table_output.pdf" success = pdf_service.generate_translated_pdf( result_json_path=result_file, translation_json_path=translation_file, output_path=output_path ) assert success is True assert output_path.exists() assert output_path.stat().st_size > 0 def test_document_with_table_ocr_track(self, pdf_service, tmp_path): """Test PDF generation for OCR track document with tables""" result_file = self.create_result_with_track(tmp_path, "ocr", with_table=True) translation_file = self.create_translation_for_track(tmp_path, "ocr", with_table=True) output_path = tmp_path / "ocr_table_output.pdf" success = pdf_service.generate_translated_pdf( result_json_path=result_file, translation_json_path=translation_file, output_path=output_path ) assert success is True assert output_path.exists() assert output_path.stat().st_size > 0 class TestTranslationMergeIntegration: """Integration tests for translation merging with PDF generation""" @pytest.fixture def pdf_service(self): return PDFGeneratorService() def test_translations_applied_to_pdf(self, pdf_service, tmp_path): """Test that translations are properly applied before PDF generation""" # Create result with specific content result_data = { "metadata": {"processing_track": "direct"}, "pages": [ { "page_number": 1, "width": 612, "height": 792, "elements": [ { "element_id": "text_1", "type": "text", "content": "ORIGINAL_MARKER_TEXT", "bounding_box": {"x": 72, "y": 720, "width": 200, "height": 20}, "style_info": {"font_size": 12} } ] } ] } result_file = tmp_path / "result.json" result_file.write_text(json.dumps(result_data), encoding='utf-8') # Create translation translation_data = { "translations": { "text_1": "TRANSLATED_MARKER_TEXT" } } translation_file = tmp_path / "translation.json" translation_file.write_text(json.dumps(translation_data), encoding='utf-8') output_path = tmp_path / "output.pdf" success = pdf_service.generate_translated_pdf( result_json_path=result_file, translation_json_path=translation_file, output_path=output_path ) assert success is True assert output_path.exists() # Read PDF content (basic check - the translated text should be in the PDF) with open(output_path, 'rb') as f: pdf_content = f.read() # Check that the file is a valid PDF assert pdf_content.startswith(b'%PDF') def test_multi_page_translated_pdf(self, pdf_service, tmp_path): """Test translated PDF generation for multi-page documents""" result_data = { "metadata": {"processing_track": "direct"}, "pages": [ { "page_number": 1, "width": 612, "height": 792, "elements": [ { "element_id": "p1_text", "type": "text", "content": "Page 1 content", "bounding_box": {"x": 72, "y": 720, "width": 200, "height": 20}, "style_info": {"font_size": 12} } ] }, { "page_number": 2, "width": 612, "height": 792, "elements": [ { "element_id": "p2_text", "type": "text", "content": "Page 2 content", "bounding_box": {"x": 72, "y": 720, "width": 200, "height": 20}, "style_info": {"font_size": 12} } ] } ] } result_file = tmp_path / "multi_page_result.json" result_file.write_text(json.dumps(result_data), encoding='utf-8') translation_data = { "translations": { "p1_text": "第一頁內容", "p2_text": "第二頁內容" } } translation_file = tmp_path / "multi_page_translation.json" translation_file.write_text(json.dumps(translation_data), encoding='utf-8') output_path = tmp_path / "multi_page_output.pdf" success = pdf_service.generate_translated_pdf( result_json_path=result_file, translation_json_path=translation_file, output_path=output_path ) assert success is True assert output_path.exists() assert output_path.stat().st_size > 0 class TestErrorHandling: """Tests for error handling in translated PDF generation""" @pytest.fixture def pdf_service(self): return PDFGeneratorService() def test_invalid_json_result(self, pdf_service, tmp_path): """Test handling of invalid JSON in result file""" invalid_result = tmp_path / "invalid.json" invalid_result.write_text("{ invalid json }", encoding='utf-8') translation_data = {"translations": {}} translation_file = tmp_path / "translation.json" translation_file.write_text(json.dumps(translation_data), encoding='utf-8') output_path = tmp_path / "output.pdf" success = pdf_service.generate_translated_pdf( result_json_path=invalid_result, translation_json_path=translation_file, output_path=output_path ) assert success is False def test_invalid_json_translation(self, pdf_service, tmp_path): """Test handling of invalid JSON in translation file""" result_data = { "pages": [{"page_number": 1, "width": 612, "height": 792, "elements": []}] } result_file = tmp_path / "result.json" result_file.write_text(json.dumps(result_data), encoding='utf-8') invalid_translation = tmp_path / "invalid_translation.json" invalid_translation.write_text("{ invalid json }", encoding='utf-8') output_path = tmp_path / "output.pdf" success = pdf_service.generate_translated_pdf( result_json_path=result_file, translation_json_path=invalid_translation, output_path=output_path ) assert success is False def test_temp_file_cleanup_on_success(self, pdf_service, tmp_path): """Test that temporary files are cleaned up after successful generation""" result_data = { "pages": [ { "page_number": 1, "width": 612, "height": 792, "elements": [ { "element_id": "text_1", "type": "text", "content": "Test", "bounding_box": {"x": 72, "y": 720, "width": 100, "height": 20}, "style_info": {"font_size": 12} } ] } ] } result_file = tmp_path / "result.json" result_file.write_text(json.dumps(result_data), encoding='utf-8') translation_data = {"translations": {"text_1": "測試"}} translation_file = tmp_path / "translation.json" translation_file.write_text(json.dumps(translation_data), encoding='utf-8') output_path = tmp_path / "output.pdf" # Check temp directory for translated JSON files before and after import tempfile temp_dir = Path(tempfile.gettempdir()) success = pdf_service.generate_translated_pdf( result_json_path=result_file, translation_json_path=translation_file, output_path=output_path ) assert success is True # Temp file should be cleaned up (we can't guarantee exact filename, # but the method is responsible for cleanup)