#!/usr/bin/env python3 """ Test translation service with DIFY API using real OCR results from storage/results/ """ import json import pytest from pathlib import Path from app.services.dify_client import DifyClient, get_dify_client from app.services.translation_service import TranslationService, get_translation_service # Real task IDs with their result files REAL_TASKS = [ ("ca2b59a3-3362-4678-954f-cf0a9bcc152e", "img3_result.json"), ("8ab2f24d-992b-46a2-87dc-2e024e006ac7", "img1_result.json"), ("1c94bfbf-9391-444c-bebf-ae22fa3dad32", "edit_result.json"), ("c85fff69-9ddb-40b8-8a9b-ebb513c60f05", "scan_result.json"), ("0088e960-7b61-4cdf-bfe5-956960b00dd1", "scan2_result.json"), ("8eedd9ed-7aad-46d5-93ca-951352c954b9", "ppt_result.json"), ("992156c5-72b4-4e3d-8d43-cbb15f23e630", "edit3_result.json"), ("1484ba43-7484-4326-95a7-1544b181e9e8", "edit2_result.json"), ("e9a16bba-7d37-42f4-84c8-6624cb58fe19", "img2_result.json"), ] RESULTS_DIR = Path(__file__).parent.parent / "storage" / "results" @pytest.fixture def dify_client(): """Get DIFY client instance""" return get_dify_client() @pytest.fixture def translation_service(): """Get translation service instance""" return get_translation_service() class TestDifyClient: """Test DIFY API client""" def test_client_initialization(self, dify_client): """Test client can be initialized""" assert dify_client is not None assert dify_client.api_key is not None def test_simple_translation(self, dify_client): """Test simple translation via DIFY API""" text = "Hello, this is a test." response = dify_client.translate(text, "zh-TW") assert response.translated_text is not None assert len(response.translated_text) > 0 assert response.total_tokens > 0 print(f"\nOriginal: {text}") print(f"Translated: {response.translated_text}") print(f"Tokens: {response.total_tokens}, Latency: {response.latency:.2f}s") class TestTranslationServiceExtraction: """Test element extraction""" def test_service_initialization(self, translation_service): """Test service can be initialized""" assert translation_service is not None assert translation_service.dify_client is not None @pytest.mark.parametrize("task_id,result_file", REAL_TASKS) def test_extract_translatable_elements(self, translation_service, task_id, result_file): """Test extracting translatable elements from real OCR results""" result_path = RESULTS_DIR / task_id / result_file if not result_path.exists(): pytest.skip(f"Result file not found: {result_path}") with open(result_path, 'r', encoding='utf-8') as f: ocr_result = json.load(f) elements, total_count = translation_service.extract_translatable_elements(ocr_result) # Verify extraction assert isinstance(elements, list) assert isinstance(total_count, int) assert total_count >= 0 print(f"\nTask {task_id}:") print(f" Extracted {len(elements)} translatable elements") print(f" Total elements in document: {total_count}") if elements: first = elements[0] assert hasattr(first, 'element_id') assert hasattr(first, 'content') assert hasattr(first, 'element_type') print(f" First element type: {first.element_type}") print(f" First element preview: {first.content[:50]}..." if len(first.content) > 50 else f" First element: {first.content}") class TestTranslationExecution: """Test actual translation via DIFY API""" @pytest.mark.parametrize("task_id,result_file", REAL_TASKS[:2]) # Test only first 2 def test_translate_first_3_elements(self, translation_service, task_id, result_file): """Test translating first 3 elements from a real OCR document""" result_path = RESULTS_DIR / task_id / result_file if not result_path.exists(): pytest.skip(f"Result file not found: {result_path}") with open(result_path, 'r', encoding='utf-8') as f: ocr_result = json.load(f) elements, _ = translation_service.extract_translatable_elements(ocr_result) if not elements: pytest.skip("No translatable elements found") # Translate first 3 elements only elements_to_translate = elements[:3] print(f"\n{task_id} translations:") for i, elem in enumerate(elements_to_translate): translated = translation_service.translate_item(elem, "en", f"test-{task_id}") assert translated.translated_content is not None assert len(translated.translated_content) > 0 orig_preview = elem.content[:30] + "..." if len(elem.content) > 30 else elem.content trans_preview = translated.translated_content[:30] + "..." if len(translated.translated_content) > 30 else translated.translated_content print(f" [{i+1}] {orig_preview} -> {trans_preview}") print(f" Total tokens: {translation_service._total_tokens}") if __name__ == "__main__": # Run extraction tests only (no API calls) by default # pytest.main([__file__, "-v", "-k", "extraction", "--tb=short"]) # Run all tests including API calls pytest.main([__file__, "-v", "--tb=short"])