- Add debug_font_path, demo_docs_dir, e2e_api_base_url to config.py - Fix hardcoded paths in pp_structure_debug.py, create_demo_images.py - Fix hardcoded paths in test files - Update .env.example with new configuration options - Update .gitignore to exclude AI development files (.claude/, openspec/, AGENTS.md, CLAUDE.md) - Add production startup script (start-prod.sh) - Add README.md with project documentation - Add 1panel Docker deployment files (docker-compose.yml, Dockerfiles, nginx.conf) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
144 lines
5.5 KiB
Python
144 lines
5.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test translation service with DIFY API using real OCR results from storage/results/
|
|
"""
|
|
import json
|
|
import os
|
|
import pytest
|
|
from pathlib import Path
|
|
|
|
from app.services.dify_client import DifyClient, get_dify_client
|
|
from app.services.translation_service import TranslationService, get_translation_service
|
|
|
|
pytestmark = pytest.mark.integration
|
|
|
|
# Real task IDs with their result files
|
|
REAL_TASKS = [
|
|
("ca2b59a3-3362-4678-954f-cf0a9bcc152e", "img3_result.json"),
|
|
("8ab2f24d-992b-46a2-87dc-2e024e006ac7", "img1_result.json"),
|
|
("1c94bfbf-9391-444c-bebf-ae22fa3dad32", "edit_result.json"),
|
|
("c85fff69-9ddb-40b8-8a9b-ebb513c60f05", "scan_result.json"),
|
|
("0088e960-7b61-4cdf-bfe5-956960b00dd1", "scan2_result.json"),
|
|
("8eedd9ed-7aad-46d5-93ca-951352c954b9", "ppt_result.json"),
|
|
("992156c5-72b4-4e3d-8d43-cbb15f23e630", "edit3_result.json"),
|
|
("1484ba43-7484-4326-95a7-1544b181e9e8", "edit2_result.json"),
|
|
("e9a16bba-7d37-42f4-84c8-6624cb58fe19", "img2_result.json"),
|
|
]
|
|
|
|
RESULTS_DIR = Path(__file__).parent.parent / "storage" / "results"
|
|
|
|
|
|
@pytest.fixture
|
|
def dify_client():
|
|
"""Get DIFY client instance"""
|
|
if not os.getenv("DIFY_API_KEY"):
|
|
pytest.skip("Set DIFY_API_KEY to run real translation integration tests")
|
|
return get_dify_client()
|
|
|
|
|
|
@pytest.fixture
|
|
def translation_service():
|
|
"""Get translation service instance"""
|
|
return get_translation_service()
|
|
|
|
|
|
class TestDifyClient:
|
|
"""Test DIFY API client"""
|
|
|
|
def test_client_initialization(self, dify_client):
|
|
"""Test client can be initialized"""
|
|
assert dify_client is not None
|
|
assert dify_client.api_key is not None
|
|
|
|
def test_simple_translation(self, dify_client):
|
|
"""Test simple translation via DIFY API"""
|
|
text = "Hello, this is a test."
|
|
response = dify_client.translate(text, "zh-TW")
|
|
|
|
assert response.translated_text is not None
|
|
assert len(response.translated_text) > 0
|
|
assert response.total_tokens > 0
|
|
|
|
print(f"\nOriginal: {text}")
|
|
print(f"Translated: {response.translated_text}")
|
|
print(f"Tokens: {response.total_tokens}, Latency: {response.latency:.2f}s")
|
|
|
|
|
|
class TestTranslationServiceExtraction:
|
|
"""Test element extraction"""
|
|
|
|
def test_service_initialization(self, translation_service):
|
|
"""Test service can be initialized"""
|
|
assert translation_service is not None
|
|
assert translation_service.dify_client is not None
|
|
|
|
@pytest.mark.parametrize("task_id,result_file", REAL_TASKS)
|
|
def test_extract_translatable_elements(self, translation_service, task_id, result_file):
|
|
"""Test extracting translatable elements from real OCR results"""
|
|
result_path = RESULTS_DIR / task_id / result_file
|
|
if not result_path.exists():
|
|
pytest.skip(f"Result file not found: {result_path}")
|
|
|
|
with open(result_path, 'r', encoding='utf-8') as f:
|
|
ocr_result = json.load(f)
|
|
|
|
elements, total_count = translation_service.extract_translatable_elements(ocr_result)
|
|
|
|
# Verify extraction
|
|
assert isinstance(elements, list)
|
|
assert isinstance(total_count, int)
|
|
assert total_count >= 0
|
|
|
|
print(f"\nTask {task_id}:")
|
|
print(f" Extracted {len(elements)} translatable elements")
|
|
print(f" Total elements in document: {total_count}")
|
|
|
|
if elements:
|
|
first = elements[0]
|
|
assert hasattr(first, 'element_id')
|
|
assert hasattr(first, 'content')
|
|
assert hasattr(first, 'element_type')
|
|
print(f" First element type: {first.element_type}")
|
|
print(f" First element preview: {first.content[:50]}..." if len(first.content) > 50 else f" First element: {first.content}")
|
|
|
|
|
|
class TestTranslationExecution:
|
|
"""Test actual translation via DIFY API"""
|
|
|
|
@pytest.mark.parametrize("task_id,result_file", REAL_TASKS[:2]) # Test only first 2
|
|
def test_translate_first_3_elements(self, translation_service, task_id, result_file):
|
|
"""Test translating first 3 elements from a real OCR document"""
|
|
result_path = RESULTS_DIR / task_id / result_file
|
|
if not result_path.exists():
|
|
pytest.skip(f"Result file not found: {result_path}")
|
|
|
|
with open(result_path, 'r', encoding='utf-8') as f:
|
|
ocr_result = json.load(f)
|
|
|
|
elements, _ = translation_service.extract_translatable_elements(ocr_result)
|
|
if not elements:
|
|
pytest.skip("No translatable elements found")
|
|
|
|
# Translate first 3 elements only
|
|
elements_to_translate = elements[:3]
|
|
|
|
print(f"\n{task_id} translations:")
|
|
for i, elem in enumerate(elements_to_translate):
|
|
translated = translation_service.translate_item(elem, "en", f"test-{task_id}")
|
|
|
|
assert translated.translated_content is not None
|
|
assert len(translated.translated_content) > 0
|
|
|
|
orig_preview = elem.content[:30] + "..." if len(elem.content) > 30 else elem.content
|
|
trans_preview = translated.translated_content[:30] + "..." if len(translated.translated_content) > 30 else translated.translated_content
|
|
print(f" [{i+1}] {orig_preview} -> {trans_preview}")
|
|
|
|
print(f" Total tokens: {translation_service._total_tokens}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Run extraction tests only (no API calls) by default
|
|
# pytest.main([__file__, "-v", "-k", "extraction", "--tb=short"])
|
|
# Run all tests including API calls
|
|
pytest.main([__file__, "-v", "--tb=short"])
|