feat: add document translation via DIFY AI API

Implement document translation feature using DIFY AI API with batch processing: Backend: - Add DIFY client with batch translation support (5000 chars, 20 items per batch) - Add translation service with element extraction and result building - Add translation router with start/status/result/list/delete endpoints - Add translation schemas (TranslationRequest, TranslationStatus, etc.) Frontend: - Enable translation UI in TaskDetailPage - Add translation API methods to apiV2.ts - Add translation types Features: - Batch translation with numbered markers [1], [2], [3]... - Support for text, title, header, footer, paragraph, footnote, table cells - Translation result JSON with statistics (tokens, latency, batch_count) - Background task processing with progress tracking 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-02 11:57:02 +08:00
parent 87dc97d951
commit 8d9b69ba93
18 changed files with 2970 additions and 26 deletions
--- a/backend/tests/test_translation_real.py
+++ b/backend/tests/test_translation_real.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+"""
+Test translation service with DIFY API using real OCR results from storage/results/
+"""
+import json
+import pytest
+from pathlib import Path
+
+from app.services.dify_client import DifyClient, get_dify_client
+from app.services.translation_service import TranslationService, get_translation_service
+
+# Real task IDs with their result files
+REAL_TASKS = [
+    ("ca2b59a3-3362-4678-954f-cf0a9bcc152e", "img3_result.json"),
+    ("8ab2f24d-992b-46a2-87dc-2e024e006ac7", "img1_result.json"),
+    ("1c94bfbf-9391-444c-bebf-ae22fa3dad32", "edit_result.json"),
+    ("c85fff69-9ddb-40b8-8a9b-ebb513c60f05", "scan_result.json"),
+    ("0088e960-7b61-4cdf-bfe5-956960b00dd1", "scan2_result.json"),
+    ("8eedd9ed-7aad-46d5-93ca-951352c954b9", "ppt_result.json"),
+    ("992156c5-72b4-4e3d-8d43-cbb15f23e630", "edit3_result.json"),
+    ("1484ba43-7484-4326-95a7-1544b181e9e8", "edit2_result.json"),
+    ("e9a16bba-7d37-42f4-84c8-6624cb58fe19", "img2_result.json"),
+]
+
+RESULTS_DIR = Path(__file__).parent.parent / "storage" / "results"
+
+
+@pytest.fixture
+def dify_client():
+    """Get DIFY client instance"""
+    return get_dify_client()
+
+
+@pytest.fixture
+def translation_service():
+    """Get translation service instance"""
+    return get_translation_service()
+
+
+class TestDifyClient:
+    """Test DIFY API client"""
+
+    def test_client_initialization(self, dify_client):
+        """Test client can be initialized"""
+        assert dify_client is not None
+        assert dify_client.api_key is not None
+
+    def test_simple_translation(self, dify_client):
+        """Test simple translation via DIFY API"""
+        text = "Hello, this is a test."
+        response = dify_client.translate(text, "zh-TW")
+
+        assert response.translated_text is not None
+        assert len(response.translated_text) > 0
+        assert response.total_tokens > 0
+
+        print(f"\nOriginal: {text}")
+        print(f"Translated: {response.translated_text}")
+        print(f"Tokens: {response.total_tokens}, Latency: {response.latency:.2f}s")
+
+
+class TestTranslationServiceExtraction:
+    """Test element extraction"""
+
+    def test_service_initialization(self, translation_service):
+        """Test service can be initialized"""
+        assert translation_service is not None
+        assert translation_service.dify_client is not None
+
+    @pytest.mark.parametrize("task_id,result_file", REAL_TASKS)
+    def test_extract_translatable_elements(self, translation_service, task_id, result_file):
+        """Test extracting translatable elements from real OCR results"""
+        result_path = RESULTS_DIR / task_id / result_file
+        if not result_path.exists():
+            pytest.skip(f"Result file not found: {result_path}")
+
+        with open(result_path, 'r', encoding='utf-8') as f:
+            ocr_result = json.load(f)
+
+        elements, total_count = translation_service.extract_translatable_elements(ocr_result)
+
+        # Verify extraction
+        assert isinstance(elements, list)
+        assert isinstance(total_count, int)
+        assert total_count >= 0
+
+        print(f"\nTask {task_id}:")
+        print(f"  Extracted {len(elements)} translatable elements")
+        print(f"  Total elements in document: {total_count}")
+
+        if elements:
+            first = elements[0]
+            assert hasattr(first, 'element_id')
+            assert hasattr(first, 'content')
+            assert hasattr(first, 'element_type')
+            print(f"  First element type: {first.element_type}")
+            print(f"  First element preview: {first.content[:50]}..." if len(first.content) > 50 else f"  First element: {first.content}")
+
+
+class TestTranslationExecution:
+    """Test actual translation via DIFY API"""
+
+    @pytest.mark.parametrize("task_id,result_file", REAL_TASKS[:2])  # Test only first 2
+    def test_translate_first_3_elements(self, translation_service, task_id, result_file):
+        """Test translating first 3 elements from a real OCR document"""
+        result_path = RESULTS_DIR / task_id / result_file
+        if not result_path.exists():
+            pytest.skip(f"Result file not found: {result_path}")
+
+        with open(result_path, 'r', encoding='utf-8') as f:
+            ocr_result = json.load(f)
+
+        elements, _ = translation_service.extract_translatable_elements(ocr_result)
+        if not elements:
+            pytest.skip("No translatable elements found")
+
+        # Translate first 3 elements only
+        elements_to_translate = elements[:3]
+
+        print(f"\n{task_id} translations:")
+        for i, elem in enumerate(elements_to_translate):
+            translated = translation_service.translate_item(elem, "en", f"test-{task_id}")
+
+            assert translated.translated_content is not None
+            assert len(translated.translated_content) > 0
+
+            orig_preview = elem.content[:30] + "..." if len(elem.content) > 30 else elem.content
+            trans_preview = translated.translated_content[:30] + "..." if len(translated.translated_content) > 30 else translated.translated_content
+            print(f"  [{i+1}] {orig_preview} -> {trans_preview}")
+
+        print(f"  Total tokens: {translation_service._total_tokens}")
+
+
+if __name__ == "__main__":
+    # Run extraction tests only (no API calls) by default
+    # pytest.main([__file__, "-v", "-k", "extraction", "--tb=short"])
+    # Run all tests including API calls
+    pytest.main([__file__, "-v", "--tb=short"])