Files
OCR/backend/app/services/dify_client.py
egg 8d9b69ba93 feat: add document translation via DIFY AI API
Implement document translation feature using DIFY AI API with batch processing:

Backend:
- Add DIFY client with batch translation support (5000 chars, 20 items per batch)
- Add translation service with element extraction and result building
- Add translation router with start/status/result/list/delete endpoints
- Add translation schemas (TranslationRequest, TranslationStatus, etc.)

Frontend:
- Enable translation UI in TaskDetailPage
- Add translation API methods to apiV2.ts
- Add translation types

Features:
- Batch translation with numbered markers [1], [2], [3]...
- Support for text, title, header, footer, paragraph, footnote, table cells
- Translation result JSON with statistics (tokens, latency, batch_count)
- Background task processing with progress tracking

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-02 11:57:02 +08:00

333 lines
9.8 KiB
Python

"""
Tool_OCR - DIFY AI Client
HTTP client for DIFY translation API with batch support
"""
import asyncio
import logging
import re
import time
from dataclasses import dataclass, field
from typing import Dict, List, Optional
import httpx
logger = logging.getLogger(__name__)
# DIFY API Configuration
DIFY_BASE_URL = "https://dify.theaken.com/v1"
DIFY_API_KEY = "app-YOPrF2ro5fshzMkCZviIuUJd"
DIFY_TIMEOUT = 120.0 # seconds (increased for batch)
DIFY_MAX_RETRIES = 3
# Batch translation limits
# Conservative limits to avoid gateway timeouts
# DIFY server may have processing time limits
MAX_BATCH_CHARS = 5000
MAX_BATCH_ITEMS = 20
# Language name mapping
LANGUAGE_NAMES = {
"en": "English",
"zh-TW": "Traditional Chinese",
"zh-CN": "Simplified Chinese",
"ja": "Japanese",
"ko": "Korean",
"de": "German",
"fr": "French",
"es": "Spanish",
"pt": "Portuguese",
"it": "Italian",
"ru": "Russian",
"vi": "Vietnamese",
"th": "Thai",
}
@dataclass
class TranslationResponse:
"""Response from DIFY translation API"""
translated_text: str
total_tokens: int
latency: float
conversation_id: str
@dataclass
class BatchTranslationResponse:
"""Response from DIFY batch translation API"""
translations: Dict[int, str] # marker_id -> translated_text
total_tokens: int
latency: float
conversation_id: str
missing_markers: List[int] = field(default_factory=list)
class DifyTranslationError(Exception):
"""Error during DIFY API translation"""
pass
class DifyClient:
"""
Client for DIFY AI translation API.
Features:
- Single and batch translation
- Blocking mode API calls
- Automatic retry with exponential backoff
- Token and latency tracking
"""
def __init__(
self,
base_url: str = DIFY_BASE_URL,
api_key: str = DIFY_API_KEY,
timeout: float = DIFY_TIMEOUT,
max_retries: int = DIFY_MAX_RETRIES
):
self.base_url = base_url
self.api_key = api_key
self.timeout = timeout
self.max_retries = max_retries
self._total_tokens = 0
self._total_requests = 0
def _get_language_name(self, lang_code: str) -> str:
"""Convert language code to full name for prompt"""
return LANGUAGE_NAMES.get(lang_code, lang_code)
def _build_prompt(self, text: str, target_lang: str) -> str:
"""Build translation prompt for single text"""
lang_name = self._get_language_name(target_lang)
return (
f"Translate the following text to {lang_name}.\n"
f"Return ONLY the translated text, no explanations.\n\n"
f"{text}"
)
def _build_batch_prompt(self, texts: List[str], target_lang: str) -> str:
"""
Build batch translation prompt with numbered markers.
Format:
Translate the following texts to {Language}.
Each text is marked with [N]. Return translations in the same format.
Return ONLY the translations with their markers, no explanations.
[1] First text
[2] Second text
...
"""
lang_name = self._get_language_name(target_lang)
# Build numbered text list
numbered_texts = []
for i, text in enumerate(texts, start=1):
# Clean text - remove newlines within each item to avoid parsing issues
clean_text = ' '.join(text.split())
numbered_texts.append(f"[{i}] {clean_text}")
texts_block = "\n".join(numbered_texts)
prompt = (
f"Translate the following texts to {lang_name}.\n"
f"Each text is marked with [N]. Return translations in the same format.\n"
f"Return ONLY the translations with their markers, no explanations.\n\n"
f"{texts_block}"
)
return prompt
def _parse_batch_response(self, response_text: str, expected_count: int) -> Dict[int, str]:
"""
Parse batch translation response with numbered markers.
Expected format:
[1] 翻譯文字一
[2] 翻譯文字二
...
Returns:
Dict mapping marker number to translated text
"""
translations = {}
# Pattern to match [N] followed by text until next [N] or end
# Use DOTALL to match across lines, but be careful with greedy matching
pattern = r'\[(\d+)\]\s*(.+?)(?=\[\d+\]|$)'
matches = re.findall(pattern, response_text, re.DOTALL)
for match in matches:
try:
marker_id = int(match[0])
text = match[1].strip()
if text:
translations[marker_id] = text
except (ValueError, IndexError):
continue
return translations
def _call_api(self, prompt: str, user_id: str) -> dict:
"""Make API call to DIFY with retry logic"""
payload = {
"inputs": {},
"query": prompt,
"response_mode": "blocking",
"conversation_id": "",
"user": user_id
}
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
last_error = None
for attempt in range(self.max_retries):
try:
with httpx.Client(timeout=self.timeout) as client:
response = client.post(
f"{self.base_url}/chat-messages",
json=payload,
headers=headers
)
if response.status_code != 200:
raise DifyTranslationError(
f"API returned status {response.status_code}: {response.text}"
)
return response.json()
except httpx.TimeoutException as e:
last_error = e
logger.warning(f"DIFY API timeout (attempt {attempt + 1}/{self.max_retries})")
except httpx.RequestError as e:
last_error = e
logger.warning(f"DIFY API request error (attempt {attempt + 1}/{self.max_retries}): {e}")
except Exception as e:
last_error = e
logger.warning(f"DIFY API error (attempt {attempt + 1}/{self.max_retries}): {e}")
# Exponential backoff
if attempt < self.max_retries - 1:
wait_time = 2 ** attempt
logger.info(f"Retrying in {wait_time}s...")
time.sleep(wait_time)
raise DifyTranslationError(f"API call failed after {self.max_retries} attempts: {last_error}")
def translate(
self,
text: str,
target_lang: str,
user_id: str = "tool-ocr"
) -> TranslationResponse:
"""
Translate single text using DIFY API.
Args:
text: Text to translate
target_lang: Target language code (e.g., 'en', 'zh-TW')
user_id: User identifier for tracking
Returns:
TranslationResponse with translated text and metadata
"""
prompt = self._build_prompt(text, target_lang)
data = self._call_api(prompt, user_id)
# Extract response fields
translated_text = data.get("answer", "")
usage = data.get("metadata", {}).get("usage", {})
self._total_tokens += usage.get("total_tokens", 0)
self._total_requests += 1
return TranslationResponse(
translated_text=translated_text,
total_tokens=usage.get("total_tokens", 0),
latency=usage.get("latency", 0.0),
conversation_id=data.get("conversation_id", "")
)
def translate_batch(
self,
texts: List[str],
target_lang: str,
user_id: str = "tool-ocr"
) -> BatchTranslationResponse:
"""
Translate multiple texts in a single API call.
Args:
texts: List of texts to translate
target_lang: Target language code
user_id: User identifier for tracking
Returns:
BatchTranslationResponse with translations dict and metadata
"""
if not texts:
return BatchTranslationResponse(
translations={},
total_tokens=0,
latency=0.0,
conversation_id=""
)
prompt = self._build_batch_prompt(texts, target_lang)
logger.debug(f"Batch translation: {len(texts)} items, ~{len(prompt)} chars")
data = self._call_api(prompt, user_id)
# Extract and parse response
answer = data.get("answer", "")
usage = data.get("metadata", {}).get("usage", {})
translations = self._parse_batch_response(answer, len(texts))
# Check for missing markers
missing_markers = []
for i in range(1, len(texts) + 1):
if i not in translations:
missing_markers.append(i)
logger.warning(f"Missing translation for marker [{i}]")
self._total_tokens += usage.get("total_tokens", 0)
self._total_requests += 1
return BatchTranslationResponse(
translations=translations,
total_tokens=usage.get("total_tokens", 0),
latency=usage.get("latency", 0.0),
conversation_id=data.get("conversation_id", ""),
missing_markers=missing_markers
)
def get_stats(self) -> dict:
"""Get client statistics"""
return {
"total_tokens": self._total_tokens,
"total_requests": self._total_requests,
"base_url": self.base_url,
}
# Global singleton
_dify_client: Optional[DifyClient] = None
def get_dify_client() -> DifyClient:
"""Get the global DifyClient instance"""
global _dify_client
if _dify_client is None:
_dify_client = DifyClient()
return _dify_client