""" Tool_OCR - DIFY AI Client HTTP client for DIFY translation API with batch support """ import asyncio import logging import re import time from dataclasses import dataclass, field from typing import Dict, List, Optional import httpx logger = logging.getLogger(__name__) # DIFY API Configuration DIFY_BASE_URL = "https://dify.theaken.com/v1" DIFY_API_KEY = "app-YOPrF2ro5fshzMkCZviIuUJd" DIFY_TIMEOUT = 120.0 # seconds (increased for batch) DIFY_MAX_RETRIES = 3 # Batch translation limits # Conservative limits to avoid gateway timeouts # DIFY server may have processing time limits MAX_BATCH_CHARS = 5000 MAX_BATCH_ITEMS = 20 # Language name mapping LANGUAGE_NAMES = { "en": "English", "zh-TW": "Traditional Chinese", "zh-CN": "Simplified Chinese", "ja": "Japanese", "ko": "Korean", "de": "German", "fr": "French", "es": "Spanish", "pt": "Portuguese", "it": "Italian", "ru": "Russian", "vi": "Vietnamese", "th": "Thai", } @dataclass class TranslationResponse: """Response from DIFY translation API""" translated_text: str total_tokens: int latency: float conversation_id: str @dataclass class BatchTranslationResponse: """Response from DIFY batch translation API""" translations: Dict[int, str] # marker_id -> translated_text total_tokens: int latency: float conversation_id: str missing_markers: List[int] = field(default_factory=list) class DifyTranslationError(Exception): """Error during DIFY API translation""" pass class DifyClient: """ Client for DIFY AI translation API. Features: - Single and batch translation - Blocking mode API calls - Automatic retry with exponential backoff - Token and latency tracking """ def __init__( self, base_url: str = DIFY_BASE_URL, api_key: str = DIFY_API_KEY, timeout: float = DIFY_TIMEOUT, max_retries: int = DIFY_MAX_RETRIES ): self.base_url = base_url self.api_key = api_key self.timeout = timeout self.max_retries = max_retries self._total_tokens = 0 self._total_requests = 0 def _get_language_name(self, lang_code: str) -> str: """Convert language code to full name for prompt""" return LANGUAGE_NAMES.get(lang_code, lang_code) def _build_prompt(self, text: str, target_lang: str) -> str: """Build translation prompt for single text""" lang_name = self._get_language_name(target_lang) return ( f"Translate the following text to {lang_name}.\n" f"Return ONLY the translated text, no explanations.\n\n" f"{text}" ) def _build_batch_prompt(self, texts: List[str], target_lang: str) -> str: """ Build batch translation prompt with numbered markers. Format: Translate the following texts to {Language}. Each text is marked with [N]. Return translations in the same format. Return ONLY the translations with their markers, no explanations. [1] First text [2] Second text ... """ lang_name = self._get_language_name(target_lang) # Build numbered text list numbered_texts = [] for i, text in enumerate(texts, start=1): # Clean text - remove newlines within each item to avoid parsing issues clean_text = ' '.join(text.split()) numbered_texts.append(f"[{i}] {clean_text}") texts_block = "\n".join(numbered_texts) prompt = ( f"Translate the following texts to {lang_name}.\n" f"Each text is marked with [N]. Return translations in the same format.\n" f"Return ONLY the translations with their markers, no explanations.\n\n" f"{texts_block}" ) return prompt def _parse_batch_response(self, response_text: str, expected_count: int) -> Dict[int, str]: """ Parse batch translation response with numbered markers. Expected format: [1] 翻譯文字一 [2] 翻譯文字二 ... Returns: Dict mapping marker number to translated text """ translations = {} # Pattern to match [N] followed by text until next [N] or end # Use DOTALL to match across lines, but be careful with greedy matching pattern = r'\[(\d+)\]\s*(.+?)(?=\[\d+\]|$)' matches = re.findall(pattern, response_text, re.DOTALL) for match in matches: try: marker_id = int(match[0]) text = match[1].strip() if text: translations[marker_id] = text except (ValueError, IndexError): continue return translations def _call_api(self, prompt: str, user_id: str) -> dict: """Make API call to DIFY with retry logic""" payload = { "inputs": {}, "query": prompt, "response_mode": "blocking", "conversation_id": "", "user": user_id } headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json" } last_error = None for attempt in range(self.max_retries): try: with httpx.Client(timeout=self.timeout) as client: response = client.post( f"{self.base_url}/chat-messages", json=payload, headers=headers ) if response.status_code != 200: raise DifyTranslationError( f"API returned status {response.status_code}: {response.text}" ) return response.json() except httpx.TimeoutException as e: last_error = e logger.warning(f"DIFY API timeout (attempt {attempt + 1}/{self.max_retries})") except httpx.RequestError as e: last_error = e logger.warning(f"DIFY API request error (attempt {attempt + 1}/{self.max_retries}): {e}") except Exception as e: last_error = e logger.warning(f"DIFY API error (attempt {attempt + 1}/{self.max_retries}): {e}") # Exponential backoff if attempt < self.max_retries - 1: wait_time = 2 ** attempt logger.info(f"Retrying in {wait_time}s...") time.sleep(wait_time) raise DifyTranslationError(f"API call failed after {self.max_retries} attempts: {last_error}") def translate( self, text: str, target_lang: str, user_id: str = "tool-ocr" ) -> TranslationResponse: """ Translate single text using DIFY API. Args: text: Text to translate target_lang: Target language code (e.g., 'en', 'zh-TW') user_id: User identifier for tracking Returns: TranslationResponse with translated text and metadata """ prompt = self._build_prompt(text, target_lang) data = self._call_api(prompt, user_id) # Extract response fields translated_text = data.get("answer", "") usage = data.get("metadata", {}).get("usage", {}) self._total_tokens += usage.get("total_tokens", 0) self._total_requests += 1 return TranslationResponse( translated_text=translated_text, total_tokens=usage.get("total_tokens", 0), latency=usage.get("latency", 0.0), conversation_id=data.get("conversation_id", "") ) def translate_batch( self, texts: List[str], target_lang: str, user_id: str = "tool-ocr" ) -> BatchTranslationResponse: """ Translate multiple texts in a single API call. Args: texts: List of texts to translate target_lang: Target language code user_id: User identifier for tracking Returns: BatchTranslationResponse with translations dict and metadata """ if not texts: return BatchTranslationResponse( translations={}, total_tokens=0, latency=0.0, conversation_id="" ) prompt = self._build_batch_prompt(texts, target_lang) logger.debug(f"Batch translation: {len(texts)} items, ~{len(prompt)} chars") data = self._call_api(prompt, user_id) # Extract and parse response answer = data.get("answer", "") usage = data.get("metadata", {}).get("usage", {}) translations = self._parse_batch_response(answer, len(texts)) # Check for missing markers missing_markers = [] for i in range(1, len(texts) + 1): if i not in translations: missing_markers.append(i) logger.warning(f"Missing translation for marker [{i}]") self._total_tokens += usage.get("total_tokens", 0) self._total_requests += 1 return BatchTranslationResponse( translations=translations, total_tokens=usage.get("total_tokens", 0), latency=usage.get("latency", 0.0), conversation_id=data.get("conversation_id", ""), missing_markers=missing_markers ) def get_stats(self) -> dict: """Get client statistics""" return { "total_tokens": self._total_tokens, "total_requests": self._total_requests, "base_url": self.base_url, } # Global singleton _dify_client: Optional[DifyClient] = None def get_dify_client() -> DifyClient: """Get the global DifyClient instance""" global _dify_client if _dify_client is None: _dify_client = DifyClient() return _dify_client