""" Tool_OCR - DIFY AI Client HTTP client for DIFY translation API with batch support """ import logging import re import time from dataclasses import dataclass, field from typing import Dict, List, Optional import httpx from app.core.config import settings logger = logging.getLogger(__name__) # Language name mapping LANGUAGE_NAMES = { "en": "English", "zh-TW": "Traditional Chinese", "zh-CN": "Simplified Chinese", "ja": "Japanese", "ko": "Korean", "de": "German", "fr": "French", "es": "Spanish", "pt": "Portuguese", "it": "Italian", "ru": "Russian", "vi": "Vietnamese", "th": "Thai", } @dataclass class TranslationResponse: """Response from DIFY translation API""" translated_text: str total_tokens: int latency: float conversation_id: str total_price: float = 0.0 currency: str = "USD" @dataclass class BatchTranslationResponse: """Response from DIFY batch translation API""" translations: Dict[int, str] # marker_id -> translated_text total_tokens: int latency: float conversation_id: str missing_markers: List[int] = field(default_factory=list) total_price: float = 0.0 currency: str = "USD" class DifyTranslationError(Exception): """Error during DIFY API translation""" pass class DifyClient: """ Client for DIFY AI translation API. Features: - Single and batch translation - Blocking mode API calls - Automatic retry with exponential backoff - Token and latency tracking Configuration is loaded from settings (config.py / .env.local): - DIFY_BASE_URL: API base URL - DIFY_API_KEY: API key (required) - DIFY_TIMEOUT: Request timeout in seconds - DIFY_MAX_RETRIES: Max retry attempts - DIFY_MAX_BATCH_CHARS: Max characters per batch - DIFY_MAX_BATCH_ITEMS: Max items per batch """ def __init__( self, base_url: Optional[str] = None, api_key: Optional[str] = None, timeout: Optional[float] = None, max_retries: Optional[int] = None ): # Use settings as defaults when not explicitly provided self.base_url = base_url or settings.dify_base_url self.api_key = api_key or settings.dify_api_key self.timeout = timeout if timeout is not None else settings.dify_timeout self.max_retries = max_retries if max_retries is not None else settings.dify_max_retries self.max_batch_chars = settings.dify_max_batch_chars self.max_batch_items = settings.dify_max_batch_items self._total_tokens = 0 self._total_requests = 0 # Warn if API key is not configured if not self.api_key: logger.warning( "DIFY_API_KEY not configured. Set DIFY_API_KEY in .env.local for translation to work." ) def _get_language_name(self, lang_code: str) -> str: """Convert language code to full name for prompt""" return LANGUAGE_NAMES.get(lang_code, lang_code) def _build_prompt(self, text: str, target_lang: str) -> str: """Build translation prompt for single text""" lang_name = self._get_language_name(target_lang) return ( f"Translate the following text to {lang_name}.\n" f"Return ONLY the translated text, no explanations.\n\n" f"{text}" ) def _build_batch_prompt(self, texts: List[str], target_lang: str) -> str: """ Build batch translation prompt with numbered markers. Format: Translate the following texts to {Language}. Each text is marked with [N]. Return translations in the same format. Return ONLY the translations with their markers, no explanations. [1] First text [2] Second text ... """ lang_name = self._get_language_name(target_lang) # Build numbered text list numbered_texts = [] for i, text in enumerate(texts, start=1): # Clean text - remove newlines within each item to avoid parsing issues clean_text = ' '.join(text.split()) numbered_texts.append(f"[{i}] {clean_text}") texts_block = "\n".join(numbered_texts) prompt = ( f"Translate the following texts to {lang_name}.\n" f"Each text is marked with [N]. Return translations in the same format.\n" f"Return ONLY the translations with their markers, no explanations.\n\n" f"{texts_block}" ) return prompt def _parse_batch_response(self, response_text: str, expected_count: int) -> Dict[int, str]: """ Parse batch translation response with numbered markers. Expected format: [1] 翻譯文字一 [2] 翻譯文字二 ... Returns: Dict mapping marker number to translated text """ translations = {} # Pattern to match [N] followed by text until next [N] or end # Use DOTALL to match across lines, but be careful with greedy matching pattern = r'\[(\d+)\]\s*(.+?)(?=\[\d+\]|$)' matches = re.findall(pattern, response_text, re.DOTALL) for match in matches: try: marker_id = int(match[0]) text = match[1].strip() if text: translations[marker_id] = text except (ValueError, IndexError): continue return translations def _call_api(self, prompt: str, user_id: str) -> dict: """Make API call to DIFY with retry logic""" payload = { "inputs": {}, "query": prompt, "response_mode": "blocking", "conversation_id": "", "user": user_id } headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json" } last_error = None for attempt in range(self.max_retries): try: with httpx.Client(timeout=self.timeout) as client: response = client.post( f"{self.base_url}/chat-messages", json=payload, headers=headers ) if response.status_code != 200: raise DifyTranslationError( f"API returned status {response.status_code}: {response.text}" ) return response.json() except httpx.TimeoutException as e: last_error = e logger.warning(f"DIFY API timeout (attempt {attempt + 1}/{self.max_retries})") except httpx.RequestError as e: last_error = e logger.warning(f"DIFY API request error (attempt {attempt + 1}/{self.max_retries}): {e}") except Exception as e: last_error = e logger.warning(f"DIFY API error (attempt {attempt + 1}/{self.max_retries}): {e}") # Exponential backoff if attempt < self.max_retries - 1: wait_time = 2 ** attempt logger.info(f"Retrying in {wait_time}s...") time.sleep(wait_time) raise DifyTranslationError(f"API call failed after {self.max_retries} attempts: {last_error}") def translate( self, text: str, target_lang: str, user_id: str = "tool-ocr" ) -> TranslationResponse: """ Translate single text using DIFY API. Args: text: Text to translate target_lang: Target language code (e.g., 'en', 'zh-TW') user_id: User identifier for tracking Returns: TranslationResponse with translated text and metadata """ prompt = self._build_prompt(text, target_lang) data = self._call_api(prompt, user_id) # Extract response fields translated_text = data.get("answer", "") usage = data.get("metadata", {}).get("usage", {}) # Extract price info from usage or metadata (may be string or number) raw_price = usage.get("total_price", 0.0) total_price = float(raw_price) if raw_price else 0.0 currency = usage.get("currency", "USD") or "USD" self._total_tokens += usage.get("total_tokens", 0) self._total_requests += 1 return TranslationResponse( translated_text=translated_text, total_tokens=usage.get("total_tokens", 0), latency=usage.get("latency", 0.0), conversation_id=data.get("conversation_id", ""), total_price=total_price, currency=currency ) def translate_batch( self, texts: List[str], target_lang: str, user_id: str = "tool-ocr" ) -> BatchTranslationResponse: """ Translate multiple texts in a single API call. Args: texts: List of texts to translate target_lang: Target language code user_id: User identifier for tracking Returns: BatchTranslationResponse with translations dict and metadata """ if not texts: return BatchTranslationResponse( translations={}, total_tokens=0, latency=0.0, conversation_id="" ) prompt = self._build_batch_prompt(texts, target_lang) logger.debug(f"Batch translation: {len(texts)} items, ~{len(prompt)} chars") data = self._call_api(prompt, user_id) # Extract and parse response answer = data.get("answer", "") usage = data.get("metadata", {}).get("usage", {}) # Extract price info from usage or metadata (may be string or number) raw_price = usage.get("total_price", 0.0) total_price = float(raw_price) if raw_price else 0.0 currency = usage.get("currency", "USD") or "USD" translations = self._parse_batch_response(answer, len(texts)) # Check for missing markers missing_markers = [] for i in range(1, len(texts) + 1): if i not in translations: missing_markers.append(i) logger.warning(f"Missing translation for marker [{i}]") self._total_tokens += usage.get("total_tokens", 0) self._total_requests += 1 return BatchTranslationResponse( translations=translations, total_tokens=usage.get("total_tokens", 0), latency=usage.get("latency", 0.0), conversation_id=data.get("conversation_id", ""), missing_markers=missing_markers, total_price=total_price, currency=currency ) def get_stats(self) -> dict: """Get client statistics""" return { "total_tokens": self._total_tokens, "total_requests": self._total_requests, "base_url": self.base_url, } # Global singleton _dify_client: Optional[DifyClient] = None def get_dify_client() -> DifyClient: """Get the global DifyClient instance""" global _dify_client if _dify_client is None: _dify_client = DifyClient() return _dify_client