OCR/backend/app/services/dify_client.py

"""
Tool_OCR - DIFY AI Client
HTTP client for DIFY translation API with batch support
"""

import asyncio
import logging
import re
import time
from dataclasses import dataclass, field
from typing import Dict, List, Optional

import httpx

logger = logging.getLogger(__name__)

# DIFY API Configuration
DIFY_BASE_URL = "https://dify.theaken.com/v1"
DIFY_API_KEY = "app-YOPrF2ro5fshzMkCZviIuUJd"
DIFY_TIMEOUT = 120.0  # seconds (increased for batch)
DIFY_MAX_RETRIES = 3

# Batch translation limits
# Conservative limits to avoid gateway timeouts
# DIFY server may have processing time limits
MAX_BATCH_CHARS = 5000
MAX_BATCH_ITEMS = 20

# Language name mapping
LANGUAGE_NAMES = {
    "en": "English",
    "zh-TW": "Traditional Chinese",
    "zh-CN": "Simplified Chinese",
    "ja": "Japanese",
    "ko": "Korean",
    "de": "German",
    "fr": "French",
    "es": "Spanish",
    "pt": "Portuguese",
    "it": "Italian",
    "ru": "Russian",
    "vi": "Vietnamese",
    "th": "Thai",
}


@dataclass
class TranslationResponse:
    """Response from DIFY translation API"""
    translated_text: str
    total_tokens: int
    latency: float
    conversation_id: str


@dataclass
class BatchTranslationResponse:
    """Response from DIFY batch translation API"""
    translations: Dict[int, str]  # marker_id -> translated_text
    total_tokens: int
    latency: float
    conversation_id: str
    missing_markers: List[int] = field(default_factory=list)


class DifyTranslationError(Exception):
    """Error during DIFY API translation"""
    pass


class DifyClient:
    """
    Client for DIFY AI translation API.

    Features:
    - Single and batch translation
    - Blocking mode API calls
    - Automatic retry with exponential backoff
    - Token and latency tracking
    """

    def __init__(
        self,
        base_url: str = DIFY_BASE_URL,
        api_key: str = DIFY_API_KEY,
        timeout: float = DIFY_TIMEOUT,
        max_retries: int = DIFY_MAX_RETRIES
    ):
        self.base_url = base_url
        self.api_key = api_key
        self.timeout = timeout
        self.max_retries = max_retries
        self._total_tokens = 0
        self._total_requests = 0

    def _get_language_name(self, lang_code: str) -> str:
        """Convert language code to full name for prompt"""
        return LANGUAGE_NAMES.get(lang_code, lang_code)

    def _build_prompt(self, text: str, target_lang: str) -> str:
        """Build translation prompt for single text"""
        lang_name = self._get_language_name(target_lang)
        return (
            f"Translate the following text to {lang_name}.\n"
            f"Return ONLY the translated text, no explanations.\n\n"
            f"{text}"
        )

    def _build_batch_prompt(self, texts: List[str], target_lang: str) -> str:
        """
        Build batch translation prompt with numbered markers.

        Format:
        Translate the following texts to {Language}.
        Each text is marked with [N]. Return translations in the same format.
        Return ONLY the translations with their markers, no explanations.

        [1] First text
        [2] Second text
        ...
        """
        lang_name = self._get_language_name(target_lang)

        # Build numbered text list
        numbered_texts = []
        for i, text in enumerate(texts, start=1):
            # Clean text - remove newlines within each item to avoid parsing issues
            clean_text = ' '.join(text.split())
            numbered_texts.append(f"[{i}] {clean_text}")

        texts_block = "\n".join(numbered_texts)

        prompt = (
            f"Translate the following texts to {lang_name}.\n"
            f"Each text is marked with [N]. Return translations in the same format.\n"
            f"Return ONLY the translations with their markers, no explanations.\n\n"
            f"{texts_block}"
        )

        return prompt

    def _parse_batch_response(self, response_text: str, expected_count: int) -> Dict[int, str]:
        """
        Parse batch translation response with numbered markers.

        Expected format:
        [1] 翻譯文字一
        [2] 翻譯文字二
        ...

        Returns:
            Dict mapping marker number to translated text
        """
        translations = {}

        # Pattern to match [N] followed by text until next [N] or end
        # Use DOTALL to match across lines, but be careful with greedy matching
        pattern = r'\[(\d+)\]\s*(.+?)(?=\[\d+\]|$)'
        matches = re.findall(pattern, response_text, re.DOTALL)

        for match in matches:
            try:
                marker_id = int(match[0])
                text = match[1].strip()
                if text:
                    translations[marker_id] = text
            except (ValueError, IndexError):
                continue

        return translations

    def _call_api(self, prompt: str, user_id: str) -> dict:
        """Make API call to DIFY with retry logic"""
        payload = {
            "inputs": {},
            "query": prompt,
            "response_mode": "blocking",
            "conversation_id": "",
            "user": user_id
        }

        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json"
        }

        last_error = None

        for attempt in range(self.max_retries):
            try:
                with httpx.Client(timeout=self.timeout) as client:
                    response = client.post(
                        f"{self.base_url}/chat-messages",
                        json=payload,
                        headers=headers
                    )

                    if response.status_code != 200:
                        raise DifyTranslationError(
                            f"API returned status {response.status_code}: {response.text}"
                        )

                    return response.json()

            except httpx.TimeoutException as e:
                last_error = e
                logger.warning(f"DIFY API timeout (attempt {attempt + 1}/{self.max_retries})")

            except httpx.RequestError as e:
                last_error = e
                logger.warning(f"DIFY API request error (attempt {attempt + 1}/{self.max_retries}): {e}")

            except Exception as e:
                last_error = e
                logger.warning(f"DIFY API error (attempt {attempt + 1}/{self.max_retries}): {e}")

            # Exponential backoff
            if attempt < self.max_retries - 1:
                wait_time = 2 ** attempt
                logger.info(f"Retrying in {wait_time}s...")
                time.sleep(wait_time)

        raise DifyTranslationError(f"API call failed after {self.max_retries} attempts: {last_error}")

    def translate(
        self,
        text: str,
        target_lang: str,
        user_id: str = "tool-ocr"
    ) -> TranslationResponse:
        """
        Translate single text using DIFY API.

        Args:
            text: Text to translate
            target_lang: Target language code (e.g., 'en', 'zh-TW')
            user_id: User identifier for tracking

        Returns:
            TranslationResponse with translated text and metadata
        """
        prompt = self._build_prompt(text, target_lang)
        data = self._call_api(prompt, user_id)

        # Extract response fields
        translated_text = data.get("answer", "")
        usage = data.get("metadata", {}).get("usage", {})

        self._total_tokens += usage.get("total_tokens", 0)
        self._total_requests += 1

        return TranslationResponse(
            translated_text=translated_text,
            total_tokens=usage.get("total_tokens", 0),
            latency=usage.get("latency", 0.0),
            conversation_id=data.get("conversation_id", "")
        )

    def translate_batch(
        self,
        texts: List[str],
        target_lang: str,
        user_id: str = "tool-ocr"
    ) -> BatchTranslationResponse:
        """
        Translate multiple texts in a single API call.

        Args:
            texts: List of texts to translate
            target_lang: Target language code
            user_id: User identifier for tracking

        Returns:
            BatchTranslationResponse with translations dict and metadata
        """
        if not texts:
            return BatchTranslationResponse(
                translations={},
                total_tokens=0,
                latency=0.0,
                conversation_id=""
            )

        prompt = self._build_batch_prompt(texts, target_lang)

        logger.debug(f"Batch translation: {len(texts)} items, ~{len(prompt)} chars")

        data = self._call_api(prompt, user_id)

        # Extract and parse response
        answer = data.get("answer", "")
        usage = data.get("metadata", {}).get("usage", {})

        translations = self._parse_batch_response(answer, len(texts))

        # Check for missing markers
        missing_markers = []
        for i in range(1, len(texts) + 1):
            if i not in translations:
                missing_markers.append(i)
                logger.warning(f"Missing translation for marker [{i}]")

        self._total_tokens += usage.get("total_tokens", 0)
        self._total_requests += 1

        return BatchTranslationResponse(
            translations=translations,
            total_tokens=usage.get("total_tokens", 0),
            latency=usage.get("latency", 0.0),
            conversation_id=data.get("conversation_id", ""),
            missing_markers=missing_markers
        )

    def get_stats(self) -> dict:
        """Get client statistics"""
        return {
            "total_tokens": self._total_tokens,
            "total_requests": self._total_requests,
            "base_url": self.base_url,
        }


# Global singleton
_dify_client: Optional[DifyClient] = None


def get_dify_client() -> DifyClient:
    """Get the global DifyClient instance"""
    global _dify_client
    if _dify_client is None:
        _dify_client = DifyClient()
    return _dify_client