""" Tool_OCR - Translation Schemas Pydantic models for document translation feature (DIFY API) """ from typing import Optional, List, Dict, Any, Tuple from datetime import datetime from pydantic import BaseModel, Field from enum import Enum from dataclasses import dataclass class TranslationStatusEnum(str, Enum): """Translation job status enumeration""" PENDING = "pending" TRANSLATING = "translating" COMPLETED = "completed" FAILED = "failed" class TargetLanguageEnum(str, Enum): """Supported target languages for translation.""" ENGLISH = "en" JAPANESE = "ja" KOREAN = "ko" CHINESE_SIMPLIFIED = "zh-CN" CHINESE_TRADITIONAL = "zh-TW" GERMAN = "de" FRENCH = "fr" SPANISH = "es" PORTUGUESE = "pt" ITALIAN = "it" RUSSIAN = "ru" VIETNAMESE = "vi" THAI = "th" class TranslationRequest(BaseModel): """Request model for starting a translation job""" target_lang: str = Field( ..., description="Target language code (e.g., 'en', 'ja', 'zh-TW')" ) source_lang: str = Field( default="auto", description="Source language code, 'auto' for automatic detection" ) class TranslationProgress(BaseModel): """Progress information for ongoing translation""" current_element: int = Field(default=0, description="Current element being translated") total_elements: int = Field(default=0, description="Total elements to translate") percentage: float = Field(default=0.0, description="Progress percentage (0-100)") class TranslationStatusResponse(BaseModel): """Response model for translation status query""" task_id: str = Field(..., description="Task ID") status: TranslationStatusEnum = Field(..., description="Current translation status") target_lang: str = Field(..., description="Target language") progress: Optional[TranslationProgress] = Field( default=None, description="Progress information when translating" ) error_message: Optional[str] = Field( default=None, description="Error message if translation failed" ) started_at: Optional[datetime] = Field(default=None, description="Translation start time") completed_at: Optional[datetime] = Field(default=None, description="Translation completion time") class TranslationStartResponse(BaseModel): """Response model for starting a translation job""" task_id: str = Field(..., description="Task ID") status: TranslationStatusEnum = Field(..., description="Initial status") target_lang: str = Field(..., description="Target language") message: str = Field(..., description="Status message") class TranslationStatistics(BaseModel): """Statistics for completed translation""" total_elements: int = Field(default=0, description="Total elements in document") translated_elements: int = Field(default=0, description="Successfully translated elements") skipped_elements: int = Field(default=0, description="Skipped elements (images, etc.)") total_characters: int = Field(default=0, description="Total characters translated") processing_time_seconds: float = Field(default=0.0, description="Translation duration") total_tokens: int = Field(default=0, description="Total API tokens used") class TranslationResultResponse(BaseModel): """Response model for translation result""" schema_version: str = Field(default="1.0.0", description="Schema version") source_document: str = Field(..., description="Source document filename") source_lang: str = Field(..., description="Source language (detected or specified)") target_lang: str = Field(..., description="Target language") provider: str = Field(default="dify", description="Translation provider") translated_at: datetime = Field(..., description="Translation timestamp") statistics: TranslationStatistics = Field(..., description="Translation statistics") translations: Dict[str, Any] = Field( ..., description="Translations dict mapping element_id to translated content" ) class TranslationListItem(BaseModel): """Item in translation list response""" target_lang: str = Field(..., description="Target language") translated_at: datetime = Field(..., description="Translation timestamp") provider: str = Field(default="dify", description="Translation provider") statistics: TranslationStatistics = Field(..., description="Translation statistics") file_path: str = Field(..., description="Path to translation JSON file") class TranslationListResponse(BaseModel): """Response model for listing available translations""" task_id: str = Field(..., description="Task ID") translations: List[TranslationListItem] = Field( default_factory=list, description="Available translations" ) # Dataclasses for internal use @dataclass class TranslatableItem: """Internal representation of a translatable element""" element_id: str content: str element_type: str # 'text', 'title', 'header', etc. or 'table_cell' page_number: int = 1 cell_position: Optional[Tuple[int, int]] = None # (row, col) for table cells def __post_init__(self): # Clean content - remove excessive whitespace if self.content: self.content = ' '.join(self.content.split()) @dataclass class TranslatedItem: """Internal representation of a translated element""" element_id: str original_content: str translated_content: str element_type: str cell_position: Optional[Tuple[int, int]] = None @dataclass class TranslationJobState: """Internal state for a translation job""" task_id: str target_lang: str source_lang: str status: TranslationStatusEnum progress: TranslationProgress error_message: Optional[str] = None started_at: Optional[datetime] = None completed_at: Optional[datetime] = None result_file_path: Optional[str] = None