Files
OCR/backend/app/schemas/translation.py
egg ee49751c38 fix: add UTC timezone indicator to all datetime serialization
Database stores times in UTC but serialized without timezone info,
causing frontend to misinterpret as local time. Now all datetime
fields include 'Z' suffix to indicate UTC, enabling proper timezone
conversion in the browser.

- Add UTCDatetimeBaseModel base class for Pydantic schemas
- Update model to_dict() methods to append 'Z' suffix
- Affects: tasks, users, sessions, audit logs, translations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 15:48:17 +08:00

167 lines
6.0 KiB
Python

"""
Tool_OCR - Translation Schemas
Pydantic models for document translation feature (DIFY API)
"""
from typing import Optional, List, Dict, Any, Tuple
from datetime import datetime
from pydantic import BaseModel, Field
from enum import Enum
from dataclasses import dataclass
from app.schemas.base import UTCDatetimeBaseModel
class TranslationStatusEnum(str, Enum):
"""Translation job status enumeration"""
PENDING = "pending"
TRANSLATING = "translating"
COMPLETED = "completed"
FAILED = "failed"
class TargetLanguageEnum(str, Enum):
"""Supported target languages for translation."""
ENGLISH = "en"
JAPANESE = "ja"
KOREAN = "ko"
CHINESE_SIMPLIFIED = "zh-CN"
CHINESE_TRADITIONAL = "zh-TW"
GERMAN = "de"
FRENCH = "fr"
SPANISH = "es"
PORTUGUESE = "pt"
ITALIAN = "it"
RUSSIAN = "ru"
VIETNAMESE = "vi"
THAI = "th"
class TranslationRequest(BaseModel):
"""Request model for starting a translation job"""
target_lang: str = Field(
...,
description="Target language code (e.g., 'en', 'ja', 'zh-TW')"
)
source_lang: str = Field(
default="auto",
description="Source language code, 'auto' for automatic detection"
)
class TranslationProgress(BaseModel):
"""Progress information for ongoing translation"""
current_element: int = Field(default=0, description="Current element being translated")
total_elements: int = Field(default=0, description="Total elements to translate")
percentage: float = Field(default=0.0, description="Progress percentage (0-100)")
class TranslationStatusResponse(UTCDatetimeBaseModel):
"""Response model for translation status query"""
task_id: str = Field(..., description="Task ID")
status: TranslationStatusEnum = Field(..., description="Current translation status")
target_lang: str = Field(..., description="Target language")
progress: Optional[TranslationProgress] = Field(
default=None,
description="Progress information when translating"
)
error_message: Optional[str] = Field(
default=None,
description="Error message if translation failed"
)
started_at: Optional[datetime] = Field(default=None, description="Translation start time")
completed_at: Optional[datetime] = Field(default=None, description="Translation completion time")
class TranslationStartResponse(BaseModel):
"""Response model for starting a translation job"""
task_id: str = Field(..., description="Task ID")
status: TranslationStatusEnum = Field(..., description="Initial status")
target_lang: str = Field(..., description="Target language")
message: str = Field(..., description="Status message")
class TranslationStatistics(BaseModel):
"""Statistics for completed translation"""
total_elements: int = Field(default=0, description="Total elements in document")
translated_elements: int = Field(default=0, description="Successfully translated elements")
skipped_elements: int = Field(default=0, description="Skipped elements (images, etc.)")
total_characters: int = Field(default=0, description="Total characters translated")
processing_time_seconds: float = Field(default=0.0, description="Translation duration")
total_tokens: int = Field(default=0, description="Total API tokens used")
class TranslationResultResponse(UTCDatetimeBaseModel):
"""Response model for translation result"""
schema_version: str = Field(default="1.0.0", description="Schema version")
source_document: str = Field(..., description="Source document filename")
source_lang: str = Field(..., description="Source language (detected or specified)")
target_lang: str = Field(..., description="Target language")
provider: str = Field(default="dify", description="Translation provider")
translated_at: datetime = Field(..., description="Translation timestamp")
statistics: TranslationStatistics = Field(..., description="Translation statistics")
translations: Dict[str, Any] = Field(
...,
description="Translations dict mapping element_id to translated content"
)
class TranslationListItem(UTCDatetimeBaseModel):
"""Item in translation list response"""
target_lang: str = Field(..., description="Target language")
translated_at: datetime = Field(..., description="Translation timestamp")
provider: str = Field(default="dify", description="Translation provider")
statistics: TranslationStatistics = Field(..., description="Translation statistics")
file_path: str = Field(..., description="Path to translation JSON file")
class TranslationListResponse(BaseModel):
"""Response model for listing available translations"""
task_id: str = Field(..., description="Task ID")
translations: List[TranslationListItem] = Field(
default_factory=list,
description="Available translations"
)
# Dataclasses for internal use
@dataclass
class TranslatableItem:
"""Internal representation of a translatable element"""
element_id: str
content: str
element_type: str # 'text', 'title', 'header', etc. or 'table_cell'
page_number: int = 1
cell_position: Optional[Tuple[int, int]] = None # (row, col) for table cells
def __post_init__(self):
# Clean content - remove excessive whitespace
if self.content:
self.content = ' '.join(self.content.split())
@dataclass
class TranslatedItem:
"""Internal representation of a translated element"""
element_id: str
original_content: str
translated_content: str
element_type: str
page_number: int = 1
cell_position: Optional[Tuple[int, int]] = None
@dataclass
class TranslationJobState:
"""Internal state for a translation job"""
task_id: str
target_lang: str
source_lang: str
status: TranslationStatusEnum
progress: TranslationProgress
error_message: Optional[str] = None
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
result_file_path: Optional[str] = None