feat: add document translation via DIFY AI API
Implement document translation feature using DIFY AI API with batch processing: Backend: - Add DIFY client with batch translation support (5000 chars, 20 items per batch) - Add translation service with element extraction and result building - Add translation router with start/status/result/list/delete endpoints - Add translation schemas (TranslationRequest, TranslationStatus, etc.) Frontend: - Enable translation UI in TaskDetailPage - Add translation API methods to apiV2.ts - Add translation types Features: - Batch translation with numbered markers [1], [2], [3]... - Support for text, title, header, footer, paragraph, footnote, table cells - Translation result JSON with statistics (tokens, latency, batch_count) - Background task processing with progress tracking 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
163
backend/app/schemas/translation.py
Normal file
163
backend/app/schemas/translation.py
Normal file
@@ -0,0 +1,163 @@
|
||||
"""
|
||||
Tool_OCR - Translation Schemas
|
||||
Pydantic models for document translation feature (DIFY API)
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Dict, Any, Tuple
|
||||
from datetime import datetime
|
||||
from pydantic import BaseModel, Field
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
class TranslationStatusEnum(str, Enum):
|
||||
"""Translation job status enumeration"""
|
||||
PENDING = "pending"
|
||||
TRANSLATING = "translating"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
class TargetLanguageEnum(str, Enum):
|
||||
"""Supported target languages for translation."""
|
||||
ENGLISH = "en"
|
||||
JAPANESE = "ja"
|
||||
KOREAN = "ko"
|
||||
CHINESE_SIMPLIFIED = "zh-CN"
|
||||
CHINESE_TRADITIONAL = "zh-TW"
|
||||
GERMAN = "de"
|
||||
FRENCH = "fr"
|
||||
SPANISH = "es"
|
||||
PORTUGUESE = "pt"
|
||||
ITALIAN = "it"
|
||||
RUSSIAN = "ru"
|
||||
VIETNAMESE = "vi"
|
||||
THAI = "th"
|
||||
|
||||
|
||||
class TranslationRequest(BaseModel):
|
||||
"""Request model for starting a translation job"""
|
||||
target_lang: str = Field(
|
||||
...,
|
||||
description="Target language code (e.g., 'en', 'ja', 'zh-TW')"
|
||||
)
|
||||
source_lang: str = Field(
|
||||
default="auto",
|
||||
description="Source language code, 'auto' for automatic detection"
|
||||
)
|
||||
|
||||
|
||||
class TranslationProgress(BaseModel):
|
||||
"""Progress information for ongoing translation"""
|
||||
current_element: int = Field(default=0, description="Current element being translated")
|
||||
total_elements: int = Field(default=0, description="Total elements to translate")
|
||||
percentage: float = Field(default=0.0, description="Progress percentage (0-100)")
|
||||
|
||||
|
||||
class TranslationStatusResponse(BaseModel):
|
||||
"""Response model for translation status query"""
|
||||
task_id: str = Field(..., description="Task ID")
|
||||
status: TranslationStatusEnum = Field(..., description="Current translation status")
|
||||
target_lang: str = Field(..., description="Target language")
|
||||
progress: Optional[TranslationProgress] = Field(
|
||||
default=None,
|
||||
description="Progress information when translating"
|
||||
)
|
||||
error_message: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Error message if translation failed"
|
||||
)
|
||||
started_at: Optional[datetime] = Field(default=None, description="Translation start time")
|
||||
completed_at: Optional[datetime] = Field(default=None, description="Translation completion time")
|
||||
|
||||
|
||||
class TranslationStartResponse(BaseModel):
|
||||
"""Response model for starting a translation job"""
|
||||
task_id: str = Field(..., description="Task ID")
|
||||
status: TranslationStatusEnum = Field(..., description="Initial status")
|
||||
target_lang: str = Field(..., description="Target language")
|
||||
message: str = Field(..., description="Status message")
|
||||
|
||||
|
||||
class TranslationStatistics(BaseModel):
|
||||
"""Statistics for completed translation"""
|
||||
total_elements: int = Field(default=0, description="Total elements in document")
|
||||
translated_elements: int = Field(default=0, description="Successfully translated elements")
|
||||
skipped_elements: int = Field(default=0, description="Skipped elements (images, etc.)")
|
||||
total_characters: int = Field(default=0, description="Total characters translated")
|
||||
processing_time_seconds: float = Field(default=0.0, description="Translation duration")
|
||||
total_tokens: int = Field(default=0, description="Total API tokens used")
|
||||
|
||||
|
||||
class TranslationResultResponse(BaseModel):
|
||||
"""Response model for translation result"""
|
||||
schema_version: str = Field(default="1.0.0", description="Schema version")
|
||||
source_document: str = Field(..., description="Source document filename")
|
||||
source_lang: str = Field(..., description="Source language (detected or specified)")
|
||||
target_lang: str = Field(..., description="Target language")
|
||||
provider: str = Field(default="dify", description="Translation provider")
|
||||
translated_at: datetime = Field(..., description="Translation timestamp")
|
||||
statistics: TranslationStatistics = Field(..., description="Translation statistics")
|
||||
translations: Dict[str, Any] = Field(
|
||||
...,
|
||||
description="Translations dict mapping element_id to translated content"
|
||||
)
|
||||
|
||||
|
||||
class TranslationListItem(BaseModel):
|
||||
"""Item in translation list response"""
|
||||
target_lang: str = Field(..., description="Target language")
|
||||
translated_at: datetime = Field(..., description="Translation timestamp")
|
||||
provider: str = Field(default="dify", description="Translation provider")
|
||||
statistics: TranslationStatistics = Field(..., description="Translation statistics")
|
||||
file_path: str = Field(..., description="Path to translation JSON file")
|
||||
|
||||
|
||||
class TranslationListResponse(BaseModel):
|
||||
"""Response model for listing available translations"""
|
||||
task_id: str = Field(..., description="Task ID")
|
||||
translations: List[TranslationListItem] = Field(
|
||||
default_factory=list,
|
||||
description="Available translations"
|
||||
)
|
||||
|
||||
|
||||
# Dataclasses for internal use
|
||||
|
||||
@dataclass
|
||||
class TranslatableItem:
|
||||
"""Internal representation of a translatable element"""
|
||||
element_id: str
|
||||
content: str
|
||||
element_type: str # 'text', 'title', 'header', etc. or 'table_cell'
|
||||
page_number: int = 1
|
||||
cell_position: Optional[Tuple[int, int]] = None # (row, col) for table cells
|
||||
|
||||
def __post_init__(self):
|
||||
# Clean content - remove excessive whitespace
|
||||
if self.content:
|
||||
self.content = ' '.join(self.content.split())
|
||||
|
||||
|
||||
@dataclass
|
||||
class TranslatedItem:
|
||||
"""Internal representation of a translated element"""
|
||||
element_id: str
|
||||
original_content: str
|
||||
translated_content: str
|
||||
element_type: str
|
||||
cell_position: Optional[Tuple[int, int]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class TranslationJobState:
|
||||
"""Internal state for a translation job"""
|
||||
task_id: str
|
||||
target_lang: str
|
||||
source_lang: str
|
||||
status: TranslationStatusEnum
|
||||
progress: TranslationProgress
|
||||
error_message: Optional[str] = None
|
||||
started_at: Optional[datetime] = None
|
||||
completed_at: Optional[datetime] = None
|
||||
result_file_path: Optional[str] = None
|
||||
Reference in New Issue
Block a user