feat: add document translation via DIFY AI API
Implement document translation feature using DIFY AI API with batch processing: Backend: - Add DIFY client with batch translation support (5000 chars, 20 items per batch) - Add translation service with element extraction and result building - Add translation router with start/status/result/list/delete endpoints - Add translation schemas (TranslationRequest, TranslationStatus, etc.) Frontend: - Enable translation UI in TaskDetailPage - Add translation API methods to apiV2.ts - Add translation types Features: - Batch translation with numbered markers [1], [2], [3]... - Support for text, title, header, footer, paragraph, footnote, table cells - Translation result JSON with statistics (tokens, latency, batch_count) - Background task processing with progress tracking 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -371,7 +371,7 @@ async def root():
|
||||
|
||||
|
||||
# Include V2 API routers
|
||||
from app.routers import auth, tasks, admin
|
||||
from app.routers import auth, tasks, admin, translate
|
||||
from fastapi import UploadFile, File, Depends, HTTPException, status
|
||||
from sqlalchemy.orm import Session
|
||||
import hashlib
|
||||
@@ -385,6 +385,7 @@ from app.services.task_service import task_service
|
||||
app.include_router(auth.router)
|
||||
app.include_router(tasks.router)
|
||||
app.include_router(admin.router)
|
||||
app.include_router(translate.router)
|
||||
|
||||
|
||||
# File upload endpoint
|
||||
|
||||
@@ -2,6 +2,6 @@
|
||||
Tool_OCR - API Routers (V2)
|
||||
"""
|
||||
|
||||
from app.routers import auth, tasks, admin
|
||||
from app.routers import auth, tasks, admin, translate
|
||||
|
||||
__all__ = ["auth", "tasks", "admin"]
|
||||
__all__ = ["auth", "tasks", "admin", "translate"]
|
||||
|
||||
503
backend/app/routers/translate.py
Normal file
503
backend/app/routers/translate.py
Normal file
@@ -0,0 +1,503 @@
|
||||
"""
|
||||
Tool_OCR - Translation Router
|
||||
Handles document translation operations via DIFY AI API
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status, Query, BackgroundTasks
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.deps import get_db, get_current_user
|
||||
from app.core.config import settings
|
||||
from app.models.user import User
|
||||
from app.models.task import Task, TaskStatus
|
||||
from app.schemas.translation import (
|
||||
TranslationRequest,
|
||||
TranslationStartResponse,
|
||||
TranslationStatusResponse,
|
||||
TranslationStatusEnum,
|
||||
TranslationProgress,
|
||||
TranslationListResponse,
|
||||
TranslationListItem,
|
||||
TranslationStatistics,
|
||||
)
|
||||
from app.services.task_service import task_service
|
||||
from app.services.dify_client import LANGUAGE_NAMES
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v2/translate", tags=["Translation"])
|
||||
|
||||
|
||||
def run_translation_task(
|
||||
task_id: str,
|
||||
task_db_id: int,
|
||||
target_lang: str,
|
||||
source_lang: str = "auto"
|
||||
):
|
||||
"""
|
||||
Background task to run document translation.
|
||||
|
||||
Args:
|
||||
task_id: Task UUID string
|
||||
task_db_id: Task database ID (for verification)
|
||||
target_lang: Target language code
|
||||
source_lang: Source language code ('auto' for detection)
|
||||
"""
|
||||
from app.core.database import SessionLocal
|
||||
from app.services.translation_service import get_translation_service
|
||||
from app.schemas.translation import TranslationJobState, TranslationProgress
|
||||
|
||||
db = SessionLocal()
|
||||
translation_service = get_translation_service()
|
||||
|
||||
try:
|
||||
logger.info(f"Starting translation for task {task_id} -> {target_lang}")
|
||||
|
||||
# Get task to find result JSON path
|
||||
task = db.query(Task).filter(Task.task_id == task_id).first()
|
||||
if not task:
|
||||
logger.error(f"Task {task_id} not found")
|
||||
return
|
||||
|
||||
if not task.result_json_path:
|
||||
logger.error(f"Task {task_id} has no result JSON")
|
||||
translation_service.set_job_state(task_id, TranslationJobState(
|
||||
task_id=task_id,
|
||||
target_lang=target_lang,
|
||||
source_lang=source_lang,
|
||||
status=TranslationStatusEnum.FAILED,
|
||||
progress=TranslationProgress(),
|
||||
error_message="No OCR result found",
|
||||
started_at=datetime.utcnow()
|
||||
))
|
||||
return
|
||||
|
||||
result_json_path = Path(task.result_json_path)
|
||||
if not result_json_path.exists():
|
||||
logger.error(f"Result JSON not found: {result_json_path}")
|
||||
translation_service.set_job_state(task_id, TranslationJobState(
|
||||
task_id=task_id,
|
||||
target_lang=target_lang,
|
||||
source_lang=source_lang,
|
||||
status=TranslationStatusEnum.FAILED,
|
||||
progress=TranslationProgress(),
|
||||
error_message="Result file not found",
|
||||
started_at=datetime.utcnow()
|
||||
))
|
||||
return
|
||||
|
||||
# Update state to translating
|
||||
translation_service.set_job_state(task_id, TranslationJobState(
|
||||
task_id=task_id,
|
||||
target_lang=target_lang,
|
||||
source_lang=source_lang,
|
||||
status=TranslationStatusEnum.TRANSLATING,
|
||||
progress=TranslationProgress(),
|
||||
started_at=datetime.utcnow()
|
||||
))
|
||||
|
||||
# Progress callback
|
||||
def progress_callback(progress: TranslationProgress):
|
||||
current_state = translation_service.get_job_state(task_id)
|
||||
if current_state:
|
||||
current_state.status = TranslationStatusEnum.TRANSLATING
|
||||
current_state.progress = progress
|
||||
translation_service.set_job_state(task_id, current_state)
|
||||
|
||||
# Run translation
|
||||
success, output_path, error_message = translation_service.translate_document(
|
||||
task_id=task_id,
|
||||
result_json_path=result_json_path,
|
||||
target_lang=target_lang,
|
||||
source_lang=source_lang,
|
||||
progress_callback=progress_callback
|
||||
)
|
||||
|
||||
if success:
|
||||
translation_service.set_job_state(task_id, TranslationJobState(
|
||||
task_id=task_id,
|
||||
target_lang=target_lang,
|
||||
source_lang=source_lang,
|
||||
status=TranslationStatusEnum.COMPLETED,
|
||||
progress=TranslationProgress(percentage=100.0),
|
||||
started_at=datetime.utcnow(),
|
||||
completed_at=datetime.utcnow(),
|
||||
result_file_path=str(output_path) if output_path else None
|
||||
))
|
||||
logger.info(f"Translation completed for task {task_id}")
|
||||
else:
|
||||
translation_service.set_job_state(task_id, TranslationJobState(
|
||||
task_id=task_id,
|
||||
target_lang=target_lang,
|
||||
source_lang=source_lang,
|
||||
status=TranslationStatusEnum.FAILED,
|
||||
progress=TranslationProgress(),
|
||||
error_message=error_message,
|
||||
started_at=datetime.utcnow()
|
||||
))
|
||||
logger.error(f"Translation failed for task {task_id}: {error_message}")
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Translation failed for task {task_id}")
|
||||
translation_service.set_job_state(task_id, TranslationJobState(
|
||||
task_id=task_id,
|
||||
target_lang=target_lang,
|
||||
source_lang=source_lang,
|
||||
status=TranslationStatusEnum.FAILED,
|
||||
progress=TranslationProgress(),
|
||||
error_message=str(e),
|
||||
started_at=datetime.utcnow()
|
||||
))
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/{task_id}", response_model=TranslationStartResponse, status_code=status.HTTP_202_ACCEPTED)
|
||||
async def start_translation(
|
||||
task_id: str,
|
||||
request: TranslationRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Start a document translation job.
|
||||
|
||||
- **task_id**: Task UUID of a completed OCR task
|
||||
- **target_lang**: Target language code (e.g., 'en', 'ja', 'zh-TW')
|
||||
- **source_lang**: Source language code ('auto' for automatic detection)
|
||||
|
||||
Returns 202 Accepted with job information. Use /status endpoint to track progress.
|
||||
"""
|
||||
from app.services.translation_service import get_translation_service
|
||||
from app.schemas.translation import TranslationJobState
|
||||
|
||||
# Get task
|
||||
task = task_service.get_task_by_id(
|
||||
db=db,
|
||||
task_id=task_id,
|
||||
user_id=current_user.id
|
||||
)
|
||||
|
||||
if not task:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Task not found"
|
||||
)
|
||||
|
||||
# Check task is completed
|
||||
if task.status != TaskStatus.COMPLETED:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Cannot translate task in '{task.status.value}' status. Task must be completed."
|
||||
)
|
||||
|
||||
# Check result JSON exists
|
||||
if not task.result_json_path or not Path(task.result_json_path).exists():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="OCR result not found. Please process the document first."
|
||||
)
|
||||
|
||||
# Validate target language
|
||||
target_lang = request.target_lang
|
||||
if target_lang not in LANGUAGE_NAMES:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Unsupported target language: {target_lang}. Supported: {', '.join(LANGUAGE_NAMES.keys())}"
|
||||
)
|
||||
|
||||
# Check if translation already exists
|
||||
result_dir = Path(task.result_json_path).parent
|
||||
existing_translation = result_dir / f"{Path(task.result_json_path).stem.replace('_result', '')}_translated_{target_lang}.json"
|
||||
if existing_translation.exists():
|
||||
logger.info(f"Translation already exists: {existing_translation}")
|
||||
# Return as completed
|
||||
return TranslationStartResponse(
|
||||
task_id=task_id,
|
||||
status=TranslationStatusEnum.COMPLETED,
|
||||
target_lang=target_lang,
|
||||
message="Translation already exists"
|
||||
)
|
||||
|
||||
# Check if translation is already in progress
|
||||
translation_service = get_translation_service()
|
||||
current_job = translation_service.get_job_state(task_id)
|
||||
if current_job and current_job.status in [TranslationStatusEnum.PENDING, TranslationStatusEnum.TRANSLATING]:
|
||||
return TranslationStartResponse(
|
||||
task_id=task_id,
|
||||
status=current_job.status,
|
||||
target_lang=current_job.target_lang,
|
||||
message="Translation already in progress"
|
||||
)
|
||||
|
||||
# Initialize job state
|
||||
translation_service.set_job_state(task_id, TranslationJobState(
|
||||
task_id=task_id,
|
||||
target_lang=target_lang,
|
||||
source_lang=request.source_lang,
|
||||
status=TranslationStatusEnum.PENDING,
|
||||
progress=TranslationProgress(),
|
||||
started_at=datetime.utcnow()
|
||||
))
|
||||
|
||||
# Start background translation task
|
||||
background_tasks.add_task(
|
||||
run_translation_task,
|
||||
task_id=task_id,
|
||||
task_db_id=task.id,
|
||||
target_lang=target_lang,
|
||||
source_lang=request.source_lang
|
||||
)
|
||||
|
||||
logger.info(f"Started translation job for task {task_id}, target_lang={target_lang}")
|
||||
|
||||
return TranslationStartResponse(
|
||||
task_id=task_id,
|
||||
status=TranslationStatusEnum.PENDING,
|
||||
target_lang=target_lang,
|
||||
message="Translation job started"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{task_id}/status", response_model=TranslationStatusResponse)
|
||||
async def get_translation_status(
|
||||
task_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Get the status of a translation job.
|
||||
|
||||
- **task_id**: Task UUID
|
||||
|
||||
Returns current translation status with progress information.
|
||||
"""
|
||||
from app.services.translation_service import get_translation_service
|
||||
|
||||
# Verify task ownership
|
||||
task = task_service.get_task_by_id(
|
||||
db=db,
|
||||
task_id=task_id,
|
||||
user_id=current_user.id
|
||||
)
|
||||
|
||||
if not task:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Task not found"
|
||||
)
|
||||
|
||||
# Get job state
|
||||
translation_service = get_translation_service()
|
||||
job_state = translation_service.get_job_state(task_id)
|
||||
|
||||
if not job_state:
|
||||
# No active job - check if any completed translations exist
|
||||
if task.result_json_path:
|
||||
result_dir = Path(task.result_json_path).parent
|
||||
translated_files = list(result_dir.glob("*_translated_*.json"))
|
||||
if translated_files:
|
||||
# Return completed status for the most recent translation
|
||||
latest_file = max(translated_files, key=lambda f: f.stat().st_mtime)
|
||||
# Extract language from filename
|
||||
lang = latest_file.stem.split("_translated_")[-1]
|
||||
return TranslationStatusResponse(
|
||||
task_id=task_id,
|
||||
status=TranslationStatusEnum.COMPLETED,
|
||||
target_lang=lang,
|
||||
progress=TranslationProgress(percentage=100.0)
|
||||
)
|
||||
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="No translation job found for this task"
|
||||
)
|
||||
|
||||
return TranslationStatusResponse(
|
||||
task_id=task_id,
|
||||
status=job_state.status,
|
||||
target_lang=job_state.target_lang,
|
||||
progress=job_state.progress,
|
||||
error_message=job_state.error_message,
|
||||
started_at=job_state.started_at,
|
||||
completed_at=job_state.completed_at
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{task_id}/result")
|
||||
async def get_translation_result(
|
||||
task_id: str,
|
||||
lang: str = Query(..., description="Target language code"),
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Get the translation result for a specific language.
|
||||
|
||||
- **task_id**: Task UUID
|
||||
- **lang**: Target language code (e.g., 'en', 'ja')
|
||||
|
||||
Returns the translation JSON file.
|
||||
"""
|
||||
# Verify task ownership
|
||||
task = task_service.get_task_by_id(
|
||||
db=db,
|
||||
task_id=task_id,
|
||||
user_id=current_user.id
|
||||
)
|
||||
|
||||
if not task:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Task not found"
|
||||
)
|
||||
|
||||
if not task.result_json_path:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="OCR result not found"
|
||||
)
|
||||
|
||||
# Find translation file
|
||||
result_dir = Path(task.result_json_path).parent
|
||||
base_name = Path(task.result_json_path).stem.replace('_result', '')
|
||||
translation_file = result_dir / f"{base_name}_translated_{lang}.json"
|
||||
|
||||
if not translation_file.exists():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Translation for language '{lang}' not found"
|
||||
)
|
||||
|
||||
# Return as JSON response with proper content type
|
||||
return FileResponse(
|
||||
path=str(translation_file),
|
||||
filename=translation_file.name,
|
||||
media_type="application/json"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{task_id}/translations", response_model=TranslationListResponse)
|
||||
async def list_translations(
|
||||
task_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
List all available translations for a task.
|
||||
|
||||
- **task_id**: Task UUID
|
||||
|
||||
Returns list of available translations with metadata.
|
||||
"""
|
||||
# Verify task ownership
|
||||
task = task_service.get_task_by_id(
|
||||
db=db,
|
||||
task_id=task_id,
|
||||
user_id=current_user.id
|
||||
)
|
||||
|
||||
if not task:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Task not found"
|
||||
)
|
||||
|
||||
translations = []
|
||||
|
||||
if task.result_json_path:
|
||||
result_dir = Path(task.result_json_path).parent
|
||||
translated_files = list(result_dir.glob("*_translated_*.json"))
|
||||
|
||||
for translation_file in translated_files:
|
||||
try:
|
||||
# Extract language from filename
|
||||
lang = translation_file.stem.split("_translated_")[-1]
|
||||
|
||||
# Read translation metadata
|
||||
with open(translation_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
stats_data = data.get('statistics', {})
|
||||
|
||||
translations.append(TranslationListItem(
|
||||
target_lang=lang,
|
||||
translated_at=datetime.fromisoformat(data.get('translated_at', '').replace('Z', '+00:00')),
|
||||
provider=data.get('provider', 'dify'),
|
||||
statistics=TranslationStatistics(
|
||||
total_elements=stats_data.get('total_elements', 0),
|
||||
translated_elements=stats_data.get('translated_elements', 0),
|
||||
skipped_elements=stats_data.get('skipped_elements', 0),
|
||||
total_characters=stats_data.get('total_characters', 0),
|
||||
processing_time_seconds=stats_data.get('processing_time_seconds', 0.0),
|
||||
total_tokens=stats_data.get('total_tokens', 0)
|
||||
),
|
||||
file_path=str(translation_file)
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read translation file {translation_file}: {e}")
|
||||
continue
|
||||
|
||||
return TranslationListResponse(
|
||||
task_id=task_id,
|
||||
translations=translations
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{task_id}/translations/{lang}", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def delete_translation(
|
||||
task_id: str,
|
||||
lang: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Delete a specific translation.
|
||||
|
||||
- **task_id**: Task UUID
|
||||
- **lang**: Target language code to delete
|
||||
"""
|
||||
# Verify task ownership
|
||||
task = task_service.get_task_by_id(
|
||||
db=db,
|
||||
task_id=task_id,
|
||||
user_id=current_user.id
|
||||
)
|
||||
|
||||
if not task:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Task not found"
|
||||
)
|
||||
|
||||
if not task.result_json_path:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="OCR result not found"
|
||||
)
|
||||
|
||||
# Find translation file
|
||||
result_dir = Path(task.result_json_path).parent
|
||||
base_name = Path(task.result_json_path).stem.replace('_result', '')
|
||||
translation_file = result_dir / f"{base_name}_translated_{lang}.json"
|
||||
|
||||
if not translation_file.exists():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Translation for language '{lang}' not found"
|
||||
)
|
||||
|
||||
# Delete file
|
||||
translation_file.unlink()
|
||||
logger.info(f"Deleted translation {lang} for task {task_id}")
|
||||
|
||||
return None
|
||||
@@ -13,6 +13,16 @@ from app.schemas.task import (
|
||||
TaskStatsResponse,
|
||||
TaskStatusEnum,
|
||||
)
|
||||
from app.schemas.translation import (
|
||||
TranslationStatusEnum,
|
||||
TranslationRequest,
|
||||
TranslationProgress,
|
||||
TranslationStatusResponse,
|
||||
TranslationStartResponse,
|
||||
TranslationStatistics,
|
||||
TranslationResultResponse,
|
||||
TranslationListResponse,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# Auth
|
||||
@@ -27,4 +37,13 @@ __all__ = [
|
||||
"TaskListResponse",
|
||||
"TaskStatsResponse",
|
||||
"TaskStatusEnum",
|
||||
# Translation
|
||||
"TranslationStatusEnum",
|
||||
"TranslationRequest",
|
||||
"TranslationProgress",
|
||||
"TranslationStatusResponse",
|
||||
"TranslationStartResponse",
|
||||
"TranslationStatistics",
|
||||
"TranslationResultResponse",
|
||||
"TranslationListResponse",
|
||||
]
|
||||
|
||||
163
backend/app/schemas/translation.py
Normal file
163
backend/app/schemas/translation.py
Normal file
@@ -0,0 +1,163 @@
|
||||
"""
|
||||
Tool_OCR - Translation Schemas
|
||||
Pydantic models for document translation feature (DIFY API)
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Dict, Any, Tuple
|
||||
from datetime import datetime
|
||||
from pydantic import BaseModel, Field
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
class TranslationStatusEnum(str, Enum):
|
||||
"""Translation job status enumeration"""
|
||||
PENDING = "pending"
|
||||
TRANSLATING = "translating"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
class TargetLanguageEnum(str, Enum):
|
||||
"""Supported target languages for translation."""
|
||||
ENGLISH = "en"
|
||||
JAPANESE = "ja"
|
||||
KOREAN = "ko"
|
||||
CHINESE_SIMPLIFIED = "zh-CN"
|
||||
CHINESE_TRADITIONAL = "zh-TW"
|
||||
GERMAN = "de"
|
||||
FRENCH = "fr"
|
||||
SPANISH = "es"
|
||||
PORTUGUESE = "pt"
|
||||
ITALIAN = "it"
|
||||
RUSSIAN = "ru"
|
||||
VIETNAMESE = "vi"
|
||||
THAI = "th"
|
||||
|
||||
|
||||
class TranslationRequest(BaseModel):
|
||||
"""Request model for starting a translation job"""
|
||||
target_lang: str = Field(
|
||||
...,
|
||||
description="Target language code (e.g., 'en', 'ja', 'zh-TW')"
|
||||
)
|
||||
source_lang: str = Field(
|
||||
default="auto",
|
||||
description="Source language code, 'auto' for automatic detection"
|
||||
)
|
||||
|
||||
|
||||
class TranslationProgress(BaseModel):
|
||||
"""Progress information for ongoing translation"""
|
||||
current_element: int = Field(default=0, description="Current element being translated")
|
||||
total_elements: int = Field(default=0, description="Total elements to translate")
|
||||
percentage: float = Field(default=0.0, description="Progress percentage (0-100)")
|
||||
|
||||
|
||||
class TranslationStatusResponse(BaseModel):
|
||||
"""Response model for translation status query"""
|
||||
task_id: str = Field(..., description="Task ID")
|
||||
status: TranslationStatusEnum = Field(..., description="Current translation status")
|
||||
target_lang: str = Field(..., description="Target language")
|
||||
progress: Optional[TranslationProgress] = Field(
|
||||
default=None,
|
||||
description="Progress information when translating"
|
||||
)
|
||||
error_message: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Error message if translation failed"
|
||||
)
|
||||
started_at: Optional[datetime] = Field(default=None, description="Translation start time")
|
||||
completed_at: Optional[datetime] = Field(default=None, description="Translation completion time")
|
||||
|
||||
|
||||
class TranslationStartResponse(BaseModel):
|
||||
"""Response model for starting a translation job"""
|
||||
task_id: str = Field(..., description="Task ID")
|
||||
status: TranslationStatusEnum = Field(..., description="Initial status")
|
||||
target_lang: str = Field(..., description="Target language")
|
||||
message: str = Field(..., description="Status message")
|
||||
|
||||
|
||||
class TranslationStatistics(BaseModel):
|
||||
"""Statistics for completed translation"""
|
||||
total_elements: int = Field(default=0, description="Total elements in document")
|
||||
translated_elements: int = Field(default=0, description="Successfully translated elements")
|
||||
skipped_elements: int = Field(default=0, description="Skipped elements (images, etc.)")
|
||||
total_characters: int = Field(default=0, description="Total characters translated")
|
||||
processing_time_seconds: float = Field(default=0.0, description="Translation duration")
|
||||
total_tokens: int = Field(default=0, description="Total API tokens used")
|
||||
|
||||
|
||||
class TranslationResultResponse(BaseModel):
|
||||
"""Response model for translation result"""
|
||||
schema_version: str = Field(default="1.0.0", description="Schema version")
|
||||
source_document: str = Field(..., description="Source document filename")
|
||||
source_lang: str = Field(..., description="Source language (detected or specified)")
|
||||
target_lang: str = Field(..., description="Target language")
|
||||
provider: str = Field(default="dify", description="Translation provider")
|
||||
translated_at: datetime = Field(..., description="Translation timestamp")
|
||||
statistics: TranslationStatistics = Field(..., description="Translation statistics")
|
||||
translations: Dict[str, Any] = Field(
|
||||
...,
|
||||
description="Translations dict mapping element_id to translated content"
|
||||
)
|
||||
|
||||
|
||||
class TranslationListItem(BaseModel):
|
||||
"""Item in translation list response"""
|
||||
target_lang: str = Field(..., description="Target language")
|
||||
translated_at: datetime = Field(..., description="Translation timestamp")
|
||||
provider: str = Field(default="dify", description="Translation provider")
|
||||
statistics: TranslationStatistics = Field(..., description="Translation statistics")
|
||||
file_path: str = Field(..., description="Path to translation JSON file")
|
||||
|
||||
|
||||
class TranslationListResponse(BaseModel):
|
||||
"""Response model for listing available translations"""
|
||||
task_id: str = Field(..., description="Task ID")
|
||||
translations: List[TranslationListItem] = Field(
|
||||
default_factory=list,
|
||||
description="Available translations"
|
||||
)
|
||||
|
||||
|
||||
# Dataclasses for internal use
|
||||
|
||||
@dataclass
|
||||
class TranslatableItem:
|
||||
"""Internal representation of a translatable element"""
|
||||
element_id: str
|
||||
content: str
|
||||
element_type: str # 'text', 'title', 'header', etc. or 'table_cell'
|
||||
page_number: int = 1
|
||||
cell_position: Optional[Tuple[int, int]] = None # (row, col) for table cells
|
||||
|
||||
def __post_init__(self):
|
||||
# Clean content - remove excessive whitespace
|
||||
if self.content:
|
||||
self.content = ' '.join(self.content.split())
|
||||
|
||||
|
||||
@dataclass
|
||||
class TranslatedItem:
|
||||
"""Internal representation of a translated element"""
|
||||
element_id: str
|
||||
original_content: str
|
||||
translated_content: str
|
||||
element_type: str
|
||||
cell_position: Optional[Tuple[int, int]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class TranslationJobState:
|
||||
"""Internal state for a translation job"""
|
||||
task_id: str
|
||||
target_lang: str
|
||||
source_lang: str
|
||||
status: TranslationStatusEnum
|
||||
progress: TranslationProgress
|
||||
error_message: Optional[str] = None
|
||||
started_at: Optional[datetime] = None
|
||||
completed_at: Optional[datetime] = None
|
||||
result_file_path: Optional[str] = None
|
||||
332
backend/app/services/dify_client.py
Normal file
332
backend/app/services/dify_client.py
Normal file
@@ -0,0 +1,332 @@
|
||||
"""
|
||||
Tool_OCR - DIFY AI Client
|
||||
HTTP client for DIFY translation API with batch support
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# DIFY API Configuration
|
||||
DIFY_BASE_URL = "https://dify.theaken.com/v1"
|
||||
DIFY_API_KEY = "app-YOPrF2ro5fshzMkCZviIuUJd"
|
||||
DIFY_TIMEOUT = 120.0 # seconds (increased for batch)
|
||||
DIFY_MAX_RETRIES = 3
|
||||
|
||||
# Batch translation limits
|
||||
# Conservative limits to avoid gateway timeouts
|
||||
# DIFY server may have processing time limits
|
||||
MAX_BATCH_CHARS = 5000
|
||||
MAX_BATCH_ITEMS = 20
|
||||
|
||||
# Language name mapping
|
||||
LANGUAGE_NAMES = {
|
||||
"en": "English",
|
||||
"zh-TW": "Traditional Chinese",
|
||||
"zh-CN": "Simplified Chinese",
|
||||
"ja": "Japanese",
|
||||
"ko": "Korean",
|
||||
"de": "German",
|
||||
"fr": "French",
|
||||
"es": "Spanish",
|
||||
"pt": "Portuguese",
|
||||
"it": "Italian",
|
||||
"ru": "Russian",
|
||||
"vi": "Vietnamese",
|
||||
"th": "Thai",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class TranslationResponse:
|
||||
"""Response from DIFY translation API"""
|
||||
translated_text: str
|
||||
total_tokens: int
|
||||
latency: float
|
||||
conversation_id: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class BatchTranslationResponse:
|
||||
"""Response from DIFY batch translation API"""
|
||||
translations: Dict[int, str] # marker_id -> translated_text
|
||||
total_tokens: int
|
||||
latency: float
|
||||
conversation_id: str
|
||||
missing_markers: List[int] = field(default_factory=list)
|
||||
|
||||
|
||||
class DifyTranslationError(Exception):
|
||||
"""Error during DIFY API translation"""
|
||||
pass
|
||||
|
||||
|
||||
class DifyClient:
|
||||
"""
|
||||
Client for DIFY AI translation API.
|
||||
|
||||
Features:
|
||||
- Single and batch translation
|
||||
- Blocking mode API calls
|
||||
- Automatic retry with exponential backoff
|
||||
- Token and latency tracking
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str = DIFY_BASE_URL,
|
||||
api_key: str = DIFY_API_KEY,
|
||||
timeout: float = DIFY_TIMEOUT,
|
||||
max_retries: int = DIFY_MAX_RETRIES
|
||||
):
|
||||
self.base_url = base_url
|
||||
self.api_key = api_key
|
||||
self.timeout = timeout
|
||||
self.max_retries = max_retries
|
||||
self._total_tokens = 0
|
||||
self._total_requests = 0
|
||||
|
||||
def _get_language_name(self, lang_code: str) -> str:
|
||||
"""Convert language code to full name for prompt"""
|
||||
return LANGUAGE_NAMES.get(lang_code, lang_code)
|
||||
|
||||
def _build_prompt(self, text: str, target_lang: str) -> str:
|
||||
"""Build translation prompt for single text"""
|
||||
lang_name = self._get_language_name(target_lang)
|
||||
return (
|
||||
f"Translate the following text to {lang_name}.\n"
|
||||
f"Return ONLY the translated text, no explanations.\n\n"
|
||||
f"{text}"
|
||||
)
|
||||
|
||||
def _build_batch_prompt(self, texts: List[str], target_lang: str) -> str:
|
||||
"""
|
||||
Build batch translation prompt with numbered markers.
|
||||
|
||||
Format:
|
||||
Translate the following texts to {Language}.
|
||||
Each text is marked with [N]. Return translations in the same format.
|
||||
Return ONLY the translations with their markers, no explanations.
|
||||
|
||||
[1] First text
|
||||
[2] Second text
|
||||
...
|
||||
"""
|
||||
lang_name = self._get_language_name(target_lang)
|
||||
|
||||
# Build numbered text list
|
||||
numbered_texts = []
|
||||
for i, text in enumerate(texts, start=1):
|
||||
# Clean text - remove newlines within each item to avoid parsing issues
|
||||
clean_text = ' '.join(text.split())
|
||||
numbered_texts.append(f"[{i}] {clean_text}")
|
||||
|
||||
texts_block = "\n".join(numbered_texts)
|
||||
|
||||
prompt = (
|
||||
f"Translate the following texts to {lang_name}.\n"
|
||||
f"Each text is marked with [N]. Return translations in the same format.\n"
|
||||
f"Return ONLY the translations with their markers, no explanations.\n\n"
|
||||
f"{texts_block}"
|
||||
)
|
||||
|
||||
return prompt
|
||||
|
||||
def _parse_batch_response(self, response_text: str, expected_count: int) -> Dict[int, str]:
|
||||
"""
|
||||
Parse batch translation response with numbered markers.
|
||||
|
||||
Expected format:
|
||||
[1] 翻譯文字一
|
||||
[2] 翻譯文字二
|
||||
...
|
||||
|
||||
Returns:
|
||||
Dict mapping marker number to translated text
|
||||
"""
|
||||
translations = {}
|
||||
|
||||
# Pattern to match [N] followed by text until next [N] or end
|
||||
# Use DOTALL to match across lines, but be careful with greedy matching
|
||||
pattern = r'\[(\d+)\]\s*(.+?)(?=\[\d+\]|$)'
|
||||
matches = re.findall(pattern, response_text, re.DOTALL)
|
||||
|
||||
for match in matches:
|
||||
try:
|
||||
marker_id = int(match[0])
|
||||
text = match[1].strip()
|
||||
if text:
|
||||
translations[marker_id] = text
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
|
||||
return translations
|
||||
|
||||
def _call_api(self, prompt: str, user_id: str) -> dict:
|
||||
"""Make API call to DIFY with retry logic"""
|
||||
payload = {
|
||||
"inputs": {},
|
||||
"query": prompt,
|
||||
"response_mode": "blocking",
|
||||
"conversation_id": "",
|
||||
"user": user_id
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
last_error = None
|
||||
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
with httpx.Client(timeout=self.timeout) as client:
|
||||
response = client.post(
|
||||
f"{self.base_url}/chat-messages",
|
||||
json=payload,
|
||||
headers=headers
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise DifyTranslationError(
|
||||
f"API returned status {response.status_code}: {response.text}"
|
||||
)
|
||||
|
||||
return response.json()
|
||||
|
||||
except httpx.TimeoutException as e:
|
||||
last_error = e
|
||||
logger.warning(f"DIFY API timeout (attempt {attempt + 1}/{self.max_retries})")
|
||||
|
||||
except httpx.RequestError as e:
|
||||
last_error = e
|
||||
logger.warning(f"DIFY API request error (attempt {attempt + 1}/{self.max_retries}): {e}")
|
||||
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
logger.warning(f"DIFY API error (attempt {attempt + 1}/{self.max_retries}): {e}")
|
||||
|
||||
# Exponential backoff
|
||||
if attempt < self.max_retries - 1:
|
||||
wait_time = 2 ** attempt
|
||||
logger.info(f"Retrying in {wait_time}s...")
|
||||
time.sleep(wait_time)
|
||||
|
||||
raise DifyTranslationError(f"API call failed after {self.max_retries} attempts: {last_error}")
|
||||
|
||||
def translate(
|
||||
self,
|
||||
text: str,
|
||||
target_lang: str,
|
||||
user_id: str = "tool-ocr"
|
||||
) -> TranslationResponse:
|
||||
"""
|
||||
Translate single text using DIFY API.
|
||||
|
||||
Args:
|
||||
text: Text to translate
|
||||
target_lang: Target language code (e.g., 'en', 'zh-TW')
|
||||
user_id: User identifier for tracking
|
||||
|
||||
Returns:
|
||||
TranslationResponse with translated text and metadata
|
||||
"""
|
||||
prompt = self._build_prompt(text, target_lang)
|
||||
data = self._call_api(prompt, user_id)
|
||||
|
||||
# Extract response fields
|
||||
translated_text = data.get("answer", "")
|
||||
usage = data.get("metadata", {}).get("usage", {})
|
||||
|
||||
self._total_tokens += usage.get("total_tokens", 0)
|
||||
self._total_requests += 1
|
||||
|
||||
return TranslationResponse(
|
||||
translated_text=translated_text,
|
||||
total_tokens=usage.get("total_tokens", 0),
|
||||
latency=usage.get("latency", 0.0),
|
||||
conversation_id=data.get("conversation_id", "")
|
||||
)
|
||||
|
||||
def translate_batch(
|
||||
self,
|
||||
texts: List[str],
|
||||
target_lang: str,
|
||||
user_id: str = "tool-ocr"
|
||||
) -> BatchTranslationResponse:
|
||||
"""
|
||||
Translate multiple texts in a single API call.
|
||||
|
||||
Args:
|
||||
texts: List of texts to translate
|
||||
target_lang: Target language code
|
||||
user_id: User identifier for tracking
|
||||
|
||||
Returns:
|
||||
BatchTranslationResponse with translations dict and metadata
|
||||
"""
|
||||
if not texts:
|
||||
return BatchTranslationResponse(
|
||||
translations={},
|
||||
total_tokens=0,
|
||||
latency=0.0,
|
||||
conversation_id=""
|
||||
)
|
||||
|
||||
prompt = self._build_batch_prompt(texts, target_lang)
|
||||
|
||||
logger.debug(f"Batch translation: {len(texts)} items, ~{len(prompt)} chars")
|
||||
|
||||
data = self._call_api(prompt, user_id)
|
||||
|
||||
# Extract and parse response
|
||||
answer = data.get("answer", "")
|
||||
usage = data.get("metadata", {}).get("usage", {})
|
||||
|
||||
translations = self._parse_batch_response(answer, len(texts))
|
||||
|
||||
# Check for missing markers
|
||||
missing_markers = []
|
||||
for i in range(1, len(texts) + 1):
|
||||
if i not in translations:
|
||||
missing_markers.append(i)
|
||||
logger.warning(f"Missing translation for marker [{i}]")
|
||||
|
||||
self._total_tokens += usage.get("total_tokens", 0)
|
||||
self._total_requests += 1
|
||||
|
||||
return BatchTranslationResponse(
|
||||
translations=translations,
|
||||
total_tokens=usage.get("total_tokens", 0),
|
||||
latency=usage.get("latency", 0.0),
|
||||
conversation_id=data.get("conversation_id", ""),
|
||||
missing_markers=missing_markers
|
||||
)
|
||||
|
||||
def get_stats(self) -> dict:
|
||||
"""Get client statistics"""
|
||||
return {
|
||||
"total_tokens": self._total_tokens,
|
||||
"total_requests": self._total_requests,
|
||||
"base_url": self.base_url,
|
||||
}
|
||||
|
||||
|
||||
# Global singleton
|
||||
_dify_client: Optional[DifyClient] = None
|
||||
|
||||
|
||||
def get_dify_client() -> DifyClient:
|
||||
"""Get the global DifyClient instance"""
|
||||
global _dify_client
|
||||
if _dify_client is None:
|
||||
_dify_client = DifyClient()
|
||||
return _dify_client
|
||||
490
backend/app/services/translation_service.py
Normal file
490
backend/app/services/translation_service.py
Normal file
@@ -0,0 +1,490 @@
|
||||
"""
|
||||
Tool_OCR - Translation Service
|
||||
Document translation using DIFY AI API with batch processing
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from app.schemas.translation import (
|
||||
TranslatableItem,
|
||||
TranslatedItem,
|
||||
TranslationJobState,
|
||||
TranslationProgress,
|
||||
TranslationStatusEnum,
|
||||
)
|
||||
from app.services.dify_client import (
|
||||
DifyClient,
|
||||
DifyTranslationError,
|
||||
get_dify_client,
|
||||
MAX_BATCH_CHARS,
|
||||
MAX_BATCH_ITEMS,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Element types that should be translated
|
||||
TRANSLATABLE_TEXT_TYPES = {'text', 'title', 'header', 'footer', 'paragraph', 'footnote'}
|
||||
TABLE_TYPE = 'table'
|
||||
SKIP_TYPES = {'page_number', 'image', 'chart', 'logo', 'reference'}
|
||||
|
||||
|
||||
@dataclass
|
||||
class TranslationBatch:
|
||||
"""A batch of items to translate together"""
|
||||
items: List[TranslatableItem] = field(default_factory=list)
|
||||
total_chars: int = 0
|
||||
|
||||
def can_add(self, item: TranslatableItem) -> bool:
|
||||
"""Check if item can be added to this batch"""
|
||||
item_chars = len(item.content)
|
||||
return (
|
||||
len(self.items) < MAX_BATCH_ITEMS and
|
||||
self.total_chars + item_chars <= MAX_BATCH_CHARS
|
||||
)
|
||||
|
||||
def add(self, item: TranslatableItem):
|
||||
"""Add item to batch"""
|
||||
self.items.append(item)
|
||||
self.total_chars += len(item.content)
|
||||
|
||||
|
||||
class TranslationService:
|
||||
"""
|
||||
Main translation service for document translation using DIFY AI.
|
||||
|
||||
Features:
|
||||
- Extract translatable elements from UnifiedDocument
|
||||
- Batch translation via DIFY API (efficient)
|
||||
- Fallback to single-item translation for failures
|
||||
- Translation JSON generation
|
||||
- Progress tracking
|
||||
"""
|
||||
|
||||
def __init__(self, dify_client: Optional[DifyClient] = None):
|
||||
self.dify_client = dify_client or get_dify_client()
|
||||
self._active_jobs: Dict[str, TranslationJobState] = {}
|
||||
self._jobs_lock = threading.Lock()
|
||||
self._total_tokens = 0
|
||||
self._total_latency = 0.0
|
||||
|
||||
def extract_translatable_elements(
|
||||
self,
|
||||
result_json: Dict
|
||||
) -> Tuple[List[TranslatableItem], int]:
|
||||
"""
|
||||
Extract all translatable elements from a result JSON.
|
||||
|
||||
Args:
|
||||
result_json: UnifiedDocument JSON data
|
||||
|
||||
Returns:
|
||||
Tuple of (list of TranslatableItem, total element count)
|
||||
"""
|
||||
items = []
|
||||
total_elements = 0
|
||||
|
||||
for page in result_json.get('pages', []):
|
||||
page_number = page.get('page_number', 1)
|
||||
|
||||
for elem in page.get('elements', []):
|
||||
total_elements += 1
|
||||
elem_type = elem.get('type', '')
|
||||
elem_id = elem.get('element_id', '')
|
||||
content = elem.get('content')
|
||||
|
||||
# Skip non-translatable types
|
||||
if elem_type in SKIP_TYPES:
|
||||
continue
|
||||
|
||||
# Handle text elements
|
||||
if elem_type in TRANSLATABLE_TEXT_TYPES and isinstance(content, str):
|
||||
text = content.strip()
|
||||
if text: # Skip empty content
|
||||
items.append(TranslatableItem(
|
||||
element_id=elem_id,
|
||||
content=text,
|
||||
element_type=elem_type,
|
||||
page_number=page_number
|
||||
))
|
||||
|
||||
# Handle table elements
|
||||
elif elem_type == TABLE_TYPE and isinstance(content, dict):
|
||||
cells = content.get('cells', [])
|
||||
for cell in cells:
|
||||
cell_content = cell.get('content', '')
|
||||
if isinstance(cell_content, str) and cell_content.strip():
|
||||
row = cell.get('row', 0)
|
||||
col = cell.get('col', 0)
|
||||
items.append(TranslatableItem(
|
||||
element_id=elem_id,
|
||||
content=cell_content.strip(),
|
||||
element_type='table_cell',
|
||||
page_number=page_number,
|
||||
cell_position=(row, col)
|
||||
))
|
||||
|
||||
logger.info(
|
||||
f"Extracted {len(items)} translatable items from {total_elements} elements"
|
||||
)
|
||||
return items, total_elements
|
||||
|
||||
def create_batches(self, items: List[TranslatableItem]) -> List[TranslationBatch]:
|
||||
"""
|
||||
Create translation batches from items based on character limits.
|
||||
|
||||
Args:
|
||||
items: List of TranslatableItem
|
||||
|
||||
Returns:
|
||||
List of TranslationBatch
|
||||
"""
|
||||
batches = []
|
||||
current_batch = TranslationBatch()
|
||||
|
||||
for item in items:
|
||||
if current_batch.can_add(item):
|
||||
current_batch.add(item)
|
||||
else:
|
||||
# Save current batch and start new one
|
||||
if current_batch.items:
|
||||
batches.append(current_batch)
|
||||
current_batch = TranslationBatch()
|
||||
current_batch.add(item)
|
||||
|
||||
# Don't forget the last batch
|
||||
if current_batch.items:
|
||||
batches.append(current_batch)
|
||||
|
||||
logger.info(
|
||||
f"Created {len(batches)} batches from {len(items)} items "
|
||||
f"(max {MAX_BATCH_CHARS} chars, max {MAX_BATCH_ITEMS} items per batch)"
|
||||
)
|
||||
|
||||
return batches
|
||||
|
||||
def translate_batch(
|
||||
self,
|
||||
batch: TranslationBatch,
|
||||
target_lang: str,
|
||||
user_id: str
|
||||
) -> List[TranslatedItem]:
|
||||
"""
|
||||
Translate a batch of items using DIFY API.
|
||||
|
||||
Args:
|
||||
batch: TranslationBatch to translate
|
||||
target_lang: Target language code
|
||||
user_id: User identifier for tracking
|
||||
|
||||
Returns:
|
||||
List of TranslatedItem
|
||||
"""
|
||||
if not batch.items:
|
||||
return []
|
||||
|
||||
# Extract texts in order
|
||||
texts = [item.content for item in batch.items]
|
||||
|
||||
try:
|
||||
response = self.dify_client.translate_batch(
|
||||
texts=texts,
|
||||
target_lang=target_lang,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
self._total_tokens += response.total_tokens
|
||||
self._total_latency += response.latency
|
||||
|
||||
# Map translations back to items
|
||||
translated_items = []
|
||||
for idx, item in enumerate(batch.items):
|
||||
marker_id = idx + 1 # Markers are 1-indexed
|
||||
|
||||
if marker_id in response.translations:
|
||||
translated_content = response.translations[marker_id]
|
||||
else:
|
||||
# Missing translation - use original
|
||||
logger.warning(f"Missing translation for {item.element_id}, using original")
|
||||
translated_content = item.content
|
||||
|
||||
translated_items.append(TranslatedItem(
|
||||
element_id=item.element_id,
|
||||
original_content=item.content,
|
||||
translated_content=translated_content,
|
||||
element_type=item.element_type,
|
||||
cell_position=item.cell_position
|
||||
))
|
||||
|
||||
return translated_items
|
||||
|
||||
except DifyTranslationError as e:
|
||||
logger.error(f"Batch translation failed: {e}")
|
||||
# Return items with original content on failure
|
||||
return [
|
||||
TranslatedItem(
|
||||
element_id=item.element_id,
|
||||
original_content=item.content,
|
||||
translated_content=item.content, # Keep original
|
||||
element_type=item.element_type,
|
||||
cell_position=item.cell_position
|
||||
)
|
||||
for item in batch.items
|
||||
]
|
||||
|
||||
def translate_item(
|
||||
self,
|
||||
item: TranslatableItem,
|
||||
target_lang: str,
|
||||
user_id: str
|
||||
) -> TranslatedItem:
|
||||
"""
|
||||
Translate a single item using DIFY API (fallback for batch failures).
|
||||
|
||||
Args:
|
||||
item: TranslatableItem to translate
|
||||
target_lang: Target language code
|
||||
user_id: User identifier for tracking
|
||||
|
||||
Returns:
|
||||
TranslatedItem with translation result
|
||||
"""
|
||||
try:
|
||||
response = self.dify_client.translate(
|
||||
text=item.content,
|
||||
target_lang=target_lang,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
self._total_tokens += response.total_tokens
|
||||
self._total_latency += response.latency
|
||||
|
||||
return TranslatedItem(
|
||||
element_id=item.element_id,
|
||||
original_content=item.content,
|
||||
translated_content=response.translated_text,
|
||||
element_type=item.element_type,
|
||||
cell_position=item.cell_position
|
||||
)
|
||||
|
||||
except DifyTranslationError as e:
|
||||
logger.error(f"Translation failed for {item.element_id}: {e}")
|
||||
# Return original content on failure
|
||||
return TranslatedItem(
|
||||
element_id=item.element_id,
|
||||
original_content=item.content,
|
||||
translated_content=item.content, # Keep original
|
||||
element_type=item.element_type,
|
||||
cell_position=item.cell_position
|
||||
)
|
||||
|
||||
def build_translation_result(
|
||||
self,
|
||||
translated_items: List[TranslatedItem],
|
||||
source_document: str,
|
||||
source_lang: str,
|
||||
target_lang: str,
|
||||
total_elements: int,
|
||||
processing_time: float,
|
||||
batch_count: int
|
||||
) -> Dict:
|
||||
"""
|
||||
Build the translation result JSON structure.
|
||||
|
||||
Args:
|
||||
translated_items: List of TranslatedItem
|
||||
source_document: Source document filename
|
||||
source_lang: Source language
|
||||
target_lang: Target language
|
||||
total_elements: Total elements in document
|
||||
processing_time: Processing time in seconds
|
||||
batch_count: Number of batches used
|
||||
|
||||
Returns:
|
||||
Translation result dictionary
|
||||
"""
|
||||
# Build translations dict
|
||||
translations: Dict[str, Any] = {}
|
||||
total_chars = 0
|
||||
|
||||
for item in translated_items:
|
||||
total_chars += len(item.translated_content)
|
||||
|
||||
if item.element_type == 'table_cell':
|
||||
# Group table cells by element_id
|
||||
if item.element_id not in translations:
|
||||
translations[item.element_id] = {'cells': []}
|
||||
|
||||
translations[item.element_id]['cells'].append({
|
||||
'row': item.cell_position[0] if item.cell_position else 0,
|
||||
'col': item.cell_position[1] if item.cell_position else 0,
|
||||
'content': item.translated_content
|
||||
})
|
||||
else:
|
||||
translations[item.element_id] = item.translated_content
|
||||
|
||||
# Build statistics
|
||||
translated_element_ids = set(item.element_id for item in translated_items)
|
||||
skipped = total_elements - len(translated_element_ids)
|
||||
|
||||
result = {
|
||||
'schema_version': '1.0.0',
|
||||
'source_document': source_document,
|
||||
'source_lang': source_lang,
|
||||
'target_lang': target_lang,
|
||||
'provider': 'dify',
|
||||
'translated_at': datetime.utcnow().isoformat() + 'Z',
|
||||
'statistics': {
|
||||
'total_elements': total_elements,
|
||||
'translated_elements': len(translated_element_ids),
|
||||
'skipped_elements': skipped,
|
||||
'total_characters': total_chars,
|
||||
'processing_time_seconds': round(processing_time, 2),
|
||||
'total_tokens': self._total_tokens,
|
||||
'batch_count': batch_count
|
||||
},
|
||||
'translations': translations
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
def translate_document(
|
||||
self,
|
||||
task_id: str,
|
||||
result_json_path: Path,
|
||||
target_lang: str,
|
||||
source_lang: str = 'auto',
|
||||
progress_callback: Optional[callable] = None
|
||||
) -> Tuple[bool, Optional[Path], Optional[str]]:
|
||||
"""
|
||||
Translate a document using batch processing and save the result.
|
||||
|
||||
Args:
|
||||
task_id: Task ID
|
||||
result_json_path: Path to source result.json
|
||||
target_lang: Target language (e.g., 'en', 'zh-TW')
|
||||
source_lang: Source language ('auto' for detection)
|
||||
progress_callback: Optional callback(progress: TranslationProgress)
|
||||
|
||||
Returns:
|
||||
Tuple of (success, output_path, error_message)
|
||||
"""
|
||||
start_time = time.time()
|
||||
self._total_tokens = 0
|
||||
self._total_latency = 0.0
|
||||
|
||||
logger.info(
|
||||
f"Starting translation: task_id={task_id}, target={target_lang}"
|
||||
)
|
||||
|
||||
try:
|
||||
# Load source JSON
|
||||
with open(result_json_path, 'r', encoding='utf-8') as f:
|
||||
result_json = json.load(f)
|
||||
|
||||
source_document = result_json.get('metadata', {}).get('filename', 'unknown')
|
||||
|
||||
# Extract translatable elements
|
||||
items, total_elements = self.extract_translatable_elements(result_json)
|
||||
|
||||
if not items:
|
||||
logger.warning("No translatable elements found")
|
||||
return False, None, "No translatable elements found"
|
||||
|
||||
# Create batches
|
||||
batches = self.create_batches(items)
|
||||
|
||||
# Update initial progress
|
||||
if progress_callback:
|
||||
progress_callback(TranslationProgress(
|
||||
total_elements=len(items)
|
||||
))
|
||||
|
||||
# Translate each batch
|
||||
all_translated: List[TranslatedItem] = []
|
||||
user_id = f"tool-ocr-{task_id}"
|
||||
processed_items = 0
|
||||
|
||||
for batch_idx, batch in enumerate(batches):
|
||||
logger.info(
|
||||
f"Translating batch {batch_idx + 1}/{len(batches)} "
|
||||
f"({len(batch.items)} items, {batch.total_chars} chars)"
|
||||
)
|
||||
|
||||
translated = self.translate_batch(batch, target_lang, user_id)
|
||||
all_translated.extend(translated)
|
||||
processed_items += len(batch.items)
|
||||
|
||||
# Update progress
|
||||
if progress_callback:
|
||||
progress_callback(TranslationProgress(
|
||||
current_element=processed_items,
|
||||
total_elements=len(items),
|
||||
percentage=(processed_items / len(items)) * 100
|
||||
))
|
||||
|
||||
# Build result
|
||||
processing_time = time.time() - start_time
|
||||
result = self.build_translation_result(
|
||||
translated_items=all_translated,
|
||||
source_document=source_document,
|
||||
source_lang=source_lang,
|
||||
target_lang=target_lang,
|
||||
total_elements=total_elements,
|
||||
processing_time=processing_time,
|
||||
batch_count=len(batches)
|
||||
)
|
||||
|
||||
# Save result
|
||||
output_filename = result_json_path.stem.replace('_result', '')
|
||||
output_path = result_json_path.parent / f"{output_filename}_translated_{target_lang}.json"
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||
|
||||
logger.info(
|
||||
f"Translation completed: {len(all_translated)} items in {len(batches)} batches, "
|
||||
f"{processing_time:.2f}s, {self._total_tokens} tokens, "
|
||||
f"saved to {output_path}"
|
||||
)
|
||||
|
||||
return True, output_path, None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Translation failed: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False, None, str(e)
|
||||
|
||||
def get_job_state(self, task_id: str) -> Optional[TranslationJobState]:
|
||||
"""Get the current state of a translation job"""
|
||||
with self._jobs_lock:
|
||||
return self._active_jobs.get(task_id)
|
||||
|
||||
def set_job_state(self, task_id: str, state: TranslationJobState):
|
||||
"""Set the state of a translation job"""
|
||||
with self._jobs_lock:
|
||||
self._active_jobs[task_id] = state
|
||||
|
||||
def remove_job_state(self, task_id: str):
|
||||
"""Remove a translation job state"""
|
||||
with self._jobs_lock:
|
||||
self._active_jobs.pop(task_id, None)
|
||||
|
||||
|
||||
# Global singleton
|
||||
_translation_service: Optional[TranslationService] = None
|
||||
|
||||
|
||||
def get_translation_service() -> TranslationService:
|
||||
"""Get the global TranslationService instance"""
|
||||
global _translation_service
|
||||
if _translation_service is None:
|
||||
_translation_service = TranslationService()
|
||||
return _translation_service
|
||||
138
backend/tests/test_translation_real.py
Normal file
138
backend/tests/test_translation_real.py
Normal file
@@ -0,0 +1,138 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test translation service with DIFY API using real OCR results from storage/results/
|
||||
"""
|
||||
import json
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
|
||||
from app.services.dify_client import DifyClient, get_dify_client
|
||||
from app.services.translation_service import TranslationService, get_translation_service
|
||||
|
||||
# Real task IDs with their result files
|
||||
REAL_TASKS = [
|
||||
("ca2b59a3-3362-4678-954f-cf0a9bcc152e", "img3_result.json"),
|
||||
("8ab2f24d-992b-46a2-87dc-2e024e006ac7", "img1_result.json"),
|
||||
("1c94bfbf-9391-444c-bebf-ae22fa3dad32", "edit_result.json"),
|
||||
("c85fff69-9ddb-40b8-8a9b-ebb513c60f05", "scan_result.json"),
|
||||
("0088e960-7b61-4cdf-bfe5-956960b00dd1", "scan2_result.json"),
|
||||
("8eedd9ed-7aad-46d5-93ca-951352c954b9", "ppt_result.json"),
|
||||
("992156c5-72b4-4e3d-8d43-cbb15f23e630", "edit3_result.json"),
|
||||
("1484ba43-7484-4326-95a7-1544b181e9e8", "edit2_result.json"),
|
||||
("e9a16bba-7d37-42f4-84c8-6624cb58fe19", "img2_result.json"),
|
||||
]
|
||||
|
||||
RESULTS_DIR = Path(__file__).parent.parent / "storage" / "results"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dify_client():
|
||||
"""Get DIFY client instance"""
|
||||
return get_dify_client()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def translation_service():
|
||||
"""Get translation service instance"""
|
||||
return get_translation_service()
|
||||
|
||||
|
||||
class TestDifyClient:
|
||||
"""Test DIFY API client"""
|
||||
|
||||
def test_client_initialization(self, dify_client):
|
||||
"""Test client can be initialized"""
|
||||
assert dify_client is not None
|
||||
assert dify_client.api_key is not None
|
||||
|
||||
def test_simple_translation(self, dify_client):
|
||||
"""Test simple translation via DIFY API"""
|
||||
text = "Hello, this is a test."
|
||||
response = dify_client.translate(text, "zh-TW")
|
||||
|
||||
assert response.translated_text is not None
|
||||
assert len(response.translated_text) > 0
|
||||
assert response.total_tokens > 0
|
||||
|
||||
print(f"\nOriginal: {text}")
|
||||
print(f"Translated: {response.translated_text}")
|
||||
print(f"Tokens: {response.total_tokens}, Latency: {response.latency:.2f}s")
|
||||
|
||||
|
||||
class TestTranslationServiceExtraction:
|
||||
"""Test element extraction"""
|
||||
|
||||
def test_service_initialization(self, translation_service):
|
||||
"""Test service can be initialized"""
|
||||
assert translation_service is not None
|
||||
assert translation_service.dify_client is not None
|
||||
|
||||
@pytest.mark.parametrize("task_id,result_file", REAL_TASKS)
|
||||
def test_extract_translatable_elements(self, translation_service, task_id, result_file):
|
||||
"""Test extracting translatable elements from real OCR results"""
|
||||
result_path = RESULTS_DIR / task_id / result_file
|
||||
if not result_path.exists():
|
||||
pytest.skip(f"Result file not found: {result_path}")
|
||||
|
||||
with open(result_path, 'r', encoding='utf-8') as f:
|
||||
ocr_result = json.load(f)
|
||||
|
||||
elements, total_count = translation_service.extract_translatable_elements(ocr_result)
|
||||
|
||||
# Verify extraction
|
||||
assert isinstance(elements, list)
|
||||
assert isinstance(total_count, int)
|
||||
assert total_count >= 0
|
||||
|
||||
print(f"\nTask {task_id}:")
|
||||
print(f" Extracted {len(elements)} translatable elements")
|
||||
print(f" Total elements in document: {total_count}")
|
||||
|
||||
if elements:
|
||||
first = elements[0]
|
||||
assert hasattr(first, 'element_id')
|
||||
assert hasattr(first, 'content')
|
||||
assert hasattr(first, 'element_type')
|
||||
print(f" First element type: {first.element_type}")
|
||||
print(f" First element preview: {first.content[:50]}..." if len(first.content) > 50 else f" First element: {first.content}")
|
||||
|
||||
|
||||
class TestTranslationExecution:
|
||||
"""Test actual translation via DIFY API"""
|
||||
|
||||
@pytest.mark.parametrize("task_id,result_file", REAL_TASKS[:2]) # Test only first 2
|
||||
def test_translate_first_3_elements(self, translation_service, task_id, result_file):
|
||||
"""Test translating first 3 elements from a real OCR document"""
|
||||
result_path = RESULTS_DIR / task_id / result_file
|
||||
if not result_path.exists():
|
||||
pytest.skip(f"Result file not found: {result_path}")
|
||||
|
||||
with open(result_path, 'r', encoding='utf-8') as f:
|
||||
ocr_result = json.load(f)
|
||||
|
||||
elements, _ = translation_service.extract_translatable_elements(ocr_result)
|
||||
if not elements:
|
||||
pytest.skip("No translatable elements found")
|
||||
|
||||
# Translate first 3 elements only
|
||||
elements_to_translate = elements[:3]
|
||||
|
||||
print(f"\n{task_id} translations:")
|
||||
for i, elem in enumerate(elements_to_translate):
|
||||
translated = translation_service.translate_item(elem, "en", f"test-{task_id}")
|
||||
|
||||
assert translated.translated_content is not None
|
||||
assert len(translated.translated_content) > 0
|
||||
|
||||
orig_preview = elem.content[:30] + "..." if len(elem.content) > 30 else elem.content
|
||||
trans_preview = translated.translated_content[:30] + "..." if len(translated.translated_content) > 30 else translated.translated_content
|
||||
print(f" [{i+1}] {orig_preview} -> {trans_preview}")
|
||||
|
||||
print(f" Total tokens: {translation_service._total_tokens}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Run extraction tests only (no API calls) by default
|
||||
# pytest.main([__file__, "-v", "-k", "extraction", "--tb=short"])
|
||||
# Run all tests including API calls
|
||||
pytest.main([__file__, "-v", "--tb=short"])
|
||||
Reference in New Issue
Block a user