feat: add document translation via DIFY AI API

Implement document translation feature using DIFY AI API with batch processing: Backend: - Add DIFY client with batch translation support (5000 chars, 20 items per batch) - Add translation service with element extraction and result building - Add translation router with start/status/result/list/delete endpoints - Add translation schemas (TranslationRequest, TranslationStatus, etc.) Frontend: - Enable translation UI in TaskDetailPage - Add translation API methods to apiV2.ts - Add translation types Features: - Batch translation with numbered markers [1], [2], [3]... - Support for text, title, header, footer, paragraph, footnote, table cells - Translation result JSON with statistics (tokens, latency, batch_count) - Background task processing with progress tracking 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-02 11:57:02 +08:00
parent 87dc97d951
commit 8d9b69ba93
18 changed files with 2970 additions and 26 deletions
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -371,7 +371,7 @@ async def root():


 # Include V2 API routers
-from app.routers import auth, tasks, admin
+from app.routers import auth, tasks, admin, translate
 from fastapi import UploadFile, File, Depends, HTTPException, status
 from sqlalchemy.orm import Session
 import hashlib
@@ -385,6 +385,7 @@ from app.services.task_service import task_service
 app.include_router(auth.router)
 app.include_router(tasks.router)
 app.include_router(admin.router)
+app.include_router(translate.router)


 # File upload endpoint
--- a/backend/app/routers/init.py
+++ b/backend/app/routers/init.py
@@ -2,6 +2,6 @@
 Tool_OCR - API Routers (V2)
 """

-from app.routers import auth, tasks, admin
+from app.routers import auth, tasks, admin, translate

-__all__ = ["auth", "tasks", "admin"]
+__all__ = ["auth", "tasks", "admin", "translate"]
--- a/backend/app/routers/translate.py
+++ b/backend/app/routers/translate.py
@@ -0,0 +1,503 @@
+"""
+Tool_OCR - Translation Router
+Handles document translation operations via DIFY AI API
+"""
+
+import logging
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+
+from fastapi import APIRouter, Depends, HTTPException, status, Query, BackgroundTasks
+from fastapi.responses import FileResponse, JSONResponse
+from sqlalchemy.orm import Session
+
+from app.core.deps import get_db, get_current_user
+from app.core.config import settings
+from app.models.user import User
+from app.models.task import Task, TaskStatus
+from app.schemas.translation import (
+    TranslationRequest,
+    TranslationStartResponse,
+    TranslationStatusResponse,
+    TranslationStatusEnum,
+    TranslationProgress,
+    TranslationListResponse,
+    TranslationListItem,
+    TranslationStatistics,
+)
+from app.services.task_service import task_service
+from app.services.dify_client import LANGUAGE_NAMES
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/v2/translate", tags=["Translation"])
+
+
+def run_translation_task(
+    task_id: str,
+    task_db_id: int,
+    target_lang: str,
+    source_lang: str = "auto"
+):
+    """
+    Background task to run document translation.
+
+    Args:
+        task_id: Task UUID string
+        task_db_id: Task database ID (for verification)
+        target_lang: Target language code
+        source_lang: Source language code ('auto' for detection)
+    """
+    from app.core.database import SessionLocal
+    from app.services.translation_service import get_translation_service
+    from app.schemas.translation import TranslationJobState, TranslationProgress
+
+    db = SessionLocal()
+    translation_service = get_translation_service()
+
+    try:
+        logger.info(f"Starting translation for task {task_id} -> {target_lang}")
+
+        # Get task to find result JSON path
+        task = db.query(Task).filter(Task.task_id == task_id).first()
+        if not task:
+            logger.error(f"Task {task_id} not found")
+            return
+
+        if not task.result_json_path:
+            logger.error(f"Task {task_id} has no result JSON")
+            translation_service.set_job_state(task_id, TranslationJobState(
+                task_id=task_id,
+                target_lang=target_lang,
+                source_lang=source_lang,
+                status=TranslationStatusEnum.FAILED,
+                progress=TranslationProgress(),
+                error_message="No OCR result found",
+                started_at=datetime.utcnow()
+            ))
+            return
+
+        result_json_path = Path(task.result_json_path)
+        if not result_json_path.exists():
+            logger.error(f"Result JSON not found: {result_json_path}")
+            translation_service.set_job_state(task_id, TranslationJobState(
+                task_id=task_id,
+                target_lang=target_lang,
+                source_lang=source_lang,
+                status=TranslationStatusEnum.FAILED,
+                progress=TranslationProgress(),
+                error_message="Result file not found",
+                started_at=datetime.utcnow()
+            ))
+            return
+
+        # Update state to translating
+        translation_service.set_job_state(task_id, TranslationJobState(
+            task_id=task_id,
+            target_lang=target_lang,
+            source_lang=source_lang,
+            status=TranslationStatusEnum.TRANSLATING,
+            progress=TranslationProgress(),
+            started_at=datetime.utcnow()
+        ))
+
+        # Progress callback
+        def progress_callback(progress: TranslationProgress):
+            current_state = translation_service.get_job_state(task_id)
+            if current_state:
+                current_state.status = TranslationStatusEnum.TRANSLATING
+                current_state.progress = progress
+                translation_service.set_job_state(task_id, current_state)
+
+        # Run translation
+        success, output_path, error_message = translation_service.translate_document(
+            task_id=task_id,
+            result_json_path=result_json_path,
+            target_lang=target_lang,
+            source_lang=source_lang,
+            progress_callback=progress_callback
+        )
+
+        if success:
+            translation_service.set_job_state(task_id, TranslationJobState(
+                task_id=task_id,
+                target_lang=target_lang,
+                source_lang=source_lang,
+                status=TranslationStatusEnum.COMPLETED,
+                progress=TranslationProgress(percentage=100.0),
+                started_at=datetime.utcnow(),
+                completed_at=datetime.utcnow(),
+                result_file_path=str(output_path) if output_path else None
+            ))
+            logger.info(f"Translation completed for task {task_id}")
+        else:
+            translation_service.set_job_state(task_id, TranslationJobState(
+                task_id=task_id,
+                target_lang=target_lang,
+                source_lang=source_lang,
+                status=TranslationStatusEnum.FAILED,
+                progress=TranslationProgress(),
+                error_message=error_message,
+                started_at=datetime.utcnow()
+            ))
+            logger.error(f"Translation failed for task {task_id}: {error_message}")
+
+    except Exception as e:
+        logger.exception(f"Translation failed for task {task_id}")
+        translation_service.set_job_state(task_id, TranslationJobState(
+            task_id=task_id,
+            target_lang=target_lang,
+            source_lang=source_lang,
+            status=TranslationStatusEnum.FAILED,
+            progress=TranslationProgress(),
+            error_message=str(e),
+            started_at=datetime.utcnow()
+        ))
+
+    finally:
+        db.close()
+
+
+@router.post("/{task_id}", response_model=TranslationStartResponse, status_code=status.HTTP_202_ACCEPTED)
+async def start_translation(
+    task_id: str,
+    request: TranslationRequest,
+    background_tasks: BackgroundTasks,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    """
+    Start a document translation job.
+
+    - **task_id**: Task UUID of a completed OCR task
+    - **target_lang**: Target language code (e.g., 'en', 'ja', 'zh-TW')
+    - **source_lang**: Source language code ('auto' for automatic detection)
+
+    Returns 202 Accepted with job information. Use /status endpoint to track progress.
+    """
+    from app.services.translation_service import get_translation_service
+    from app.schemas.translation import TranslationJobState
+
+    # Get task
+    task = task_service.get_task_by_id(
+        db=db,
+        task_id=task_id,
+        user_id=current_user.id
+    )
+
+    if not task:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="Task not found"
+        )
+
+    # Check task is completed
+    if task.status != TaskStatus.COMPLETED:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Cannot translate task in '{task.status.value}' status. Task must be completed."
+        )
+
+    # Check result JSON exists
+    if not task.result_json_path or not Path(task.result_json_path).exists():
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="OCR result not found. Please process the document first."
+        )
+
+    # Validate target language
+    target_lang = request.target_lang
+    if target_lang not in LANGUAGE_NAMES:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Unsupported target language: {target_lang}. Supported: {', '.join(LANGUAGE_NAMES.keys())}"
+        )
+
+    # Check if translation already exists
+    result_dir = Path(task.result_json_path).parent
+    existing_translation = result_dir / f"{Path(task.result_json_path).stem.replace('_result', '')}_translated_{target_lang}.json"
+    if existing_translation.exists():
+        logger.info(f"Translation already exists: {existing_translation}")
+        # Return as completed
+        return TranslationStartResponse(
+            task_id=task_id,
+            status=TranslationStatusEnum.COMPLETED,
+            target_lang=target_lang,
+            message="Translation already exists"
+        )
+
+    # Check if translation is already in progress
+    translation_service = get_translation_service()
+    current_job = translation_service.get_job_state(task_id)
+    if current_job and current_job.status in [TranslationStatusEnum.PENDING, TranslationStatusEnum.TRANSLATING]:
+        return TranslationStartResponse(
+            task_id=task_id,
+            status=current_job.status,
+            target_lang=current_job.target_lang,
+            message="Translation already in progress"
+        )
+
+    # Initialize job state
+    translation_service.set_job_state(task_id, TranslationJobState(
+        task_id=task_id,
+        target_lang=target_lang,
+        source_lang=request.source_lang,
+        status=TranslationStatusEnum.PENDING,
+        progress=TranslationProgress(),
+        started_at=datetime.utcnow()
+    ))
+
+    # Start background translation task
+    background_tasks.add_task(
+        run_translation_task,
+        task_id=task_id,
+        task_db_id=task.id,
+        target_lang=target_lang,
+        source_lang=request.source_lang
+    )
+
+    logger.info(f"Started translation job for task {task_id}, target_lang={target_lang}")
+
+    return TranslationStartResponse(
+        task_id=task_id,
+        status=TranslationStatusEnum.PENDING,
+        target_lang=target_lang,
+        message="Translation job started"
+    )
+
+
+@router.get("/{task_id}/status", response_model=TranslationStatusResponse)
+async def get_translation_status(
+    task_id: str,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    """
+    Get the status of a translation job.
+
+    - **task_id**: Task UUID
+
+    Returns current translation status with progress information.
+    """
+    from app.services.translation_service import get_translation_service
+
+    # Verify task ownership
+    task = task_service.get_task_by_id(
+        db=db,
+        task_id=task_id,
+        user_id=current_user.id
+    )
+
+    if not task:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="Task not found"
+        )
+
+    # Get job state
+    translation_service = get_translation_service()
+    job_state = translation_service.get_job_state(task_id)
+
+    if not job_state:
+        # No active job - check if any completed translations exist
+        if task.result_json_path:
+            result_dir = Path(task.result_json_path).parent
+            translated_files = list(result_dir.glob("*_translated_*.json"))
+            if translated_files:
+                # Return completed status for the most recent translation
+                latest_file = max(translated_files, key=lambda f: f.stat().st_mtime)
+                # Extract language from filename
+                lang = latest_file.stem.split("_translated_")[-1]
+                return TranslationStatusResponse(
+                    task_id=task_id,
+                    status=TranslationStatusEnum.COMPLETED,
+                    target_lang=lang,
+                    progress=TranslationProgress(percentage=100.0)
+                )
+
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="No translation job found for this task"
+        )
+
+    return TranslationStatusResponse(
+        task_id=task_id,
+        status=job_state.status,
+        target_lang=job_state.target_lang,
+        progress=job_state.progress,
+        error_message=job_state.error_message,
+        started_at=job_state.started_at,
+        completed_at=job_state.completed_at
+    )
+
+
+@router.get("/{task_id}/result")
+async def get_translation_result(
+    task_id: str,
+    lang: str = Query(..., description="Target language code"),
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    """
+    Get the translation result for a specific language.
+
+    - **task_id**: Task UUID
+    - **lang**: Target language code (e.g., 'en', 'ja')
+
+    Returns the translation JSON file.
+    """
+    # Verify task ownership
+    task = task_service.get_task_by_id(
+        db=db,
+        task_id=task_id,
+        user_id=current_user.id
+    )
+
+    if not task:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="Task not found"
+        )
+
+    if not task.result_json_path:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="OCR result not found"
+        )
+
+    # Find translation file
+    result_dir = Path(task.result_json_path).parent
+    base_name = Path(task.result_json_path).stem.replace('_result', '')
+    translation_file = result_dir / f"{base_name}_translated_{lang}.json"
+
+    if not translation_file.exists():
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Translation for language '{lang}' not found"
+        )
+
+    # Return as JSON response with proper content type
+    return FileResponse(
+        path=str(translation_file),
+        filename=translation_file.name,
+        media_type="application/json"
+    )
+
+
+@router.get("/{task_id}/translations", response_model=TranslationListResponse)
+async def list_translations(
+    task_id: str,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    """
+    List all available translations for a task.
+
+    - **task_id**: Task UUID
+
+    Returns list of available translations with metadata.
+    """
+    # Verify task ownership
+    task = task_service.get_task_by_id(
+        db=db,
+        task_id=task_id,
+        user_id=current_user.id
+    )
+
+    if not task:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="Task not found"
+        )
+
+    translations = []
+
+    if task.result_json_path:
+        result_dir = Path(task.result_json_path).parent
+        translated_files = list(result_dir.glob("*_translated_*.json"))
+
+        for translation_file in translated_files:
+            try:
+                # Extract language from filename
+                lang = translation_file.stem.split("_translated_")[-1]
+
+                # Read translation metadata
+                with open(translation_file, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+
+                stats_data = data.get('statistics', {})
+
+                translations.append(TranslationListItem(
+                    target_lang=lang,
+                    translated_at=datetime.fromisoformat(data.get('translated_at', '').replace('Z', '+00:00')),
+                    provider=data.get('provider', 'dify'),
+                    statistics=TranslationStatistics(
+                        total_elements=stats_data.get('total_elements', 0),
+                        translated_elements=stats_data.get('translated_elements', 0),
+                        skipped_elements=stats_data.get('skipped_elements', 0),
+                        total_characters=stats_data.get('total_characters', 0),
+                        processing_time_seconds=stats_data.get('processing_time_seconds', 0.0),
+                        total_tokens=stats_data.get('total_tokens', 0)
+                    ),
+                    file_path=str(translation_file)
+                ))
+            except Exception as e:
+                logger.warning(f"Failed to read translation file {translation_file}: {e}")
+                continue
+
+    return TranslationListResponse(
+        task_id=task_id,
+        translations=translations
+    )
+
+
+@router.delete("/{task_id}/translations/{lang}", status_code=status.HTTP_204_NO_CONTENT)
+async def delete_translation(
+    task_id: str,
+    lang: str,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    """
+    Delete a specific translation.
+
+    - **task_id**: Task UUID
+    - **lang**: Target language code to delete
+    """
+    # Verify task ownership
+    task = task_service.get_task_by_id(
+        db=db,
+        task_id=task_id,
+        user_id=current_user.id
+    )
+
+    if not task:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="Task not found"
+        )
+
+    if not task.result_json_path:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="OCR result not found"
+        )
+
+    # Find translation file
+    result_dir = Path(task.result_json_path).parent
+    base_name = Path(task.result_json_path).stem.replace('_result', '')
+    translation_file = result_dir / f"{base_name}_translated_{lang}.json"
+
+    if not translation_file.exists():
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Translation for language '{lang}' not found"
+        )
+
+    # Delete file
+    translation_file.unlink()
+    logger.info(f"Deleted translation {lang} for task {task_id}")
+
+    return None
--- a/backend/app/schemas/init.py
+++ b/backend/app/schemas/init.py
@@ -13,6 +13,16 @@ from app.schemas.task import (
    TaskStatsResponse,
    TaskStatusEnum,
 )
+from app.schemas.translation import (
+    TranslationStatusEnum,
+    TranslationRequest,
+    TranslationProgress,
+    TranslationStatusResponse,
+    TranslationStartResponse,
+    TranslationStatistics,
+    TranslationResultResponse,
+    TranslationListResponse,
+)

 __all__ = [
    # Auth
@@ -27,4 +37,13 @@ __all__ = [
    "TaskListResponse",
    "TaskStatsResponse",
    "TaskStatusEnum",
+    # Translation
+    "TranslationStatusEnum",
+    "TranslationRequest",
+    "TranslationProgress",
+    "TranslationStatusResponse",
+    "TranslationStartResponse",
+    "TranslationStatistics",
+    "TranslationResultResponse",
+    "TranslationListResponse",
 ]
--- a/backend/app/schemas/translation.py
+++ b/backend/app/schemas/translation.py
@@ -0,0 +1,163 @@
+"""
+Tool_OCR - Translation Schemas
+Pydantic models for document translation feature (DIFY API)
+"""
+
+from typing import Optional, List, Dict, Any, Tuple
+from datetime import datetime
+from pydantic import BaseModel, Field
+from enum import Enum
+from dataclasses import dataclass
+
+
+class TranslationStatusEnum(str, Enum):
+    """Translation job status enumeration"""
+    PENDING = "pending"
+    TRANSLATING = "translating"
+    COMPLETED = "completed"
+    FAILED = "failed"
+
+
+class TargetLanguageEnum(str, Enum):
+    """Supported target languages for translation."""
+    ENGLISH = "en"
+    JAPANESE = "ja"
+    KOREAN = "ko"
+    CHINESE_SIMPLIFIED = "zh-CN"
+    CHINESE_TRADITIONAL = "zh-TW"
+    GERMAN = "de"
+    FRENCH = "fr"
+    SPANISH = "es"
+    PORTUGUESE = "pt"
+    ITALIAN = "it"
+    RUSSIAN = "ru"
+    VIETNAMESE = "vi"
+    THAI = "th"
+
+
+class TranslationRequest(BaseModel):
+    """Request model for starting a translation job"""
+    target_lang: str = Field(
+        ...,
+        description="Target language code (e.g., 'en', 'ja', 'zh-TW')"
+    )
+    source_lang: str = Field(
+        default="auto",
+        description="Source language code, 'auto' for automatic detection"
+    )
+
+
+class TranslationProgress(BaseModel):
+    """Progress information for ongoing translation"""
+    current_element: int = Field(default=0, description="Current element being translated")
+    total_elements: int = Field(default=0, description="Total elements to translate")
+    percentage: float = Field(default=0.0, description="Progress percentage (0-100)")
+
+
+class TranslationStatusResponse(BaseModel):
+    """Response model for translation status query"""
+    task_id: str = Field(..., description="Task ID")
+    status: TranslationStatusEnum = Field(..., description="Current translation status")
+    target_lang: str = Field(..., description="Target language")
+    progress: Optional[TranslationProgress] = Field(
+        default=None,
+        description="Progress information when translating"
+    )
+    error_message: Optional[str] = Field(
+        default=None,
+        description="Error message if translation failed"
+    )
+    started_at: Optional[datetime] = Field(default=None, description="Translation start time")
+    completed_at: Optional[datetime] = Field(default=None, description="Translation completion time")
+
+
+class TranslationStartResponse(BaseModel):
+    """Response model for starting a translation job"""
+    task_id: str = Field(..., description="Task ID")
+    status: TranslationStatusEnum = Field(..., description="Initial status")
+    target_lang: str = Field(..., description="Target language")
+    message: str = Field(..., description="Status message")
+
+
+class TranslationStatistics(BaseModel):
+    """Statistics for completed translation"""
+    total_elements: int = Field(default=0, description="Total elements in document")
+    translated_elements: int = Field(default=0, description="Successfully translated elements")
+    skipped_elements: int = Field(default=0, description="Skipped elements (images, etc.)")
+    total_characters: int = Field(default=0, description="Total characters translated")
+    processing_time_seconds: float = Field(default=0.0, description="Translation duration")
+    total_tokens: int = Field(default=0, description="Total API tokens used")
+
+
+class TranslationResultResponse(BaseModel):
+    """Response model for translation result"""
+    schema_version: str = Field(default="1.0.0", description="Schema version")
+    source_document: str = Field(..., description="Source document filename")
+    source_lang: str = Field(..., description="Source language (detected or specified)")
+    target_lang: str = Field(..., description="Target language")
+    provider: str = Field(default="dify", description="Translation provider")
+    translated_at: datetime = Field(..., description="Translation timestamp")
+    statistics: TranslationStatistics = Field(..., description="Translation statistics")
+    translations: Dict[str, Any] = Field(
+        ...,
+        description="Translations dict mapping element_id to translated content"
+    )
+
+
+class TranslationListItem(BaseModel):
+    """Item in translation list response"""
+    target_lang: str = Field(..., description="Target language")
+    translated_at: datetime = Field(..., description="Translation timestamp")
+    provider: str = Field(default="dify", description="Translation provider")
+    statistics: TranslationStatistics = Field(..., description="Translation statistics")
+    file_path: str = Field(..., description="Path to translation JSON file")
+
+
+class TranslationListResponse(BaseModel):
+    """Response model for listing available translations"""
+    task_id: str = Field(..., description="Task ID")
+    translations: List[TranslationListItem] = Field(
+        default_factory=list,
+        description="Available translations"
+    )
+
+
+# Dataclasses for internal use
+
+@dataclass
+class TranslatableItem:
+    """Internal representation of a translatable element"""
+    element_id: str
+    content: str
+    element_type: str  # 'text', 'title', 'header', etc. or 'table_cell'
+    page_number: int = 1
+    cell_position: Optional[Tuple[int, int]] = None  # (row, col) for table cells
+
+    def __post_init__(self):
+        # Clean content - remove excessive whitespace
+        if self.content:
+            self.content = ' '.join(self.content.split())
+
+
+@dataclass
+class TranslatedItem:
+    """Internal representation of a translated element"""
+    element_id: str
+    original_content: str
+    translated_content: str
+    element_type: str
+    cell_position: Optional[Tuple[int, int]] = None
+
+
+@dataclass
+class TranslationJobState:
+    """Internal state for a translation job"""
+    task_id: str
+    target_lang: str
+    source_lang: str
+    status: TranslationStatusEnum
+    progress: TranslationProgress
+    error_message: Optional[str] = None
+    started_at: Optional[datetime] = None
+    completed_at: Optional[datetime] = None
+    result_file_path: Optional[str] = None
--- a/backend/app/services/dify_client.py
+++ b/backend/app/services/dify_client.py
@@ -0,0 +1,332 @@
+"""
+Tool_OCR - DIFY AI Client
+HTTP client for DIFY translation API with batch support
+"""
+
+import asyncio
+import logging
+import re
+import time
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+# DIFY API Configuration
+DIFY_BASE_URL = "https://dify.theaken.com/v1"
+DIFY_API_KEY = "app-YOPrF2ro5fshzMkCZviIuUJd"
+DIFY_TIMEOUT = 120.0  # seconds (increased for batch)
+DIFY_MAX_RETRIES = 3
+
+# Batch translation limits
+# Conservative limits to avoid gateway timeouts
+# DIFY server may have processing time limits
+MAX_BATCH_CHARS = 5000
+MAX_BATCH_ITEMS = 20
+
+# Language name mapping
+LANGUAGE_NAMES = {
+    "en": "English",
+    "zh-TW": "Traditional Chinese",
+    "zh-CN": "Simplified Chinese",
+    "ja": "Japanese",
+    "ko": "Korean",
+    "de": "German",
+    "fr": "French",
+    "es": "Spanish",
+    "pt": "Portuguese",
+    "it": "Italian",
+    "ru": "Russian",
+    "vi": "Vietnamese",
+    "th": "Thai",
+}
+
+
+@dataclass
+class TranslationResponse:
+    """Response from DIFY translation API"""
+    translated_text: str
+    total_tokens: int
+    latency: float
+    conversation_id: str
+
+
+@dataclass
+class BatchTranslationResponse:
+    """Response from DIFY batch translation API"""
+    translations: Dict[int, str]  # marker_id -> translated_text
+    total_tokens: int
+    latency: float
+    conversation_id: str
+    missing_markers: List[int] = field(default_factory=list)
+
+
+class DifyTranslationError(Exception):
+    """Error during DIFY API translation"""
+    pass
+
+
+class DifyClient:
+    """
+    Client for DIFY AI translation API.
+
+    Features:
+    - Single and batch translation
+    - Blocking mode API calls
+    - Automatic retry with exponential backoff
+    - Token and latency tracking
+    """
+
+    def __init__(
+        self,
+        base_url: str = DIFY_BASE_URL,
+        api_key: str = DIFY_API_KEY,
+        timeout: float = DIFY_TIMEOUT,
+        max_retries: int = DIFY_MAX_RETRIES
+    ):
+        self.base_url = base_url
+        self.api_key = api_key
+        self.timeout = timeout
+        self.max_retries = max_retries
+        self._total_tokens = 0
+        self._total_requests = 0
+
+    def _get_language_name(self, lang_code: str) -> str:
+        """Convert language code to full name for prompt"""
+        return LANGUAGE_NAMES.get(lang_code, lang_code)
+
+    def _build_prompt(self, text: str, target_lang: str) -> str:
+        """Build translation prompt for single text"""
+        lang_name = self._get_language_name(target_lang)
+        return (
+            f"Translate the following text to {lang_name}.\n"
+            f"Return ONLY the translated text, no explanations.\n\n"
+            f"{text}"
+        )
+
+    def _build_batch_prompt(self, texts: List[str], target_lang: str) -> str:
+        """
+        Build batch translation prompt with numbered markers.
+
+        Format:
+        Translate the following texts to {Language}.
+        Each text is marked with [N]. Return translations in the same format.
+        Return ONLY the translations with their markers, no explanations.
+
+        [1] First text
+        [2] Second text
+        ...
+        """
+        lang_name = self._get_language_name(target_lang)
+
+        # Build numbered text list
+        numbered_texts = []
+        for i, text in enumerate(texts, start=1):
+            # Clean text - remove newlines within each item to avoid parsing issues
+            clean_text = ' '.join(text.split())
+            numbered_texts.append(f"[{i}] {clean_text}")
+
+        texts_block = "\n".join(numbered_texts)
+
+        prompt = (
+            f"Translate the following texts to {lang_name}.\n"
+            f"Each text is marked with [N]. Return translations in the same format.\n"
+            f"Return ONLY the translations with their markers, no explanations.\n\n"
+            f"{texts_block}"
+        )
+
+        return prompt
+
+    def _parse_batch_response(self, response_text: str, expected_count: int) -> Dict[int, str]:
+        """
+        Parse batch translation response with numbered markers.
+
+        Expected format:
+        [1] 翻譯文字一
+        [2] 翻譯文字二
+        ...
+
+        Returns:
+            Dict mapping marker number to translated text
+        """
+        translations = {}
+
+        # Pattern to match [N] followed by text until next [N] or end
+        # Use DOTALL to match across lines, but be careful with greedy matching
+        pattern = r'\[(\d+)\]\s*(.+?)(?=\[\d+\]|$)'
+        matches = re.findall(pattern, response_text, re.DOTALL)
+
+        for match in matches:
+            try:
+                marker_id = int(match[0])
+                text = match[1].strip()
+                if text:
+                    translations[marker_id] = text
+            except (ValueError, IndexError):
+                continue
+
+        return translations
+
+    def _call_api(self, prompt: str, user_id: str) -> dict:
+        """Make API call to DIFY with retry logic"""
+        payload = {
+            "inputs": {},
+            "query": prompt,
+            "response_mode": "blocking",
+            "conversation_id": "",
+            "user": user_id
+        }
+
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json"
+        }
+
+        last_error = None
+
+        for attempt in range(self.max_retries):
+            try:
+                with httpx.Client(timeout=self.timeout) as client:
+                    response = client.post(
+                        f"{self.base_url}/chat-messages",
+                        json=payload,
+                        headers=headers
+                    )
+
+                    if response.status_code != 200:
+                        raise DifyTranslationError(
+                            f"API returned status {response.status_code}: {response.text}"
+                        )
+
+                    return response.json()
+
+            except httpx.TimeoutException as e:
+                last_error = e
+                logger.warning(f"DIFY API timeout (attempt {attempt + 1}/{self.max_retries})")
+
+            except httpx.RequestError as e:
+                last_error = e
+                logger.warning(f"DIFY API request error (attempt {attempt + 1}/{self.max_retries}): {e}")
+
+            except Exception as e:
+                last_error = e
+                logger.warning(f"DIFY API error (attempt {attempt + 1}/{self.max_retries}): {e}")
+
+            # Exponential backoff
+            if attempt < self.max_retries - 1:
+                wait_time = 2 ** attempt
+                logger.info(f"Retrying in {wait_time}s...")
+                time.sleep(wait_time)
+
+        raise DifyTranslationError(f"API call failed after {self.max_retries} attempts: {last_error}")
+
+    def translate(
+        self,
+        text: str,
+        target_lang: str,
+        user_id: str = "tool-ocr"
+    ) -> TranslationResponse:
+        """
+        Translate single text using DIFY API.
+
+        Args:
+            text: Text to translate
+            target_lang: Target language code (e.g., 'en', 'zh-TW')
+            user_id: User identifier for tracking
+
+        Returns:
+            TranslationResponse with translated text and metadata
+        """
+        prompt = self._build_prompt(text, target_lang)
+        data = self._call_api(prompt, user_id)
+
+        # Extract response fields
+        translated_text = data.get("answer", "")
+        usage = data.get("metadata", {}).get("usage", {})
+
+        self._total_tokens += usage.get("total_tokens", 0)
+        self._total_requests += 1
+
+        return TranslationResponse(
+            translated_text=translated_text,
+            total_tokens=usage.get("total_tokens", 0),
+            latency=usage.get("latency", 0.0),
+            conversation_id=data.get("conversation_id", "")
+        )
+
+    def translate_batch(
+        self,
+        texts: List[str],
+        target_lang: str,
+        user_id: str = "tool-ocr"
+    ) -> BatchTranslationResponse:
+        """
+        Translate multiple texts in a single API call.
+
+        Args:
+            texts: List of texts to translate
+            target_lang: Target language code
+            user_id: User identifier for tracking
+
+        Returns:
+            BatchTranslationResponse with translations dict and metadata
+        """
+        if not texts:
+            return BatchTranslationResponse(
+                translations={},
+                total_tokens=0,
+                latency=0.0,
+                conversation_id=""
+            )
+
+        prompt = self._build_batch_prompt(texts, target_lang)
+
+        logger.debug(f"Batch translation: {len(texts)} items, ~{len(prompt)} chars")
+
+        data = self._call_api(prompt, user_id)
+
+        # Extract and parse response
+        answer = data.get("answer", "")
+        usage = data.get("metadata", {}).get("usage", {})
+
+        translations = self._parse_batch_response(answer, len(texts))
+
+        # Check for missing markers
+        missing_markers = []
+        for i in range(1, len(texts) + 1):
+            if i not in translations:
+                missing_markers.append(i)
+                logger.warning(f"Missing translation for marker [{i}]")
+
+        self._total_tokens += usage.get("total_tokens", 0)
+        self._total_requests += 1
+
+        return BatchTranslationResponse(
+            translations=translations,
+            total_tokens=usage.get("total_tokens", 0),
+            latency=usage.get("latency", 0.0),
+            conversation_id=data.get("conversation_id", ""),
+            missing_markers=missing_markers
+        )
+
+    def get_stats(self) -> dict:
+        """Get client statistics"""
+        return {
+            "total_tokens": self._total_tokens,
+            "total_requests": self._total_requests,
+            "base_url": self.base_url,
+        }
+
+
+# Global singleton
+_dify_client: Optional[DifyClient] = None
+
+
+def get_dify_client() -> DifyClient:
+    """Get the global DifyClient instance"""
+    global _dify_client
+    if _dify_client is None:
+        _dify_client = DifyClient()
+    return _dify_client
--- a/backend/app/services/translation_service.py
+++ b/backend/app/services/translation_service.py
@@ -0,0 +1,490 @@
+"""
+Tool_OCR - Translation Service
+Document translation using DIFY AI API with batch processing
+"""
+
+import json
+import logging
+import threading
+import time
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+from app.schemas.translation import (
+    TranslatableItem,
+    TranslatedItem,
+    TranslationJobState,
+    TranslationProgress,
+    TranslationStatusEnum,
+)
+from app.services.dify_client import (
+    DifyClient,
+    DifyTranslationError,
+    get_dify_client,
+    MAX_BATCH_CHARS,
+    MAX_BATCH_ITEMS,
+)
+
+logger = logging.getLogger(__name__)
+
+# Element types that should be translated
+TRANSLATABLE_TEXT_TYPES = {'text', 'title', 'header', 'footer', 'paragraph', 'footnote'}
+TABLE_TYPE = 'table'
+SKIP_TYPES = {'page_number', 'image', 'chart', 'logo', 'reference'}
+
+
+@dataclass
+class TranslationBatch:
+    """A batch of items to translate together"""
+    items: List[TranslatableItem] = field(default_factory=list)
+    total_chars: int = 0
+
+    def can_add(self, item: TranslatableItem) -> bool:
+        """Check if item can be added to this batch"""
+        item_chars = len(item.content)
+        return (
+            len(self.items) < MAX_BATCH_ITEMS and
+            self.total_chars + item_chars <= MAX_BATCH_CHARS
+        )
+
+    def add(self, item: TranslatableItem):
+        """Add item to batch"""
+        self.items.append(item)
+        self.total_chars += len(item.content)
+
+
+class TranslationService:
+    """
+    Main translation service for document translation using DIFY AI.
+
+    Features:
+    - Extract translatable elements from UnifiedDocument
+    - Batch translation via DIFY API (efficient)
+    - Fallback to single-item translation for failures
+    - Translation JSON generation
+    - Progress tracking
+    """
+
+    def __init__(self, dify_client: Optional[DifyClient] = None):
+        self.dify_client = dify_client or get_dify_client()
+        self._active_jobs: Dict[str, TranslationJobState] = {}
+        self._jobs_lock = threading.Lock()
+        self._total_tokens = 0
+        self._total_latency = 0.0
+
+    def extract_translatable_elements(
+        self,
+        result_json: Dict
+    ) -> Tuple[List[TranslatableItem], int]:
+        """
+        Extract all translatable elements from a result JSON.
+
+        Args:
+            result_json: UnifiedDocument JSON data
+
+        Returns:
+            Tuple of (list of TranslatableItem, total element count)
+        """
+        items = []
+        total_elements = 0
+
+        for page in result_json.get('pages', []):
+            page_number = page.get('page_number', 1)
+
+            for elem in page.get('elements', []):
+                total_elements += 1
+                elem_type = elem.get('type', '')
+                elem_id = elem.get('element_id', '')
+                content = elem.get('content')
+
+                # Skip non-translatable types
+                if elem_type in SKIP_TYPES:
+                    continue
+
+                # Handle text elements
+                if elem_type in TRANSLATABLE_TEXT_TYPES and isinstance(content, str):
+                    text = content.strip()
+                    if text:  # Skip empty content
+                        items.append(TranslatableItem(
+                            element_id=elem_id,
+                            content=text,
+                            element_type=elem_type,
+                            page_number=page_number
+                        ))
+
+                # Handle table elements
+                elif elem_type == TABLE_TYPE and isinstance(content, dict):
+                    cells = content.get('cells', [])
+                    for cell in cells:
+                        cell_content = cell.get('content', '')
+                        if isinstance(cell_content, str) and cell_content.strip():
+                            row = cell.get('row', 0)
+                            col = cell.get('col', 0)
+                            items.append(TranslatableItem(
+                                element_id=elem_id,
+                                content=cell_content.strip(),
+                                element_type='table_cell',
+                                page_number=page_number,
+                                cell_position=(row, col)
+                            ))
+
+        logger.info(
+            f"Extracted {len(items)} translatable items from {total_elements} elements"
+        )
+        return items, total_elements
+
+    def create_batches(self, items: List[TranslatableItem]) -> List[TranslationBatch]:
+        """
+        Create translation batches from items based on character limits.
+
+        Args:
+            items: List of TranslatableItem
+
+        Returns:
+            List of TranslationBatch
+        """
+        batches = []
+        current_batch = TranslationBatch()
+
+        for item in items:
+            if current_batch.can_add(item):
+                current_batch.add(item)
+            else:
+                # Save current batch and start new one
+                if current_batch.items:
+                    batches.append(current_batch)
+                current_batch = TranslationBatch()
+                current_batch.add(item)
+
+        # Don't forget the last batch
+        if current_batch.items:
+            batches.append(current_batch)
+
+        logger.info(
+            f"Created {len(batches)} batches from {len(items)} items "
+            f"(max {MAX_BATCH_CHARS} chars, max {MAX_BATCH_ITEMS} items per batch)"
+        )
+
+        return batches
+
+    def translate_batch(
+        self,
+        batch: TranslationBatch,
+        target_lang: str,
+        user_id: str
+    ) -> List[TranslatedItem]:
+        """
+        Translate a batch of items using DIFY API.
+
+        Args:
+            batch: TranslationBatch to translate
+            target_lang: Target language code
+            user_id: User identifier for tracking
+
+        Returns:
+            List of TranslatedItem
+        """
+        if not batch.items:
+            return []
+
+        # Extract texts in order
+        texts = [item.content for item in batch.items]
+
+        try:
+            response = self.dify_client.translate_batch(
+                texts=texts,
+                target_lang=target_lang,
+                user_id=user_id
+            )
+
+            self._total_tokens += response.total_tokens
+            self._total_latency += response.latency
+
+            # Map translations back to items
+            translated_items = []
+            for idx, item in enumerate(batch.items):
+                marker_id = idx + 1  # Markers are 1-indexed
+
+                if marker_id in response.translations:
+                    translated_content = response.translations[marker_id]
+                else:
+                    # Missing translation - use original
+                    logger.warning(f"Missing translation for {item.element_id}, using original")
+                    translated_content = item.content
+
+                translated_items.append(TranslatedItem(
+                    element_id=item.element_id,
+                    original_content=item.content,
+                    translated_content=translated_content,
+                    element_type=item.element_type,
+                    cell_position=item.cell_position
+                ))
+
+            return translated_items
+
+        except DifyTranslationError as e:
+            logger.error(f"Batch translation failed: {e}")
+            # Return items with original content on failure
+            return [
+                TranslatedItem(
+                    element_id=item.element_id,
+                    original_content=item.content,
+                    translated_content=item.content,  # Keep original
+                    element_type=item.element_type,
+                    cell_position=item.cell_position
+                )
+                for item in batch.items
+            ]
+
+    def translate_item(
+        self,
+        item: TranslatableItem,
+        target_lang: str,
+        user_id: str
+    ) -> TranslatedItem:
+        """
+        Translate a single item using DIFY API (fallback for batch failures).
+
+        Args:
+            item: TranslatableItem to translate
+            target_lang: Target language code
+            user_id: User identifier for tracking
+
+        Returns:
+            TranslatedItem with translation result
+        """
+        try:
+            response = self.dify_client.translate(
+                text=item.content,
+                target_lang=target_lang,
+                user_id=user_id
+            )
+
+            self._total_tokens += response.total_tokens
+            self._total_latency += response.latency
+
+            return TranslatedItem(
+                element_id=item.element_id,
+                original_content=item.content,
+                translated_content=response.translated_text,
+                element_type=item.element_type,
+                cell_position=item.cell_position
+            )
+
+        except DifyTranslationError as e:
+            logger.error(f"Translation failed for {item.element_id}: {e}")
+            # Return original content on failure
+            return TranslatedItem(
+                element_id=item.element_id,
+                original_content=item.content,
+                translated_content=item.content,  # Keep original
+                element_type=item.element_type,
+                cell_position=item.cell_position
+            )
+
+    def build_translation_result(
+        self,
+        translated_items: List[TranslatedItem],
+        source_document: str,
+        source_lang: str,
+        target_lang: str,
+        total_elements: int,
+        processing_time: float,
+        batch_count: int
+    ) -> Dict:
+        """
+        Build the translation result JSON structure.
+
+        Args:
+            translated_items: List of TranslatedItem
+            source_document: Source document filename
+            source_lang: Source language
+            target_lang: Target language
+            total_elements: Total elements in document
+            processing_time: Processing time in seconds
+            batch_count: Number of batches used
+
+        Returns:
+            Translation result dictionary
+        """
+        # Build translations dict
+        translations: Dict[str, Any] = {}
+        total_chars = 0
+
+        for item in translated_items:
+            total_chars += len(item.translated_content)
+
+            if item.element_type == 'table_cell':
+                # Group table cells by element_id
+                if item.element_id not in translations:
+                    translations[item.element_id] = {'cells': []}
+
+                translations[item.element_id]['cells'].append({
+                    'row': item.cell_position[0] if item.cell_position else 0,
+                    'col': item.cell_position[1] if item.cell_position else 0,
+                    'content': item.translated_content
+                })
+            else:
+                translations[item.element_id] = item.translated_content
+
+        # Build statistics
+        translated_element_ids = set(item.element_id for item in translated_items)
+        skipped = total_elements - len(translated_element_ids)
+
+        result = {
+            'schema_version': '1.0.0',
+            'source_document': source_document,
+            'source_lang': source_lang,
+            'target_lang': target_lang,
+            'provider': 'dify',
+            'translated_at': datetime.utcnow().isoformat() + 'Z',
+            'statistics': {
+                'total_elements': total_elements,
+                'translated_elements': len(translated_element_ids),
+                'skipped_elements': skipped,
+                'total_characters': total_chars,
+                'processing_time_seconds': round(processing_time, 2),
+                'total_tokens': self._total_tokens,
+                'batch_count': batch_count
+            },
+            'translations': translations
+        }
+
+        return result
+
+    def translate_document(
+        self,
+        task_id: str,
+        result_json_path: Path,
+        target_lang: str,
+        source_lang: str = 'auto',
+        progress_callback: Optional[callable] = None
+    ) -> Tuple[bool, Optional[Path], Optional[str]]:
+        """
+        Translate a document using batch processing and save the result.
+
+        Args:
+            task_id: Task ID
+            result_json_path: Path to source result.json
+            target_lang: Target language (e.g., 'en', 'zh-TW')
+            source_lang: Source language ('auto' for detection)
+            progress_callback: Optional callback(progress: TranslationProgress)
+
+        Returns:
+            Tuple of (success, output_path, error_message)
+        """
+        start_time = time.time()
+        self._total_tokens = 0
+        self._total_latency = 0.0
+
+        logger.info(
+            f"Starting translation: task_id={task_id}, target={target_lang}"
+        )
+
+        try:
+            # Load source JSON
+            with open(result_json_path, 'r', encoding='utf-8') as f:
+                result_json = json.load(f)
+
+            source_document = result_json.get('metadata', {}).get('filename', 'unknown')
+
+            # Extract translatable elements
+            items, total_elements = self.extract_translatable_elements(result_json)
+
+            if not items:
+                logger.warning("No translatable elements found")
+                return False, None, "No translatable elements found"
+
+            # Create batches
+            batches = self.create_batches(items)
+
+            # Update initial progress
+            if progress_callback:
+                progress_callback(TranslationProgress(
+                    total_elements=len(items)
+                ))
+
+            # Translate each batch
+            all_translated: List[TranslatedItem] = []
+            user_id = f"tool-ocr-{task_id}"
+            processed_items = 0
+
+            for batch_idx, batch in enumerate(batches):
+                logger.info(
+                    f"Translating batch {batch_idx + 1}/{len(batches)} "
+                    f"({len(batch.items)} items, {batch.total_chars} chars)"
+                )
+
+                translated = self.translate_batch(batch, target_lang, user_id)
+                all_translated.extend(translated)
+                processed_items += len(batch.items)
+
+                # Update progress
+                if progress_callback:
+                    progress_callback(TranslationProgress(
+                        current_element=processed_items,
+                        total_elements=len(items),
+                        percentage=(processed_items / len(items)) * 100
+                    ))
+
+            # Build result
+            processing_time = time.time() - start_time
+            result = self.build_translation_result(
+                translated_items=all_translated,
+                source_document=source_document,
+                source_lang=source_lang,
+                target_lang=target_lang,
+                total_elements=total_elements,
+                processing_time=processing_time,
+                batch_count=len(batches)
+            )
+
+            # Save result
+            output_filename = result_json_path.stem.replace('_result', '')
+            output_path = result_json_path.parent / f"{output_filename}_translated_{target_lang}.json"
+
+            with open(output_path, 'w', encoding='utf-8') as f:
+                json.dump(result, f, ensure_ascii=False, indent=2)
+
+            logger.info(
+                f"Translation completed: {len(all_translated)} items in {len(batches)} batches, "
+                f"{processing_time:.2f}s, {self._total_tokens} tokens, "
+                f"saved to {output_path}"
+            )
+
+            return True, output_path, None
+
+        except Exception as e:
+            logger.error(f"Translation failed: {e}")
+            import traceback
+            traceback.print_exc()
+            return False, None, str(e)
+
+    def get_job_state(self, task_id: str) -> Optional[TranslationJobState]:
+        """Get the current state of a translation job"""
+        with self._jobs_lock:
+            return self._active_jobs.get(task_id)
+
+    def set_job_state(self, task_id: str, state: TranslationJobState):
+        """Set the state of a translation job"""
+        with self._jobs_lock:
+            self._active_jobs[task_id] = state
+
+    def remove_job_state(self, task_id: str):
+        """Remove a translation job state"""
+        with self._jobs_lock:
+            self._active_jobs.pop(task_id, None)
+
+
+# Global singleton
+_translation_service: Optional[TranslationService] = None
+
+
+def get_translation_service() -> TranslationService:
+    """Get the global TranslationService instance"""
+    global _translation_service
+    if _translation_service is None:
+        _translation_service = TranslationService()
+    return _translation_service
--- a/backend/tests/test_translation_real.py
+++ b/backend/tests/test_translation_real.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+"""
+Test translation service with DIFY API using real OCR results from storage/results/
+"""
+import json
+import pytest
+from pathlib import Path
+
+from app.services.dify_client import DifyClient, get_dify_client
+from app.services.translation_service import TranslationService, get_translation_service
+
+# Real task IDs with their result files
+REAL_TASKS = [
+    ("ca2b59a3-3362-4678-954f-cf0a9bcc152e", "img3_result.json"),
+    ("8ab2f24d-992b-46a2-87dc-2e024e006ac7", "img1_result.json"),
+    ("1c94bfbf-9391-444c-bebf-ae22fa3dad32", "edit_result.json"),
+    ("c85fff69-9ddb-40b8-8a9b-ebb513c60f05", "scan_result.json"),
+    ("0088e960-7b61-4cdf-bfe5-956960b00dd1", "scan2_result.json"),
+    ("8eedd9ed-7aad-46d5-93ca-951352c954b9", "ppt_result.json"),
+    ("992156c5-72b4-4e3d-8d43-cbb15f23e630", "edit3_result.json"),
+    ("1484ba43-7484-4326-95a7-1544b181e9e8", "edit2_result.json"),
+    ("e9a16bba-7d37-42f4-84c8-6624cb58fe19", "img2_result.json"),
+]
+
+RESULTS_DIR = Path(__file__).parent.parent / "storage" / "results"
+
+
+@pytest.fixture
+def dify_client():
+    """Get DIFY client instance"""
+    return get_dify_client()
+
+
+@pytest.fixture
+def translation_service():
+    """Get translation service instance"""
+    return get_translation_service()
+
+
+class TestDifyClient:
+    """Test DIFY API client"""
+
+    def test_client_initialization(self, dify_client):
+        """Test client can be initialized"""
+        assert dify_client is not None
+        assert dify_client.api_key is not None
+
+    def test_simple_translation(self, dify_client):
+        """Test simple translation via DIFY API"""
+        text = "Hello, this is a test."
+        response = dify_client.translate(text, "zh-TW")
+
+        assert response.translated_text is not None
+        assert len(response.translated_text) > 0
+        assert response.total_tokens > 0
+
+        print(f"\nOriginal: {text}")
+        print(f"Translated: {response.translated_text}")
+        print(f"Tokens: {response.total_tokens}, Latency: {response.latency:.2f}s")
+
+
+class TestTranslationServiceExtraction:
+    """Test element extraction"""
+
+    def test_service_initialization(self, translation_service):
+        """Test service can be initialized"""
+        assert translation_service is not None
+        assert translation_service.dify_client is not None
+
+    @pytest.mark.parametrize("task_id,result_file", REAL_TASKS)
+    def test_extract_translatable_elements(self, translation_service, task_id, result_file):
+        """Test extracting translatable elements from real OCR results"""
+        result_path = RESULTS_DIR / task_id / result_file
+        if not result_path.exists():
+            pytest.skip(f"Result file not found: {result_path}")
+
+        with open(result_path, 'r', encoding='utf-8') as f:
+            ocr_result = json.load(f)
+
+        elements, total_count = translation_service.extract_translatable_elements(ocr_result)
+
+        # Verify extraction
+        assert isinstance(elements, list)
+        assert isinstance(total_count, int)
+        assert total_count >= 0
+
+        print(f"\nTask {task_id}:")
+        print(f"  Extracted {len(elements)} translatable elements")
+        print(f"  Total elements in document: {total_count}")
+
+        if elements:
+            first = elements[0]
+            assert hasattr(first, 'element_id')
+            assert hasattr(first, 'content')
+            assert hasattr(first, 'element_type')
+            print(f"  First element type: {first.element_type}")
+            print(f"  First element preview: {first.content[:50]}..." if len(first.content) > 50 else f"  First element: {first.content}")
+
+
+class TestTranslationExecution:
+    """Test actual translation via DIFY API"""
+
+    @pytest.mark.parametrize("task_id,result_file", REAL_TASKS[:2])  # Test only first 2
+    def test_translate_first_3_elements(self, translation_service, task_id, result_file):
+        """Test translating first 3 elements from a real OCR document"""
+        result_path = RESULTS_DIR / task_id / result_file
+        if not result_path.exists():
+            pytest.skip(f"Result file not found: {result_path}")
+
+        with open(result_path, 'r', encoding='utf-8') as f:
+            ocr_result = json.load(f)
+
+        elements, _ = translation_service.extract_translatable_elements(ocr_result)
+        if not elements:
+            pytest.skip("No translatable elements found")
+
+        # Translate first 3 elements only
+        elements_to_translate = elements[:3]
+
+        print(f"\n{task_id} translations:")
+        for i, elem in enumerate(elements_to_translate):
+            translated = translation_service.translate_item(elem, "en", f"test-{task_id}")
+
+            assert translated.translated_content is not None
+            assert len(translated.translated_content) > 0
+
+            orig_preview = elem.content[:30] + "..." if len(elem.content) > 30 else elem.content
+            trans_preview = translated.translated_content[:30] + "..." if len(translated.translated_content) > 30 else translated.translated_content
+            print(f"  [{i+1}] {orig_preview} -> {trans_preview}")
+
+        print(f"  Total tokens: {translation_service._total_tokens}")
+
+
+if __name__ == "__main__":
+    # Run extraction tests only (no API calls) by default
+    # pytest.main([__file__, "-v", "-k", "extraction", "--tb=short"])
+    # Run all tests including API calls
+    pytest.main([__file__, "-v", "--tb=short"])