feat: add translation billing stats and remove Export/Settings pages

- Add TranslationLog model to track translation API usage per task - Integrate Dify API actual price (total_price) into translation stats - Display translation statistics in admin dashboard with per-task costs - Remove unused Export and Settings pages to simplify frontend - Add GET /api/v2/admin/translation-stats endpoint 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 17:38:12 +08:00
parent d20751d56b
commit 65abd51d60
21 changed files with 682 additions and 662 deletions
--- a/backend/alembic/versions/g2b3c4d5e6f7_add_translation_logs_table.py
+++ b/backend/alembic/versions/g2b3c4d5e6f7_add_translation_logs_table.py
@@ -0,0 +1,58 @@
+"""add_translation_logs_table
+
+Revision ID: g2b3c4d5e6f7
+Revises: f1a2b3c4d5e6
+Create Date: 2025-12-12 10:00:00.000000
+
+Add translation_logs table to track translation API usage and costs.
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = 'g2b3c4d5e6f7'
+down_revision: Union[str, None] = 'f1a2b3c4d5e6'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Create tool_ocr_translation_logs table."""
+    op.create_table(
+        'tool_ocr_translation_logs',
+        sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column('user_id', sa.Integer(), nullable=False, comment='Foreign key to users table'),
+        sa.Column('task_id', sa.String(length=255), nullable=False, comment='Task UUID that was translated'),
+        sa.Column('target_lang', sa.String(length=10), nullable=False, comment='Target language code'),
+        sa.Column('source_lang', sa.String(length=10), nullable=True, comment='Source language code'),
+        sa.Column('input_tokens', sa.Integer(), nullable=False, server_default='0', comment='Number of input tokens used'),
+        sa.Column('output_tokens', sa.Integer(), nullable=False, server_default='0', comment='Number of output tokens generated'),
+        sa.Column('total_tokens', sa.Integer(), nullable=False, server_default='0', comment='Total tokens (input + output)'),
+        sa.Column('total_elements', sa.Integer(), nullable=False, server_default='0', comment='Total elements in document'),
+        sa.Column('translated_elements', sa.Integer(), nullable=False, server_default='0', comment='Number of elements translated'),
+        sa.Column('total_characters', sa.Integer(), nullable=False, server_default='0', comment='Total characters translated'),
+        sa.Column('estimated_cost', sa.Float(), nullable=False, server_default='0.0', comment='Estimated cost in USD'),
+        sa.Column('processing_time_seconds', sa.Float(), nullable=False, server_default='0.0', comment='Translation processing time'),
+        sa.Column('provider', sa.String(length=50), nullable=False, server_default='dify', comment='Translation provider'),
+        sa.Column('created_at', sa.DateTime(), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')),
+        sa.ForeignKeyConstraint(['user_id'], ['tool_ocr_users.id'], ondelete='CASCADE'),
+        sa.PrimaryKeyConstraint('id')
+    )
+    op.create_index(op.f('ix_tool_ocr_translation_logs_id'), 'tool_ocr_translation_logs', ['id'], unique=False)
+    op.create_index(op.f('ix_tool_ocr_translation_logs_user_id'), 'tool_ocr_translation_logs', ['user_id'], unique=False)
+    op.create_index(op.f('ix_tool_ocr_translation_logs_task_id'), 'tool_ocr_translation_logs', ['task_id'], unique=False)
+    op.create_index(op.f('ix_tool_ocr_translation_logs_target_lang'), 'tool_ocr_translation_logs', ['target_lang'], unique=False)
+    op.create_index(op.f('ix_tool_ocr_translation_logs_created_at'), 'tool_ocr_translation_logs', ['created_at'], unique=False)
+
+
+def downgrade() -> None:
+    """Drop tool_ocr_translation_logs table."""
+    op.drop_index(op.f('ix_tool_ocr_translation_logs_created_at'), table_name='tool_ocr_translation_logs')
+    op.drop_index(op.f('ix_tool_ocr_translation_logs_target_lang'), table_name='tool_ocr_translation_logs')
+    op.drop_index(op.f('ix_tool_ocr_translation_logs_task_id'), table_name='tool_ocr_translation_logs')
+    op.drop_index(op.f('ix_tool_ocr_translation_logs_user_id'), table_name='tool_ocr_translation_logs')
+    op.drop_index(op.f('ix_tool_ocr_translation_logs_id'), table_name='tool_ocr_translation_logs')
+    op.drop_table('tool_ocr_translation_logs')
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -458,6 +458,14 @@ class Settings(BaseSettings):
    dify_max_batch_chars: int = Field(default=5000)  # Max characters per batch
    dify_max_batch_items: int = Field(default=20)  # Max items per batch

+    # Translation cost calculation (USD per 1M tokens) - FALLBACK only
+    # Dify API returns actual price (total_price), this is only used as fallback
+    # when actual price is not available
+    translation_cost_per_million_tokens: float = Field(
+        default=3.0,
+        description="Fallback cost per 1M tokens when Dify doesn't return actual price"
+    )
+
    # ===== Background Tasks Configuration =====
    task_queue_type: str = Field(default="memory")
    redis_url: str = Field(default="redis://localhost:6379/0")
--- a/backend/app/models/init.py
+++ b/backend/app/models/init.py
@@ -9,6 +9,7 @@ from app.models.user import User
 from app.models.task import Task, TaskFile, TaskStatus
 from app.models.session import Session
 from app.models.audit_log import AuditLog
+from app.models.translation_log import TranslationLog

 __all__ = [
    "User",
@@ -17,4 +18,5 @@ __all__ = [
    "TaskStatus",
    "Session",
    "AuditLog",
+    "TranslationLog",
 ]
--- a/backend/app/models/translation_log.py
+++ b/backend/app/models/translation_log.py
@@ -0,0 +1,87 @@
+"""
+Tool_OCR - Translation Log Model
+Tracks translation usage statistics for billing and monitoring
+"""
+
+from sqlalchemy import Column, Integer, String, DateTime, Float, ForeignKey
+from sqlalchemy.orm import relationship
+from datetime import datetime
+
+from app.core.database import Base
+
+
+class TranslationLog(Base):
+    """
+    Translation log model for tracking API usage and costs.
+
+    Each record represents a single translation job completion,
+    storing token usage and estimated costs for billing purposes.
+    """
+
+    __tablename__ = "tool_ocr_translation_logs"
+
+    id = Column(Integer, primary_key=True, index=True, autoincrement=True)
+    user_id = Column(Integer, ForeignKey("tool_ocr_users.id", ondelete="CASCADE"),
+                    nullable=False, index=True,
+                    comment="Foreign key to users table")
+    task_id = Column(String(255), nullable=False, index=True,
+                    comment="Task UUID that was translated")
+    target_lang = Column(String(10), nullable=False, index=True,
+                        comment="Target language code (e.g., 'en', 'ja', 'zh-TW')")
+    source_lang = Column(String(10), nullable=True,
+                        comment="Source language code (or 'auto')")
+
+    # Token usage statistics
+    input_tokens = Column(Integer, default=0, nullable=False,
+                         comment="Number of input tokens used")
+    output_tokens = Column(Integer, default=0, nullable=False,
+                          comment="Number of output tokens generated")
+    total_tokens = Column(Integer, default=0, nullable=False,
+                         comment="Total tokens (input + output)")
+
+    # Translation statistics
+    total_elements = Column(Integer, default=0, nullable=False,
+                           comment="Total elements in document")
+    translated_elements = Column(Integer, default=0, nullable=False,
+                                comment="Number of elements translated")
+    total_characters = Column(Integer, default=0, nullable=False,
+                             comment="Total characters translated")
+
+    # Cost tracking (estimated based on token pricing)
+    estimated_cost = Column(Float, default=0.0, nullable=False,
+                           comment="Estimated cost in USD")
+
+    # Processing info
+    processing_time_seconds = Column(Float, default=0.0, nullable=False,
+                                    comment="Translation processing time")
+    provider = Column(String(50), default="dify", nullable=False,
+                     comment="Translation provider (e.g., 'dify')")
+
+    # Timestamps
+    created_at = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
+
+    # Relationships
+    user = relationship("User", back_populates="translation_logs")
+
+    def __repr__(self):
+        return f"<TranslationLog(id={self.id}, task_id='{self.task_id}', target_lang='{self.target_lang}', tokens={self.total_tokens})>"
+
+    def to_dict(self):
+        """Convert translation log to dictionary"""
+        return {
+            "id": self.id,
+            "user_id": self.user_id,
+            "task_id": self.task_id,
+            "target_lang": self.target_lang,
+            "source_lang": self.source_lang,
+            "input_tokens": self.input_tokens,
+            "output_tokens": self.output_tokens,
+            "total_tokens": self.total_tokens,
+            "total_elements": self.total_elements,
+            "translated_elements": self.translated_elements,
+            "total_characters": self.total_characters,
+            "estimated_cost": self.estimated_cost,
+            "processing_time_seconds": self.processing_time_seconds,
+            "provider": self.provider,
+            "created_at": self.created_at.isoformat() if self.created_at else None
+        }
--- a/backend/app/models/user.py
+++ b/backend/app/models/user.py
@@ -33,6 +33,7 @@ class User(Base):
    tasks = relationship("Task", back_populates="user", cascade="all, delete-orphan")
    sessions = relationship("Session", back_populates="user", cascade="all, delete-orphan")
    audit_logs = relationship("AuditLog", back_populates="user")
+    translation_logs = relationship("TranslationLog", back_populates="user", cascade="all, delete-orphan")

    def __repr__(self):
        return f"<User(id={self.id}, email='{self.email}', display_name='{self.display_name}')>"
--- a/backend/app/routers/admin.py
+++ b/backend/app/routers/admin.py
@@ -186,3 +186,34 @@ async def get_user_activity_summary(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Failed to get user activity summary: {str(e)}"
        )
+
+
+@router.get("/translation-stats", summary="Get translation statistics")
+async def get_translation_stats(
+    db: Session = Depends(get_db),
+    admin_user: User = Depends(get_current_admin_user)
+):
+    """
+    Get translation usage statistics for billing and monitoring.
+
+    Returns:
+    - total_translations: Total number of translation jobs
+    - total_tokens: Sum of all tokens used
+    - total_characters: Sum of all characters translated
+    - estimated_cost: Estimated cost based on token pricing
+    - by_language: Breakdown by target language
+    - recent_translations: List of recent translation activities
+    - last_30_days: Statistics for the last 30 days
+
+    Requires admin privileges.
+    """
+    try:
+        stats = admin_service.get_translation_statistics(db)
+        return stats
+
+    except Exception as e:
+        logger.exception("Failed to get translation statistics")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to get translation statistics: {str(e)}"
+        )
--- a/backend/app/routers/translate.py
+++ b/backend/app/routers/translate.py
@@ -39,7 +39,8 @@ def run_translation_task(
    task_id: str,
    task_db_id: int,
    target_lang: str,
-    source_lang: str = "auto"
+    source_lang: str = "auto",
+    user_id: int = None
 ):
    """
    Background task to run document translation.
@@ -49,10 +50,12 @@ def run_translation_task(
        task_db_id: Task database ID (for verification)
        target_lang: Target language code
        source_lang: Source language code ('auto' for detection)
+        user_id: User ID for logging translation statistics
    """
    from app.core.database import SessionLocal
    from app.services.translation_service import get_translation_service
    from app.schemas.translation import TranslationJobState, TranslationProgress
+    from app.models.translation_log import TranslationLog

    db = SessionLocal()
    translation_service = get_translation_service()
@@ -132,6 +135,44 @@ def run_translation_task(
                result_file_path=str(output_path) if output_path else None
            ))
            logger.info(f"Translation completed for task {task_id}")
+
+            # Log translation statistics to database
+            if user_id and output_path:
+                try:
+                    with open(output_path, 'r', encoding='utf-8') as f:
+                        translation_result = json.load(f)
+
+                    stats = translation_result.get('statistics', {})
+                    total_tokens = stats.get('total_tokens', 0)
+
+                    # Use actual price from Dify API if available, otherwise calculate estimated cost
+                    actual_price = stats.get('total_price', 0.0)
+                    if actual_price > 0:
+                        estimated_cost = actual_price
+                    else:
+                        # Fallback: Calculate estimated cost based on token pricing
+                        estimated_cost = (total_tokens / 1_000_000) * settings.translation_cost_per_million_tokens
+
+                    translation_log = TranslationLog(
+                        user_id=user_id,
+                        task_id=task_id,
+                        target_lang=target_lang,
+                        source_lang=source_lang,
+                        total_tokens=total_tokens,
+                        input_tokens=0,  # Dify doesn't provide separate input/output tokens
+                        output_tokens=0,
+                        total_elements=stats.get('total_elements', 0),
+                        translated_elements=stats.get('translated_elements', 0),
+                        total_characters=stats.get('total_characters', 0),
+                        processing_time_seconds=stats.get('processing_time_seconds', 0.0),
+                        provider=translation_result.get('provider', 'dify'),
+                        estimated_cost=estimated_cost
+                    )
+                    db.add(translation_log)
+                    db.commit()
+                    logger.info(f"Logged translation stats for task {task_id}: {total_tokens} tokens, ${estimated_cost:.6f}")
+                except Exception as log_error:
+                    logger.error(f"Failed to log translation stats: {log_error}")
        else:
            translation_service.set_job_state(task_id, TranslationJobState(
                task_id=task_id,
@@ -255,7 +296,8 @@ async def start_translation(
        task_id=task_id,
        task_db_id=task.id,
        target_lang=target_lang,
-        source_lang=request.source_lang
+        source_lang=request.source_lang,
+        user_id=current_user.id
    )

    logger.info(f"Started translation job for task {task_id}, target_lang={target_lang}")
--- a/backend/app/services/admin_service.py
+++ b/backend/app/services/admin_service.py
@@ -13,6 +13,7 @@ from app.models.user import User
 from app.models.task import Task, TaskStatus
 from app.models.session import Session as UserSession
 from app.models.audit_log import AuditLog
+from app.models.translation_log import TranslationLog
 from app.core.config import settings

 logger = logging.getLogger(__name__)
@@ -209,6 +210,87 @@ class AdminService:

        return top_users

+    def get_translation_statistics(self, db: Session) -> dict:
+        """
+        Get translation usage statistics for admin dashboard.
+
+        Args:
+            db: Database session
+
+        Returns:
+            Dictionary with translation stats including total tokens, costs, and breakdowns
+        """
+        # Total translation count
+        total_translations = db.query(TranslationLog).count()
+
+        # Sum of tokens
+        token_stats = db.query(
+            func.sum(TranslationLog.total_tokens).label("total_tokens"),
+            func.sum(TranslationLog.input_tokens).label("total_input_tokens"),
+            func.sum(TranslationLog.output_tokens).label("total_output_tokens"),
+            func.sum(TranslationLog.total_characters).label("total_characters"),
+            func.sum(TranslationLog.estimated_cost).label("total_cost")
+        ).first()
+
+        # Breakdown by target language
+        by_language = db.query(
+            TranslationLog.target_lang,
+            func.count(TranslationLog.id).label("count"),
+            func.sum(TranslationLog.total_tokens).label("tokens"),
+            func.sum(TranslationLog.total_characters).label("characters")
+        ).group_by(TranslationLog.target_lang).all()
+
+        language_breakdown = [
+            {
+                "language": lang,
+                "count": count,
+                "tokens": tokens or 0,
+                "characters": chars or 0
+            }
+            for lang, count, tokens, chars in by_language
+        ]
+
+        # Recent translations (last 20)
+        recent = db.query(TranslationLog).order_by(
+            TranslationLog.created_at.desc()
+        ).limit(20).all()
+
+        recent_translations = [
+            {
+                "id": log.id,
+                "task_id": log.task_id,
+                "target_lang": log.target_lang,
+                "total_tokens": log.total_tokens,
+                "total_characters": log.total_characters,
+                "processing_time_seconds": log.processing_time_seconds,
+                "estimated_cost": log.estimated_cost,
+                "created_at": log.created_at.isoformat() if log.created_at else None
+            }
+            for log in recent
+        ]
+
+        # Stats for last 30 days
+        date_30_days_ago = datetime.utcnow() - timedelta(days=30)
+        recent_stats = db.query(
+            func.count(TranslationLog.id).label("count"),
+            func.sum(TranslationLog.total_tokens).label("tokens")
+        ).filter(TranslationLog.created_at >= date_30_days_ago).first()
+
+        return {
+            "total_translations": total_translations,
+            "total_tokens": token_stats.total_tokens or 0,
+            "total_input_tokens": token_stats.total_input_tokens or 0,
+            "total_output_tokens": token_stats.total_output_tokens or 0,
+            "total_characters": token_stats.total_characters or 0,
+            "estimated_cost": token_stats.total_cost or 0.0,
+            "by_language": language_breakdown,
+            "recent_translations": recent_translations,
+            "last_30_days": {
+                "count": recent_stats.count or 0,
+                "tokens": recent_stats.tokens or 0
+            }
+        }
+

 # Singleton instance
 admin_service = AdminService()
--- a/backend/app/services/dify_client.py
+++ b/backend/app/services/dify_client.py
@@ -40,6 +40,8 @@ class TranslationResponse:
    total_tokens: int
    latency: float
    conversation_id: str
+    total_price: float = 0.0
+    currency: str = "USD"


@dataclass
@@ -50,6 +52,8 @@ class BatchTranslationResponse:
    latency: float
    conversation_id: str
    missing_markers: List[int] = field(default_factory=list)
+    total_price: float = 0.0
+    currency: str = "USD"


 class DifyTranslationError(Exception):
@@ -252,6 +256,11 @@ class DifyClient:
        translated_text = data.get("answer", "")
        usage = data.get("metadata", {}).get("usage", {})

+        # Extract price info from usage or metadata (may be string or number)
+        raw_price = usage.get("total_price", 0.0)
+        total_price = float(raw_price) if raw_price else 0.0
+        currency = usage.get("currency", "USD") or "USD"
+
        self._total_tokens += usage.get("total_tokens", 0)
        self._total_requests += 1

@@ -259,7 +268,9 @@ class DifyClient:
            translated_text=translated_text,
            total_tokens=usage.get("total_tokens", 0),
            latency=usage.get("latency", 0.0),
-            conversation_id=data.get("conversation_id", "")
+            conversation_id=data.get("conversation_id", ""),
+            total_price=total_price,
+            currency=currency
        )

    def translate_batch(
@@ -297,6 +308,11 @@ class DifyClient:
        answer = data.get("answer", "")
        usage = data.get("metadata", {}).get("usage", {})

+        # Extract price info from usage or metadata (may be string or number)
+        raw_price = usage.get("total_price", 0.0)
+        total_price = float(raw_price) if raw_price else 0.0
+        currency = usage.get("currency", "USD") or "USD"
+
        translations = self._parse_batch_response(answer, len(texts))

        # Check for missing markers
@@ -314,7 +330,9 @@ class DifyClient:
            total_tokens=usage.get("total_tokens", 0),
            latency=usage.get("latency", 0.0),
            conversation_id=data.get("conversation_id", ""),
-            missing_markers=missing_markers
+            missing_markers=missing_markers,
+            total_price=total_price,
+            currency=currency
        )

    def get_stats(self) -> dict:
--- a/backend/app/services/translation_service.py
+++ b/backend/app/services/translation_service.py
@@ -232,6 +232,8 @@ class TranslationService:
        self._jobs_lock = threading.Lock()
        self._total_tokens = 0
        self._total_latency = 0.0
+        self._total_price = 0.0
+        self._currency = "USD"

    def _load_raw_ocr_regions(
        self,
@@ -459,6 +461,9 @@ class TranslationService:

            self._total_tokens += response.total_tokens
            self._total_latency += response.latency
+            self._total_price += response.total_price
+            if response.currency:
+                self._currency = response.currency

            # Map translations back to items
            translated_items = []
@@ -524,6 +529,9 @@ class TranslationService:

            self._total_tokens += response.total_tokens
            self._total_latency += response.latency
+            self._total_price += response.total_price
+            if response.currency:
+                self._currency = response.currency

            return TranslatedItem(
                element_id=item.element_id,
@@ -555,7 +563,9 @@ class TranslationService:
        total_elements: int,
        processing_time: float,
        batch_count: int,
-        processing_track: str = 'direct'
+        processing_track: str = 'direct',
+        total_price: float = 0.0,
+        currency: str = 'USD'
    ) -> Dict:
        """
        Build the translation result JSON structure.
@@ -613,6 +623,8 @@ class TranslationService:
                    'total_characters': total_chars,
                    'processing_time_seconds': round(processing_time, 2),
                    'total_tokens': self._total_tokens,
+                    'total_price': round(total_price, 6),
+                    'currency': currency,
                    'batch_count': batch_count
                },
                'translations': {},  # Empty for OCR Track
@@ -656,6 +668,8 @@ class TranslationService:
                    'total_characters': total_chars,
                    'processing_time_seconds': round(processing_time, 2),
                    'total_tokens': self._total_tokens,
+                    'total_price': round(total_price, 6),
+                    'currency': currency,
                    'batch_count': batch_count
                },
                'translations': translations
@@ -687,6 +701,8 @@ class TranslationService:
        start_time = time.time()
        self._total_tokens = 0
        self._total_latency = 0.0
+        self._total_price = 0.0
+        self._currency = "USD"

        logger.info(
            f"Starting translation: task_id={task_id}, target={target_lang}"
@@ -752,7 +768,9 @@ class TranslationService:
                total_elements=total_elements,
                processing_time=processing_time,
                batch_count=len(batches),
-                processing_track=processing_track
+                processing_track=processing_track,
+                total_price=self._total_price,
+                currency=self._currency
            )

            # Save result