feat: add translation billing stats and remove Export/Settings pages

- Add TranslationLog model to track translation API usage per task
- Integrate Dify API actual price (total_price) into translation stats
- Display translation statistics in admin dashboard with per-task costs
- Remove unused Export and Settings pages to simplify frontend
- Add GET /api/v2/admin/translation-stats endpoint

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-12 17:38:12 +08:00
parent d20751d56b
commit 65abd51d60
21 changed files with 682 additions and 662 deletions

View File

@@ -0,0 +1,58 @@
"""add_translation_logs_table
Revision ID: g2b3c4d5e6f7
Revises: f1a2b3c4d5e6
Create Date: 2025-12-12 10:00:00.000000
Add translation_logs table to track translation API usage and costs.
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = 'g2b3c4d5e6f7'
down_revision: Union[str, None] = 'f1a2b3c4d5e6'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Create tool_ocr_translation_logs table."""
op.create_table(
'tool_ocr_translation_logs',
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
sa.Column('user_id', sa.Integer(), nullable=False, comment='Foreign key to users table'),
sa.Column('task_id', sa.String(length=255), nullable=False, comment='Task UUID that was translated'),
sa.Column('target_lang', sa.String(length=10), nullable=False, comment='Target language code'),
sa.Column('source_lang', sa.String(length=10), nullable=True, comment='Source language code'),
sa.Column('input_tokens', sa.Integer(), nullable=False, server_default='0', comment='Number of input tokens used'),
sa.Column('output_tokens', sa.Integer(), nullable=False, server_default='0', comment='Number of output tokens generated'),
sa.Column('total_tokens', sa.Integer(), nullable=False, server_default='0', comment='Total tokens (input + output)'),
sa.Column('total_elements', sa.Integer(), nullable=False, server_default='0', comment='Total elements in document'),
sa.Column('translated_elements', sa.Integer(), nullable=False, server_default='0', comment='Number of elements translated'),
sa.Column('total_characters', sa.Integer(), nullable=False, server_default='0', comment='Total characters translated'),
sa.Column('estimated_cost', sa.Float(), nullable=False, server_default='0.0', comment='Estimated cost in USD'),
sa.Column('processing_time_seconds', sa.Float(), nullable=False, server_default='0.0', comment='Translation processing time'),
sa.Column('provider', sa.String(length=50), nullable=False, server_default='dify', comment='Translation provider'),
sa.Column('created_at', sa.DateTime(), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')),
sa.ForeignKeyConstraint(['user_id'], ['tool_ocr_users.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_tool_ocr_translation_logs_id'), 'tool_ocr_translation_logs', ['id'], unique=False)
op.create_index(op.f('ix_tool_ocr_translation_logs_user_id'), 'tool_ocr_translation_logs', ['user_id'], unique=False)
op.create_index(op.f('ix_tool_ocr_translation_logs_task_id'), 'tool_ocr_translation_logs', ['task_id'], unique=False)
op.create_index(op.f('ix_tool_ocr_translation_logs_target_lang'), 'tool_ocr_translation_logs', ['target_lang'], unique=False)
op.create_index(op.f('ix_tool_ocr_translation_logs_created_at'), 'tool_ocr_translation_logs', ['created_at'], unique=False)
def downgrade() -> None:
"""Drop tool_ocr_translation_logs table."""
op.drop_index(op.f('ix_tool_ocr_translation_logs_created_at'), table_name='tool_ocr_translation_logs')
op.drop_index(op.f('ix_tool_ocr_translation_logs_target_lang'), table_name='tool_ocr_translation_logs')
op.drop_index(op.f('ix_tool_ocr_translation_logs_task_id'), table_name='tool_ocr_translation_logs')
op.drop_index(op.f('ix_tool_ocr_translation_logs_user_id'), table_name='tool_ocr_translation_logs')
op.drop_index(op.f('ix_tool_ocr_translation_logs_id'), table_name='tool_ocr_translation_logs')
op.drop_table('tool_ocr_translation_logs')

View File

@@ -458,6 +458,14 @@ class Settings(BaseSettings):
dify_max_batch_chars: int = Field(default=5000) # Max characters per batch
dify_max_batch_items: int = Field(default=20) # Max items per batch
# Translation cost calculation (USD per 1M tokens) - FALLBACK only
# Dify API returns actual price (total_price), this is only used as fallback
# when actual price is not available
translation_cost_per_million_tokens: float = Field(
default=3.0,
description="Fallback cost per 1M tokens when Dify doesn't return actual price"
)
# ===== Background Tasks Configuration =====
task_queue_type: str = Field(default="memory")
redis_url: str = Field(default="redis://localhost:6379/0")

View File

@@ -9,6 +9,7 @@ from app.models.user import User
from app.models.task import Task, TaskFile, TaskStatus
from app.models.session import Session
from app.models.audit_log import AuditLog
from app.models.translation_log import TranslationLog
__all__ = [
"User",
@@ -17,4 +18,5 @@ __all__ = [
"TaskStatus",
"Session",
"AuditLog",
"TranslationLog",
]

View File

@@ -0,0 +1,87 @@
"""
Tool_OCR - Translation Log Model
Tracks translation usage statistics for billing and monitoring
"""
from sqlalchemy import Column, Integer, String, DateTime, Float, ForeignKey
from sqlalchemy.orm import relationship
from datetime import datetime
from app.core.database import Base
class TranslationLog(Base):
"""
Translation log model for tracking API usage and costs.
Each record represents a single translation job completion,
storing token usage and estimated costs for billing purposes.
"""
__tablename__ = "tool_ocr_translation_logs"
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
user_id = Column(Integer, ForeignKey("tool_ocr_users.id", ondelete="CASCADE"),
nullable=False, index=True,
comment="Foreign key to users table")
task_id = Column(String(255), nullable=False, index=True,
comment="Task UUID that was translated")
target_lang = Column(String(10), nullable=False, index=True,
comment="Target language code (e.g., 'en', 'ja', 'zh-TW')")
source_lang = Column(String(10), nullable=True,
comment="Source language code (or 'auto')")
# Token usage statistics
input_tokens = Column(Integer, default=0, nullable=False,
comment="Number of input tokens used")
output_tokens = Column(Integer, default=0, nullable=False,
comment="Number of output tokens generated")
total_tokens = Column(Integer, default=0, nullable=False,
comment="Total tokens (input + output)")
# Translation statistics
total_elements = Column(Integer, default=0, nullable=False,
comment="Total elements in document")
translated_elements = Column(Integer, default=0, nullable=False,
comment="Number of elements translated")
total_characters = Column(Integer, default=0, nullable=False,
comment="Total characters translated")
# Cost tracking (estimated based on token pricing)
estimated_cost = Column(Float, default=0.0, nullable=False,
comment="Estimated cost in USD")
# Processing info
processing_time_seconds = Column(Float, default=0.0, nullable=False,
comment="Translation processing time")
provider = Column(String(50), default="dify", nullable=False,
comment="Translation provider (e.g., 'dify')")
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
# Relationships
user = relationship("User", back_populates="translation_logs")
def __repr__(self):
return f"<TranslationLog(id={self.id}, task_id='{self.task_id}', target_lang='{self.target_lang}', tokens={self.total_tokens})>"
def to_dict(self):
"""Convert translation log to dictionary"""
return {
"id": self.id,
"user_id": self.user_id,
"task_id": self.task_id,
"target_lang": self.target_lang,
"source_lang": self.source_lang,
"input_tokens": self.input_tokens,
"output_tokens": self.output_tokens,
"total_tokens": self.total_tokens,
"total_elements": self.total_elements,
"translated_elements": self.translated_elements,
"total_characters": self.total_characters,
"estimated_cost": self.estimated_cost,
"processing_time_seconds": self.processing_time_seconds,
"provider": self.provider,
"created_at": self.created_at.isoformat() if self.created_at else None
}

View File

@@ -33,6 +33,7 @@ class User(Base):
tasks = relationship("Task", back_populates="user", cascade="all, delete-orphan")
sessions = relationship("Session", back_populates="user", cascade="all, delete-orphan")
audit_logs = relationship("AuditLog", back_populates="user")
translation_logs = relationship("TranslationLog", back_populates="user", cascade="all, delete-orphan")
def __repr__(self):
return f"<User(id={self.id}, email='{self.email}', display_name='{self.display_name}')>"

View File

@@ -186,3 +186,34 @@ async def get_user_activity_summary(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to get user activity summary: {str(e)}"
)
@router.get("/translation-stats", summary="Get translation statistics")
async def get_translation_stats(
db: Session = Depends(get_db),
admin_user: User = Depends(get_current_admin_user)
):
"""
Get translation usage statistics for billing and monitoring.
Returns:
- total_translations: Total number of translation jobs
- total_tokens: Sum of all tokens used
- total_characters: Sum of all characters translated
- estimated_cost: Estimated cost based on token pricing
- by_language: Breakdown by target language
- recent_translations: List of recent translation activities
- last_30_days: Statistics for the last 30 days
Requires admin privileges.
"""
try:
stats = admin_service.get_translation_statistics(db)
return stats
except Exception as e:
logger.exception("Failed to get translation statistics")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to get translation statistics: {str(e)}"
)

View File

@@ -39,7 +39,8 @@ def run_translation_task(
task_id: str,
task_db_id: int,
target_lang: str,
source_lang: str = "auto"
source_lang: str = "auto",
user_id: int = None
):
"""
Background task to run document translation.
@@ -49,10 +50,12 @@ def run_translation_task(
task_db_id: Task database ID (for verification)
target_lang: Target language code
source_lang: Source language code ('auto' for detection)
user_id: User ID for logging translation statistics
"""
from app.core.database import SessionLocal
from app.services.translation_service import get_translation_service
from app.schemas.translation import TranslationJobState, TranslationProgress
from app.models.translation_log import TranslationLog
db = SessionLocal()
translation_service = get_translation_service()
@@ -132,6 +135,44 @@ def run_translation_task(
result_file_path=str(output_path) if output_path else None
))
logger.info(f"Translation completed for task {task_id}")
# Log translation statistics to database
if user_id and output_path:
try:
with open(output_path, 'r', encoding='utf-8') as f:
translation_result = json.load(f)
stats = translation_result.get('statistics', {})
total_tokens = stats.get('total_tokens', 0)
# Use actual price from Dify API if available, otherwise calculate estimated cost
actual_price = stats.get('total_price', 0.0)
if actual_price > 0:
estimated_cost = actual_price
else:
# Fallback: Calculate estimated cost based on token pricing
estimated_cost = (total_tokens / 1_000_000) * settings.translation_cost_per_million_tokens
translation_log = TranslationLog(
user_id=user_id,
task_id=task_id,
target_lang=target_lang,
source_lang=source_lang,
total_tokens=total_tokens,
input_tokens=0, # Dify doesn't provide separate input/output tokens
output_tokens=0,
total_elements=stats.get('total_elements', 0),
translated_elements=stats.get('translated_elements', 0),
total_characters=stats.get('total_characters', 0),
processing_time_seconds=stats.get('processing_time_seconds', 0.0),
provider=translation_result.get('provider', 'dify'),
estimated_cost=estimated_cost
)
db.add(translation_log)
db.commit()
logger.info(f"Logged translation stats for task {task_id}: {total_tokens} tokens, ${estimated_cost:.6f}")
except Exception as log_error:
logger.error(f"Failed to log translation stats: {log_error}")
else:
translation_service.set_job_state(task_id, TranslationJobState(
task_id=task_id,
@@ -255,7 +296,8 @@ async def start_translation(
task_id=task_id,
task_db_id=task.id,
target_lang=target_lang,
source_lang=request.source_lang
source_lang=request.source_lang,
user_id=current_user.id
)
logger.info(f"Started translation job for task {task_id}, target_lang={target_lang}")

View File

@@ -13,6 +13,7 @@ from app.models.user import User
from app.models.task import Task, TaskStatus
from app.models.session import Session as UserSession
from app.models.audit_log import AuditLog
from app.models.translation_log import TranslationLog
from app.core.config import settings
logger = logging.getLogger(__name__)
@@ -209,6 +210,87 @@ class AdminService:
return top_users
def get_translation_statistics(self, db: Session) -> dict:
"""
Get translation usage statistics for admin dashboard.
Args:
db: Database session
Returns:
Dictionary with translation stats including total tokens, costs, and breakdowns
"""
# Total translation count
total_translations = db.query(TranslationLog).count()
# Sum of tokens
token_stats = db.query(
func.sum(TranslationLog.total_tokens).label("total_tokens"),
func.sum(TranslationLog.input_tokens).label("total_input_tokens"),
func.sum(TranslationLog.output_tokens).label("total_output_tokens"),
func.sum(TranslationLog.total_characters).label("total_characters"),
func.sum(TranslationLog.estimated_cost).label("total_cost")
).first()
# Breakdown by target language
by_language = db.query(
TranslationLog.target_lang,
func.count(TranslationLog.id).label("count"),
func.sum(TranslationLog.total_tokens).label("tokens"),
func.sum(TranslationLog.total_characters).label("characters")
).group_by(TranslationLog.target_lang).all()
language_breakdown = [
{
"language": lang,
"count": count,
"tokens": tokens or 0,
"characters": chars or 0
}
for lang, count, tokens, chars in by_language
]
# Recent translations (last 20)
recent = db.query(TranslationLog).order_by(
TranslationLog.created_at.desc()
).limit(20).all()
recent_translations = [
{
"id": log.id,
"task_id": log.task_id,
"target_lang": log.target_lang,
"total_tokens": log.total_tokens,
"total_characters": log.total_characters,
"processing_time_seconds": log.processing_time_seconds,
"estimated_cost": log.estimated_cost,
"created_at": log.created_at.isoformat() if log.created_at else None
}
for log in recent
]
# Stats for last 30 days
date_30_days_ago = datetime.utcnow() - timedelta(days=30)
recent_stats = db.query(
func.count(TranslationLog.id).label("count"),
func.sum(TranslationLog.total_tokens).label("tokens")
).filter(TranslationLog.created_at >= date_30_days_ago).first()
return {
"total_translations": total_translations,
"total_tokens": token_stats.total_tokens or 0,
"total_input_tokens": token_stats.total_input_tokens or 0,
"total_output_tokens": token_stats.total_output_tokens or 0,
"total_characters": token_stats.total_characters or 0,
"estimated_cost": token_stats.total_cost or 0.0,
"by_language": language_breakdown,
"recent_translations": recent_translations,
"last_30_days": {
"count": recent_stats.count or 0,
"tokens": recent_stats.tokens or 0
}
}
# Singleton instance
admin_service = AdminService()

View File

@@ -40,6 +40,8 @@ class TranslationResponse:
total_tokens: int
latency: float
conversation_id: str
total_price: float = 0.0
currency: str = "USD"
@dataclass
@@ -50,6 +52,8 @@ class BatchTranslationResponse:
latency: float
conversation_id: str
missing_markers: List[int] = field(default_factory=list)
total_price: float = 0.0
currency: str = "USD"
class DifyTranslationError(Exception):
@@ -252,6 +256,11 @@ class DifyClient:
translated_text = data.get("answer", "")
usage = data.get("metadata", {}).get("usage", {})
# Extract price info from usage or metadata (may be string or number)
raw_price = usage.get("total_price", 0.0)
total_price = float(raw_price) if raw_price else 0.0
currency = usage.get("currency", "USD") or "USD"
self._total_tokens += usage.get("total_tokens", 0)
self._total_requests += 1
@@ -259,7 +268,9 @@ class DifyClient:
translated_text=translated_text,
total_tokens=usage.get("total_tokens", 0),
latency=usage.get("latency", 0.0),
conversation_id=data.get("conversation_id", "")
conversation_id=data.get("conversation_id", ""),
total_price=total_price,
currency=currency
)
def translate_batch(
@@ -297,6 +308,11 @@ class DifyClient:
answer = data.get("answer", "")
usage = data.get("metadata", {}).get("usage", {})
# Extract price info from usage or metadata (may be string or number)
raw_price = usage.get("total_price", 0.0)
total_price = float(raw_price) if raw_price else 0.0
currency = usage.get("currency", "USD") or "USD"
translations = self._parse_batch_response(answer, len(texts))
# Check for missing markers
@@ -314,7 +330,9 @@ class DifyClient:
total_tokens=usage.get("total_tokens", 0),
latency=usage.get("latency", 0.0),
conversation_id=data.get("conversation_id", ""),
missing_markers=missing_markers
missing_markers=missing_markers,
total_price=total_price,
currency=currency
)
def get_stats(self) -> dict:

View File

@@ -232,6 +232,8 @@ class TranslationService:
self._jobs_lock = threading.Lock()
self._total_tokens = 0
self._total_latency = 0.0
self._total_price = 0.0
self._currency = "USD"
def _load_raw_ocr_regions(
self,
@@ -459,6 +461,9 @@ class TranslationService:
self._total_tokens += response.total_tokens
self._total_latency += response.latency
self._total_price += response.total_price
if response.currency:
self._currency = response.currency
# Map translations back to items
translated_items = []
@@ -524,6 +529,9 @@ class TranslationService:
self._total_tokens += response.total_tokens
self._total_latency += response.latency
self._total_price += response.total_price
if response.currency:
self._currency = response.currency
return TranslatedItem(
element_id=item.element_id,
@@ -555,7 +563,9 @@ class TranslationService:
total_elements: int,
processing_time: float,
batch_count: int,
processing_track: str = 'direct'
processing_track: str = 'direct',
total_price: float = 0.0,
currency: str = 'USD'
) -> Dict:
"""
Build the translation result JSON structure.
@@ -613,6 +623,8 @@ class TranslationService:
'total_characters': total_chars,
'processing_time_seconds': round(processing_time, 2),
'total_tokens': self._total_tokens,
'total_price': round(total_price, 6),
'currency': currency,
'batch_count': batch_count
},
'translations': {}, # Empty for OCR Track
@@ -656,6 +668,8 @@ class TranslationService:
'total_characters': total_chars,
'processing_time_seconds': round(processing_time, 2),
'total_tokens': self._total_tokens,
'total_price': round(total_price, 6),
'currency': currency,
'batch_count': batch_count
},
'translations': translations
@@ -687,6 +701,8 @@ class TranslationService:
start_time = time.time()
self._total_tokens = 0
self._total_latency = 0.0
self._total_price = 0.0
self._currency = "USD"
logger.info(
f"Starting translation: task_id={task_id}, target={target_lang}"
@@ -752,7 +768,9 @@ class TranslationService:
total_elements=total_elements,
processing_time=processing_time,
batch_count=len(batches),
processing_track=processing_track
processing_track=processing_track,
total_price=self._total_price,
currency=self._currency
)
# Save result