Files
Document_translator/app/models/job.py
2025-10-02 17:13:24 +08:00

327 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
翻譯任務資料模型
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
import json
import uuid
from datetime import datetime, timedelta
from sqlalchemy.sql import func
from sqlalchemy import event
from app import db
from app.utils.timezone import format_taiwan_time
class TranslationJob(db.Model):
"""翻譯任務表 (dt_translation_jobs)"""
__tablename__ = 'dt_translation_jobs'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
job_uuid = db.Column(db.String(36), unique=True, nullable=False, index=True, comment='任務唯一識別碼')
user_id = db.Column(db.Integer, db.ForeignKey('dt_users.id'), nullable=False, comment='使用者ID')
original_filename = db.Column(db.String(500), nullable=False, comment='原始檔名')
file_extension = db.Column(db.String(10), nullable=False, comment='檔案副檔名')
file_size = db.Column(db.BigInteger, nullable=False, comment='檔案大小(bytes)')
file_path = db.Column(db.String(1000), nullable=False, comment='檔案路徑')
source_language = db.Column(db.String(50), default=None, comment='來源語言')
target_languages = db.Column(db.JSON, nullable=False, comment='目標語言陣列')
status = db.Column(
db.Enum('PENDING', 'PROCESSING', 'COMPLETED', 'FAILED', 'RETRY', name='job_status'),
default='PENDING',
comment='任務狀態'
)
progress = db.Column(db.Numeric(5, 2), default=0.00, comment='處理進度(%)')
retry_count = db.Column(db.Integer, default=0, comment='重試次數')
error_message = db.Column(db.Text, comment='錯誤訊息')
total_tokens = db.Column(db.Integer, default=0, comment='總token數')
total_cost = db.Column(db.Numeric(10, 4), default=0.0000, comment='總成本')
conversation_id = db.Column(db.String(100), comment='Dify對話ID用於維持翻譯上下文')
processing_started_at = db.Column(db.DateTime, comment='開始處理時間')
completed_at = db.Column(db.DateTime, comment='完成時間')
created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間')
updated_at = db.Column(
db.DateTime,
default=func.now(),
onupdate=func.now(),
comment='更新時間'
)
deleted_at = db.Column(db.DateTime, comment='軟刪除時間')
# 關聯關係
files = db.relationship('JobFile', backref='job', lazy='dynamic', cascade='all, delete-orphan')
api_usage_stats = db.relationship('APIUsageStats', backref='job', lazy='dynamic')
def __repr__(self):
return f'<TranslationJob {self.job_uuid}>'
def __init__(self, **kwargs):
"""初始化,自動生成 UUID"""
super().__init__(**kwargs)
if not self.job_uuid:
self.job_uuid = str(uuid.uuid4())
def to_dict(self, include_files=False):
"""轉換為字典格式"""
data = {
'id': self.id,
'job_uuid': self.job_uuid,
'user_id': self.user_id,
'original_filename': self.original_filename,
'file_extension': self.file_extension,
'file_size': self.file_size,
'file_path': self.file_path,
'source_language': self.source_language,
'target_languages': self.target_languages,
'status': self.status,
'progress': float(self.progress) if self.progress else 0.0,
'retry_count': self.retry_count,
'error_message': self.error_message,
'total_tokens': self.total_tokens,
'total_cost': float(self.total_cost) if self.total_cost else 0.0,
'conversation_id': self.conversation_id,
'processing_started_at': format_taiwan_time(self.processing_started_at, "%Y-%m-%d %H:%M:%S") if self.processing_started_at else None,
'completed_at': format_taiwan_time(self.completed_at, "%Y-%m-%d %H:%M:%S") if self.completed_at else None,
'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None,
'updated_at': format_taiwan_time(self.updated_at, "%Y-%m-%d %H:%M:%S") if self.updated_at else None,
'deleted_at': format_taiwan_time(self.deleted_at, "%Y-%m-%d %H:%M:%S") if self.deleted_at else None
}
if include_files:
data['files'] = [f.to_dict() for f in self.files]
return data
def update_status(self, status, error_message=None, progress=None):
"""更新任務狀態"""
self.status = status
if error_message:
self.error_message = error_message
if progress is not None:
self.progress = progress
if status == 'PROCESSING' and not self.processing_started_at:
self.processing_started_at = datetime.utcnow()
elif status == 'COMPLETED':
self.completed_at = datetime.utcnow()
self.progress = 100.00
self.updated_at = datetime.utcnow()
db.session.commit()
def add_original_file(self, filename, file_path, file_size):
"""新增原始檔案記錄"""
from pathlib import Path
stored_name = Path(file_path).name
original_file = JobFile(
job_id=self.id,
file_type='source',
original_filename=filename,
stored_filename=stored_name,
file_path=file_path,
file_size=file_size,
mime_type=self._get_mime_type(filename)
)
db.session.add(original_file)
db.session.commit()
return original_file
def add_translated_file(self, language_code, filename, file_path, file_size):
"""新增翻譯檔案記錄"""
from pathlib import Path
stored_name = Path(file_path).name
translated_file = JobFile(
job_id=self.id,
file_type='translated',
language_code=language_code,
original_filename=filename,
stored_filename=stored_name,
file_path=file_path,
file_size=file_size,
mime_type=self._get_mime_type(filename)
)
db.session.add(translated_file)
db.session.commit()
return translated_file
def _get_mime_type(self, filename):
"""取得MIME類型"""
import mimetypes
from pathlib import Path
ext = Path(filename).suffix.lower()
mime_map = {
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.pdf': 'application/pdf',
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'.txt': 'text/plain'
}
return mime_map.get(ext, mimetypes.guess_type(filename)[0] or 'application/octet-stream')
def get_translated_files(self):
"""取得翻譯檔案"""
return self.files.filter_by(file_type='translated').all()
def get_original_file(self):
"""取得原始檔案"""
return self.files.filter_by(file_type='source').first()
def can_retry(self):
"""是否可以重試"""
return self.status in ['FAILED', 'RETRY'] and self.retry_count < 3
def increment_retry(self):
"""增加重試次數"""
self.retry_count += 1
self.updated_at = datetime.utcnow()
db.session.commit()
def soft_delete(self):
"""軟刪除任務(保留資料供報表使用)"""
self.deleted_at = datetime.utcnow()
self.updated_at = datetime.utcnow()
db.session.commit()
def restore(self):
"""恢復已刪除的任務"""
self.deleted_at = None
self.updated_at = datetime.utcnow()
db.session.commit()
def is_deleted(self):
"""檢查任務是否已被軟刪除"""
return self.deleted_at is not None
@classmethod
def get_queue_position(cls, job_uuid):
"""取得任務在佇列中的位置"""
job = cls.query.filter_by(job_uuid=job_uuid, deleted_at=None).first()
if not job:
return None
position = cls.query.filter(
cls.status == 'PENDING',
cls.deleted_at.is_(None),
cls.created_at < job.created_at
).count()
return position + 1
@classmethod
def get_pending_jobs(cls):
"""取得所有等待處理的任務"""
return cls.query.filter_by(status='PENDING', deleted_at=None).order_by(cls.created_at.asc()).all()
@classmethod
def get_processing_jobs(cls):
"""取得所有處理中的任務"""
return cls.query.filter_by(status='PROCESSING', deleted_at=None).all()
@classmethod
def get_user_jobs(cls, user_id, status=None, limit=None, offset=None, include_deleted=False):
"""取得使用者的任務列表"""
query = cls.query.filter_by(user_id=user_id)
# 預設排除軟刪除的記錄,除非明確要求包含
if not include_deleted:
query = query.filter(cls.deleted_at.is_(None))
if status and status != 'all':
query = query.filter_by(status=status.upper())
query = query.order_by(cls.created_at.desc())
if limit:
query = query.limit(limit)
if offset:
query = query.offset(offset)
return query.all()
@classmethod
def get_statistics(cls, user_id=None, start_date=None, end_date=None, include_deleted=True):
"""取得統計資料(預設包含所有記錄以確保報表完整性)"""
query = cls.query
# 報表統計預設包含已刪除記錄以確保數據完整性
if not include_deleted:
query = query.filter(cls.deleted_at.is_(None))
if user_id:
query = query.filter_by(user_id=user_id)
if start_date:
query = query.filter(cls.created_at >= start_date)
if end_date:
query = query.filter(cls.created_at <= end_date)
total = query.count()
completed = query.filter_by(status='COMPLETED').count()
failed = query.filter_by(status='FAILED').count()
processing = query.filter_by(status='PROCESSING').count()
pending = query.filter_by(status='PENDING').count()
return {
'total': total,
'completed': completed,
'failed': failed,
'processing': processing,
'pending': pending,
'success_rate': (completed / total * 100) if total > 0 else 0
}
class JobFile(db.Model):
"""檔案記錄表 (dt_job_files)"""
__tablename__ = 'dt_job_files'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
job_id = db.Column(db.Integer, db.ForeignKey('dt_translation_jobs.id'), nullable=False, comment='任務ID')
file_type = db.Column(
db.Enum('source', 'translated', name='file_type'),
nullable=False,
comment='檔案類型'
)
language_code = db.Column(db.String(50), comment='語言代碼(翻譯檔案)')
original_filename = db.Column(db.String(255), nullable=False, comment='原始檔名')
stored_filename = db.Column(db.String(255), nullable=False, comment='儲存檔名')
file_path = db.Column(db.String(500), nullable=False, comment='檔案路徑')
file_size = db.Column(db.BigInteger, default=0, comment='檔案大小')
mime_type = db.Column(db.String(100), comment='MIME 類型')
created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間')
def __repr__(self):
return f'<JobFile {self.original_filename}>'
def to_dict(self):
"""轉換為字典格式"""
return {
'id': self.id,
'job_id': self.job_id,
'file_type': self.file_type,
'language_code': self.language_code,
'original_filename': self.original_filename,
'stored_filename': self.stored_filename,
'file_path': self.file_path,
'file_size': self.file_size,
'mime_type': self.mime_type,
'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None
}
# 事件監聽器:自動生成 UUID
@event.listens_for(TranslationJob, 'before_insert')
def receive_before_insert(mapper, connection, target):
"""在插入前自動生成 UUID"""
if not target.job_uuid:
target.job_uuid = str(uuid.uuid4())