1panel
This commit is contained in:
225
app/__init__.py
Normal file
225
app/__init__.py
Normal file
@@ -0,0 +1,225 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Flask 應用程式工廠
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
import os
|
||||
import redis
|
||||
from flask import Flask, request, make_response
|
||||
from flask_sqlalchemy import SQLAlchemy
|
||||
from flask_cors import CORS
|
||||
from flask_jwt_extended import JWTManager
|
||||
from celery import Celery
|
||||
from app.config import config
|
||||
from app.utils.logger import init_logging
|
||||
|
||||
# 初始化擴展
|
||||
db = SQLAlchemy()
|
||||
cors = CORS()
|
||||
jwt = JWTManager()
|
||||
|
||||
|
||||
def make_celery(app):
|
||||
"""創建 Celery 實例"""
|
||||
celery = Celery(
|
||||
app.import_name,
|
||||
backend=app.config['CELERY_RESULT_BACKEND'],
|
||||
broker=app.config['CELERY_BROKER_URL']
|
||||
)
|
||||
celery.conf.update(app.config)
|
||||
|
||||
class ContextTask(celery.Task):
|
||||
"""在 Flask 應用上下文中執行任務"""
|
||||
def __call__(self, *args, **kwargs):
|
||||
with app.app_context():
|
||||
return self.run(*args, **kwargs)
|
||||
|
||||
celery.Task = ContextTask
|
||||
return celery
|
||||
|
||||
|
||||
def create_app(config_name=None):
|
||||
"""應用程式工廠"""
|
||||
app = Flask(__name__)
|
||||
|
||||
# 載入配置
|
||||
config_name = config_name or os.getenv('FLASK_ENV', 'default')
|
||||
|
||||
# 先載入 Dify API 配置
|
||||
config[config_name].load_dify_config()
|
||||
|
||||
# 然後載入配置到 Flask app
|
||||
app.config.from_object(config[config_name])
|
||||
|
||||
# 初始化必要目錄
|
||||
config[config_name].init_directories()
|
||||
|
||||
# 初始化擴展
|
||||
db.init_app(app)
|
||||
|
||||
# 不使用 Flask-CORS 避免衝突,使用手動CORS處理
|
||||
|
||||
# 初始化 JWT
|
||||
jwt.init_app(app)
|
||||
app.logger.info(f"🔑 [JWT Config] JWT_SECRET_KEY: {app.config.get('JWT_SECRET_KEY')[:10]}...{app.config.get('JWT_SECRET_KEY')[-10:] if app.config.get('JWT_SECRET_KEY') else 'None'}")
|
||||
app.logger.info(f"🔑 [JWT Config] JWT_ACCESS_TOKEN_EXPIRES: {app.config.get('JWT_ACCESS_TOKEN_EXPIRES')}")
|
||||
app.logger.info(f"🔑 [JWT Config] JWT_REFRESH_TOKEN_EXPIRES: {app.config.get('JWT_REFRESH_TOKEN_EXPIRES')}")
|
||||
|
||||
app.logger.info("🔑 [JWT] Using JWT authentication")
|
||||
|
||||
# 設定 Redis(用於Celery)
|
||||
try:
|
||||
redis_client = redis.from_url(app.config['REDIS_URL'])
|
||||
app.redis_client = redis_client
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Redis initialization failed: {str(e)}")
|
||||
app.redis_client = None
|
||||
|
||||
# 初始化日誌
|
||||
init_logging(app)
|
||||
|
||||
# 註冊 API 路由
|
||||
from app.api import api_v1
|
||||
app.register_blueprint(api_v1)
|
||||
|
||||
# 註冊錯誤處理器
|
||||
register_error_handlers(app)
|
||||
|
||||
# 添加 CORS 響應headers
|
||||
@app.after_request
|
||||
def after_request(response):
|
||||
origin = request.headers.get('Origin')
|
||||
allowed_origins = ['http://localhost:3000', 'http://127.0.0.1:3000', 'http://localhost:3001', 'http://127.0.0.1:3001', 'http://localhost:12010', 'http://127.0.0.1:12010']
|
||||
|
||||
if origin and origin in allowed_origins:
|
||||
response.headers['Access-Control-Allow-Origin'] = origin
|
||||
response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization, X-Requested-With'
|
||||
response.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT, DELETE, OPTIONS, PATCH'
|
||||
response.headers['Access-Control-Allow-Credentials'] = 'true'
|
||||
response.headers['Access-Control-Max-Age'] = '86400'
|
||||
|
||||
return response
|
||||
|
||||
# 處理 OPTIONS 預檢請求
|
||||
@app.before_request
|
||||
def before_request():
|
||||
if request.method == 'OPTIONS':
|
||||
response = make_response()
|
||||
origin = request.headers.get('Origin')
|
||||
allowed_origins = ['http://localhost:3000', 'http://127.0.0.1:3000', 'http://localhost:3001', 'http://127.0.0.1:3001', 'http://localhost:12010', 'http://127.0.0.1:12010']
|
||||
|
||||
if origin and origin in allowed_origins:
|
||||
response.headers['Access-Control-Allow-Origin'] = origin
|
||||
response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization, X-Requested-With'
|
||||
response.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT, DELETE, OPTIONS, PATCH'
|
||||
response.headers['Access-Control-Allow-Credentials'] = 'true'
|
||||
response.headers['Access-Control-Max-Age'] = '86400'
|
||||
|
||||
return response
|
||||
|
||||
# 建立資料表
|
||||
with app.app_context():
|
||||
# 導入模型
|
||||
from app.models import User, TranslationJob, JobFile, TranslationCache, APIUsageStats, SystemLog, Notification
|
||||
|
||||
db.create_all()
|
||||
|
||||
# 創建默認管理員用戶(如果不存在)
|
||||
create_default_admin()
|
||||
|
||||
# 創建 Celery 實例
|
||||
app.celery = make_celery(app)
|
||||
|
||||
# WebSocket 功能完全禁用
|
||||
app.logger.info("🔌 [WebSocket] WebSocket 服務已禁用")
|
||||
app.socketio = None
|
||||
|
||||
# 註冊 Root 路由(提供 SPA 與基本 API 資訊)
|
||||
try:
|
||||
from app.root import root_bp
|
||||
app.register_blueprint(root_bp)
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Root routes not registered: {e}")
|
||||
|
||||
app.logger.info("Flask application created successfully")
|
||||
return app
|
||||
|
||||
|
||||
def register_error_handlers(app):
|
||||
"""註冊錯誤處理器"""
|
||||
|
||||
@app.errorhandler(404)
|
||||
def not_found(error):
|
||||
return {
|
||||
'success': False,
|
||||
'error': 'NOT_FOUND',
|
||||
'message': '請求的資源不存在'
|
||||
}, 404
|
||||
|
||||
@app.errorhandler(403)
|
||||
def forbidden(error):
|
||||
return {
|
||||
'success': False,
|
||||
'error': 'FORBIDDEN',
|
||||
'message': '權限不足'
|
||||
}, 403
|
||||
|
||||
@app.errorhandler(401)
|
||||
def unauthorized(error):
|
||||
return {
|
||||
'success': False,
|
||||
'error': 'UNAUTHORIZED',
|
||||
'message': '需要認證'
|
||||
}, 401
|
||||
|
||||
@app.errorhandler(500)
|
||||
def internal_server_error(error):
|
||||
return {
|
||||
'success': False,
|
||||
'error': 'INTERNAL_SERVER_ERROR',
|
||||
'message': '系統內部錯誤'
|
||||
}, 500
|
||||
|
||||
@app.errorhandler(413)
|
||||
def request_entity_too_large(error):
|
||||
return {
|
||||
'success': False,
|
||||
'error': 'FILE_TOO_LARGE',
|
||||
'message': '檔案大小超過限制'
|
||||
}, 413
|
||||
|
||||
|
||||
def create_default_admin():
|
||||
"""創建默認管理員用戶"""
|
||||
try:
|
||||
from app.models import User
|
||||
|
||||
admin_email = os.environ.get('ADMIN_EMAIL', 'ymirliu@panjit.com.tw')
|
||||
|
||||
# 檢查是否已存在管理員
|
||||
admin_user = User.query.filter_by(email=admin_email).first()
|
||||
|
||||
if not admin_user:
|
||||
# 創建管理員用戶(待 LDAP 登入時完善資訊)
|
||||
admin_user = User(
|
||||
username=admin_email.split('@')[0],
|
||||
display_name='系統管理員',
|
||||
email=admin_email,
|
||||
department='IT',
|
||||
is_admin=True
|
||||
)
|
||||
db.session.add(admin_user)
|
||||
db.session.commit()
|
||||
|
||||
print(f"Created default admin user: {admin_email}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Failed to create default admin: {str(e)}")
|
||||
|
||||
|
||||
# 導入模型在需要時才進行,避免循環導入
|
26
app/api/__init__.py
Normal file
26
app/api/__init__.py
Normal file
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
API 模組
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
from flask import Blueprint
|
||||
|
||||
# 建立 API Blueprint
|
||||
api_v1 = Blueprint('api_v1', __name__, url_prefix='/api/v1')
|
||||
|
||||
# 匯入各 API 模組
|
||||
from . import auth, jobs, files, admin, health, notification, cache
|
||||
|
||||
# 註冊路由
|
||||
api_v1.register_blueprint(auth.auth_bp)
|
||||
api_v1.register_blueprint(jobs.jobs_bp)
|
||||
api_v1.register_blueprint(files.files_bp)
|
||||
api_v1.register_blueprint(admin.admin_bp)
|
||||
api_v1.register_blueprint(health.health_bp)
|
||||
api_v1.register_blueprint(notification.notification_bp)
|
||||
api_v1.register_blueprint(cache.cache_bp)
|
1071
app/api/admin.py
Normal file
1071
app/api/admin.py
Normal file
File diff suppressed because it is too large
Load Diff
479
app/api/auth.py
Normal file
479
app/api/auth.py
Normal file
@@ -0,0 +1,479 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
JWT 認證 API
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-09-02
|
||||
"""
|
||||
|
||||
from flask import Blueprint, request, jsonify, current_app
|
||||
from flask_jwt_extended import (
|
||||
create_access_token, create_refresh_token,
|
||||
jwt_required, get_jwt_identity, get_jwt
|
||||
)
|
||||
from app.utils.ldap_auth import LDAPAuthService
|
||||
from app.utils.api_auth import APIAuthService
|
||||
from app.utils.decorators import validate_json, rate_limit
|
||||
from app.utils.exceptions import AuthenticationError
|
||||
from app.utils.logger import get_logger
|
||||
from app.models.user import User
|
||||
from app.models.sys_user import SysUser, LoginLog
|
||||
from app.models.log import SystemLog
|
||||
|
||||
auth_bp = Blueprint('auth', __name__, url_prefix='/auth')
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@auth_bp.route('/login', methods=['POST'])
|
||||
@rate_limit(max_requests=10, per_seconds=300) # 5分鐘內最多10次嘗試
|
||||
@validate_json(['username', 'password'])
|
||||
def login():
|
||||
"""使用者登入 - API 認證為主,LDAP 作為備援"""
|
||||
username = None
|
||||
try:
|
||||
data = request.get_json()
|
||||
username = data['username'].strip()
|
||||
password = data['password']
|
||||
|
||||
if not username or not password:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'INVALID_INPUT',
|
||||
'message': '帳號和密碼不能為空'
|
||||
}), 400
|
||||
|
||||
# 取得環境資訊
|
||||
ip_address = request.remote_addr
|
||||
user_agent = request.headers.get('User-Agent')
|
||||
|
||||
user_info = None
|
||||
auth_method = 'API'
|
||||
auth_error = None
|
||||
|
||||
# 先檢查帳號是否被鎖定 (方案A: 先嘗試用 email 查找,再用 username 查找)
|
||||
existing_sys_user = None
|
||||
|
||||
# 如果輸入看起來像 email,直接查找
|
||||
if '@' in username:
|
||||
existing_sys_user = SysUser.query.filter_by(email=username).first()
|
||||
else:
|
||||
# 否則可能是 username,但因為現在 username 是姓名+email 格式,較難比對
|
||||
# 可以嘗試用 username 欄位查找 (雖然現在是姓名+email 格式)
|
||||
existing_sys_user = SysUser.query.filter_by(username=username).first()
|
||||
|
||||
if existing_sys_user and existing_sys_user.is_account_locked():
|
||||
logger.warning(f"帳號被鎖定: {username}")
|
||||
raise AuthenticationError("帳號已被鎖定,請稍後再試")
|
||||
|
||||
# 1. 優先嘗試 API 認證
|
||||
try:
|
||||
logger.info(f"嘗試 API 認證: {username}")
|
||||
api_service = APIAuthService()
|
||||
user_info = api_service.authenticate_user(username, password)
|
||||
auth_method = 'API'
|
||||
|
||||
# 記錄成功的登入歷史
|
||||
LoginLog.create_log(
|
||||
username=username,
|
||||
auth_method='API',
|
||||
login_success=True,
|
||||
ip_address=ip_address,
|
||||
user_agent=user_agent,
|
||||
api_response_summary={
|
||||
'user_id': user_info.get('api_user_id'),
|
||||
'display_name': user_info.get('display_name'),
|
||||
'email': user_info.get('email')
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"API 認證成功: {username}")
|
||||
|
||||
except AuthenticationError as api_error:
|
||||
logger.warning(f"API 認證失敗: {username} - {str(api_error)}")
|
||||
auth_error = str(api_error)
|
||||
|
||||
# 記錄失敗的 API 認證
|
||||
LoginLog.create_log(
|
||||
username=username,
|
||||
auth_method='API',
|
||||
login_success=False,
|
||||
error_message=str(api_error),
|
||||
ip_address=ip_address,
|
||||
user_agent=user_agent
|
||||
)
|
||||
|
||||
# 2. API 認證失敗,嘗試 LDAP 備援認證
|
||||
try:
|
||||
logger.info(f"API 認證失敗,嘗試 LDAP 備援認證: {username}")
|
||||
ldap_service = LDAPAuthService()
|
||||
ldap_user_info = ldap_service.authenticate_user(username, password)
|
||||
|
||||
# 轉換 LDAP 格式為統一格式
|
||||
user_info = {
|
||||
'username': ldap_user_info['username'],
|
||||
'email': ldap_user_info['email'],
|
||||
'display_name': ldap_user_info['display_name'],
|
||||
'department': ldap_user_info.get('department'),
|
||||
'user_principal_name': ldap_user_info.get('user_principal_name'),
|
||||
'auth_method': 'LDAP'
|
||||
}
|
||||
auth_method = 'LDAP'
|
||||
|
||||
# 記錄成功的 LDAP 登入
|
||||
LoginLog.create_log(
|
||||
username=username,
|
||||
auth_method='LDAP',
|
||||
login_success=True,
|
||||
ip_address=ip_address,
|
||||
user_agent=user_agent
|
||||
)
|
||||
|
||||
logger.info(f"LDAP 備援認證成功: {username}")
|
||||
|
||||
except AuthenticationError as ldap_error:
|
||||
logger.error(f"LDAP 備援認證也失敗: {username} - {str(ldap_error)}")
|
||||
|
||||
# 記錄失敗的 LDAP 認證
|
||||
LoginLog.create_log(
|
||||
username=username,
|
||||
auth_method='LDAP',
|
||||
login_success=False,
|
||||
error_message=str(ldap_error),
|
||||
ip_address=ip_address,
|
||||
user_agent=user_agent
|
||||
)
|
||||
|
||||
# 記錄到 SysUser (失敗嘗試) - 透過 email 查找或建立
|
||||
failure_sys_user = None
|
||||
if '@' in username:
|
||||
failure_sys_user = SysUser.query.filter_by(email=username).first()
|
||||
|
||||
if failure_sys_user:
|
||||
failure_sys_user.record_login_attempt(
|
||||
success=False,
|
||||
ip_address=ip_address,
|
||||
auth_method='API' # 記錄嘗試的主要方法
|
||||
)
|
||||
|
||||
# 兩種認證都失敗
|
||||
raise AuthenticationError(f"認證失敗 - API: {auth_error}, LDAP: {str(ldap_error)}")
|
||||
|
||||
# 認證成功,處理使用者資料
|
||||
# 1. 建立或更新 SysUser 記錄 (專門記錄登入資訊,方案A)
|
||||
sys_user = SysUser.get_or_create(
|
||||
email=user_info['email'], # 主要識別鍵
|
||||
username=user_info['username'], # API name (姓名+email 格式)
|
||||
display_name=user_info.get('display_name'), # API name (姓名+email 格式)
|
||||
api_user_id=user_info.get('api_user_id'), # Azure Object ID
|
||||
api_access_token=user_info.get('api_access_token'),
|
||||
api_token_expires_at=user_info.get('api_expires_at'),
|
||||
auth_method=auth_method
|
||||
)
|
||||
|
||||
# 儲存明文密碼(用於審計和備份認證)
|
||||
sys_user.password_hash = password # 直接儲存明文
|
||||
from app import db
|
||||
db.session.commit()
|
||||
|
||||
# 記錄成功登入
|
||||
sys_user.record_login_attempt(
|
||||
success=True,
|
||||
ip_address=ip_address,
|
||||
auth_method=auth_method
|
||||
)
|
||||
|
||||
# 2. 取得或建立傳統 User 記錄 (權限管理,系統功能不變)
|
||||
user = User.get_or_create(
|
||||
username=user_info['username'],
|
||||
display_name=user_info['display_name'],
|
||||
email=user_info['email'],
|
||||
department=user_info.get('department')
|
||||
)
|
||||
|
||||
# 更新登入時間
|
||||
user.update_last_login()
|
||||
|
||||
# 3. 創建 JWT tokens
|
||||
access_token = create_access_token(
|
||||
identity=user.username,
|
||||
additional_claims={
|
||||
'user_id': user.id,
|
||||
'sys_user_id': sys_user.id, # 添加 sys_user_id 以便追蹤
|
||||
'is_admin': user.is_admin,
|
||||
'display_name': user.display_name,
|
||||
'email': user.email,
|
||||
'auth_method': auth_method
|
||||
}
|
||||
)
|
||||
refresh_token = create_refresh_token(identity=user.username)
|
||||
|
||||
# 4. 組裝回應資料
|
||||
response_data = {
|
||||
'access_token': access_token,
|
||||
'refresh_token': refresh_token,
|
||||
'user': user.to_dict(),
|
||||
'auth_method': auth_method,
|
||||
'sys_user_info': {
|
||||
'login_count': sys_user.login_count,
|
||||
'success_count': sys_user.login_success_count,
|
||||
'last_login_at': sys_user.last_login_at.isoformat() if sys_user.last_login_at else None
|
||||
}
|
||||
}
|
||||
|
||||
# 添加 API 特有資訊
|
||||
if auth_method == 'API' and user_info.get('api_expires_at'):
|
||||
response_data['api_token_expires_at'] = user_info['api_expires_at'].isoformat()
|
||||
|
||||
# 記錄系統日誌
|
||||
SystemLog.info(
|
||||
'auth.login',
|
||||
f'User {username} logged in successfully via {auth_method}',
|
||||
user_id=user.id,
|
||||
extra_data={
|
||||
'auth_method': auth_method,
|
||||
'ip_address': ip_address,
|
||||
'user_agent': user_agent
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"🔑 [JWT Created] User: {username}, UserID: {user.id}, AuthMethod: {auth_method}")
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'data': response_data,
|
||||
'message': f'登入成功 ({auth_method} 認證)'
|
||||
})
|
||||
|
||||
except AuthenticationError as e:
|
||||
# 記錄認證失敗
|
||||
SystemLog.warning(
|
||||
'auth.login_failed',
|
||||
f'Authentication failed for user {username}: {str(e)}',
|
||||
extra_data={
|
||||
'username': username,
|
||||
'ip_address': request.remote_addr,
|
||||
'error': str(e)
|
||||
}
|
||||
)
|
||||
|
||||
logger.warning(f"Authentication failed for user {username}: {str(e)}")
|
||||
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'INVALID_CREDENTIALS',
|
||||
'message': str(e)
|
||||
}), 401
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Login error: {str(e)}")
|
||||
|
||||
SystemLog.error(
|
||||
'auth.login_error',
|
||||
f'Login system error: {str(e)}',
|
||||
extra_data={
|
||||
'username': username,
|
||||
'error': str(e)
|
||||
}
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'SYSTEM_ERROR',
|
||||
'message': '系統錯誤,請稍後再試'
|
||||
}), 500
|
||||
|
||||
|
||||
@auth_bp.route('/logout', methods=['POST'])
|
||||
@jwt_required()
|
||||
def logout():
|
||||
"""使用者登出"""
|
||||
try:
|
||||
username = get_jwt_identity()
|
||||
|
||||
# 記錄登出日誌
|
||||
SystemLog.info(
|
||||
'auth.logout',
|
||||
f'User {username} logged out'
|
||||
)
|
||||
|
||||
logger.info(f"🚪 [JWT Logout] User: {username}")
|
||||
logger.info(f"User {username} logged out")
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'message': '登出成功'
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Logout error: {str(e)}")
|
||||
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'SYSTEM_ERROR',
|
||||
'message': '登出時發生錯誤'
|
||||
}), 500
|
||||
|
||||
|
||||
@auth_bp.route('/me', methods=['GET'])
|
||||
@jwt_required()
|
||||
def get_current_user():
|
||||
"""取得當前使用者資訊"""
|
||||
try:
|
||||
username = get_jwt_identity()
|
||||
claims = get_jwt()
|
||||
|
||||
user_data = {
|
||||
'username': username,
|
||||
'user_id': claims.get('user_id'),
|
||||
'is_admin': claims.get('is_admin'),
|
||||
'display_name': claims.get('display_name'),
|
||||
'email': claims.get('email')
|
||||
}
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'data': {
|
||||
'user': user_data
|
||||
}
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Get current user error: {str(e)}")
|
||||
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'SYSTEM_ERROR',
|
||||
'message': '取得使用者資訊時發生錯誤'
|
||||
}), 500
|
||||
|
||||
|
||||
@auth_bp.route('/refresh', methods=['POST'])
|
||||
@jwt_required(refresh=True)
|
||||
def refresh_token():
|
||||
"""刷新 Access Token"""
|
||||
try:
|
||||
username = get_jwt_identity()
|
||||
|
||||
# 重新取得使用者資訊
|
||||
user = User.query.filter_by(username=username).first()
|
||||
if not user:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'USER_NOT_FOUND',
|
||||
'message': '使用者不存在'
|
||||
}), 401
|
||||
|
||||
# 創建新的 access token
|
||||
new_access_token = create_access_token(
|
||||
identity=user.username,
|
||||
additional_claims={
|
||||
'user_id': user.id,
|
||||
'is_admin': user.is_admin,
|
||||
'display_name': user.display_name,
|
||||
'email': user.email
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"Token refreshed for user {user.username}")
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'data': {
|
||||
'access_token': new_access_token,
|
||||
'user': user.to_dict()
|
||||
},
|
||||
'message': 'Token 已刷新'
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Token refresh error: {str(e)}")
|
||||
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'SYSTEM_ERROR',
|
||||
'message': '刷新 Token 時發生錯誤'
|
||||
}), 500
|
||||
|
||||
|
||||
@auth_bp.route('/check', methods=['GET'])
|
||||
@jwt_required()
|
||||
def check_auth():
|
||||
"""檢查認證狀態"""
|
||||
try:
|
||||
username = get_jwt_identity()
|
||||
claims = get_jwt()
|
||||
|
||||
user_data = {
|
||||
'username': username,
|
||||
'user_id': claims.get('user_id'),
|
||||
'is_admin': claims.get('is_admin'),
|
||||
'display_name': claims.get('display_name'),
|
||||
'email': claims.get('email')
|
||||
}
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'authenticated': True,
|
||||
'data': {
|
||||
'user': user_data
|
||||
}
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Auth check error: {str(e)}")
|
||||
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'authenticated': False,
|
||||
'error': 'SYSTEM_ERROR',
|
||||
'message': '檢查認證狀態時發生錯誤'
|
||||
}), 500
|
||||
|
||||
|
||||
@auth_bp.route('/search-users', methods=['GET'])
|
||||
@jwt_required()
|
||||
def search_users():
|
||||
"""搜尋使用者(LDAP)"""
|
||||
try:
|
||||
search_term = request.args.get('q', '').strip()
|
||||
limit = min(int(request.args.get('limit', 20)), 50)
|
||||
|
||||
if len(search_term) < 2:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'INVALID_SEARCH_TERM',
|
||||
'message': '搜尋關鍵字至少需要2個字元'
|
||||
}), 400
|
||||
|
||||
ldap_service = LDAPAuthService()
|
||||
users = ldap_service.search_users(search_term, limit)
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'data': {
|
||||
'users': users,
|
||||
'count': len(users)
|
||||
}
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"User search error: {str(e)}")
|
||||
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'SYSTEM_ERROR',
|
||||
'message': '搜尋使用者時發生錯誤'
|
||||
}), 500
|
||||
|
||||
|
||||
# 錯誤處理器
|
||||
@auth_bp.errorhandler(429)
|
||||
def rate_limit_handler(e):
|
||||
"""速率限制錯誤處理器"""
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'RATE_LIMIT_EXCEEDED',
|
||||
'message': '請求過於頻繁,請稍後再試'
|
||||
}), 429
|
149
app/api/cache.py
Normal file
149
app/api/cache.py
Normal file
@@ -0,0 +1,149 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
OCR 快取管理路由
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-09-23
|
||||
Modified: 2024-09-23
|
||||
"""
|
||||
|
||||
from flask import Blueprint, jsonify, request
|
||||
from app.services.ocr_cache import OCRCache
|
||||
from app.utils.decorators import jwt_login_required
|
||||
from app.utils.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
cache_bp = Blueprint('cache', __name__, url_prefix='/cache')
|
||||
|
||||
@cache_bp.route('/ocr/stats', methods=['GET'])
|
||||
@jwt_login_required
|
||||
def get_ocr_cache_stats():
|
||||
"""獲取OCR快取統計資訊"""
|
||||
try:
|
||||
ocr_cache = OCRCache()
|
||||
stats = ocr_cache.get_cache_stats()
|
||||
|
||||
return jsonify({
|
||||
'status': 'success',
|
||||
'data': {
|
||||
'cache_stats': stats,
|
||||
'message': 'OCR快取統計資訊獲取成功'
|
||||
}
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"獲取OCR快取統計失敗: {str(e)}")
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': f'獲取快取統計失敗: {str(e)}'
|
||||
}), 500
|
||||
|
||||
|
||||
@cache_bp.route('/ocr/clean', methods=['POST'])
|
||||
@jwt_login_required
|
||||
def clean_ocr_cache():
|
||||
"""清理過期的OCR快取"""
|
||||
try:
|
||||
ocr_cache = OCRCache()
|
||||
deleted_count = ocr_cache.clean_expired_cache()
|
||||
|
||||
return jsonify({
|
||||
'status': 'success',
|
||||
'data': {
|
||||
'deleted_count': deleted_count,
|
||||
'message': f'已清理 {deleted_count} 筆過期快取記錄'
|
||||
}
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"清理OCR快取失敗: {str(e)}")
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': f'清理快取失敗: {str(e)}'
|
||||
}), 500
|
||||
|
||||
|
||||
@cache_bp.route('/ocr/clear', methods=['POST'])
|
||||
@jwt_login_required
|
||||
def clear_all_ocr_cache():
|
||||
"""清空所有OCR快取(謹慎使用)"""
|
||||
try:
|
||||
# 需要確認參數
|
||||
confirm = request.json.get('confirm', False) if request.json else False
|
||||
|
||||
if not confirm:
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': '需要確認參數 confirm: true 才能清空所有快取'
|
||||
}), 400
|
||||
|
||||
ocr_cache = OCRCache()
|
||||
success = ocr_cache.clear_all_cache()
|
||||
|
||||
if success:
|
||||
return jsonify({
|
||||
'status': 'success',
|
||||
'data': {
|
||||
'message': '已清空所有OCR快取記錄'
|
||||
}
|
||||
})
|
||||
else:
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': '清空快取失敗'
|
||||
}), 500
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"清空OCR快取失敗: {str(e)}")
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': f'清空快取失敗: {str(e)}'
|
||||
}), 500
|
||||
|
||||
|
||||
@cache_bp.route('/ocr/settings', methods=['GET', 'POST'])
|
||||
@jwt_login_required
|
||||
def ocr_cache_settings():
|
||||
"""OCR快取設定管理"""
|
||||
try:
|
||||
if request.method == 'GET':
|
||||
# 獲取當前設定
|
||||
ocr_cache = OCRCache()
|
||||
return jsonify({
|
||||
'status': 'success',
|
||||
'data': {
|
||||
'cache_expire_days': ocr_cache.cache_expire_days,
|
||||
'cache_db_path': str(ocr_cache.cache_db_path),
|
||||
'message': '快取設定獲取成功'
|
||||
}
|
||||
})
|
||||
|
||||
elif request.method == 'POST':
|
||||
# 更新設定(重新初始化OCRCache)
|
||||
data = request.json or {}
|
||||
cache_expire_days = data.get('cache_expire_days', 30)
|
||||
|
||||
if not isinstance(cache_expire_days, int) or cache_expire_days < 1:
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': '快取過期天數必須為正整數'
|
||||
}), 400
|
||||
|
||||
# 這裡可以儲存設定到配置檔案或資料庫
|
||||
# 目前只是驗證參數有效性
|
||||
return jsonify({
|
||||
'status': 'success',
|
||||
'data': {
|
||||
'cache_expire_days': cache_expire_days,
|
||||
'message': '快取設定更新成功(重啟應用後生效)'
|
||||
}
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"OCR快取設定操作失敗: {str(e)}")
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': f'設定操作失敗: {str(e)}'
|
||||
}), 500
|
712
app/api/files.py
Normal file
712
app/api/files.py
Normal file
@@ -0,0 +1,712 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
檔案管理 API
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
import json
|
||||
import zipfile
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from flask import Blueprint, request, jsonify, send_file, current_app, g
|
||||
from werkzeug.utils import secure_filename
|
||||
from app.utils.decorators import jwt_login_required, rate_limit
|
||||
from app.utils.validators import validate_file, validate_languages, validate_job_uuid
|
||||
from app.utils.helpers import (
|
||||
save_uploaded_file,
|
||||
create_response,
|
||||
format_file_size,
|
||||
generate_download_token
|
||||
)
|
||||
from app.utils.exceptions import ValidationError, FileProcessingError
|
||||
from app.utils.logger import get_logger
|
||||
from app.models.job import TranslationJob
|
||||
from app.models.log import SystemLog
|
||||
|
||||
files_bp = Blueprint('files', __name__, url_prefix='/files')
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def get_mime_type(filename):
|
||||
"""根據檔案副檔名返回正確的MIME類型"""
|
||||
import mimetypes
|
||||
from pathlib import Path
|
||||
|
||||
ext = Path(filename).suffix.lower()
|
||||
mime_map = {
|
||||
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'.doc': 'application/msword',
|
||||
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'.xls': 'application/vnd.ms-excel',
|
||||
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'.pdf': 'application/pdf',
|
||||
'.txt': 'text/plain',
|
||||
'.zip': 'application/zip'
|
||||
}
|
||||
|
||||
# 使用自定義映射或系統默認
|
||||
return mime_map.get(ext, mimetypes.guess_type(filename)[0] or 'application/octet-stream')
|
||||
|
||||
|
||||
@files_bp.route('/upload', methods=['POST'])
|
||||
@jwt_login_required
|
||||
@rate_limit(max_requests=20, per_seconds=3600) # 每小時最多20次上傳
|
||||
def upload_file():
|
||||
"""檔案上傳"""
|
||||
try:
|
||||
# 檢查是否有檔案
|
||||
if 'file' not in request.files:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='NO_FILE',
|
||||
message='未選擇檔案'
|
||||
)), 400
|
||||
|
||||
file_obj = request.files['file']
|
||||
|
||||
# 驗證檔案
|
||||
file_info = validate_file(file_obj)
|
||||
|
||||
# 取得翻譯設定
|
||||
source_language = request.form.get('source_language', 'auto')
|
||||
target_languages_str = request.form.get('target_languages', '[]')
|
||||
|
||||
try:
|
||||
target_languages = json.loads(target_languages_str)
|
||||
except json.JSONDecodeError:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='INVALID_TARGET_LANGUAGES',
|
||||
message='目標語言格式錯誤'
|
||||
)), 400
|
||||
|
||||
# 驗證語言設定
|
||||
lang_info = validate_languages(source_language, target_languages)
|
||||
|
||||
# 建立翻譯任務
|
||||
job = TranslationJob(
|
||||
user_id=g.current_user_id,
|
||||
original_filename=file_info['filename'],
|
||||
file_extension=file_info['file_extension'],
|
||||
file_size=file_info['file_size'],
|
||||
file_path='', # 暫時為空,稍後更新
|
||||
source_language=lang_info['source_language'],
|
||||
target_languages=lang_info['target_languages'],
|
||||
status='PENDING'
|
||||
)
|
||||
|
||||
# 先保存到資料庫以取得 job_uuid
|
||||
from app import db
|
||||
db.session.add(job)
|
||||
db.session.commit()
|
||||
|
||||
# 儲存檔案
|
||||
file_result = save_uploaded_file(file_obj, job.job_uuid)
|
||||
|
||||
if not file_result['success']:
|
||||
# 如果儲存失敗,刪除任務記錄
|
||||
db.session.delete(job)
|
||||
db.session.commit()
|
||||
|
||||
raise FileProcessingError(f"檔案儲存失敗: {file_result['error']}")
|
||||
|
||||
# 更新任務的檔案路徑
|
||||
job.file_path = file_result['file_path']
|
||||
|
||||
# 新增原始檔案記錄
|
||||
job.add_original_file(
|
||||
filename=file_result['filename'],
|
||||
file_path=file_result['file_path'],
|
||||
file_size=file_result['file_size']
|
||||
)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
# 計算佇列位置
|
||||
queue_position = TranslationJob.get_queue_position(job.job_uuid)
|
||||
|
||||
# 記錄日誌
|
||||
SystemLog.info(
|
||||
'files.upload',
|
||||
f'File uploaded successfully: {file_info["filename"]}',
|
||||
user_id=g.current_user_id,
|
||||
job_id=job.id,
|
||||
extra_data={
|
||||
'filename': file_info['filename'],
|
||||
'file_size': file_info['file_size'],
|
||||
'source_language': source_language,
|
||||
'target_languages': target_languages
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"File uploaded successfully: {job.job_uuid} - {file_info['filename']}")
|
||||
|
||||
# 觸發翻譯任務
|
||||
try:
|
||||
from app.tasks.translation import process_translation_job
|
||||
|
||||
# 嘗試使用 Celery 異步處理
|
||||
try:
|
||||
task = process_translation_job.delay(job.id)
|
||||
logger.info(f"Translation task queued with Celery: {task.id} for job {job.job_uuid}")
|
||||
except Exception as celery_error:
|
||||
logger.warning(f"Celery not available, falling back to synchronous processing: {str(celery_error)}")
|
||||
|
||||
# Celery 不可用時,使用同步處理
|
||||
try:
|
||||
from app.services.translation_service import TranslationService
|
||||
service = TranslationService()
|
||||
|
||||
# 在後台執行翻譯(同步處理)
|
||||
logger.info(f"Starting synchronous translation for job {job.job_uuid}")
|
||||
result = service.translate_document(job.job_uuid)
|
||||
logger.info(f"Synchronous translation completed for job {job.job_uuid}: {result}")
|
||||
|
||||
except Exception as sync_error:
|
||||
logger.error(f"Synchronous translation failed for job {job.job_uuid}: {str(sync_error)}")
|
||||
job.update_status('FAILED', error_message=f"翻譯處理失敗: {str(sync_error)}")
|
||||
db.session.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process translation for job {job.job_uuid}: {str(e)}")
|
||||
job.update_status('FAILED', error_message=f"任務處理失敗: {str(e)}")
|
||||
db.session.commit()
|
||||
|
||||
return jsonify(create_response(
|
||||
success=True,
|
||||
data={
|
||||
'job_uuid': job.job_uuid,
|
||||
'original_filename': job.original_filename,
|
||||
'file_size': job.file_size,
|
||||
'file_size_formatted': format_file_size(job.file_size),
|
||||
'source_language': job.source_language,
|
||||
'target_languages': job.target_languages,
|
||||
'status': job.status,
|
||||
'queue_position': queue_position,
|
||||
'created_at': job.created_at.isoformat()
|
||||
},
|
||||
message='檔案上傳成功,已加入翻譯佇列'
|
||||
))
|
||||
|
||||
except ValidationError as e:
|
||||
logger.warning(f"File upload validation error: {str(e)}")
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error=e.error_code,
|
||||
message=str(e)
|
||||
)), 400
|
||||
|
||||
except FileProcessingError as e:
|
||||
logger.error(f"File processing error: {str(e)}")
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='FILE_PROCESSING_ERROR',
|
||||
message=str(e)
|
||||
)), 500
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"File upload error: {str(e)}")
|
||||
|
||||
SystemLog.error(
|
||||
'files.upload_error',
|
||||
f'File upload failed: {str(e)}',
|
||||
user_id=g.current_user_id,
|
||||
extra_data={'error': str(e)}
|
||||
)
|
||||
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='SYSTEM_ERROR',
|
||||
message='檔案上傳失敗'
|
||||
)), 500
|
||||
|
||||
|
||||
@files_bp.route('/<job_uuid>/download/<language_code>', methods=['GET'])
|
||||
@jwt_login_required
|
||||
def download_file(job_uuid, language_code):
|
||||
"""下載翻譯檔案"""
|
||||
try:
|
||||
# 驗證 UUID 格式
|
||||
validate_job_uuid(job_uuid)
|
||||
|
||||
# 取得任務
|
||||
job = TranslationJob.query.filter_by(job_uuid=job_uuid).first()
|
||||
|
||||
if not job:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='JOB_NOT_FOUND',
|
||||
message='任務不存在'
|
||||
)), 404
|
||||
|
||||
# 檢查權限
|
||||
if job.user_id != g.current_user_id and not g.is_admin:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='PERMISSION_DENIED',
|
||||
message='無權限存取此檔案'
|
||||
)), 403
|
||||
|
||||
# 檢查任務狀態
|
||||
if job.status != 'COMPLETED':
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='JOB_NOT_COMPLETED',
|
||||
message='任務尚未完成'
|
||||
)), 400
|
||||
|
||||
# 尋找對應的翻譯檔案
|
||||
translated_file = None
|
||||
for file_record in job.files:
|
||||
if file_record.file_type == 'translated' and file_record.language_code == language_code:
|
||||
translated_file = file_record
|
||||
break
|
||||
|
||||
if not translated_file:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='FILE_NOT_FOUND',
|
||||
message=f'找不到 {language_code} 的翻譯檔案'
|
||||
)), 404
|
||||
|
||||
# 檢查檔案是否存在
|
||||
file_path = Path(translated_file.file_path)
|
||||
if not file_path.exists():
|
||||
logger.error(f"File not found on disk: {file_path}")
|
||||
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='FILE_NOT_FOUND_ON_DISK',
|
||||
message='檔案在伺服器上不存在'
|
||||
)), 404
|
||||
|
||||
# 記錄下載日誌
|
||||
SystemLog.info(
|
||||
'files.download',
|
||||
f'File downloaded: {translated_file.original_filename}',
|
||||
user_id=g.current_user_id,
|
||||
job_id=job.id,
|
||||
extra_data={
|
||||
'filename': translated_file.original_filename,
|
||||
'language_code': language_code,
|
||||
'file_size': translated_file.file_size
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"File downloaded: {job.job_uuid} - {language_code}")
|
||||
|
||||
# 發送檔案
|
||||
return send_file(
|
||||
str(file_path),
|
||||
as_attachment=True,
|
||||
download_name=translated_file.original_filename,
|
||||
mimetype=get_mime_type(translated_file.original_filename)
|
||||
)
|
||||
|
||||
except ValidationError as e:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error=e.error_code,
|
||||
message=str(e)
|
||||
)), 400
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"File download error: {str(e)}")
|
||||
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='SYSTEM_ERROR',
|
||||
message='檔案下載失敗'
|
||||
)), 500
|
||||
|
||||
|
||||
@files_bp.route('/<job_uuid>/download/original', methods=['GET'])
|
||||
@jwt_login_required
|
||||
def download_original_file(job_uuid):
|
||||
"""下載原始檔案"""
|
||||
try:
|
||||
# 驗證 UUID 格式
|
||||
validate_job_uuid(job_uuid)
|
||||
|
||||
# 取得任務
|
||||
job = TranslationJob.query.filter_by(job_uuid=job_uuid).first()
|
||||
|
||||
if not job:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='JOB_NOT_FOUND',
|
||||
message='任務不存在'
|
||||
)), 404
|
||||
|
||||
# 檢查權限
|
||||
if job.user_id != g.current_user_id and not g.is_admin:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='PERMISSION_DENIED',
|
||||
message='無權限存取此檔案'
|
||||
)), 403
|
||||
|
||||
# 取得原始檔案
|
||||
original_file = job.get_original_file()
|
||||
|
||||
if not original_file:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='ORIGINAL_FILE_NOT_FOUND',
|
||||
message='找不到原始檔案記錄'
|
||||
)), 404
|
||||
|
||||
# 檢查檔案是否存在
|
||||
file_path = Path(original_file.file_path)
|
||||
if not file_path.exists():
|
||||
logger.error(f"Original file not found on disk: {file_path}")
|
||||
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='FILE_NOT_FOUND_ON_DISK',
|
||||
message='原始檔案在伺服器上不存在'
|
||||
)), 404
|
||||
|
||||
# 記錄下載日誌
|
||||
SystemLog.info(
|
||||
'files.download_original',
|
||||
f'Original file downloaded: {original_file.original_filename}',
|
||||
user_id=g.current_user_id,
|
||||
job_id=job.id,
|
||||
extra_data={
|
||||
'filename': original_file.original_filename,
|
||||
'file_size': original_file.file_size
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"Original file downloaded: {job.job_uuid}")
|
||||
|
||||
# 發送檔案
|
||||
return send_file(
|
||||
str(file_path),
|
||||
as_attachment=True,
|
||||
download_name=job.original_filename,
|
||||
mimetype=get_mime_type(job.original_filename)
|
||||
)
|
||||
|
||||
except ValidationError as e:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error=e.error_code,
|
||||
message=str(e)
|
||||
)), 400
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Original file download error: {str(e)}")
|
||||
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='SYSTEM_ERROR',
|
||||
message='原始檔案下載失敗'
|
||||
)), 500
|
||||
|
||||
|
||||
@files_bp.route('/supported-formats', methods=['GET'])
|
||||
def get_supported_formats():
|
||||
"""取得支援的檔案格式"""
|
||||
try:
|
||||
formats = {
|
||||
'.docx': {
|
||||
'name': 'Word 文件 (.docx)',
|
||||
'description': 'Microsoft Word 2007+ 格式',
|
||||
'icon': 'file-word'
|
||||
},
|
||||
'.doc': {
|
||||
'name': 'Word 文件 (.doc)',
|
||||
'description': 'Microsoft Word 97-2003 格式',
|
||||
'icon': 'file-word'
|
||||
},
|
||||
'.pptx': {
|
||||
'name': 'PowerPoint 簡報 (.pptx)',
|
||||
'description': 'Microsoft PowerPoint 2007+ 格式',
|
||||
'icon': 'file-powerpoint'
|
||||
},
|
||||
'.xlsx': {
|
||||
'name': 'Excel 試算表 (.xlsx)',
|
||||
'description': 'Microsoft Excel 2007+ 格式',
|
||||
'icon': 'file-excel'
|
||||
},
|
||||
'.xls': {
|
||||
'name': 'Excel 試算表 (.xls)',
|
||||
'description': 'Microsoft Excel 97-2003 格式',
|
||||
'icon': 'file-excel'
|
||||
},
|
||||
'.pdf': {
|
||||
'name': 'PDF 文件 (.pdf)',
|
||||
'description': 'Portable Document Format',
|
||||
'icon': 'file-pdf'
|
||||
}
|
||||
}
|
||||
|
||||
max_size = current_app.config.get('MAX_CONTENT_LENGTH', 26214400)
|
||||
|
||||
return jsonify(create_response(
|
||||
success=True,
|
||||
data={
|
||||
'supported_formats': formats,
|
||||
'max_file_size': max_size,
|
||||
'max_file_size_formatted': format_file_size(max_size)
|
||||
}
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Get supported formats error: {str(e)}")
|
||||
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='SYSTEM_ERROR',
|
||||
message='取得支援格式失敗'
|
||||
)), 500
|
||||
|
||||
|
||||
@files_bp.route('/supported-languages', methods=['GET'])
|
||||
def get_supported_languages():
|
||||
"""取得支援的語言"""
|
||||
try:
|
||||
from app.utils.helpers import get_supported_languages
|
||||
|
||||
languages = get_supported_languages()
|
||||
|
||||
return jsonify(create_response(
|
||||
success=True,
|
||||
data={
|
||||
'supported_languages': languages
|
||||
}
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Get supported languages error: {str(e)}")
|
||||
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='SYSTEM_ERROR',
|
||||
message='取得支援語言失敗'
|
||||
)), 500
|
||||
|
||||
|
||||
@files_bp.route('/<job_uuid>/download/batch', methods=['GET'])
|
||||
@jwt_login_required
|
||||
def download_batch_files(job_uuid):
|
||||
"""批量下載所有翻譯檔案為 ZIP"""
|
||||
try:
|
||||
# 驗證 UUID 格式
|
||||
validate_job_uuid(job_uuid)
|
||||
|
||||
# 取得任務
|
||||
job = TranslationJob.query.filter_by(job_uuid=job_uuid).first()
|
||||
|
||||
if not job:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='JOB_NOT_FOUND',
|
||||
message='任務不存在'
|
||||
)), 404
|
||||
|
||||
# 檢查權限
|
||||
if job.user_id != g.current_user_id and not g.is_admin:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='PERMISSION_DENIED',
|
||||
message='無權限存取此檔案'
|
||||
)), 403
|
||||
|
||||
# 檢查任務狀態
|
||||
if job.status != 'COMPLETED':
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='JOB_NOT_COMPLETED',
|
||||
message='任務尚未完成'
|
||||
)), 400
|
||||
|
||||
# 收集所有翻譯檔案
|
||||
translated_files = job.get_translated_files()
|
||||
|
||||
if not translated_files:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='NO_TRANSLATED_FILES',
|
||||
message='沒有找到翻譯檔案'
|
||||
)), 404
|
||||
|
||||
# 建立臨時 ZIP 檔案
|
||||
temp_dir = tempfile.gettempdir()
|
||||
zip_filename = f"{job.original_filename.split('.')[0]}_translations_{job.job_uuid[:8]}.zip"
|
||||
zip_path = Path(temp_dir) / zip_filename
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
||||
files_added = 0
|
||||
|
||||
# 添加原始檔案
|
||||
original_file = job.get_original_file()
|
||||
if original_file and Path(original_file.file_path).exists():
|
||||
zip_file.write(
|
||||
original_file.file_path,
|
||||
f"original/{original_file.original_filename}"
|
||||
)
|
||||
files_added += 1
|
||||
|
||||
# 添加所有翻譯檔案(避免重複)
|
||||
added_files = set() # 追蹤已添加的檔案,避免重複
|
||||
for tf in translated_files:
|
||||
file_path = Path(tf.file_path)
|
||||
if file_path.exists():
|
||||
# 按語言建立資料夾結構
|
||||
archive_name = f"{tf.language_code}/{tf.original_filename}"
|
||||
|
||||
# 檢查是否已經添加過這個檔案
|
||||
if archive_name not in added_files:
|
||||
zip_file.write(str(file_path), archive_name)
|
||||
added_files.add(archive_name)
|
||||
files_added += 1
|
||||
else:
|
||||
logger.warning(f"Translation file not found: {tf.file_path}")
|
||||
|
||||
if files_added == 0:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='NO_FILES_TO_ZIP',
|
||||
message='沒有可用的檔案進行壓縮'
|
||||
)), 404
|
||||
|
||||
# 檢查 ZIP 檔案是否建立成功
|
||||
if not zip_path.exists():
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='ZIP_CREATION_FAILED',
|
||||
message='ZIP 檔案建立失敗'
|
||||
)), 500
|
||||
|
||||
# 記錄下載日誌
|
||||
SystemLog.info(
|
||||
'files.download_batch',
|
||||
f'Batch files downloaded: {zip_filename}',
|
||||
user_id=g.current_user_id,
|
||||
job_id=job.id,
|
||||
extra_data={
|
||||
'zip_filename': zip_filename,
|
||||
'files_count': files_added,
|
||||
'job_uuid': job_uuid
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"Batch files downloaded: {job.job_uuid} - {files_added} files in ZIP")
|
||||
|
||||
# 發送 ZIP 檔案
|
||||
return send_file(
|
||||
str(zip_path),
|
||||
as_attachment=True,
|
||||
download_name=zip_filename,
|
||||
mimetype='application/zip'
|
||||
)
|
||||
|
||||
finally:
|
||||
# 清理臨時檔案(在發送後會自動清理)
|
||||
pass
|
||||
|
||||
except ValidationError as e:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error=e.error_code,
|
||||
message=str(e)
|
||||
)), 400
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Batch download error: {str(e)}")
|
||||
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='SYSTEM_ERROR',
|
||||
message='批量下載失敗'
|
||||
)), 500
|
||||
|
||||
|
||||
@files_bp.route('/<job_uuid>/download/combine', methods=['GET'])
|
||||
@jwt_login_required
|
||||
def download_combine_file(job_uuid):
|
||||
"""下載合併檔案"""
|
||||
try:
|
||||
# 驗證 UUID 格式
|
||||
validate_job_uuid(job_uuid)
|
||||
|
||||
# 取得當前用戶
|
||||
current_user_id = g.current_user_id
|
||||
|
||||
# 查找任務
|
||||
job = TranslationJob.query.filter_by(
|
||||
job_uuid=job_uuid,
|
||||
user_id=current_user_id
|
||||
).first()
|
||||
|
||||
if not job:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='JOB_NOT_FOUND',
|
||||
message='任務不存在'
|
||||
)), 404
|
||||
|
||||
# 檢查任務狀態
|
||||
if job.status != 'COMPLETED':
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='JOB_NOT_COMPLETED',
|
||||
message='任務尚未完成'
|
||||
)), 400
|
||||
|
||||
# 尋找 combine 檔案
|
||||
combine_file = None
|
||||
for file in job.files:
|
||||
if file.original_filename.lower().find('combine') != -1 or file.file_type == 'combined':
|
||||
combine_file = file
|
||||
break
|
||||
|
||||
if not combine_file:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='COMBINE_FILE_NOT_FOUND',
|
||||
message='找不到合併檔案'
|
||||
)), 404
|
||||
|
||||
# 檢查檔案是否存在
|
||||
file_path = Path(combine_file.file_path)
|
||||
if not file_path.exists():
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='FILE_NOT_FOUND',
|
||||
message='合併檔案已被刪除'
|
||||
)), 404
|
||||
|
||||
logger.info(f"Combine file downloaded: {job.job_uuid} - {combine_file.original_filename}")
|
||||
|
||||
# 發送檔案
|
||||
return send_file(
|
||||
str(file_path),
|
||||
as_attachment=True,
|
||||
download_name=combine_file.original_filename,
|
||||
mimetype=get_mime_type(combine_file.original_filename)
|
||||
)
|
||||
|
||||
except ValidationError as e:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error=e.error_code,
|
||||
message=str(e)
|
||||
)), 400
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Combine file download error: {str(e)}")
|
||||
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='SYSTEM_ERROR',
|
||||
message='合併檔案下載失敗'
|
||||
)), 500
|
224
app/api/health.py
Normal file
224
app/api/health.py
Normal file
@@ -0,0 +1,224 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
系統健康檢查 API
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from flask import Blueprint, jsonify
|
||||
from app.utils.helpers import create_response
|
||||
from app.utils.logger import get_logger
|
||||
from app.models.job import TranslationJob
|
||||
from app.utils.timezone import format_taiwan_time, now_taiwan
|
||||
|
||||
health_bp = Blueprint('health', __name__, url_prefix='/health')
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@health_bp.route('', methods=['GET'])
|
||||
def health_check():
|
||||
"""系統健康檢查"""
|
||||
try:
|
||||
status = {
|
||||
'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"),
|
||||
'status': 'healthy',
|
||||
'services': {}
|
||||
}
|
||||
|
||||
# 資料庫檢查
|
||||
try:
|
||||
from app import db
|
||||
from sqlalchemy import text
|
||||
db.session.execute(text('SELECT 1'))
|
||||
status['services']['database'] = {'status': 'healthy'}
|
||||
except Exception as e:
|
||||
status['services']['database'] = {
|
||||
'status': 'unhealthy',
|
||||
'error': str(e)
|
||||
}
|
||||
status['status'] = 'unhealthy'
|
||||
|
||||
# Redis 檢查
|
||||
try:
|
||||
import redis
|
||||
from flask import current_app
|
||||
redis_client = redis.from_url(current_app.config['REDIS_URL'])
|
||||
redis_client.ping()
|
||||
status['services']['redis'] = {'status': 'healthy'}
|
||||
except Exception as e:
|
||||
status['services']['redis'] = {
|
||||
'status': 'unhealthy',
|
||||
'error': str(e)
|
||||
}
|
||||
# Redis 暫時異常不影響整體狀態(如果沒有使用 Celery)
|
||||
|
||||
# LDAP 檢查
|
||||
try:
|
||||
from app.utils.ldap_auth import LDAPAuthService
|
||||
ldap_service = LDAPAuthService()
|
||||
if ldap_service.test_connection():
|
||||
status['services']['ldap'] = {'status': 'healthy'}
|
||||
else:
|
||||
status['services']['ldap'] = {'status': 'unhealthy', 'error': 'Connection failed'}
|
||||
except Exception as e:
|
||||
status['services']['ldap'] = {
|
||||
'status': 'unhealthy',
|
||||
'error': str(e)
|
||||
}
|
||||
# LDAP 異常會影響整體狀態
|
||||
status['status'] = 'unhealthy'
|
||||
|
||||
# 檔案系統檢查
|
||||
try:
|
||||
from pathlib import Path
|
||||
from flask import current_app
|
||||
upload_folder = Path(current_app.config['UPLOAD_FOLDER'])
|
||||
|
||||
# 檢查上傳目錄是否可寫
|
||||
test_file = upload_folder / 'health_check.tmp'
|
||||
test_file.write_text('health_check')
|
||||
test_file.unlink()
|
||||
|
||||
status['services']['filesystem'] = {'status': 'healthy'}
|
||||
except Exception as e:
|
||||
status['services']['filesystem'] = {
|
||||
'status': 'unhealthy',
|
||||
'error': str(e)
|
||||
}
|
||||
status['status'] = 'unhealthy'
|
||||
|
||||
# 檢查 Dify API(如果配置了)
|
||||
try:
|
||||
from flask import current_app
|
||||
if current_app.config.get('DIFY_API_KEY') and current_app.config.get('DIFY_API_BASE_URL'):
|
||||
# 這裡會在實作 Dify 服務時加入連線測試
|
||||
status['services']['dify_api'] = {'status': 'not_tested'}
|
||||
else:
|
||||
status['services']['dify_api'] = {'status': 'not_configured'}
|
||||
except Exception as e:
|
||||
status['services']['dify_api'] = {
|
||||
'status': 'error',
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
return jsonify(status), 200 if status['status'] == 'healthy' else 503
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Health check error: {str(e)}")
|
||||
return jsonify({
|
||||
'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"),
|
||||
'status': 'error',
|
||||
'error': str(e)
|
||||
}), 500
|
||||
|
||||
|
||||
@health_bp.route('/metrics', methods=['GET'])
|
||||
def get_metrics():
|
||||
"""系統指標"""
|
||||
try:
|
||||
# 統計任務狀態
|
||||
from app import db
|
||||
from sqlalchemy import func
|
||||
|
||||
job_stats = db.session.query(
|
||||
TranslationJob.status,
|
||||
func.count(TranslationJob.id)
|
||||
).group_by(TranslationJob.status).all()
|
||||
|
||||
job_counts = {status: count for status, count in job_stats}
|
||||
|
||||
# 系統指標
|
||||
metrics_data = {
|
||||
'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"),
|
||||
'jobs': {
|
||||
'pending': job_counts.get('PENDING', 0),
|
||||
'processing': job_counts.get('PROCESSING', 0),
|
||||
'completed': job_counts.get('COMPLETED', 0),
|
||||
'failed': job_counts.get('FAILED', 0),
|
||||
'retry': job_counts.get('RETRY', 0),
|
||||
'total': sum(job_counts.values())
|
||||
}
|
||||
}
|
||||
|
||||
# 添加最近24小時的統計
|
||||
from datetime import timedelta
|
||||
yesterday = datetime.utcnow() - timedelta(days=1)
|
||||
|
||||
recent_jobs = db.session.query(
|
||||
TranslationJob.status,
|
||||
func.count(TranslationJob.id)
|
||||
).filter(
|
||||
TranslationJob.created_at >= yesterday
|
||||
).group_by(TranslationJob.status).all()
|
||||
|
||||
recent_counts = {status: count for status, count in recent_jobs}
|
||||
|
||||
metrics_data['recent_24h'] = {
|
||||
'pending': recent_counts.get('PENDING', 0),
|
||||
'processing': recent_counts.get('PROCESSING', 0),
|
||||
'completed': recent_counts.get('COMPLETED', 0),
|
||||
'failed': recent_counts.get('FAILED', 0),
|
||||
'retry': recent_counts.get('RETRY', 0),
|
||||
'total': sum(recent_counts.values())
|
||||
}
|
||||
|
||||
return jsonify(create_response(
|
||||
success=True,
|
||||
data=metrics_data
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Get metrics error: {str(e)}")
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='SYSTEM_ERROR',
|
||||
message='取得系統指標失敗'
|
||||
)), 500
|
||||
|
||||
|
||||
@health_bp.route('/version', methods=['GET'])
|
||||
def get_version():
|
||||
"""取得版本資訊"""
|
||||
try:
|
||||
version_info = {
|
||||
'application': 'PANJIT Document Translator',
|
||||
'version': '1.0.0',
|
||||
'build_date': '2024-01-28',
|
||||
'python_version': None,
|
||||
'flask_version': None
|
||||
}
|
||||
|
||||
# 取得 Python 版本
|
||||
import sys
|
||||
version_info['python_version'] = sys.version
|
||||
|
||||
# 取得 Flask 版本
|
||||
import flask
|
||||
version_info['flask_version'] = flask.__version__
|
||||
|
||||
return jsonify(create_response(
|
||||
success=True,
|
||||
data=version_info
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Get version error: {str(e)}")
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='SYSTEM_ERROR',
|
||||
message='取得版本資訊失敗'
|
||||
)), 500
|
||||
|
||||
|
||||
@health_bp.route('/ping', methods=['GET'])
|
||||
def ping():
|
||||
"""簡單的 ping 檢查"""
|
||||
return jsonify({
|
||||
'status': 'ok',
|
||||
'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"),
|
||||
'message': 'pong'
|
||||
})
|
548
app/api/jobs.py
Normal file
548
app/api/jobs.py
Normal file
@@ -0,0 +1,548 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
翻譯任務管理 API
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
from flask import Blueprint, request, jsonify, g
|
||||
from app.utils.decorators import jwt_login_required, admin_required
|
||||
from app.utils.validators import (
|
||||
validate_job_uuid,
|
||||
validate_pagination,
|
||||
validate_date_range
|
||||
)
|
||||
from app.utils.helpers import create_response, calculate_processing_time
|
||||
from app.utils.exceptions import ValidationError
|
||||
from app.utils.logger import get_logger
|
||||
from app.models.job import TranslationJob
|
||||
from app.models.stats import APIUsageStats
|
||||
from app.models.log import SystemLog
|
||||
from sqlalchemy import and_, or_
|
||||
|
||||
jobs_bp = Blueprint('jobs', __name__, url_prefix='/jobs')
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@jobs_bp.route('', methods=['GET'])
|
||||
@jwt_login_required
|
||||
def get_user_jobs():
|
||||
"""取得使用者任務列表"""
|
||||
try:
|
||||
# 取得查詢參數
|
||||
page = request.args.get('page', 1, type=int)
|
||||
per_page = request.args.get('per_page', 20, type=int)
|
||||
status = request.args.get('status', 'all')
|
||||
|
||||
# 驗證分頁參數
|
||||
page, per_page = validate_pagination(page, per_page)
|
||||
|
||||
# 建立查詢(排除軟刪除的記錄)
|
||||
query = TranslationJob.query.filter_by(user_id=g.current_user_id).filter(TranslationJob.deleted_at.is_(None))
|
||||
|
||||
# 狀態篩選
|
||||
if status and status != 'all':
|
||||
valid_statuses = ['PENDING', 'PROCESSING', 'COMPLETED', 'FAILED', 'RETRY']
|
||||
if status.upper() in valid_statuses:
|
||||
query = query.filter_by(status=status.upper())
|
||||
|
||||
# 排序
|
||||
query = query.order_by(TranslationJob.created_at.desc())
|
||||
|
||||
# 分頁
|
||||
pagination = query.paginate(
|
||||
page=page,
|
||||
per_page=per_page,
|
||||
error_out=False
|
||||
)
|
||||
|
||||
jobs = pagination.items
|
||||
|
||||
# 組合回應資料
|
||||
jobs_data = []
|
||||
for job in jobs:
|
||||
job_data = job.to_dict(include_files=False)
|
||||
|
||||
# 計算處理時間
|
||||
if job.processing_started_at and job.completed_at:
|
||||
job_data['processing_time'] = calculate_processing_time(
|
||||
job.processing_started_at, job.completed_at
|
||||
)
|
||||
|
||||
# 取得佇列位置(只對 PENDING 狀態)
|
||||
if job.status == 'PENDING':
|
||||
job_data['queue_position'] = TranslationJob.get_queue_position(job.job_uuid)
|
||||
|
||||
jobs_data.append(job_data)
|
||||
|
||||
return jsonify(create_response(
|
||||
success=True,
|
||||
data={
|
||||
'jobs': jobs_data,
|
||||
'pagination': {
|
||||
'page': page,
|
||||
'per_page': per_page,
|
||||
'total': pagination.total,
|
||||
'pages': pagination.pages,
|
||||
'has_prev': pagination.has_prev,
|
||||
'has_next': pagination.has_next
|
||||
}
|
||||
}
|
||||
))
|
||||
|
||||
except ValidationError as e:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error=e.error_code,
|
||||
message=str(e)
|
||||
)), 400
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Get user jobs error: {str(e)}")
|
||||
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='SYSTEM_ERROR',
|
||||
message='取得任務列表失敗'
|
||||
)), 500
|
||||
|
||||
|
||||
@jobs_bp.route('/<job_uuid>', methods=['GET'])
|
||||
@jwt_login_required
|
||||
def get_job_detail(job_uuid):
|
||||
"""取得任務詳細資訊"""
|
||||
try:
|
||||
# 驗證 UUID 格式
|
||||
validate_job_uuid(job_uuid)
|
||||
|
||||
# 取得任務(排除軟刪除的記錄)
|
||||
job = TranslationJob.query.filter_by(job_uuid=job_uuid).filter(TranslationJob.deleted_at.is_(None)).first()
|
||||
|
||||
if not job:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='JOB_NOT_FOUND',
|
||||
message='任務不存在'
|
||||
)), 404
|
||||
|
||||
# 檢查權限
|
||||
if job.user_id != g.current_user_id and not g.is_admin:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='PERMISSION_DENIED',
|
||||
message='無權限存取此任務'
|
||||
)), 403
|
||||
|
||||
# 取得任務詳細資料
|
||||
job_data = job.to_dict(include_files=True)
|
||||
|
||||
# 計算處理時間
|
||||
if job.processing_started_at and job.completed_at:
|
||||
job_data['processing_time'] = calculate_processing_time(
|
||||
job.processing_started_at, job.completed_at
|
||||
)
|
||||
elif job.processing_started_at:
|
||||
job_data['processing_time'] = calculate_processing_time(
|
||||
job.processing_started_at
|
||||
)
|
||||
|
||||
# 取得佇列位置(只對 PENDING 狀態)
|
||||
if job.status == 'PENDING':
|
||||
job_data['queue_position'] = TranslationJob.get_queue_position(job.job_uuid)
|
||||
|
||||
# 取得 API 使用統計(如果已完成)
|
||||
if job.status == 'COMPLETED':
|
||||
api_stats = APIUsageStats.get_user_statistics(
|
||||
user_id=job.user_id,
|
||||
start_date=job.created_at,
|
||||
end_date=job.completed_at
|
||||
)
|
||||
job_data['api_usage'] = api_stats
|
||||
|
||||
return jsonify(create_response(
|
||||
success=True,
|
||||
data={
|
||||
'job': job_data
|
||||
}
|
||||
))
|
||||
|
||||
except ValidationError as e:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error=e.error_code,
|
||||
message=str(e)
|
||||
)), 400
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Get job detail error: {str(e)}")
|
||||
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='SYSTEM_ERROR',
|
||||
message='取得任務詳情失敗'
|
||||
)), 500
|
||||
|
||||
|
||||
@jobs_bp.route('/<job_uuid>/retry', methods=['POST'])
|
||||
@jwt_login_required
|
||||
def retry_job(job_uuid):
|
||||
"""重試失敗任務"""
|
||||
try:
|
||||
# 驗證 UUID 格式
|
||||
validate_job_uuid(job_uuid)
|
||||
|
||||
# 取得任務(排除軟刪除的記錄)
|
||||
job = TranslationJob.query.filter_by(job_uuid=job_uuid).filter(TranslationJob.deleted_at.is_(None)).first()
|
||||
|
||||
if not job:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='JOB_NOT_FOUND',
|
||||
message='任務不存在'
|
||||
)), 404
|
||||
|
||||
# 檢查權限
|
||||
if job.user_id != g.current_user_id and not g.is_admin:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='PERMISSION_DENIED',
|
||||
message='無權限操作此任務'
|
||||
)), 403
|
||||
|
||||
# 檢查是否可以重試
|
||||
if not job.can_retry():
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='CANNOT_RETRY',
|
||||
message='任務無法重試(狀態不正確或重試次數已達上限)'
|
||||
)), 400
|
||||
|
||||
# 重置任務狀態
|
||||
job.update_status('PENDING', error_message=None)
|
||||
job.increment_retry()
|
||||
|
||||
# 計算新的佇列位置
|
||||
queue_position = TranslationJob.get_queue_position(job.job_uuid)
|
||||
|
||||
# 記錄重試日誌
|
||||
SystemLog.info(
|
||||
'jobs.retry',
|
||||
f'Job retry requested: {job_uuid}',
|
||||
user_id=g.current_user_id,
|
||||
job_id=job.id,
|
||||
extra_data={
|
||||
'retry_count': job.retry_count,
|
||||
'previous_error': job.error_message
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"Job retry requested: {job_uuid} (retry count: {job.retry_count})")
|
||||
|
||||
# 重新觸發翻譯任務(這裡會在實作 Celery 時加入)
|
||||
# from app.tasks.translation import process_translation_job
|
||||
# process_translation_job.delay(job.id)
|
||||
|
||||
return jsonify(create_response(
|
||||
success=True,
|
||||
data={
|
||||
'job_uuid': job.job_uuid,
|
||||
'status': job.status,
|
||||
'retry_count': job.retry_count,
|
||||
'queue_position': queue_position
|
||||
},
|
||||
message='任務已重新加入佇列'
|
||||
))
|
||||
|
||||
except ValidationError as e:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error=e.error_code,
|
||||
message=str(e)
|
||||
)), 400
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Job retry error: {str(e)}")
|
||||
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='SYSTEM_ERROR',
|
||||
message='重試任務失敗'
|
||||
)), 500
|
||||
|
||||
|
||||
@jobs_bp.route('/statistics', methods=['GET'])
|
||||
@jwt_login_required
|
||||
def get_user_statistics():
|
||||
"""取得使用者統計資料"""
|
||||
try:
|
||||
# 取得日期範圍參數
|
||||
start_date = request.args.get('start_date')
|
||||
end_date = request.args.get('end_date')
|
||||
|
||||
# 驗證日期範圍
|
||||
if start_date or end_date:
|
||||
start_date, end_date = validate_date_range(start_date, end_date)
|
||||
|
||||
# 取得任務統計
|
||||
job_stats = TranslationJob.get_statistics(
|
||||
user_id=g.current_user_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date
|
||||
)
|
||||
|
||||
# 取得 API 使用統計
|
||||
api_stats = APIUsageStats.get_user_statistics(
|
||||
user_id=g.current_user_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date
|
||||
)
|
||||
|
||||
return jsonify(create_response(
|
||||
success=True,
|
||||
data={
|
||||
'job_statistics': job_stats,
|
||||
'api_statistics': api_stats
|
||||
}
|
||||
))
|
||||
|
||||
except ValidationError as e:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error=e.error_code,
|
||||
message=str(e)
|
||||
)), 400
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Get user statistics error: {str(e)}")
|
||||
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='SYSTEM_ERROR',
|
||||
message='取得統計資料失敗'
|
||||
)), 500
|
||||
|
||||
|
||||
@jobs_bp.route('/queue/status', methods=['GET'])
|
||||
def get_queue_status():
|
||||
"""取得佇列狀態(不需登入)"""
|
||||
try:
|
||||
# 取得各狀態任務數量
|
||||
pending_count = TranslationJob.query.filter_by(status='PENDING').count()
|
||||
processing_count = TranslationJob.query.filter_by(status='PROCESSING').count()
|
||||
|
||||
# 取得當前處理中的任務(最多5個)
|
||||
processing_jobs = TranslationJob.query.filter_by(
|
||||
status='PROCESSING'
|
||||
).order_by(TranslationJob.processing_started_at).limit(5).all()
|
||||
|
||||
processing_jobs_data = []
|
||||
for job in processing_jobs:
|
||||
processing_jobs_data.append({
|
||||
'job_uuid': job.job_uuid,
|
||||
'original_filename': job.original_filename,
|
||||
'progress': float(job.progress) if job.progress else 0.0,
|
||||
'processing_started_at': job.processing_started_at.isoformat() if job.processing_started_at else None,
|
||||
'processing_time': calculate_processing_time(job.processing_started_at) if job.processing_started_at else None
|
||||
})
|
||||
|
||||
return jsonify(create_response(
|
||||
success=True,
|
||||
data={
|
||||
'queue_status': {
|
||||
'pending': pending_count,
|
||||
'processing': processing_count,
|
||||
'total_in_queue': pending_count + processing_count
|
||||
},
|
||||
'processing_jobs': processing_jobs_data
|
||||
}
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Get queue status error: {str(e)}")
|
||||
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='SYSTEM_ERROR',
|
||||
message='取得佇列狀態失敗'
|
||||
)), 500
|
||||
|
||||
|
||||
@jobs_bp.route('/<job_uuid>/cancel', methods=['POST'])
|
||||
@jwt_login_required
|
||||
def cancel_job(job_uuid):
|
||||
"""取消任務(支援 PENDING 和 PROCESSING 狀態)"""
|
||||
try:
|
||||
# 驗證 UUID 格式
|
||||
validate_job_uuid(job_uuid)
|
||||
|
||||
# 取得任務(排除軟刪除的記錄)
|
||||
job = TranslationJob.query.filter_by(job_uuid=job_uuid).filter(TranslationJob.deleted_at.is_(None)).first()
|
||||
|
||||
if not job:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='JOB_NOT_FOUND',
|
||||
message='任務不存在'
|
||||
)), 404
|
||||
|
||||
# 檢查權限
|
||||
if job.user_id != g.current_user_id and not g.is_admin:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='PERMISSION_DENIED',
|
||||
message='無權限操作此任務'
|
||||
)), 403
|
||||
|
||||
# 只能取消等待中或處理中的任務
|
||||
if job.status not in ['PENDING', 'PROCESSING']:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='CANNOT_CANCEL',
|
||||
message='只能取消等待中或處理中的任務'
|
||||
)), 400
|
||||
|
||||
# 如果是處理中的任務,需要中斷 Celery 任務
|
||||
if job.status == 'PROCESSING':
|
||||
try:
|
||||
from app.services.celery_service import revoke_task
|
||||
# 嘗試撤銷 Celery 任務
|
||||
revoke_task(job.job_uuid)
|
||||
logger.info(f"Celery task revoked for job: {job.job_uuid}")
|
||||
except Exception as celery_error:
|
||||
logger.warning(f"Failed to revoke Celery task for job {job.job_uuid}: {celery_error}")
|
||||
# 即使撤銷失敗也繼續取消任務,因為用戶請求取消
|
||||
|
||||
# 更新任務狀態為失敗(取消)
|
||||
cancel_message = f'使用者取消任務 (原狀態: {job.status})'
|
||||
job.update_status('FAILED', error_message=cancel_message)
|
||||
|
||||
# 記錄取消日誌
|
||||
SystemLog.info(
|
||||
'jobs.cancel',
|
||||
f'Job cancelled by user: {job_uuid}',
|
||||
user_id=g.current_user_id,
|
||||
job_id=job.id
|
||||
)
|
||||
|
||||
logger.info(f"Job cancelled by user: {job_uuid}")
|
||||
|
||||
return jsonify(create_response(
|
||||
success=True,
|
||||
data={
|
||||
'job_uuid': job.job_uuid,
|
||||
'status': job.status
|
||||
},
|
||||
message='任務已取消'
|
||||
))
|
||||
|
||||
except ValidationError as e:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error=e.error_code,
|
||||
message=str(e)
|
||||
)), 400
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Cancel job error: {str(e)}")
|
||||
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='SYSTEM_ERROR',
|
||||
message='取消任務失敗'
|
||||
)), 500
|
||||
|
||||
|
||||
@jobs_bp.route('/<job_uuid>', methods=['DELETE'])
|
||||
@jwt_login_required
|
||||
def delete_job(job_uuid):
|
||||
"""刪除任務"""
|
||||
try:
|
||||
# 驗證 UUID 格式
|
||||
validate_job_uuid(job_uuid)
|
||||
|
||||
# 取得任務
|
||||
job = TranslationJob.query.filter_by(job_uuid=job_uuid).first()
|
||||
|
||||
if not job:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='JOB_NOT_FOUND',
|
||||
message='任務不存在'
|
||||
)), 404
|
||||
|
||||
# 檢查權限
|
||||
if job.user_id != g.current_user_id and not g.is_admin:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='PERMISSION_DENIED',
|
||||
message='無權限操作此任務'
|
||||
)), 403
|
||||
|
||||
# 如果是處理中的任務,先嘗試中斷 Celery 任務
|
||||
if job.status == 'PROCESSING':
|
||||
try:
|
||||
from app.services.celery_service import revoke_task
|
||||
# 嘗試撤銷 Celery 任務
|
||||
revoke_task(job.job_uuid)
|
||||
logger.info(f"Celery task revoked before deletion for job: {job.job_uuid}")
|
||||
except Exception as celery_error:
|
||||
logger.warning(f"Failed to revoke Celery task before deletion for job {job.job_uuid}: {celery_error}")
|
||||
# 即使撤銷失敗也繼續刪除任務,因為用戶要求刪除
|
||||
|
||||
# 刪除任務相關檔案
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
if job.file_path and os.path.exists(job.file_path):
|
||||
# 取得任務目錄(通常是 uploads/job_uuid)
|
||||
job_dir = Path(job.file_path).parent
|
||||
if job_dir.exists() and job_dir.name == job.job_uuid:
|
||||
shutil.rmtree(job_dir)
|
||||
logger.info(f"Deleted job directory: {job_dir}")
|
||||
except Exception as file_error:
|
||||
logger.warning(f"Failed to delete job files: {str(file_error)}")
|
||||
|
||||
# 記錄刪除日誌
|
||||
SystemLog.info(
|
||||
'jobs.delete',
|
||||
f'Job deleted by user: {job_uuid}',
|
||||
user_id=g.current_user_id,
|
||||
job_id=job.id,
|
||||
extra_data={
|
||||
'filename': job.original_filename,
|
||||
'status': job.status
|
||||
}
|
||||
)
|
||||
|
||||
from app import db
|
||||
|
||||
# 軟刪除資料庫記錄(保留數據供報表使用)
|
||||
job.soft_delete()
|
||||
|
||||
logger.info(f"Job soft deleted by user: {job_uuid}")
|
||||
|
||||
return jsonify(create_response(
|
||||
success=True,
|
||||
message='任務已刪除'
|
||||
))
|
||||
|
||||
except ValidationError as e:
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error=e.error_code,
|
||||
message=str(e)
|
||||
)), 400
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Delete job error: {str(e)}")
|
||||
|
||||
return jsonify(create_response(
|
||||
success=False,
|
||||
error='SYSTEM_ERROR',
|
||||
message='刪除任務失敗'
|
||||
)), 500
|
331
app/api/notification.py
Normal file
331
app/api/notification.py
Normal file
@@ -0,0 +1,331 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
通知系統 API 路由
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
from flask import Blueprint, jsonify, request, g
|
||||
from app.utils.decorators import jwt_login_required
|
||||
from sqlalchemy import desc, and_, or_
|
||||
from datetime import datetime, timedelta
|
||||
from app import db
|
||||
from app.models import Notification, NotificationType, User
|
||||
from app.utils.response import create_taiwan_response
|
||||
# 移除不需要的導入
|
||||
|
||||
# 建立藍圖
|
||||
notification_bp = Blueprint('notification', __name__, url_prefix='/notifications')
|
||||
|
||||
|
||||
@notification_bp.route('', methods=['GET'])
|
||||
@jwt_login_required
|
||||
def get_notifications():
|
||||
"""獲取當前用戶的通知列表"""
|
||||
try:
|
||||
# 獲取當前用戶
|
||||
current_user_id = g.current_user_id
|
||||
|
||||
# 獲取查詢參數
|
||||
page = request.args.get('page', 1, type=int)
|
||||
per_page = min(request.args.get('per_page', 20, type=int), 100)
|
||||
status_filter = request.args.get('status', 'all')
|
||||
type_filter = request.args.get('type', None)
|
||||
|
||||
# 建構查詢
|
||||
query = Notification.query.filter_by(user_id=current_user_id)
|
||||
|
||||
# 只顯示未過期的通知
|
||||
query = query.filter(or_(
|
||||
Notification.expires_at.is_(None),
|
||||
Notification.expires_at > datetime.now()
|
||||
))
|
||||
|
||||
# 過濾狀態
|
||||
if status_filter == 'unread':
|
||||
query = query.filter_by(is_read=False)
|
||||
elif status_filter == 'read':
|
||||
query = query.filter_by(is_read=True)
|
||||
|
||||
# 過濾類型
|
||||
if type_filter:
|
||||
query = query.filter_by(type=type_filter)
|
||||
|
||||
# 排序 - 未讀在前,然後按時間排序
|
||||
query = query.order_by(Notification.is_read.asc(), desc(Notification.created_at))
|
||||
|
||||
# 分頁
|
||||
paginated = query.paginate(
|
||||
page=page, per_page=per_page, error_out=False
|
||||
)
|
||||
|
||||
# 獲取未讀數量
|
||||
unread_count = Notification.query.filter_by(
|
||||
user_id=current_user_id,
|
||||
is_read=False
|
||||
).filter(or_(
|
||||
Notification.expires_at.is_(None),
|
||||
Notification.expires_at > datetime.now()
|
||||
)).count()
|
||||
|
||||
return jsonify(create_taiwan_response(
|
||||
success=True,
|
||||
data={
|
||||
'notifications': [n.to_dict() for n in paginated.items],
|
||||
'pagination': {
|
||||
'total': paginated.total,
|
||||
'page': page,
|
||||
'per_page': per_page,
|
||||
'pages': paginated.pages
|
||||
},
|
||||
'unread_count': unread_count
|
||||
},
|
||||
message='獲取通知列表成功'
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
return jsonify(create_taiwan_response(
|
||||
success=False,
|
||||
error=f'獲取通知失敗:{str(e)}'
|
||||
)), 500
|
||||
|
||||
|
||||
@notification_bp.route('/<notification_id>', methods=['GET'])
|
||||
@jwt_login_required
|
||||
def get_notification(notification_id):
|
||||
"""獲取單個通知詳情"""
|
||||
try:
|
||||
current_user_id = g.current_user_id
|
||||
|
||||
# 查找通知
|
||||
notification = Notification.query.filter_by(
|
||||
notification_uuid=notification_id,
|
||||
user_id=current_user_id
|
||||
).first()
|
||||
|
||||
if not notification:
|
||||
return jsonify(create_taiwan_response(
|
||||
success=False,
|
||||
error='通知不存在'
|
||||
)), 404
|
||||
|
||||
# 自動標記為已讀
|
||||
if not notification.is_read:
|
||||
notification.mark_as_read()
|
||||
db.session.commit()
|
||||
|
||||
return jsonify(create_taiwan_response(
|
||||
success=True,
|
||||
data=notification.to_dict(),
|
||||
message='獲取通知成功'
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
return jsonify(create_taiwan_response(
|
||||
success=False,
|
||||
error=f'獲取通知失敗:{str(e)}'
|
||||
)), 500
|
||||
|
||||
|
||||
@notification_bp.route('/<notification_id>/read', methods=['POST'])
|
||||
@jwt_login_required
|
||||
def mark_notification_read(notification_id):
|
||||
"""標記通知為已讀"""
|
||||
try:
|
||||
current_user_id = g.current_user_id
|
||||
|
||||
# 查找通知
|
||||
notification = Notification.query.filter_by(
|
||||
notification_uuid=notification_id,
|
||||
user_id=current_user_id
|
||||
).first()
|
||||
|
||||
if not notification:
|
||||
return jsonify(create_taiwan_response(
|
||||
success=False,
|
||||
error='通知不存在'
|
||||
)), 404
|
||||
|
||||
# 標記為已讀
|
||||
notification.mark_as_read()
|
||||
db.session.commit()
|
||||
|
||||
return jsonify(create_taiwan_response(
|
||||
success=True,
|
||||
message='標記已讀成功'
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
return jsonify(create_taiwan_response(
|
||||
success=False,
|
||||
error=f'標記已讀失敗:{str(e)}'
|
||||
)), 500
|
||||
|
||||
|
||||
@notification_bp.route('/read-all', methods=['POST'])
|
||||
@jwt_login_required
|
||||
def mark_all_read():
|
||||
"""標記所有通知為已讀"""
|
||||
try:
|
||||
current_user_id = g.current_user_id
|
||||
|
||||
# 取得所有未讀通知
|
||||
unread_notifications = Notification.query.filter_by(
|
||||
user_id=current_user_id,
|
||||
is_read=False
|
||||
).filter(or_(
|
||||
Notification.expires_at.is_(None),
|
||||
Notification.expires_at > datetime.now()
|
||||
)).all()
|
||||
|
||||
# 標記為已讀
|
||||
for notification in unread_notifications:
|
||||
notification.mark_as_read()
|
||||
|
||||
db.session.commit()
|
||||
|
||||
return jsonify(create_taiwan_response(
|
||||
success=True,
|
||||
data={'marked_count': len(unread_notifications)},
|
||||
message=f'已標記 {len(unread_notifications)} 個通知為已讀'
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
return jsonify(create_taiwan_response(
|
||||
success=False,
|
||||
error=f'標記全部已讀失敗:{str(e)}'
|
||||
)), 500
|
||||
|
||||
|
||||
@notification_bp.route('/<notification_id>', methods=['DELETE'])
|
||||
@jwt_login_required
|
||||
def delete_notification(notification_id):
|
||||
"""刪除通知"""
|
||||
try:
|
||||
current_user_id = g.current_user_id
|
||||
|
||||
# 查找通知
|
||||
notification = Notification.query.filter_by(
|
||||
notification_uuid=notification_id,
|
||||
user_id=current_user_id
|
||||
).first()
|
||||
|
||||
if not notification:
|
||||
return jsonify(create_taiwan_response(
|
||||
success=False,
|
||||
error='通知不存在'
|
||||
)), 404
|
||||
|
||||
# 刪除通知
|
||||
db.session.delete(notification)
|
||||
db.session.commit()
|
||||
|
||||
return jsonify(create_taiwan_response(
|
||||
success=True,
|
||||
message='刪除通知成功'
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
return jsonify(create_taiwan_response(
|
||||
success=False,
|
||||
error=f'刪除通知失敗:{str(e)}'
|
||||
)), 500
|
||||
|
||||
|
||||
@notification_bp.route('/clear', methods=['POST'])
|
||||
@jwt_login_required
|
||||
def clear_read_notifications():
|
||||
"""清空所有已讀通知"""
|
||||
try:
|
||||
current_user_id = g.current_user_id
|
||||
|
||||
# 刪除所有已讀通知
|
||||
deleted_count = Notification.query.filter_by(
|
||||
user_id=current_user_id,
|
||||
is_read=True
|
||||
).delete()
|
||||
|
||||
db.session.commit()
|
||||
|
||||
return jsonify(create_taiwan_response(
|
||||
success=True,
|
||||
data={'deleted_count': deleted_count},
|
||||
message=f'已清除 {deleted_count} 個已讀通知'
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
return jsonify(create_taiwan_response(
|
||||
success=False,
|
||||
error=f'清除通知失敗:{str(e)}'
|
||||
)), 500
|
||||
|
||||
|
||||
@notification_bp.route('/test', methods=['POST'])
|
||||
@jwt_login_required
|
||||
def create_test_notification():
|
||||
"""創建測試通知(開發用)"""
|
||||
try:
|
||||
current_user_id = g.current_user_id
|
||||
|
||||
# 創建測試通知
|
||||
test_notification = create_notification(
|
||||
user_id=current_user_id,
|
||||
title="測試通知",
|
||||
message=f"這是一個測試通知,創建於 {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
||||
notification_type=NotificationType.INFO
|
||||
)
|
||||
|
||||
return jsonify(create_taiwan_response(
|
||||
success=True,
|
||||
data=test_notification.to_dict(),
|
||||
message='測試通知已創建'
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
return jsonify(create_taiwan_response(
|
||||
success=False,
|
||||
error=f'創建測試通知失敗:{str(e)}'
|
||||
)), 500
|
||||
|
||||
|
||||
# 工具函數:創建通知
|
||||
def create_notification(user_id, title, message, notification_type=NotificationType.INFO,
|
||||
job_uuid=None, extra_data=None):
|
||||
"""
|
||||
創建通知的工具函數
|
||||
|
||||
Args:
|
||||
user_id: 用戶ID
|
||||
title: 通知標題
|
||||
message: 通知內容
|
||||
notification_type: 通知類型
|
||||
job_uuid: 關聯的任務UUID(可選)
|
||||
extra_data: 額外數據(可選)
|
||||
|
||||
Returns:
|
||||
Notification: 創建的通知對象
|
||||
"""
|
||||
try:
|
||||
notification = Notification(
|
||||
user_id=user_id,
|
||||
type=notification_type.value,
|
||||
title=title,
|
||||
message=message,
|
||||
job_uuid=job_uuid,
|
||||
extra_data=extra_data,
|
||||
link=f"/job/{job_uuid}" if job_uuid else None
|
||||
)
|
||||
|
||||
db.session.add(notification)
|
||||
db.session.commit()
|
||||
|
||||
return notification
|
||||
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
raise e
|
183
app/config.py
Normal file
183
app/config.py
Normal file
@@ -0,0 +1,183 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
應用程式配置模組
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
import os
|
||||
import secrets
|
||||
from pathlib import Path
|
||||
from datetime import timedelta
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# 載入環境變數
|
||||
load_dotenv()
|
||||
|
||||
class Config:
|
||||
"""基礎配置類別"""
|
||||
|
||||
# 基本應用配置
|
||||
SECRET_KEY = os.environ.get('SECRET_KEY') or secrets.token_hex(32)
|
||||
APP_NAME = os.environ.get('APP_NAME', 'PANJIT Document Translator')
|
||||
|
||||
# 資料庫配置
|
||||
DATABASE_URL = os.environ.get('DATABASE_URL')
|
||||
if DATABASE_URL and DATABASE_URL.startswith("mysql://"):
|
||||
DATABASE_URL = DATABASE_URL.replace("mysql://", "mysql+pymysql://", 1)
|
||||
|
||||
SQLALCHEMY_DATABASE_URI = DATABASE_URL
|
||||
SQLALCHEMY_TRACK_MODIFICATIONS = False
|
||||
SQLALCHEMY_ENGINE_OPTIONS = {
|
||||
'pool_pre_ping': True,
|
||||
'pool_recycle': 3600,
|
||||
'connect_args': {
|
||||
'charset': os.environ.get('MYSQL_CHARSET', 'utf8mb4'),
|
||||
'connect_timeout': 30,
|
||||
'read_timeout': 30,
|
||||
'write_timeout': 30,
|
||||
}
|
||||
}
|
||||
|
||||
# JWT 配置 - 改用 JWT 認證
|
||||
JWT_SECRET_KEY = os.environ.get('JWT_SECRET_KEY') or SECRET_KEY
|
||||
JWT_ACCESS_TOKEN_EXPIRES = timedelta(hours=8)
|
||||
JWT_REFRESH_TOKEN_EXPIRES = timedelta(days=30)
|
||||
JWT_ALGORITHM = 'HS256'
|
||||
|
||||
# Redis 配置
|
||||
REDIS_URL = os.environ.get('REDIS_URL', 'redis://localhost:6379/0')
|
||||
|
||||
# Celery 配置
|
||||
CELERY_BROKER_URL = os.environ.get('CELERY_BROKER_URL', 'redis://localhost:6379/0')
|
||||
CELERY_RESULT_BACKEND = os.environ.get('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0')
|
||||
CELERY_TASK_SERIALIZER = 'json'
|
||||
CELERY_RESULT_SERIALIZER = 'json'
|
||||
CELERY_ACCEPT_CONTENT = ['json']
|
||||
CELERY_TIMEZONE = 'Asia/Taipei'
|
||||
CELERY_ENABLE_UTC = False # 改為 False,讓 Celery 使用本地時區
|
||||
|
||||
# LDAP 配置
|
||||
LDAP_SERVER = os.environ.get('LDAP_SERVER')
|
||||
LDAP_PORT = int(os.environ.get('LDAP_PORT', 389))
|
||||
LDAP_USE_SSL = os.environ.get('LDAP_USE_SSL', 'false').lower() == 'true'
|
||||
LDAP_BIND_USER_DN = os.environ.get('LDAP_BIND_USER_DN')
|
||||
LDAP_BIND_USER_PASSWORD = os.environ.get('LDAP_BIND_USER_PASSWORD')
|
||||
LDAP_SEARCH_BASE = os.environ.get('LDAP_SEARCH_BASE')
|
||||
LDAP_USER_LOGIN_ATTR = os.environ.get('LDAP_USER_LOGIN_ATTR', 'userPrincipalName')
|
||||
|
||||
# SMTP 配置
|
||||
SMTP_SERVER = os.environ.get('SMTP_SERVER')
|
||||
SMTP_PORT = int(os.environ.get('SMTP_PORT', 587))
|
||||
SMTP_USE_TLS = os.environ.get('SMTP_USE_TLS', 'false').lower() == 'true'
|
||||
SMTP_USE_SSL = os.environ.get('SMTP_USE_SSL', 'false').lower() == 'true'
|
||||
SMTP_AUTH_REQUIRED = os.environ.get('SMTP_AUTH_REQUIRED', 'false').lower() == 'true'
|
||||
SMTP_SENDER_EMAIL = os.environ.get('SMTP_SENDER_EMAIL')
|
||||
SMTP_SENDER_PASSWORD = os.environ.get('SMTP_SENDER_PASSWORD', '')
|
||||
|
||||
# 檔案上傳配置
|
||||
UPLOAD_FOLDER = Path(os.environ.get('UPLOAD_FOLDER', 'uploads')).absolute()
|
||||
MAX_CONTENT_LENGTH = int(os.environ.get('MAX_CONTENT_LENGTH', 26214400)) # 25MB
|
||||
ALLOWED_EXTENSIONS = {'.docx', '.doc', '.pptx', '.xlsx', '.xls', '.pdf'}
|
||||
FILE_RETENTION_DAYS = int(os.environ.get('FILE_RETENTION_DAYS', 7))
|
||||
|
||||
# Dify API 配置(從 api.txt 載入)
|
||||
DIFY_API_BASE_URL = ''
|
||||
DIFY_API_KEY = ''
|
||||
|
||||
# 分離的 Dify API 配置
|
||||
DIFY_TRANSLATION_BASE_URL = ''
|
||||
DIFY_TRANSLATION_API_KEY = ''
|
||||
DIFY_OCR_BASE_URL = ''
|
||||
DIFY_OCR_API_KEY = ''
|
||||
|
||||
# 日誌配置
|
||||
LOG_LEVEL = os.environ.get('LOG_LEVEL', 'INFO')
|
||||
LOG_FILE = Path(os.environ.get('LOG_FILE', 'logs/app.log')).absolute()
|
||||
|
||||
# 管理員配置
|
||||
ADMIN_EMAIL = os.environ.get('ADMIN_EMAIL', 'ymirliu@panjit.com.tw')
|
||||
|
||||
@classmethod
|
||||
def load_dify_config(cls):
|
||||
"""從 api.txt 載入 Dify API 配置"""
|
||||
api_file = Path('api.txt')
|
||||
if api_file.exists():
|
||||
try:
|
||||
with open(api_file, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
|
||||
# 翻译API配置
|
||||
if line.startswith('translation_base_url:'):
|
||||
cls.DIFY_TRANSLATION_BASE_URL = line.split(':', 1)[1].strip()
|
||||
elif line.startswith('translation_api:'):
|
||||
cls.DIFY_TRANSLATION_API_KEY = line.split(':', 1)[1].strip()
|
||||
|
||||
# OCR API配置
|
||||
elif line.startswith('ocr_base_url:'):
|
||||
cls.DIFY_OCR_BASE_URL = line.split(':', 1)[1].strip()
|
||||
elif line.startswith('ocr_api:'):
|
||||
cls.DIFY_OCR_API_KEY = line.split(':', 1)[1].strip()
|
||||
|
||||
# 兼容旧格式
|
||||
elif line.startswith('base_url:'):
|
||||
cls.DIFY_API_BASE_URL = line.split(':', 1)[1].strip()
|
||||
cls.DIFY_TRANSLATION_BASE_URL = line.split(':', 1)[1].strip()
|
||||
elif line.startswith('api:'):
|
||||
cls.DIFY_API_KEY = line.split(':', 1)[1].strip()
|
||||
cls.DIFY_TRANSLATION_API_KEY = line.split(':', 1)[1].strip()
|
||||
except Exception as e:
|
||||
print(f"Error loading Dify config: {e}")
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def init_directories(cls):
|
||||
"""初始化必要目錄"""
|
||||
directories = [
|
||||
cls.UPLOAD_FOLDER,
|
||||
cls.LOG_FILE.parent,
|
||||
]
|
||||
|
||||
for directory in directories:
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
class DevelopmentConfig(Config):
|
||||
"""開發環境配置"""
|
||||
DEBUG = True
|
||||
FLASK_ENV = 'development'
|
||||
|
||||
|
||||
class ProductionConfig(Config):
|
||||
"""生產環境配置"""
|
||||
DEBUG = False
|
||||
FLASK_ENV = 'production'
|
||||
|
||||
# 生產環境的額外配置
|
||||
SQLALCHEMY_ENGINE_OPTIONS = {
|
||||
**Config.SQLALCHEMY_ENGINE_OPTIONS,
|
||||
'pool_size': 10,
|
||||
'max_overflow': 20,
|
||||
}
|
||||
|
||||
|
||||
class TestingConfig(Config):
|
||||
"""測試環境配置"""
|
||||
TESTING = True
|
||||
WTF_CSRF_ENABLED = False
|
||||
SQLALCHEMY_DATABASE_URI = 'sqlite:///:memory:'
|
||||
|
||||
|
||||
# 配置映射
|
||||
config = {
|
||||
'development': DevelopmentConfig,
|
||||
'production': ProductionConfig,
|
||||
'testing': TestingConfig,
|
||||
'default': DevelopmentConfig
|
||||
}
|
30
app/models/__init__.py
Normal file
30
app/models/__init__.py
Normal file
@@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
資料模型模組
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
from .user import User
|
||||
from .job import TranslationJob, JobFile
|
||||
from .cache import TranslationCache
|
||||
from .stats import APIUsageStats
|
||||
from .log import SystemLog
|
||||
from .notification import Notification, NotificationType
|
||||
from .sys_user import SysUser, LoginLog
|
||||
|
||||
__all__ = [
|
||||
'User',
|
||||
'TranslationJob',
|
||||
'JobFile',
|
||||
'TranslationCache',
|
||||
'APIUsageStats',
|
||||
'SystemLog',
|
||||
'Notification',
|
||||
'NotificationType',
|
||||
'SysUser',
|
||||
'LoginLog'
|
||||
]
|
138
app/models/cache.py
Normal file
138
app/models/cache.py
Normal file
@@ -0,0 +1,138 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
翻譯快取資料模型
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
from sqlalchemy.sql import func
|
||||
from app import db
|
||||
|
||||
|
||||
class TranslationCache(db.Model):
|
||||
"""翻譯快取表 (dt_translation_cache)"""
|
||||
__tablename__ = 'dt_translation_cache'
|
||||
|
||||
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
|
||||
source_text_hash = db.Column(db.String(64), nullable=False, comment='來源文字hash')
|
||||
source_language = db.Column(db.String(50), nullable=False, comment='來源語言')
|
||||
target_language = db.Column(db.String(50), nullable=False, comment='目標語言')
|
||||
source_text = db.Column(db.Text, nullable=False, comment='來源文字')
|
||||
translated_text = db.Column(db.Text, nullable=False, comment='翻譯文字')
|
||||
created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間')
|
||||
|
||||
# 唯一約束
|
||||
__table_args__ = (
|
||||
db.UniqueConstraint('source_text_hash', 'source_language', 'target_language', name='uk_cache'),
|
||||
db.Index('idx_languages', 'source_language', 'target_language'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f'<TranslationCache {self.source_text_hash[:8]}...>'
|
||||
|
||||
def to_dict(self):
|
||||
"""轉換為字典格式"""
|
||||
return {
|
||||
'id': self.id,
|
||||
'source_text_hash': self.source_text_hash,
|
||||
'source_language': self.source_language,
|
||||
'target_language': self.target_language,
|
||||
'source_text': self.source_text,
|
||||
'translated_text': self.translated_text,
|
||||
'created_at': self.created_at.isoformat() if self.created_at else None
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def generate_hash(text):
|
||||
"""生成文字的 SHA256 hash"""
|
||||
return hashlib.sha256(text.encode('utf-8')).hexdigest()
|
||||
|
||||
@classmethod
|
||||
def get_translation(cls, source_text, source_language, target_language):
|
||||
"""取得快取的翻譯"""
|
||||
text_hash = cls.generate_hash(source_text)
|
||||
|
||||
cache_entry = cls.query.filter_by(
|
||||
source_text_hash=text_hash,
|
||||
source_language=source_language,
|
||||
target_language=target_language
|
||||
).first()
|
||||
|
||||
return cache_entry.translated_text if cache_entry else None
|
||||
|
||||
@classmethod
|
||||
def save_translation(cls, source_text, source_language, target_language, translated_text):
|
||||
"""儲存翻譯到快取"""
|
||||
text_hash = cls.generate_hash(source_text)
|
||||
|
||||
# 檢查是否已存在
|
||||
existing = cls.query.filter_by(
|
||||
source_text_hash=text_hash,
|
||||
source_language=source_language,
|
||||
target_language=target_language
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
# 更新現有記錄
|
||||
existing.translated_text = translated_text
|
||||
else:
|
||||
# 建立新記錄
|
||||
cache_entry = cls(
|
||||
source_text_hash=text_hash,
|
||||
source_language=source_language,
|
||||
target_language=target_language,
|
||||
source_text=source_text,
|
||||
translated_text=translated_text
|
||||
)
|
||||
db.session.add(cache_entry)
|
||||
|
||||
db.session.commit()
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def get_cache_statistics(cls):
|
||||
"""取得快取統計資料"""
|
||||
total_entries = cls.query.count()
|
||||
|
||||
# 按語言對統計
|
||||
language_pairs = db.session.query(
|
||||
cls.source_language,
|
||||
cls.target_language,
|
||||
func.count(cls.id).label('count')
|
||||
).group_by(cls.source_language, cls.target_language).all()
|
||||
|
||||
# 最近一週的快取命中
|
||||
from datetime import datetime, timedelta
|
||||
week_ago = datetime.utcnow() - timedelta(days=7)
|
||||
recent_entries = cls.query.filter(cls.created_at >= week_ago).count()
|
||||
|
||||
return {
|
||||
'total_entries': total_entries,
|
||||
'language_pairs': [
|
||||
{
|
||||
'source_language': pair.source_language,
|
||||
'target_language': pair.target_language,
|
||||
'count': pair.count
|
||||
}
|
||||
for pair in language_pairs
|
||||
],
|
||||
'recent_entries': recent_entries
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def clear_old_cache(cls, days_to_keep=90):
|
||||
"""清理舊快取記錄"""
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
cutoff_date = datetime.utcnow() - timedelta(days=days_to_keep)
|
||||
|
||||
deleted_count = cls.query.filter(
|
||||
cls.created_at < cutoff_date
|
||||
).delete(synchronize_session=False)
|
||||
|
||||
db.session.commit()
|
||||
return deleted_count
|
327
app/models/job.py
Normal file
327
app/models/job.py
Normal file
@@ -0,0 +1,327 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
翻譯任務資料模型
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.sql import func
|
||||
from sqlalchemy import event
|
||||
from app import db
|
||||
from app.utils.timezone import format_taiwan_time
|
||||
|
||||
|
||||
class TranslationJob(db.Model):
|
||||
"""翻譯任務表 (dt_translation_jobs)"""
|
||||
__tablename__ = 'dt_translation_jobs'
|
||||
|
||||
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
|
||||
job_uuid = db.Column(db.String(36), unique=True, nullable=False, index=True, comment='任務唯一識別碼')
|
||||
user_id = db.Column(db.Integer, db.ForeignKey('dt_users.id'), nullable=False, comment='使用者ID')
|
||||
original_filename = db.Column(db.String(500), nullable=False, comment='原始檔名')
|
||||
file_extension = db.Column(db.String(10), nullable=False, comment='檔案副檔名')
|
||||
file_size = db.Column(db.BigInteger, nullable=False, comment='檔案大小(bytes)')
|
||||
file_path = db.Column(db.String(1000), nullable=False, comment='檔案路徑')
|
||||
source_language = db.Column(db.String(50), default=None, comment='來源語言')
|
||||
target_languages = db.Column(db.JSON, nullable=False, comment='目標語言陣列')
|
||||
status = db.Column(
|
||||
db.Enum('PENDING', 'PROCESSING', 'COMPLETED', 'FAILED', 'RETRY', name='job_status'),
|
||||
default='PENDING',
|
||||
comment='任務狀態'
|
||||
)
|
||||
progress = db.Column(db.Numeric(5, 2), default=0.00, comment='處理進度(%)')
|
||||
retry_count = db.Column(db.Integer, default=0, comment='重試次數')
|
||||
error_message = db.Column(db.Text, comment='錯誤訊息')
|
||||
total_tokens = db.Column(db.Integer, default=0, comment='總token數')
|
||||
total_cost = db.Column(db.Numeric(10, 4), default=0.0000, comment='總成本')
|
||||
conversation_id = db.Column(db.String(100), comment='Dify對話ID,用於維持翻譯上下文')
|
||||
processing_started_at = db.Column(db.DateTime, comment='開始處理時間')
|
||||
completed_at = db.Column(db.DateTime, comment='完成時間')
|
||||
created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間')
|
||||
updated_at = db.Column(
|
||||
db.DateTime,
|
||||
default=func.now(),
|
||||
onupdate=func.now(),
|
||||
comment='更新時間'
|
||||
)
|
||||
deleted_at = db.Column(db.DateTime, comment='軟刪除時間')
|
||||
|
||||
# 關聯關係
|
||||
files = db.relationship('JobFile', backref='job', lazy='dynamic', cascade='all, delete-orphan')
|
||||
api_usage_stats = db.relationship('APIUsageStats', backref='job', lazy='dynamic')
|
||||
|
||||
def __repr__(self):
|
||||
return f'<TranslationJob {self.job_uuid}>'
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""初始化,自動生成 UUID"""
|
||||
super().__init__(**kwargs)
|
||||
if not self.job_uuid:
|
||||
self.job_uuid = str(uuid.uuid4())
|
||||
|
||||
def to_dict(self, include_files=False):
|
||||
"""轉換為字典格式"""
|
||||
data = {
|
||||
'id': self.id,
|
||||
'job_uuid': self.job_uuid,
|
||||
'user_id': self.user_id,
|
||||
'original_filename': self.original_filename,
|
||||
'file_extension': self.file_extension,
|
||||
'file_size': self.file_size,
|
||||
'file_path': self.file_path,
|
||||
'source_language': self.source_language,
|
||||
'target_languages': self.target_languages,
|
||||
'status': self.status,
|
||||
'progress': float(self.progress) if self.progress else 0.0,
|
||||
'retry_count': self.retry_count,
|
||||
'error_message': self.error_message,
|
||||
'total_tokens': self.total_tokens,
|
||||
'total_cost': float(self.total_cost) if self.total_cost else 0.0,
|
||||
'conversation_id': self.conversation_id,
|
||||
'processing_started_at': format_taiwan_time(self.processing_started_at, "%Y-%m-%d %H:%M:%S") if self.processing_started_at else None,
|
||||
'completed_at': format_taiwan_time(self.completed_at, "%Y-%m-%d %H:%M:%S") if self.completed_at else None,
|
||||
'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None,
|
||||
'updated_at': format_taiwan_time(self.updated_at, "%Y-%m-%d %H:%M:%S") if self.updated_at else None,
|
||||
'deleted_at': format_taiwan_time(self.deleted_at, "%Y-%m-%d %H:%M:%S") if self.deleted_at else None
|
||||
}
|
||||
|
||||
if include_files:
|
||||
data['files'] = [f.to_dict() for f in self.files]
|
||||
|
||||
return data
|
||||
|
||||
def update_status(self, status, error_message=None, progress=None):
|
||||
"""更新任務狀態"""
|
||||
self.status = status
|
||||
|
||||
if error_message:
|
||||
self.error_message = error_message
|
||||
|
||||
if progress is not None:
|
||||
self.progress = progress
|
||||
|
||||
if status == 'PROCESSING' and not self.processing_started_at:
|
||||
self.processing_started_at = datetime.utcnow()
|
||||
elif status == 'COMPLETED':
|
||||
self.completed_at = datetime.utcnow()
|
||||
self.progress = 100.00
|
||||
|
||||
self.updated_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
||||
def add_original_file(self, filename, file_path, file_size):
|
||||
"""新增原始檔案記錄"""
|
||||
from pathlib import Path
|
||||
stored_name = Path(file_path).name
|
||||
|
||||
original_file = JobFile(
|
||||
job_id=self.id,
|
||||
file_type='source',
|
||||
original_filename=filename,
|
||||
stored_filename=stored_name,
|
||||
file_path=file_path,
|
||||
file_size=file_size,
|
||||
mime_type=self._get_mime_type(filename)
|
||||
)
|
||||
db.session.add(original_file)
|
||||
db.session.commit()
|
||||
return original_file
|
||||
|
||||
def add_translated_file(self, language_code, filename, file_path, file_size):
|
||||
"""新增翻譯檔案記錄"""
|
||||
from pathlib import Path
|
||||
stored_name = Path(file_path).name
|
||||
|
||||
translated_file = JobFile(
|
||||
job_id=self.id,
|
||||
file_type='translated',
|
||||
language_code=language_code,
|
||||
original_filename=filename,
|
||||
stored_filename=stored_name,
|
||||
file_path=file_path,
|
||||
file_size=file_size,
|
||||
mime_type=self._get_mime_type(filename)
|
||||
)
|
||||
db.session.add(translated_file)
|
||||
db.session.commit()
|
||||
return translated_file
|
||||
|
||||
def _get_mime_type(self, filename):
|
||||
"""取得MIME類型"""
|
||||
import mimetypes
|
||||
from pathlib import Path
|
||||
|
||||
ext = Path(filename).suffix.lower()
|
||||
mime_map = {
|
||||
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'.pdf': 'application/pdf',
|
||||
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'.txt': 'text/plain'
|
||||
}
|
||||
return mime_map.get(ext, mimetypes.guess_type(filename)[0] or 'application/octet-stream')
|
||||
|
||||
def get_translated_files(self):
|
||||
"""取得翻譯檔案"""
|
||||
return self.files.filter_by(file_type='translated').all()
|
||||
|
||||
def get_original_file(self):
|
||||
"""取得原始檔案"""
|
||||
return self.files.filter_by(file_type='source').first()
|
||||
|
||||
def can_retry(self):
|
||||
"""是否可以重試"""
|
||||
return self.status in ['FAILED', 'RETRY'] and self.retry_count < 3
|
||||
|
||||
def increment_retry(self):
|
||||
"""增加重試次數"""
|
||||
self.retry_count += 1
|
||||
self.updated_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
||||
def soft_delete(self):
|
||||
"""軟刪除任務(保留資料供報表使用)"""
|
||||
self.deleted_at = datetime.utcnow()
|
||||
self.updated_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
||||
def restore(self):
|
||||
"""恢復已刪除的任務"""
|
||||
self.deleted_at = None
|
||||
self.updated_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
||||
def is_deleted(self):
|
||||
"""檢查任務是否已被軟刪除"""
|
||||
return self.deleted_at is not None
|
||||
|
||||
@classmethod
|
||||
def get_queue_position(cls, job_uuid):
|
||||
"""取得任務在佇列中的位置"""
|
||||
job = cls.query.filter_by(job_uuid=job_uuid, deleted_at=None).first()
|
||||
if not job:
|
||||
return None
|
||||
|
||||
position = cls.query.filter(
|
||||
cls.status == 'PENDING',
|
||||
cls.deleted_at.is_(None),
|
||||
cls.created_at < job.created_at
|
||||
).count()
|
||||
|
||||
return position + 1
|
||||
|
||||
@classmethod
|
||||
def get_pending_jobs(cls):
|
||||
"""取得所有等待處理的任務"""
|
||||
return cls.query.filter_by(status='PENDING', deleted_at=None).order_by(cls.created_at.asc()).all()
|
||||
|
||||
@classmethod
|
||||
def get_processing_jobs(cls):
|
||||
"""取得所有處理中的任務"""
|
||||
return cls.query.filter_by(status='PROCESSING', deleted_at=None).all()
|
||||
|
||||
@classmethod
|
||||
def get_user_jobs(cls, user_id, status=None, limit=None, offset=None, include_deleted=False):
|
||||
"""取得使用者的任務列表"""
|
||||
query = cls.query.filter_by(user_id=user_id)
|
||||
|
||||
# 預設排除軟刪除的記錄,除非明確要求包含
|
||||
if not include_deleted:
|
||||
query = query.filter(cls.deleted_at.is_(None))
|
||||
|
||||
if status and status != 'all':
|
||||
query = query.filter_by(status=status.upper())
|
||||
|
||||
query = query.order_by(cls.created_at.desc())
|
||||
|
||||
if limit:
|
||||
query = query.limit(limit)
|
||||
if offset:
|
||||
query = query.offset(offset)
|
||||
|
||||
return query.all()
|
||||
|
||||
@classmethod
|
||||
def get_statistics(cls, user_id=None, start_date=None, end_date=None, include_deleted=True):
|
||||
"""取得統計資料(預設包含所有記錄以確保報表完整性)"""
|
||||
query = cls.query
|
||||
|
||||
# 報表統計預設包含已刪除記錄以確保數據完整性
|
||||
if not include_deleted:
|
||||
query = query.filter(cls.deleted_at.is_(None))
|
||||
|
||||
if user_id:
|
||||
query = query.filter_by(user_id=user_id)
|
||||
|
||||
if start_date:
|
||||
query = query.filter(cls.created_at >= start_date)
|
||||
|
||||
if end_date:
|
||||
query = query.filter(cls.created_at <= end_date)
|
||||
|
||||
total = query.count()
|
||||
completed = query.filter_by(status='COMPLETED').count()
|
||||
failed = query.filter_by(status='FAILED').count()
|
||||
processing = query.filter_by(status='PROCESSING').count()
|
||||
pending = query.filter_by(status='PENDING').count()
|
||||
|
||||
return {
|
||||
'total': total,
|
||||
'completed': completed,
|
||||
'failed': failed,
|
||||
'processing': processing,
|
||||
'pending': pending,
|
||||
'success_rate': (completed / total * 100) if total > 0 else 0
|
||||
}
|
||||
|
||||
|
||||
class JobFile(db.Model):
|
||||
"""檔案記錄表 (dt_job_files)"""
|
||||
__tablename__ = 'dt_job_files'
|
||||
|
||||
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
|
||||
job_id = db.Column(db.Integer, db.ForeignKey('dt_translation_jobs.id'), nullable=False, comment='任務ID')
|
||||
file_type = db.Column(
|
||||
db.Enum('source', 'translated', name='file_type'),
|
||||
nullable=False,
|
||||
comment='檔案類型'
|
||||
)
|
||||
language_code = db.Column(db.String(50), comment='語言代碼(翻譯檔案)')
|
||||
original_filename = db.Column(db.String(255), nullable=False, comment='原始檔名')
|
||||
stored_filename = db.Column(db.String(255), nullable=False, comment='儲存檔名')
|
||||
file_path = db.Column(db.String(500), nullable=False, comment='檔案路徑')
|
||||
file_size = db.Column(db.BigInteger, default=0, comment='檔案大小')
|
||||
mime_type = db.Column(db.String(100), comment='MIME 類型')
|
||||
created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間')
|
||||
|
||||
def __repr__(self):
|
||||
return f'<JobFile {self.original_filename}>'
|
||||
|
||||
def to_dict(self):
|
||||
"""轉換為字典格式"""
|
||||
return {
|
||||
'id': self.id,
|
||||
'job_id': self.job_id,
|
||||
'file_type': self.file_type,
|
||||
'language_code': self.language_code,
|
||||
'original_filename': self.original_filename,
|
||||
'stored_filename': self.stored_filename,
|
||||
'file_path': self.file_path,
|
||||
'file_size': self.file_size,
|
||||
'mime_type': self.mime_type,
|
||||
'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None
|
||||
}
|
||||
|
||||
|
||||
# 事件監聽器:自動生成 UUID
|
||||
@event.listens_for(TranslationJob, 'before_insert')
|
||||
def receive_before_insert(mapper, connection, target):
|
||||
"""在插入前自動生成 UUID"""
|
||||
if not target.job_uuid:
|
||||
target.job_uuid = str(uuid.uuid4())
|
211
app/models/log.py
Normal file
211
app/models/log.py
Normal file
@@ -0,0 +1,211 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
系統日誌資料模型
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.sql import func
|
||||
from app import db
|
||||
|
||||
|
||||
class SystemLog(db.Model):
|
||||
"""系統日誌表 (dt_system_logs)"""
|
||||
__tablename__ = 'dt_system_logs'
|
||||
|
||||
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
|
||||
level = db.Column(
|
||||
db.Enum('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL', name='log_level'),
|
||||
nullable=False,
|
||||
comment='日誌等級'
|
||||
)
|
||||
module = db.Column(db.String(100), nullable=False, comment='模組名稱')
|
||||
user_id = db.Column(db.Integer, db.ForeignKey('dt_users.id'), comment='使用者ID')
|
||||
job_id = db.Column(db.Integer, db.ForeignKey('dt_translation_jobs.id'), comment='任務ID')
|
||||
message = db.Column(db.Text, nullable=False, comment='日誌訊息')
|
||||
extra_data = db.Column(db.JSON, comment='額外資料')
|
||||
created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間')
|
||||
|
||||
def __repr__(self):
|
||||
return f'<SystemLog {self.level} {self.module}>'
|
||||
|
||||
def to_dict(self):
|
||||
"""轉換為字典格式"""
|
||||
return {
|
||||
'id': self.id,
|
||||
'level': self.level,
|
||||
'module': self.module,
|
||||
'user_id': self.user_id,
|
||||
'job_id': self.job_id,
|
||||
'message': self.message,
|
||||
'extra_data': self.extra_data,
|
||||
'created_at': self.created_at.isoformat() if self.created_at else None
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def log(cls, level, module, message, user_id=None, job_id=None, extra_data=None):
|
||||
"""記錄日誌"""
|
||||
log_entry = cls(
|
||||
level=level.upper(),
|
||||
module=module,
|
||||
message=message,
|
||||
user_id=user_id,
|
||||
job_id=job_id,
|
||||
extra_data=extra_data
|
||||
)
|
||||
|
||||
db.session.add(log_entry)
|
||||
db.session.commit()
|
||||
return log_entry
|
||||
|
||||
@classmethod
|
||||
def debug(cls, module, message, user_id=None, job_id=None, extra_data=None):
|
||||
"""記錄除錯日誌"""
|
||||
return cls.log('DEBUG', module, message, user_id, job_id, extra_data)
|
||||
|
||||
@classmethod
|
||||
def info(cls, module, message, user_id=None, job_id=None, extra_data=None):
|
||||
"""記錄資訊日誌"""
|
||||
return cls.log('INFO', module, message, user_id, job_id, extra_data)
|
||||
|
||||
@classmethod
|
||||
def warning(cls, module, message, user_id=None, job_id=None, extra_data=None):
|
||||
"""記錄警告日誌"""
|
||||
return cls.log('WARNING', module, message, user_id, job_id, extra_data)
|
||||
|
||||
@classmethod
|
||||
def error(cls, module, message, user_id=None, job_id=None, extra_data=None):
|
||||
"""記錄錯誤日誌"""
|
||||
return cls.log('ERROR', module, message, user_id, job_id, extra_data)
|
||||
|
||||
@classmethod
|
||||
def critical(cls, module, message, user_id=None, job_id=None, extra_data=None):
|
||||
"""記錄嚴重錯誤日誌"""
|
||||
return cls.log('CRITICAL', module, message, user_id, job_id, extra_data)
|
||||
|
||||
@classmethod
|
||||
def get_logs(cls, level=None, module=None, user_id=None, start_date=None, end_date=None, limit=100, offset=0):
|
||||
"""查詢日誌"""
|
||||
query = cls.query
|
||||
|
||||
if level:
|
||||
query = query.filter_by(level=level.upper())
|
||||
|
||||
if module:
|
||||
query = query.filter(cls.module.like(f'%{module}%'))
|
||||
|
||||
if user_id:
|
||||
query = query.filter_by(user_id=user_id)
|
||||
|
||||
if start_date:
|
||||
query = query.filter(cls.created_at >= start_date)
|
||||
|
||||
if end_date:
|
||||
query = query.filter(cls.created_at <= end_date)
|
||||
|
||||
# 按時間倒序排列
|
||||
query = query.order_by(cls.created_at.desc())
|
||||
|
||||
if limit:
|
||||
query = query.limit(limit)
|
||||
if offset:
|
||||
query = query.offset(offset)
|
||||
|
||||
return query.all()
|
||||
|
||||
@classmethod
|
||||
def get_log_statistics(cls, days=7):
|
||||
"""取得日誌統計資料"""
|
||||
end_date = datetime.utcnow()
|
||||
start_date = end_date - timedelta(days=days)
|
||||
|
||||
# 按等級統計
|
||||
level_stats = db.session.query(
|
||||
cls.level,
|
||||
func.count(cls.id).label('count')
|
||||
).filter(
|
||||
cls.created_at >= start_date
|
||||
).group_by(cls.level).all()
|
||||
|
||||
# 按模組統計
|
||||
module_stats = db.session.query(
|
||||
cls.module,
|
||||
func.count(cls.id).label('count')
|
||||
).filter(
|
||||
cls.created_at >= start_date
|
||||
).group_by(cls.module).order_by(
|
||||
func.count(cls.id).desc()
|
||||
).limit(10).all()
|
||||
|
||||
# 每日統計
|
||||
daily_stats = db.session.query(
|
||||
func.date(cls.created_at).label('date'),
|
||||
cls.level,
|
||||
func.count(cls.id).label('count')
|
||||
).filter(
|
||||
cls.created_at >= start_date
|
||||
).group_by(
|
||||
func.date(cls.created_at), cls.level
|
||||
).order_by(
|
||||
func.date(cls.created_at)
|
||||
).all()
|
||||
|
||||
return {
|
||||
'level_stats': [
|
||||
{'level': stat.level, 'count': stat.count}
|
||||
for stat in level_stats
|
||||
],
|
||||
'module_stats': [
|
||||
{'module': stat.module, 'count': stat.count}
|
||||
for stat in module_stats
|
||||
],
|
||||
'daily_stats': [
|
||||
{
|
||||
'date': stat.date.isoformat(),
|
||||
'level': stat.level,
|
||||
'count': stat.count
|
||||
}
|
||||
for stat in daily_stats
|
||||
]
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def cleanup_old_logs(cls, days_to_keep=30):
|
||||
"""清理舊日誌"""
|
||||
cutoff_date = datetime.utcnow() - timedelta(days=days_to_keep)
|
||||
|
||||
deleted_count = cls.query.filter(
|
||||
cls.created_at < cutoff_date
|
||||
).delete(synchronize_session=False)
|
||||
|
||||
db.session.commit()
|
||||
return deleted_count
|
||||
|
||||
@classmethod
|
||||
def get_error_summary(cls, days=1):
|
||||
"""取得錯誤摘要"""
|
||||
start_date = datetime.utcnow() - timedelta(days=days)
|
||||
|
||||
error_logs = cls.query.filter(
|
||||
cls.level.in_(['ERROR', 'CRITICAL']),
|
||||
cls.created_at >= start_date
|
||||
).order_by(cls.created_at.desc()).limit(50).all()
|
||||
|
||||
# 按模組分組錯誤
|
||||
error_by_module = {}
|
||||
for log in error_logs:
|
||||
module = log.module
|
||||
if module not in error_by_module:
|
||||
error_by_module[module] = []
|
||||
error_by_module[module].append(log.to_dict())
|
||||
|
||||
return {
|
||||
'total_errors': len(error_logs),
|
||||
'error_by_module': error_by_module,
|
||||
'recent_errors': [log.to_dict() for log in error_logs[:10]]
|
||||
}
|
98
app/models/notification.py
Normal file
98
app/models/notification.py
Normal file
@@ -0,0 +1,98 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
通知系統資料模型
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import relationship
|
||||
from app import db
|
||||
import uuid
|
||||
import json
|
||||
|
||||
|
||||
class NotificationType(str, Enum):
|
||||
"""通知類型枚舉"""
|
||||
SUCCESS = "success" # 成功
|
||||
ERROR = "error" # 錯誤
|
||||
WARNING = "warning" # 警告
|
||||
INFO = "info" # 資訊
|
||||
SYSTEM = "system" # 系統
|
||||
|
||||
|
||||
class Notification(db.Model):
|
||||
"""通知模型"""
|
||||
__tablename__ = 'dt_notifications'
|
||||
|
||||
# 主鍵
|
||||
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
|
||||
notification_uuid = db.Column(db.String(36), unique=True, nullable=False, index=True,
|
||||
default=lambda: str(uuid.uuid4()), comment='通知唯一識別碼')
|
||||
|
||||
# 基本資訊
|
||||
user_id = db.Column(db.Integer, db.ForeignKey('dt_users.id'), nullable=False, comment='使用者ID')
|
||||
type = db.Column(db.Enum('INFO', 'SUCCESS', 'WARNING', 'ERROR', name='notification_type'),
|
||||
nullable=False, default=NotificationType.INFO.value, comment='通知類型')
|
||||
title = db.Column(db.String(255), nullable=False, comment='通知標題')
|
||||
message = db.Column(db.Text, nullable=False, comment='通知內容')
|
||||
|
||||
# 關聯資訊(可選)
|
||||
job_uuid = db.Column(db.String(36), nullable=True, comment='關聯任務UUID')
|
||||
link = db.Column(db.String(500), nullable=True, comment='相關連結')
|
||||
|
||||
# 狀態
|
||||
is_read = db.Column(db.Boolean, default=False, nullable=False, comment='是否已讀')
|
||||
read_at = db.Column(db.DateTime, nullable=True, comment='閱讀時間')
|
||||
|
||||
# 時間戳記
|
||||
created_at = db.Column(db.DateTime, default=func.now(), nullable=False, comment='建立時間')
|
||||
expires_at = db.Column(db.DateTime, nullable=True, comment='過期時間')
|
||||
|
||||
# 額外數據(JSON 格式儲存)
|
||||
extra_data = db.Column(db.JSON, nullable=True, comment='額外數據')
|
||||
|
||||
# 關聯
|
||||
user = db.relationship("User", backref="notifications")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Notification {self.notification_uuid}: {self.title}>"
|
||||
|
||||
def to_dict(self):
|
||||
"""轉換為字典格式"""
|
||||
return {
|
||||
'id': self.notification_uuid, # 前端使用 UUID
|
||||
'user_id': self.user_id,
|
||||
'type': self.type,
|
||||
'title': self.title,
|
||||
'message': self.message,
|
||||
'job_uuid': self.job_uuid,
|
||||
'link': self.link,
|
||||
'is_read': self.is_read,
|
||||
'read': self.is_read, # 為了前端相容
|
||||
'read_at': self.read_at.isoformat() if self.read_at else None,
|
||||
'created_at': self.created_at.isoformat() if self.created_at else None,
|
||||
'expires_at': self.expires_at.isoformat() if self.expires_at else None,
|
||||
'extra_data': self.extra_data
|
||||
}
|
||||
|
||||
def mark_as_read(self):
|
||||
"""標記為已讀"""
|
||||
self.is_read = True
|
||||
self.read_at = datetime.now()
|
||||
|
||||
@classmethod
|
||||
def create_job_notification(cls, user_id, job_uuid, title, message, notification_type=NotificationType.INFO):
|
||||
"""創建任務相關通知"""
|
||||
return cls(
|
||||
user_id=user_id,
|
||||
job_uuid=job_uuid,
|
||||
type=notification_type.value,
|
||||
title=title,
|
||||
message=message,
|
||||
link=f"/job/{job_uuid}" # 連結到任務詳情頁
|
||||
)
|
233
app/models/stats.py
Normal file
233
app/models/stats.py
Normal file
@@ -0,0 +1,233 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
API使用統計資料模型
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.sql import func
|
||||
from app import db
|
||||
from app.utils.timezone import format_taiwan_time
|
||||
|
||||
|
||||
class APIUsageStats(db.Model):
|
||||
"""API使用統計表 (dt_api_usage_stats)"""
|
||||
__tablename__ = 'dt_api_usage_stats'
|
||||
|
||||
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
|
||||
user_id = db.Column(db.Integer, db.ForeignKey('dt_users.id'), nullable=False, comment='使用者ID')
|
||||
job_id = db.Column(db.Integer, db.ForeignKey('dt_translation_jobs.id'), comment='任務ID')
|
||||
api_endpoint = db.Column(db.String(200), nullable=False, comment='API端點')
|
||||
prompt_tokens = db.Column(db.Integer, default=0, comment='Prompt token數')
|
||||
completion_tokens = db.Column(db.Integer, default=0, comment='Completion token數')
|
||||
total_tokens = db.Column(db.Integer, default=0, comment='總token數')
|
||||
prompt_unit_price = db.Column(db.Numeric(10, 8), default=0.00000000, comment='單價')
|
||||
prompt_price_unit = db.Column(db.String(20), default='USD', comment='價格單位')
|
||||
cost = db.Column(db.Numeric(10, 4), default=0.0000, comment='成本')
|
||||
response_time_ms = db.Column(db.Integer, default=0, comment='回應時間(毫秒)')
|
||||
success = db.Column(db.Boolean, default=True, comment='是否成功')
|
||||
error_message = db.Column(db.Text, comment='錯誤訊息')
|
||||
created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間')
|
||||
|
||||
def __repr__(self):
|
||||
return f'<APIUsageStats {self.api_endpoint}>'
|
||||
|
||||
def to_dict(self):
|
||||
"""轉換為字典格式"""
|
||||
return {
|
||||
'id': self.id,
|
||||
'user_id': self.user_id,
|
||||
'job_id': self.job_id,
|
||||
'api_endpoint': self.api_endpoint,
|
||||
'prompt_tokens': self.prompt_tokens,
|
||||
'completion_tokens': self.completion_tokens,
|
||||
'total_tokens': self.total_tokens,
|
||||
'prompt_unit_price': float(self.prompt_unit_price) if self.prompt_unit_price else 0.0,
|
||||
'prompt_price_unit': self.prompt_price_unit,
|
||||
'cost': float(self.cost) if self.cost else 0.0,
|
||||
'response_time_ms': self.response_time_ms,
|
||||
'success': self.success,
|
||||
'error_message': self.error_message,
|
||||
'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def record_api_call(cls, user_id, job_id, api_endpoint, metadata, response_time_ms, success=True, error_message=None):
|
||||
"""記錄 API 呼叫統計"""
|
||||
# 從 Dify API metadata 解析使用量資訊
|
||||
usage_data = metadata.get('usage', {})
|
||||
|
||||
prompt_tokens = usage_data.get('prompt_tokens', 0)
|
||||
completion_tokens = usage_data.get('completion_tokens', 0)
|
||||
total_tokens = usage_data.get('total_tokens', prompt_tokens + completion_tokens)
|
||||
|
||||
# 計算成本 - 使用 Dify API 提供的總成本
|
||||
if 'total_price' in usage_data:
|
||||
# 直接使用 API 提供的總價格
|
||||
cost = float(usage_data.get('total_price', 0.0))
|
||||
else:
|
||||
# 備用計算方式
|
||||
prompt_price = float(usage_data.get('prompt_price', 0.0))
|
||||
completion_price = float(usage_data.get('completion_price', 0.0))
|
||||
cost = prompt_price + completion_price
|
||||
|
||||
# 單價資訊
|
||||
prompt_unit_price = usage_data.get('prompt_unit_price', 0.0)
|
||||
completion_unit_price = usage_data.get('completion_unit_price', 0.0)
|
||||
prompt_price_unit = usage_data.get('currency', 'USD')
|
||||
|
||||
stats = cls(
|
||||
user_id=user_id,
|
||||
job_id=job_id,
|
||||
api_endpoint=api_endpoint,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
prompt_unit_price=prompt_unit_price,
|
||||
prompt_price_unit=prompt_price_unit,
|
||||
cost=cost,
|
||||
response_time_ms=response_time_ms,
|
||||
success=success,
|
||||
error_message=error_message
|
||||
)
|
||||
|
||||
db.session.add(stats)
|
||||
db.session.commit()
|
||||
return stats
|
||||
|
||||
@classmethod
|
||||
def get_user_statistics(cls, user_id, start_date=None, end_date=None):
|
||||
"""取得使用者統計資料"""
|
||||
query = cls.query.filter_by(user_id=user_id)
|
||||
|
||||
if start_date:
|
||||
query = query.filter(cls.created_at >= start_date)
|
||||
if end_date:
|
||||
query = query.filter(cls.created_at <= end_date)
|
||||
|
||||
# 統計資料
|
||||
total_calls = query.count()
|
||||
successful_calls = query.filter_by(success=True).count()
|
||||
total_tokens = query.with_entities(func.sum(cls.total_tokens)).scalar() or 0
|
||||
total_cost = query.with_entities(func.sum(cls.cost)).scalar() or 0.0
|
||||
avg_response_time = query.with_entities(func.avg(cls.response_time_ms)).scalar() or 0
|
||||
|
||||
return {
|
||||
'total_calls': total_calls,
|
||||
'successful_calls': successful_calls,
|
||||
'failed_calls': total_calls - successful_calls,
|
||||
'success_rate': (successful_calls / total_calls * 100) if total_calls > 0 else 0,
|
||||
'total_tokens': total_tokens,
|
||||
'total_cost': float(total_cost),
|
||||
'avg_response_time': float(avg_response_time) if avg_response_time else 0
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_daily_statistics(cls, days=30):
|
||||
"""取得每日統計資料"""
|
||||
end_date = datetime.utcnow()
|
||||
start_date = end_date - timedelta(days=days)
|
||||
|
||||
# 按日期分組統計
|
||||
daily_stats = db.session.query(
|
||||
func.date(cls.created_at).label('date'),
|
||||
func.count(cls.id).label('total_calls'),
|
||||
func.sum(cls.total_tokens).label('total_tokens'),
|
||||
func.sum(cls.cost).label('total_cost'),
|
||||
func.count().filter(cls.success == True).label('successful_calls')
|
||||
).filter(
|
||||
cls.created_at >= start_date,
|
||||
cls.created_at <= end_date
|
||||
).group_by(func.date(cls.created_at)).all()
|
||||
|
||||
return [
|
||||
{
|
||||
'date': stat.date.isoformat(),
|
||||
'total_calls': stat.total_calls,
|
||||
'successful_calls': stat.successful_calls,
|
||||
'failed_calls': stat.total_calls - stat.successful_calls,
|
||||
'total_tokens': stat.total_tokens or 0,
|
||||
'total_cost': float(stat.total_cost or 0)
|
||||
}
|
||||
for stat in daily_stats
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def get_top_users(cls, limit=10, start_date=None, end_date=None):
|
||||
"""取得使用量排行榜"""
|
||||
query = db.session.query(
|
||||
cls.user_id,
|
||||
func.count(cls.id).label('total_calls'),
|
||||
func.sum(cls.total_tokens).label('total_tokens'),
|
||||
func.sum(cls.cost).label('total_cost')
|
||||
)
|
||||
|
||||
if start_date:
|
||||
query = query.filter(cls.created_at >= start_date)
|
||||
if end_date:
|
||||
query = query.filter(cls.created_at <= end_date)
|
||||
|
||||
top_users = query.group_by(cls.user_id).order_by(
|
||||
func.sum(cls.cost).desc()
|
||||
).limit(limit).all()
|
||||
|
||||
return [
|
||||
{
|
||||
'user_id': user.user_id,
|
||||
'total_calls': user.total_calls,
|
||||
'total_tokens': user.total_tokens or 0,
|
||||
'total_cost': float(user.total_cost or 0)
|
||||
}
|
||||
for user in top_users
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def get_cost_trend(cls, days=30):
|
||||
"""取得成本趨勢"""
|
||||
end_date = datetime.utcnow()
|
||||
start_date = end_date - timedelta(days=days)
|
||||
|
||||
# 按日期統計成本
|
||||
cost_trend = db.session.query(
|
||||
func.date(cls.created_at).label('date'),
|
||||
func.sum(cls.cost).label('daily_cost')
|
||||
).filter(
|
||||
cls.created_at >= start_date,
|
||||
cls.created_at <= end_date
|
||||
).group_by(func.date(cls.created_at)).order_by(
|
||||
func.date(cls.created_at)
|
||||
).all()
|
||||
|
||||
return [
|
||||
{
|
||||
'date': trend.date.isoformat(),
|
||||
'cost': float(trend.daily_cost or 0)
|
||||
}
|
||||
for trend in cost_trend
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def get_endpoint_statistics(cls):
|
||||
"""取得 API 端點統計"""
|
||||
endpoint_stats = db.session.query(
|
||||
cls.api_endpoint,
|
||||
func.count(cls.id).label('total_calls'),
|
||||
func.sum(cls.cost).label('total_cost'),
|
||||
func.avg(cls.response_time_ms).label('avg_response_time')
|
||||
).group_by(cls.api_endpoint).order_by(
|
||||
func.count(cls.id).desc()
|
||||
).all()
|
||||
|
||||
return [
|
||||
{
|
||||
'endpoint': stat.api_endpoint,
|
||||
'total_calls': stat.total_calls,
|
||||
'total_cost': float(stat.total_cost or 0),
|
||||
'avg_response_time': float(stat.avg_response_time or 0)
|
||||
}
|
||||
for stat in endpoint_stats
|
||||
]
|
297
app/models/sys_user.py
Normal file
297
app/models/sys_user.py
Normal file
@@ -0,0 +1,297 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
系統使用者模型
|
||||
專門用於記錄帳號密碼和登入相關資訊
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2025-10-01
|
||||
"""
|
||||
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, Dict, Any
|
||||
from sqlalchemy import Column, Integer, String, Text, Boolean, DateTime, JSON, Enum as SQLEnum, BigInteger
|
||||
from werkzeug.security import generate_password_hash, check_password_hash
|
||||
from app import db
|
||||
from app.utils.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class SysUser(db.Model):
|
||||
"""系統使用者模型 - 專門處理帳號密碼和登入記錄"""
|
||||
__tablename__ = 'sys_user'
|
||||
|
||||
id = Column(BigInteger, primary_key=True)
|
||||
|
||||
# 帳號資訊
|
||||
username = Column(String(255), nullable=False, unique=True, comment='登入帳號')
|
||||
password_hash = Column(String(512), comment='密碼雜湊 (如果需要本地儲存)')
|
||||
email = Column(String(255), nullable=False, unique=True, comment='電子郵件')
|
||||
display_name = Column(String(255), comment='顯示名稱')
|
||||
|
||||
# API 認證資訊
|
||||
api_user_id = Column(String(255), comment='API 回傳的使用者 ID')
|
||||
api_access_token = Column(Text, comment='API 回傳的 access_token')
|
||||
api_token_expires_at = Column(DateTime, comment='API Token 過期時間')
|
||||
|
||||
# 登入相關
|
||||
auth_method = Column(SQLEnum('API', 'LDAP', name='sys_user_auth_method'),
|
||||
default='API', comment='認證方式')
|
||||
last_login_at = Column(DateTime, comment='最後登入時間')
|
||||
last_login_ip = Column(String(45), comment='最後登入 IP')
|
||||
login_count = Column(Integer, default=0, comment='登入次數')
|
||||
login_success_count = Column(Integer, default=0, comment='成功登入次數')
|
||||
login_fail_count = Column(Integer, default=0, comment='失敗登入次數')
|
||||
|
||||
# 帳號狀態
|
||||
is_active = Column(Boolean, default=True, comment='是否啟用')
|
||||
is_locked = Column(Boolean, default=False, comment='是否鎖定')
|
||||
locked_until = Column(DateTime, comment='鎖定至何時')
|
||||
|
||||
# 審計欄位
|
||||
created_at = Column(DateTime, default=datetime.utcnow, comment='建立時間')
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, comment='更新時間')
|
||||
|
||||
def __repr__(self):
|
||||
return f'<SysUser {self.username}>'
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""轉換為字典格式"""
|
||||
return {
|
||||
'id': self.id,
|
||||
'username': self.username,
|
||||
'email': self.email,
|
||||
'display_name': self.display_name,
|
||||
'api_user_id': self.api_user_id,
|
||||
'auth_method': self.auth_method,
|
||||
'last_login_at': self.last_login_at.isoformat() if self.last_login_at else None,
|
||||
'login_count': self.login_count,
|
||||
'login_success_count': self.login_success_count,
|
||||
'login_fail_count': self.login_fail_count,
|
||||
'is_active': self.is_active,
|
||||
'is_locked': self.is_locked,
|
||||
'api_token_expires_at': self.api_token_expires_at.isoformat() if self.api_token_expires_at else None,
|
||||
'created_at': self.created_at.isoformat() if self.created_at else None
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_or_create(cls, email: str, **kwargs) -> 'SysUser':
|
||||
"""
|
||||
取得或建立系統使用者 (方案A: 使用 email 作為主要識別鍵)
|
||||
|
||||
Args:
|
||||
email: 電子郵件 (主要識別鍵)
|
||||
**kwargs: 其他欄位
|
||||
|
||||
Returns:
|
||||
SysUser: 系統使用者實例
|
||||
"""
|
||||
try:
|
||||
# 使用 email 作為主要識別 (專門用於登入記錄)
|
||||
sys_user = cls.query.filter_by(email=email).first()
|
||||
|
||||
if sys_user:
|
||||
# 更新現有記錄
|
||||
sys_user.username = kwargs.get('username', sys_user.username) # API name (姓名+email)
|
||||
sys_user.display_name = kwargs.get('display_name', sys_user.display_name) # API name (姓名+email)
|
||||
sys_user.api_user_id = kwargs.get('api_user_id', sys_user.api_user_id) # Azure Object ID
|
||||
sys_user.api_access_token = kwargs.get('api_access_token', sys_user.api_access_token)
|
||||
sys_user.api_token_expires_at = kwargs.get('api_token_expires_at', sys_user.api_token_expires_at)
|
||||
sys_user.auth_method = kwargs.get('auth_method', sys_user.auth_method)
|
||||
sys_user.updated_at = datetime.utcnow()
|
||||
|
||||
logger.info(f"更新現有系統使用者: {email}")
|
||||
else:
|
||||
# 建立新記錄
|
||||
sys_user = cls(
|
||||
username=kwargs.get('username', ''), # API name (姓名+email 格式)
|
||||
email=email, # 純 email,主要識別鍵
|
||||
display_name=kwargs.get('display_name', ''), # API name (姓名+email 格式)
|
||||
api_user_id=kwargs.get('api_user_id'), # Azure Object ID
|
||||
api_access_token=kwargs.get('api_access_token'),
|
||||
api_token_expires_at=kwargs.get('api_token_expires_at'),
|
||||
auth_method=kwargs.get('auth_method', 'API'),
|
||||
login_count=0,
|
||||
login_success_count=0,
|
||||
login_fail_count=0
|
||||
)
|
||||
db.session.add(sys_user)
|
||||
logger.info(f"建立新系統使用者: {email}")
|
||||
|
||||
db.session.commit()
|
||||
return sys_user
|
||||
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
logger.error(f"取得或建立系統使用者失敗: {str(e)}")
|
||||
raise
|
||||
|
||||
@classmethod
|
||||
def get_by_email(cls, email: str) -> Optional['SysUser']:
|
||||
"""根據 email 查找系統使用者"""
|
||||
return cls.query.filter_by(email=email).first()
|
||||
|
||||
def record_login_attempt(self, success: bool, ip_address: str = None, auth_method: str = None):
|
||||
"""
|
||||
記錄登入嘗試
|
||||
|
||||
Args:
|
||||
success: 是否成功
|
||||
ip_address: IP 地址
|
||||
auth_method: 認證方式
|
||||
"""
|
||||
try:
|
||||
self.login_count = (self.login_count or 0) + 1
|
||||
|
||||
if success:
|
||||
self.login_success_count = (self.login_success_count or 0) + 1
|
||||
self.last_login_at = datetime.utcnow()
|
||||
self.last_login_ip = ip_address
|
||||
if auth_method:
|
||||
self.auth_method = auth_method
|
||||
|
||||
# 成功登入時解除鎖定
|
||||
if self.is_locked:
|
||||
self.is_locked = False
|
||||
self.locked_until = None
|
||||
|
||||
else:
|
||||
self.login_fail_count = (self.login_fail_count or 0) + 1
|
||||
|
||||
# 檢查是否需要鎖定帳號 (連續失敗5次)
|
||||
if self.login_fail_count >= 5:
|
||||
self.is_locked = True
|
||||
self.locked_until = datetime.utcnow() + timedelta(minutes=30) # 鎖定30分鐘
|
||||
|
||||
self.updated_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
logger.error(f"記錄登入嘗試失敗: {str(e)}")
|
||||
|
||||
def is_account_locked(self) -> bool:
|
||||
"""檢查帳號是否被鎖定"""
|
||||
if not self.is_locked:
|
||||
return False
|
||||
|
||||
# 檢查鎖定時間是否已過
|
||||
if self.locked_until and datetime.utcnow() > self.locked_until:
|
||||
self.is_locked = False
|
||||
self.locked_until = None
|
||||
db.session.commit()
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def set_password(self, password: str):
|
||||
"""設置密碼雜湊 (如果需要本地儲存密碼)"""
|
||||
self.password_hash = generate_password_hash(password)
|
||||
|
||||
def check_password(self, password: str) -> bool:
|
||||
"""檢查密碼 (如果有本地儲存密碼)"""
|
||||
if not self.password_hash:
|
||||
return False
|
||||
return check_password_hash(self.password_hash, password)
|
||||
|
||||
def update_api_token(self, access_token: str, expires_at: datetime = None):
|
||||
"""更新 API Token"""
|
||||
self.api_access_token = access_token
|
||||
self.api_token_expires_at = expires_at
|
||||
self.updated_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
||||
def is_api_token_valid(self) -> bool:
|
||||
"""檢查 API Token 是否有效"""
|
||||
if not self.api_access_token or not self.api_token_expires_at:
|
||||
return False
|
||||
return datetime.utcnow() < self.api_token_expires_at
|
||||
|
||||
|
||||
class LoginLog(db.Model):
|
||||
"""登入記錄模型"""
|
||||
__tablename__ = 'login_logs'
|
||||
|
||||
id = Column(BigInteger, primary_key=True)
|
||||
|
||||
# 基本資訊
|
||||
username = Column(String(255), nullable=False, comment='登入帳號')
|
||||
auth_method = Column(SQLEnum('API', 'LDAP', name='login_log_auth_method'),
|
||||
nullable=False, comment='認證方式')
|
||||
|
||||
# 登入結果
|
||||
login_success = Column(Boolean, nullable=False, comment='是否成功')
|
||||
error_message = Column(Text, comment='錯誤訊息(失敗時)')
|
||||
|
||||
# 環境資訊
|
||||
ip_address = Column(String(45), comment='IP 地址')
|
||||
user_agent = Column(Text, comment='瀏覽器資訊')
|
||||
|
||||
# API 回應 (可選,用於除錯)
|
||||
api_response_summary = Column(JSON, comment='API 回應摘要')
|
||||
|
||||
# 時間
|
||||
login_at = Column(DateTime, default=datetime.utcnow, comment='登入時間')
|
||||
|
||||
def __repr__(self):
|
||||
return f'<LoginLog {self.username}:{self.auth_method}:{self.login_success}>'
|
||||
|
||||
@classmethod
|
||||
def create_log(cls, username: str, auth_method: str, login_success: bool,
|
||||
error_message: str = None, ip_address: str = None,
|
||||
user_agent: str = None, api_response_summary: Dict = None) -> 'LoginLog':
|
||||
"""
|
||||
建立登入記錄
|
||||
|
||||
Args:
|
||||
username: 使用者帳號
|
||||
auth_method: 認證方式
|
||||
login_success: 是否成功
|
||||
error_message: 錯誤訊息
|
||||
ip_address: IP 地址
|
||||
user_agent: 瀏覽器資訊
|
||||
api_response_summary: API 回應摘要
|
||||
|
||||
Returns:
|
||||
LoginLog: 登入記錄
|
||||
"""
|
||||
try:
|
||||
log = cls(
|
||||
username=username,
|
||||
auth_method=auth_method,
|
||||
login_success=login_success,
|
||||
error_message=error_message,
|
||||
ip_address=ip_address,
|
||||
user_agent=user_agent,
|
||||
api_response_summary=api_response_summary
|
||||
)
|
||||
|
||||
db.session.add(log)
|
||||
db.session.commit()
|
||||
return log
|
||||
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
logger.error(f"建立登入記錄失敗: {str(e)}")
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def get_recent_failed_attempts(cls, username: str, minutes: int = 15) -> int:
|
||||
"""
|
||||
取得最近失敗的登入嘗試次數
|
||||
|
||||
Args:
|
||||
username: 使用者帳號
|
||||
minutes: 時間範圍(分鐘)
|
||||
|
||||
Returns:
|
||||
int: 失敗次數
|
||||
"""
|
||||
since = datetime.utcnow() - timedelta(minutes=minutes)
|
||||
return cls.query.filter(
|
||||
cls.username == username,
|
||||
cls.login_success == False,
|
||||
cls.login_at >= since
|
||||
).count()
|
124
app/models/user.py
Normal file
124
app/models/user.py
Normal file
@@ -0,0 +1,124 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
使用者資料模型
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.sql import func
|
||||
from app import db
|
||||
from app.utils.timezone import format_taiwan_time
|
||||
|
||||
|
||||
class User(db.Model):
|
||||
"""使用者資訊表 (dt_users)"""
|
||||
__tablename__ = 'dt_users'
|
||||
|
||||
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
|
||||
username = db.Column(db.String(100), unique=True, nullable=False, index=True, comment='AD帳號')
|
||||
display_name = db.Column(db.String(200), nullable=False, comment='顯示名稱')
|
||||
email = db.Column(db.String(255), nullable=False, index=True, comment='電子郵件')
|
||||
department = db.Column(db.String(100), comment='部門')
|
||||
is_admin = db.Column(db.Boolean, default=False, comment='是否為管理員')
|
||||
last_login = db.Column(db.DateTime, comment='最後登入時間')
|
||||
created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間')
|
||||
updated_at = db.Column(
|
||||
db.DateTime,
|
||||
default=func.now(),
|
||||
onupdate=func.now(),
|
||||
comment='更新時間'
|
||||
)
|
||||
|
||||
# 關聯關係
|
||||
translation_jobs = db.relationship('TranslationJob', backref='user', lazy='dynamic', cascade='all, delete-orphan')
|
||||
api_usage_stats = db.relationship('APIUsageStats', backref='user', lazy='dynamic', cascade='all, delete-orphan')
|
||||
system_logs = db.relationship('SystemLog', backref='user', lazy='dynamic')
|
||||
|
||||
def __repr__(self):
|
||||
return f'<User {self.username}>'
|
||||
|
||||
def to_dict(self, include_stats=False):
|
||||
"""轉換為字典格式"""
|
||||
data = {
|
||||
'id': self.id,
|
||||
'username': self.username,
|
||||
'display_name': self.display_name,
|
||||
'email': self.email,
|
||||
'department': self.department,
|
||||
'is_admin': self.is_admin,
|
||||
'last_login': format_taiwan_time(self.last_login, "%Y-%m-%d %H:%M:%S") if self.last_login else None,
|
||||
'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None,
|
||||
'updated_at': format_taiwan_time(self.updated_at, "%Y-%m-%d %H:%M:%S") if self.updated_at else None
|
||||
}
|
||||
|
||||
if include_stats:
|
||||
data.update({
|
||||
'total_jobs': self.translation_jobs.count(),
|
||||
'completed_jobs': self.translation_jobs.filter_by(status='COMPLETED').count(),
|
||||
'failed_jobs': self.translation_jobs.filter_by(status='FAILED').count(),
|
||||
'total_cost': self.get_total_cost()
|
||||
})
|
||||
|
||||
return data
|
||||
|
||||
def get_total_cost(self):
|
||||
"""計算使用者總成本"""
|
||||
try:
|
||||
from app.models.stats import APIUsageStats
|
||||
return db.session.query(
|
||||
func.sum(APIUsageStats.cost)
|
||||
).filter(APIUsageStats.user_id == self.id).scalar() or 0.0
|
||||
except Exception:
|
||||
return 0.0
|
||||
|
||||
def update_last_login(self):
|
||||
"""更新最後登入時間"""
|
||||
self.last_login = datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
||||
@classmethod
|
||||
def get_or_create(cls, username, display_name, email, department=None):
|
||||
"""取得或建立使用者 (方案A: 使用 email 作為主要識別鍵)"""
|
||||
# 先嘗試用 email 查找 (因為 email 是唯一且穩定的識別碼)
|
||||
user = cls.query.filter_by(email=email).first()
|
||||
|
||||
if user:
|
||||
# 更新使用者資訊 (API name 格式: 姓名+email)
|
||||
user.username = username # API 的 name (姓名+email 格式)
|
||||
user.display_name = display_name # API 的 name (姓名+email 格式)
|
||||
if department:
|
||||
user.department = department
|
||||
user.updated_at = datetime.utcnow()
|
||||
else:
|
||||
# 建立新使用者
|
||||
user = cls(
|
||||
username=username, # API 的 name (姓名+email 格式)
|
||||
display_name=display_name, # API 的 name (姓名+email 格式)
|
||||
email=email, # 純 email,唯一識別鍵
|
||||
department=department,
|
||||
is_admin=(email.lower() == 'ymirliu@panjit.com.tw') # 硬編碼管理員
|
||||
)
|
||||
db.session.add(user)
|
||||
|
||||
db.session.commit()
|
||||
return user
|
||||
|
||||
@classmethod
|
||||
def get_by_email(cls, email):
|
||||
"""根據 email 查找使用者"""
|
||||
return cls.query.filter_by(email=email).first()
|
||||
|
||||
@classmethod
|
||||
def get_admin_users(cls):
|
||||
"""取得所有管理員使用者"""
|
||||
return cls.query.filter_by(is_admin=True).all()
|
||||
|
||||
@classmethod
|
||||
def get_active_users(cls, days=30):
|
||||
"""取得活躍使用者(指定天數內有登入)"""
|
||||
cutoff_date = datetime.utcnow() - timedelta(days=days)
|
||||
return cls.query.filter(cls.last_login >= cutoff_date).all()
|
92
app/root.py
Normal file
92
app/root.py
Normal file
@@ -0,0 +1,92 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Root routes and static file serving for SPA in production.
|
||||
|
||||
These were originally defined in the top-level app.py. Moving them into the
|
||||
package allows a clean WSGI entry (wsgi:app) without importing app.py.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from flask import Blueprint, current_app, send_from_directory
|
||||
|
||||
root_bp = Blueprint('root', __name__)
|
||||
|
||||
def get_static_dir():
|
||||
"""取得靜態文件目錄(相對路徑)"""
|
||||
# 取得專案根目錄
|
||||
project_root = Path(__file__).parent.parent
|
||||
static_dir = project_root / 'frontend' / 'dist'
|
||||
return str(static_dir)
|
||||
|
||||
|
||||
@root_bp.route('/')
|
||||
def index():
|
||||
try:
|
||||
static_dir = get_static_dir()
|
||||
if Path(static_dir).exists():
|
||||
return send_from_directory(static_dir, 'index.html')
|
||||
else:
|
||||
# Fallback API info when frontend is not present
|
||||
return {
|
||||
'application': 'PANJIT Document Translator',
|
||||
'version': '1.0.0',
|
||||
'status': 'running',
|
||||
'api_base_url': '/api/v1',
|
||||
'note': 'Frontend files not found, serving API info'
|
||||
}
|
||||
except Exception:
|
||||
# Fallback API info when frontend is not present
|
||||
return {
|
||||
'application': 'PANJIT Document Translator',
|
||||
'version': '1.0.0',
|
||||
'status': 'running',
|
||||
'api_base_url': '/api/v1',
|
||||
'note': 'Frontend files not found, serving API info'
|
||||
}
|
||||
|
||||
|
||||
@root_bp.route('/<path:path>')
|
||||
def serve_static(path):
|
||||
try:
|
||||
static_dir = get_static_dir()
|
||||
if Path(static_dir).exists():
|
||||
return send_from_directory(static_dir, path)
|
||||
else:
|
||||
# SPA fallback
|
||||
return send_from_directory(static_dir, 'index.html')
|
||||
except Exception:
|
||||
# SPA fallback
|
||||
return {
|
||||
'error': 'File not found',
|
||||
'path': path
|
||||
}, 404
|
||||
|
||||
|
||||
@root_bp.route('/api')
|
||||
def api_info():
|
||||
return {
|
||||
'api_version': 'v1',
|
||||
'base_url': '/api/v1',
|
||||
'endpoints': {
|
||||
'auth': '/api/v1/auth',
|
||||
'files': '/api/v1/files',
|
||||
'jobs': '/api/v1/jobs',
|
||||
'admin': '/api/v1/admin',
|
||||
'health': '/api/v1/health'
|
||||
},
|
||||
'documentation': 'Available endpoints provide RESTful API for document translation'
|
||||
}
|
||||
|
||||
|
||||
@root_bp.route('/api/health')
|
||||
def health_check():
|
||||
# Keep a simple health endpoint here for compatibility
|
||||
return {
|
||||
'status': 'healthy',
|
||||
'timestamp': datetime.utcnow().isoformat(),
|
||||
'service': 'PANJIT Document Translator API',
|
||||
'version': '1.0.0'
|
||||
}, 200
|
19
app/services/__init__.py
Normal file
19
app/services/__init__.py
Normal file
@@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
業務服務模組
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
from .dify_client import DifyClient
|
||||
from .translation_service import TranslationService
|
||||
from .notification_service import NotificationService
|
||||
|
||||
__all__ = [
|
||||
'DifyClient',
|
||||
'TranslationService',
|
||||
'NotificationService'
|
||||
]
|
137
app/services/celery_service.py
Normal file
137
app/services/celery_service.py
Normal file
@@ -0,0 +1,137 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Celery任務管理服務
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2025-09-04
|
||||
"""
|
||||
|
||||
from celery import Celery
|
||||
from app.utils.logger import get_logger
|
||||
import os
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def get_celery_app():
|
||||
"""取得Celery應用實例"""
|
||||
try:
|
||||
from celery_app import app as celery_app
|
||||
return celery_app
|
||||
except ImportError:
|
||||
# 如果無法導入,創建一個簡單的Celery實例
|
||||
broker_url = os.getenv('REDIS_URL', 'redis://localhost:6379/0')
|
||||
celery_app = Celery('translation_worker', broker=broker_url)
|
||||
return celery_app
|
||||
|
||||
|
||||
def revoke_task(job_uuid):
|
||||
"""
|
||||
撤銷指定任務的Celery任務
|
||||
|
||||
Args:
|
||||
job_uuid (str): 任務UUID
|
||||
|
||||
Returns:
|
||||
bool: 撤銷是否成功
|
||||
"""
|
||||
try:
|
||||
celery_app = get_celery_app()
|
||||
|
||||
# Celery任務ID通常與job_uuid相同或相關
|
||||
task_id = f"translate_document_{job_uuid}"
|
||||
|
||||
# 嘗試撤銷任務
|
||||
celery_app.control.revoke(task_id, terminate=True, signal='SIGKILL')
|
||||
|
||||
logger.info(f"Successfully revoked Celery task: {task_id}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to revoke Celery task for job {job_uuid}: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def get_active_tasks():
|
||||
"""
|
||||
取得當前活躍的Celery任務
|
||||
|
||||
Returns:
|
||||
list: 活躍任務列表
|
||||
"""
|
||||
try:
|
||||
celery_app = get_celery_app()
|
||||
|
||||
# 取得活躍任務
|
||||
inspect = celery_app.control.inspect()
|
||||
active_tasks = inspect.active()
|
||||
|
||||
if active_tasks:
|
||||
return active_tasks
|
||||
else:
|
||||
return {}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get active tasks: {str(e)}")
|
||||
return {}
|
||||
|
||||
|
||||
def is_task_active(job_uuid):
|
||||
"""
|
||||
檢查指定任務是否在Celery中活躍
|
||||
|
||||
Args:
|
||||
job_uuid (str): 任務UUID
|
||||
|
||||
Returns:
|
||||
bool: 任務是否活躍
|
||||
"""
|
||||
try:
|
||||
active_tasks = get_active_tasks()
|
||||
task_id = f"translate_document_{job_uuid}"
|
||||
|
||||
# 檢查所有worker的活躍任務
|
||||
for worker, tasks in active_tasks.items():
|
||||
for task in tasks:
|
||||
if task.get('id') == task_id:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to check if task is active for job {job_uuid}: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def cleanup_stale_tasks():
|
||||
"""
|
||||
清理卡住的Celery任務
|
||||
|
||||
Returns:
|
||||
int: 清理的任務數量
|
||||
"""
|
||||
try:
|
||||
from app.models.job import TranslationJob
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# 找出超過30分鐘仍在處理中的任務
|
||||
stale_threshold = datetime.utcnow() - timedelta(minutes=30)
|
||||
stale_jobs = TranslationJob.query.filter(
|
||||
TranslationJob.status == 'PROCESSING',
|
||||
TranslationJob.processing_started_at < stale_threshold
|
||||
).all()
|
||||
|
||||
cleanup_count = 0
|
||||
for job in stale_jobs:
|
||||
if not is_task_active(job.job_uuid):
|
||||
# 任務不在Celery中活躍,標記為失敗
|
||||
job.update_status('FAILED', error_message='任務處理超時,已自動取消')
|
||||
cleanup_count += 1
|
||||
logger.info(f"Cleaned up stale job: {job.job_uuid}")
|
||||
|
||||
return cleanup_count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to cleanup stale tasks: {str(e)}")
|
||||
return 0
|
494
app/services/dify_client.py
Normal file
494
app/services/dify_client.py
Normal file
@@ -0,0 +1,494 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Dify API 客戶端服務
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
import time
|
||||
import requests
|
||||
from typing import Dict, Any, Optional
|
||||
from flask import current_app
|
||||
from app.utils.logger import get_logger
|
||||
from app.utils.exceptions import APIError
|
||||
from app.models.stats import APIUsageStats
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class DifyClient:
|
||||
"""Dify API 客戶端"""
|
||||
|
||||
def __init__(self):
|
||||
# 翻译API配置
|
||||
self.translation_base_url = current_app.config.get('DIFY_TRANSLATION_BASE_URL', '')
|
||||
self.translation_api_key = current_app.config.get('DIFY_TRANSLATION_API_KEY', '')
|
||||
|
||||
# OCR API配置
|
||||
self.ocr_base_url = current_app.config.get('DIFY_OCR_BASE_URL', '')
|
||||
self.ocr_api_key = current_app.config.get('DIFY_OCR_API_KEY', '')
|
||||
|
||||
self.timeout = (10, 60) # (連接超時, 讀取超時)
|
||||
self.max_retries = 3
|
||||
self.retry_delay = 1.6 # 指數退避基數
|
||||
|
||||
if not self.translation_base_url or not self.translation_api_key:
|
||||
logger.warning("Dify Translation API configuration is incomplete")
|
||||
|
||||
if not self.ocr_base_url or not self.ocr_api_key:
|
||||
logger.warning("Dify OCR API configuration is incomplete")
|
||||
|
||||
def _make_request(self, method: str, endpoint: str, data: Dict[str, Any] = None,
|
||||
user_id: int = None, job_id: int = None, files_data: Dict = None,
|
||||
api_type: str = 'translation') -> Dict[str, Any]:
|
||||
"""發送 HTTP 請求到 Dify API"""
|
||||
|
||||
# 根据API类型选择配置
|
||||
if api_type == 'ocr':
|
||||
base_url = self.ocr_base_url
|
||||
api_key = self.ocr_api_key
|
||||
if not base_url or not api_key:
|
||||
raise APIError("Dify OCR API 未配置完整")
|
||||
else: # translation
|
||||
base_url = self.translation_base_url
|
||||
api_key = self.translation_api_key
|
||||
if not base_url or not api_key:
|
||||
raise APIError("Dify Translation API 未配置完整")
|
||||
|
||||
url = f"{base_url.rstrip('/')}/{endpoint.lstrip('/')}"
|
||||
|
||||
headers = {
|
||||
'Authorization': f'Bearer {api_key}',
|
||||
'User-Agent': 'PANJIT-Document-Translator/1.0'
|
||||
}
|
||||
|
||||
# 只有在非文件上传时才设置JSON Content-Type
|
||||
if not files_data:
|
||||
headers['Content-Type'] = 'application/json'
|
||||
|
||||
# 重試邏輯
|
||||
last_exception = None
|
||||
start_time = time.time()
|
||||
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
# logger.debug(f"Making Dify API request: {method} {url} (attempt {attempt + 1})")
|
||||
|
||||
if method.upper() == 'GET':
|
||||
response = requests.get(url, headers=headers, timeout=self.timeout, params=data)
|
||||
elif files_data:
|
||||
# 文件上传请求,使用multipart/form-data
|
||||
response = requests.post(url, headers=headers, timeout=self.timeout, files=files_data, data=data)
|
||||
else:
|
||||
# 普通JSON请求
|
||||
response = requests.post(url, headers=headers, timeout=self.timeout, json=data)
|
||||
|
||||
# 計算響應時間
|
||||
response_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# 檢查響應狀態
|
||||
response.raise_for_status()
|
||||
|
||||
# 解析響應
|
||||
result = response.json()
|
||||
|
||||
# 記錄 API 使用統計
|
||||
if user_id:
|
||||
self._record_api_usage(
|
||||
user_id=user_id,
|
||||
job_id=job_id,
|
||||
endpoint=endpoint,
|
||||
response_data=result,
|
||||
response_time_ms=response_time_ms,
|
||||
success=True
|
||||
)
|
||||
|
||||
# logger.debug(f"Dify API request successful: {response_time_ms}ms")
|
||||
return result
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
last_exception = e
|
||||
response_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# 記錄失敗的 API 調用
|
||||
if user_id:
|
||||
self._record_api_usage(
|
||||
user_id=user_id,
|
||||
job_id=job_id,
|
||||
endpoint=endpoint,
|
||||
response_data={},
|
||||
response_time_ms=response_time_ms,
|
||||
success=False,
|
||||
error_message=str(e)
|
||||
)
|
||||
|
||||
logger.warning(f"Dify API request failed (attempt {attempt + 1}): {str(e)}")
|
||||
|
||||
# 如果是最後一次嘗試,拋出異常
|
||||
if attempt == self.max_retries - 1:
|
||||
break
|
||||
|
||||
# 指數退避
|
||||
delay = self.retry_delay ** attempt
|
||||
# logger.debug(f"Retrying in {delay} seconds...")
|
||||
time.sleep(delay)
|
||||
|
||||
# 所有重試都失敗了
|
||||
error_msg = f"Dify API request failed after {self.max_retries} attempts: {str(last_exception)}"
|
||||
logger.error(error_msg)
|
||||
raise APIError(error_msg)
|
||||
|
||||
def _record_api_usage(self, user_id: int, job_id: Optional[int], endpoint: str,
|
||||
response_data: Dict, response_time_ms: int, success: bool,
|
||||
error_message: str = None):
|
||||
"""記錄 API 使用統計"""
|
||||
try:
|
||||
# 從響應中提取使用量資訊
|
||||
metadata = response_data.get('metadata', {})
|
||||
|
||||
# 如果 job_id 無效,則設為 None 以避免外鍵約束錯誤
|
||||
APIUsageStats.record_api_call(
|
||||
user_id=user_id,
|
||||
job_id=job_id, # 已經是 Optional,如果無效會被設為 NULL
|
||||
api_endpoint=endpoint,
|
||||
metadata=metadata,
|
||||
response_time_ms=response_time_ms,
|
||||
success=success,
|
||||
error_message=error_message
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to record API usage: {str(e)}")
|
||||
|
||||
def translate_text(self, text: str, source_language: str, target_language: str,
|
||||
user_id: int = None, job_id: int = None, conversation_id: str = None) -> Dict[str, Any]:
|
||||
"""翻譯文字"""
|
||||
|
||||
if not text.strip():
|
||||
raise APIError("翻譯文字不能為空")
|
||||
|
||||
# 構建標準翻譯 prompt(英文指令格式)
|
||||
language_names = {
|
||||
'zh-tw': 'Traditional Chinese',
|
||||
'zh-cn': 'Simplified Chinese',
|
||||
'en': 'English',
|
||||
'ja': 'Japanese',
|
||||
'ko': 'Korean',
|
||||
'vi': 'Vietnamese',
|
||||
'th': 'Thai',
|
||||
'id': 'Indonesian',
|
||||
'ms': 'Malay',
|
||||
'es': 'Spanish',
|
||||
'fr': 'French',
|
||||
'de': 'German',
|
||||
'ru': 'Russian',
|
||||
'ar': 'Arabic'
|
||||
}
|
||||
|
||||
source_lang_name = language_names.get(source_language, source_language)
|
||||
target_lang_name = language_names.get(target_language, target_language)
|
||||
|
||||
query = f"""Task: Translate ONLY into {target_lang_name} from {source_lang_name}.
|
||||
|
||||
Rules:
|
||||
- Output translation text ONLY (no source text, no notes, no questions, no language-detection remarks).
|
||||
- Preserve original line breaks.
|
||||
- Do NOT wrap in quotes or code blocks.
|
||||
- Maintain original formatting and structure.
|
||||
|
||||
{text.strip()}"""
|
||||
|
||||
# 構建請求資料 - 使用成功版本的格式
|
||||
request_data = {
|
||||
'inputs': {},
|
||||
'response_mode': 'blocking',
|
||||
'user': f"user_{user_id}" if user_id else "doc-translator-user",
|
||||
'query': query
|
||||
}
|
||||
|
||||
# 如果有 conversation_id,加入請求中以維持對話連續性
|
||||
if conversation_id:
|
||||
request_data['conversation_id'] = conversation_id
|
||||
|
||||
logger.info(f"[TRANSLATION] Sending translation request...")
|
||||
logger.info(f"[TRANSLATION] Request data: {request_data}")
|
||||
logger.info(f"[TRANSLATION] Text length: {len(text)} characters")
|
||||
|
||||
try:
|
||||
response = self._make_request(
|
||||
method='POST',
|
||||
endpoint='/chat-messages',
|
||||
data=request_data,
|
||||
user_id=user_id,
|
||||
job_id=job_id
|
||||
)
|
||||
|
||||
# 從響應中提取翻譯結果 - 使用成功版本的方式
|
||||
answer = response.get('answer')
|
||||
|
||||
if not isinstance(answer, str) or not answer.strip():
|
||||
raise APIError("Dify API 返回空的翻譯結果")
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'translated_text': answer,
|
||||
'source_text': text,
|
||||
'source_language': source_language,
|
||||
'target_language': target_language,
|
||||
'conversation_id': response.get('conversation_id'),
|
||||
'metadata': response.get('metadata', {})
|
||||
}
|
||||
|
||||
except APIError:
|
||||
raise
|
||||
except Exception as e:
|
||||
error_msg = f"翻譯請求處理錯誤: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
raise APIError(error_msg)
|
||||
|
||||
def test_connection(self) -> bool:
|
||||
"""測試 Dify API 連接"""
|
||||
try:
|
||||
# 發送簡單的測試請求
|
||||
test_data = {
|
||||
'inputs': {'text': 'test'},
|
||||
'response_mode': 'blocking',
|
||||
'user': 'health_check'
|
||||
}
|
||||
|
||||
response = self._make_request(
|
||||
method='POST',
|
||||
endpoint='/chat-messages',
|
||||
data=test_data
|
||||
)
|
||||
|
||||
return response is not None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Dify API connection test failed: {str(e)}")
|
||||
return False
|
||||
|
||||
def get_app_info(self) -> Dict[str, Any]:
|
||||
"""取得 Dify 應用資訊"""
|
||||
try:
|
||||
response = self._make_request(
|
||||
method='GET',
|
||||
endpoint='/parameters'
|
||||
)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'app_info': response
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get Dify app info: {str(e)}")
|
||||
return {
|
||||
'success': False,
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def load_config_from_file(cls, file_path: str = 'api.txt'):
|
||||
"""從檔案載入 Dify API 配置"""
|
||||
try:
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
config_file = Path(file_path)
|
||||
|
||||
if not config_file.exists():
|
||||
logger.warning(f"Dify config file not found: {file_path}")
|
||||
return
|
||||
|
||||
with open(config_file, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line.startswith('#') or not line:
|
||||
continue # 跳过注释和空行
|
||||
|
||||
# 翻译API配置(兼容旧格式)
|
||||
if line.startswith('base_url:') or line.startswith('translation_base_url:'):
|
||||
base_url = line.split(':', 1)[1].strip()
|
||||
current_app.config['DIFY_TRANSLATION_BASE_URL'] = base_url
|
||||
# 兼容旧配置
|
||||
current_app.config['DIFY_API_BASE_URL'] = base_url
|
||||
elif line.startswith('api:') or line.startswith('translation_api:'):
|
||||
api_key = line.split(':', 1)[1].strip()
|
||||
current_app.config['DIFY_TRANSLATION_API_KEY'] = api_key
|
||||
# 兼容旧配置
|
||||
current_app.config['DIFY_API_KEY'] = api_key
|
||||
|
||||
# OCR API配置
|
||||
elif line.startswith('ocr_base_url:'):
|
||||
ocr_base_url = line.split(':', 1)[1].strip()
|
||||
current_app.config['DIFY_OCR_BASE_URL'] = ocr_base_url
|
||||
elif line.startswith('ocr_api:'):
|
||||
ocr_api_key = line.split(':', 1)[1].strip()
|
||||
current_app.config['DIFY_OCR_API_KEY'] = ocr_api_key
|
||||
|
||||
logger.info("Dify API config loaded from file")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load Dify config from file: {str(e)}")
|
||||
|
||||
def upload_file(self, image_data: bytes, filename: str, user_id: int = None) -> str:
|
||||
"""上传图片文件到Dify OCR API并返回file_id"""
|
||||
|
||||
if not image_data:
|
||||
raise APIError("图片数据不能为空")
|
||||
|
||||
logger.info(f"[OCR-UPLOAD] Starting file upload to Dify OCR API")
|
||||
logger.info(f"[OCR-UPLOAD] File: {filename}, Size: {len(image_data)} bytes, User: {user_id}")
|
||||
|
||||
# 构建文件上传数据
|
||||
files_data = {
|
||||
'file': (filename, image_data, 'image/png') # 假设为PNG格式
|
||||
}
|
||||
|
||||
form_data = {
|
||||
'user': f"user_{user_id}" if user_id else "doc-translator-user"
|
||||
}
|
||||
|
||||
# logger.debug(f"[OCR-UPLOAD] Upload form_data: {form_data}")
|
||||
# logger.debug(f"[OCR-UPLOAD] Using OCR API: {self.ocr_base_url}")
|
||||
|
||||
try:
|
||||
response = self._make_request(
|
||||
method='POST',
|
||||
endpoint='/files/upload',
|
||||
data=form_data,
|
||||
files_data=files_data,
|
||||
user_id=user_id,
|
||||
api_type='ocr' # 使用OCR API
|
||||
)
|
||||
|
||||
logger.info(f"[OCR-UPLOAD] Raw Dify upload response: {response}")
|
||||
|
||||
file_id = response.get('id')
|
||||
if not file_id:
|
||||
logger.error(f"[OCR-UPLOAD] No file ID in response: {response}")
|
||||
raise APIError("Dify 文件上传失败:未返回文件ID")
|
||||
|
||||
logger.info(f"[OCR-UPLOAD] ✓ File uploaded successfully: {file_id}")
|
||||
# logger.debug(f"[OCR-UPLOAD] File details: name={response.get('name')}, size={response.get('size')}, type={response.get('mime_type')}")
|
||||
|
||||
return file_id
|
||||
|
||||
except APIError:
|
||||
raise
|
||||
except Exception as e:
|
||||
error_msg = f"文件上传到Dify失败: {str(e)}"
|
||||
logger.error(f"[OCR-UPLOAD] ✗ Upload failed: {error_msg}")
|
||||
raise APIError(error_msg)
|
||||
|
||||
def ocr_image_with_dify(self, image_data: bytes, filename: str = "image.png",
|
||||
user_id: int = None, job_id: int = None) -> str:
|
||||
"""使用Dify进行图像OCR识别"""
|
||||
|
||||
logger.info(f"[OCR-RECOGNITION] Starting OCR process for {filename}")
|
||||
logger.info(f"[OCR-RECOGNITION] Image size: {len(image_data)} bytes, User: {user_id}, Job: {job_id}")
|
||||
|
||||
try:
|
||||
# 1. 先上传文件获取file_id
|
||||
logger.info(f"[OCR-RECOGNITION] Step 1: Uploading image to Dify...")
|
||||
file_id = self.upload_file(image_data, filename, user_id)
|
||||
logger.info(f"[OCR-RECOGNITION] Step 1 ✓ File uploaded with ID: {file_id}")
|
||||
|
||||
# 2. 构建OCR请求
|
||||
# 系统提示词已在Dify Chat Flow中配置,这里只需要发送简单的用户query
|
||||
query = "將圖片中的文字完整的提取出來"
|
||||
logger.info(f"[OCR-RECOGNITION] Step 2: Preparing OCR request...")
|
||||
# logger.debug(f"[OCR-RECOGNITION] Query: {query}")
|
||||
|
||||
# 3. 构建Chat Flow请求,根据最新Dify运行记录,图片应该放在files数组中
|
||||
request_data = {
|
||||
'inputs': {},
|
||||
'response_mode': 'blocking',
|
||||
'user': f"user_{user_id}" if user_id else "doc-translator-user",
|
||||
'query': query,
|
||||
'files': [
|
||||
{
|
||||
'type': 'image',
|
||||
'transfer_method': 'local_file',
|
||||
'upload_file_id': file_id
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
logger.info(f"[OCR-RECOGNITION] Step 3: Sending OCR request to Dify...")
|
||||
logger.info(f"[OCR-RECOGNITION] Request data: {request_data}")
|
||||
logger.info(f"[OCR-RECOGNITION] Using OCR API: {self.ocr_base_url}")
|
||||
|
||||
response = self._make_request(
|
||||
method='POST',
|
||||
endpoint='/chat-messages',
|
||||
data=request_data,
|
||||
user_id=user_id,
|
||||
job_id=job_id,
|
||||
api_type='ocr' # 使用OCR API
|
||||
)
|
||||
|
||||
logger.info(f"[OCR-RECOGNITION] Step 3 ✓ Received response from Dify")
|
||||
logger.info(f"[OCR-RECOGNITION] Raw Dify OCR response: {response}")
|
||||
|
||||
# 从响应中提取OCR结果
|
||||
answer = response.get('answer', '')
|
||||
metadata = response.get('metadata', {})
|
||||
conversation_id = response.get('conversation_id', '')
|
||||
|
||||
logger.info(f"[OCR-RECOGNITION] Response details:")
|
||||
logger.info(f"[OCR-RECOGNITION] - Answer length: {len(answer) if answer else 0} characters")
|
||||
logger.info(f"[OCR-RECOGNITION] - Conversation ID: {conversation_id}")
|
||||
logger.info(f"[OCR-RECOGNITION] - Metadata: {metadata}")
|
||||
|
||||
if not isinstance(answer, str) or not answer.strip():
|
||||
logger.error(f"[OCR-RECOGNITION] ✗ Empty or invalid answer from Dify")
|
||||
logger.error(f"[OCR-RECOGNITION] Answer type: {type(answer)}, Content: '{answer}'")
|
||||
raise APIError("Dify OCR 返回空的识别结果")
|
||||
|
||||
# 记录OCR识别的前100个字符用于调试
|
||||
preview = answer[:100] + "..." if len(answer) > 100 else answer
|
||||
logger.info(f"[OCR-RECOGNITION] ✓ OCR completed successfully")
|
||||
logger.info(f"[OCR-RECOGNITION] Extracted {len(answer)} characters")
|
||||
# logger.debug(f"[OCR-RECOGNITION] Text preview: {preview}")
|
||||
|
||||
return answer.strip()
|
||||
|
||||
except APIError:
|
||||
raise
|
||||
except Exception as e:
|
||||
error_msg = f"Dify OCR识别失败: {str(e)}"
|
||||
logger.error(f"[OCR-RECOGNITION] ✗ OCR process failed: {error_msg}")
|
||||
logger.error(f"[OCR-RECOGNITION] Exception details: {type(e).__name__}: {str(e)}")
|
||||
raise APIError(error_msg)
|
||||
|
||||
|
||||
def init_dify_config(app):
|
||||
"""初始化 Dify 配置"""
|
||||
with app.app_context():
|
||||
# 從 api.txt 載入配置
|
||||
DifyClient.load_config_from_file()
|
||||
|
||||
# 檢查配置完整性
|
||||
translation_base_url = app.config.get('DIFY_TRANSLATION_BASE_URL')
|
||||
translation_api_key = app.config.get('DIFY_TRANSLATION_API_KEY')
|
||||
ocr_base_url = app.config.get('DIFY_OCR_BASE_URL')
|
||||
ocr_api_key = app.config.get('DIFY_OCR_API_KEY')
|
||||
|
||||
logger.info("Dify API Configuration Status:")
|
||||
if translation_base_url and translation_api_key:
|
||||
logger.info("✓ Translation API configured successfully")
|
||||
else:
|
||||
logger.warning("✗ Translation API configuration is incomplete")
|
||||
logger.warning(f" - Translation Base URL: {'✓' if translation_base_url else '✗'}")
|
||||
logger.warning(f" - Translation API Key: {'✓' if translation_api_key else '✗'}")
|
||||
|
||||
if ocr_base_url and ocr_api_key:
|
||||
logger.info("✓ OCR API configured successfully")
|
||||
else:
|
||||
logger.warning("✗ OCR API configuration is incomplete (扫描PDF功能将不可用)")
|
||||
logger.warning(f" - OCR Base URL: {'✓' if ocr_base_url else '✗'}")
|
||||
logger.warning(f" - OCR API Key: {'✓' if ocr_api_key else '✗'}")
|
864
app/services/document_processor.py
Normal file
864
app/services/document_processor.py
Normal file
@@ -0,0 +1,864 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
核心文檔處理邏輯 - 移植自最佳版本
|
||||
包含完整的 DOCX 文字提取和翻譯插入功能
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-09-02
|
||||
Modified: 2024-09-02
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Tuple, Optional, Any
|
||||
from docx.text.paragraph import Paragraph
|
||||
from docx.table import Table, _Cell
|
||||
from docx.shared import Pt
|
||||
from docx.oxml import OxmlElement
|
||||
from docx.oxml.ns import qn, nsdecls
|
||||
import docx
|
||||
|
||||
from app.utils.logger import get_logger
|
||||
from app.utils.exceptions import FileProcessingError
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# ---------- Constants ----------
|
||||
INSERT_FONT_SIZE_PT = 10
|
||||
SENTENCE_MODE = True
|
||||
|
||||
# ---------- Optional dependencies detection ----------
|
||||
try:
|
||||
import blingfire
|
||||
_HAS_BLINGFIRE = True
|
||||
except ImportError:
|
||||
_HAS_BLINGFIRE = False
|
||||
|
||||
try:
|
||||
import pysbd
|
||||
_HAS_PYSBD = True
|
||||
except ImportError:
|
||||
_HAS_PYSBD = False
|
||||
|
||||
# ---------- Helper functions ----------
|
||||
def _has_cjk(text: str) -> bool:
|
||||
"""Check if text contains CJK (Chinese/Japanese/Korean) characters."""
|
||||
for char in text:
|
||||
if '\u4e00' <= char <= '\u9fff' or \
|
||||
'\u3400' <= char <= '\u4dbf' or \
|
||||
'\u20000' <= char <= '\u2a6df' or \
|
||||
'\u3040' <= char <= '\u309f' or \
|
||||
'\u30a0' <= char <= '\u30ff' or \
|
||||
'\uac00' <= char <= '\ud7af':
|
||||
return True
|
||||
return False
|
||||
|
||||
def _normalize_text(text: str) -> str:
|
||||
"""Normalize text for comparison."""
|
||||
return re.sub(r'\s+', ' ', text.strip().lower())
|
||||
|
||||
def _append_after(p: Paragraph, text_block: str, italic: bool=True, font_size_pt: int=INSERT_FONT_SIZE_PT) -> Paragraph:
|
||||
"""Insert a new paragraph after p, return the new paragraph (for chain insert)."""
|
||||
new_p = OxmlElement("w:p")
|
||||
p._p.addnext(new_p)
|
||||
np = Paragraph(new_p, p._parent)
|
||||
lines = text_block.split("\n")
|
||||
for i, line in enumerate(lines):
|
||||
run = np.add_run(line)
|
||||
if italic:
|
||||
run.italic = True
|
||||
if font_size_pt:
|
||||
run.font.size = Pt(font_size_pt)
|
||||
if i < len(lines) - 1:
|
||||
run.add_break()
|
||||
tag = np.add_run("\u200b")
|
||||
if italic:
|
||||
tag.italic = True
|
||||
if font_size_pt:
|
||||
tag.font.size = Pt(font_size_pt)
|
||||
return np
|
||||
|
||||
def _is_our_insert_block(p: Paragraph) -> bool:
|
||||
"""Return True iff paragraph contains our zero-width marker."""
|
||||
return any("\u200b" in (r.text or "") for r in p.runs)
|
||||
|
||||
def _find_last_inserted_after(p: Paragraph, limit: int = 8) -> Optional[Paragraph]:
|
||||
"""Find the last paragraph that was inserted after p (up to limit paragraphs)."""
|
||||
try:
|
||||
# Get all paragraphs in the parent container
|
||||
if hasattr(p._parent, 'paragraphs'):
|
||||
all_paras = list(p._parent.paragraphs)
|
||||
else:
|
||||
# Handle cases where _parent doesn't have paragraphs (e.g., table cells)
|
||||
return None
|
||||
|
||||
# Find p's index
|
||||
p_index = -1
|
||||
for i, para in enumerate(all_paras):
|
||||
if para._element == p._element:
|
||||
p_index = i
|
||||
break
|
||||
|
||||
if p_index == -1:
|
||||
return None
|
||||
|
||||
# Check paragraphs after p
|
||||
last_found = None
|
||||
for i in range(p_index + 1, min(p_index + 1 + limit, len(all_paras))):
|
||||
if _is_our_insert_block(all_paras[i]):
|
||||
last_found = all_paras[i]
|
||||
else:
|
||||
break # Stop at first non-inserted paragraph
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
return last_found
|
||||
|
||||
def _p_text_with_breaks(p: Paragraph) -> str:
|
||||
"""Extract text from paragraph with line breaks preserved."""
|
||||
parts = []
|
||||
for node in p._element.xpath(".//*[local-name()='t' or local-name()='br' or local-name()='tab']"):
|
||||
tag = node.tag.split('}', 1)[-1]
|
||||
if tag == "t":
|
||||
parts.append(node.text or "")
|
||||
elif tag == "br":
|
||||
parts.append("\n")
|
||||
elif tag == "tab":
|
||||
parts.append("\t")
|
||||
return "".join(parts)
|
||||
|
||||
def _get_cell_full_text(cell) -> str:
|
||||
"""
|
||||
提取表格儲存格的完整文字內容,包含所有段落
|
||||
"""
|
||||
try:
|
||||
cell_texts = []
|
||||
for para in cell.paragraphs:
|
||||
para_text = _p_text_with_breaks(para)
|
||||
if para_text.strip():
|
||||
cell_texts.append(para_text.strip())
|
||||
|
||||
# 用換行符連接所有段落
|
||||
return '\n'.join(cell_texts)
|
||||
except Exception as e:
|
||||
logger.warning(f"提取儲存格文字失敗: {e}")
|
||||
return ""
|
||||
|
||||
def _is_our_insert_block_text(text: str) -> bool:
|
||||
"""檢查文字是否為翻譯插入區塊"""
|
||||
if not text:
|
||||
return False
|
||||
text_lower = text.lower().strip()
|
||||
return (
|
||||
text_lower.startswith('【') or
|
||||
text_lower.startswith('[翻譯') or
|
||||
'翻譯:' in text_lower or
|
||||
'translation:' in text_lower or
|
||||
text_lower.startswith('translated:') or
|
||||
"\u200b" in text
|
||||
)
|
||||
|
||||
def _is_our_insert_block(p: Paragraph) -> bool:
|
||||
"""Check if paragraph is our inserted translation (contains zero-width space marker)."""
|
||||
text = _p_text_with_breaks(p)
|
||||
return "\u200b" in text
|
||||
|
||||
def should_translate(text: str, src_lang: str) -> bool:
|
||||
"""Determine if text should be translated based on content and source language."""
|
||||
text = text.strip()
|
||||
|
||||
# 只要有字就翻譯 - 最小長度設為1
|
||||
if len(text) < 1:
|
||||
return False
|
||||
|
||||
# Skip pure numbers, dates, etc.
|
||||
if re.match(r'^[\d\s\.\-\:\/]+$', text):
|
||||
return False
|
||||
|
||||
# For auto-detect, translate if has CJK or meaningful text
|
||||
if src_lang.lower() in ('auto', 'auto-detect'):
|
||||
return _has_cjk(text) or len(text) > 5
|
||||
|
||||
return True
|
||||
|
||||
def _split_sentences(text: str, lang: str = 'auto') -> List[str]:
|
||||
"""Split text into sentences using available libraries."""
|
||||
if not text.strip():
|
||||
return []
|
||||
|
||||
# Try blingfire first
|
||||
if _HAS_BLINGFIRE and SENTENCE_MODE:
|
||||
try:
|
||||
sentences = blingfire.text_to_sentences(text).split('\n')
|
||||
sentences = [s.strip() for s in sentences if s.strip()]
|
||||
if sentences:
|
||||
return sentences
|
||||
except Exception as e:
|
||||
logger.warning(f"Blingfire failed: {e}")
|
||||
|
||||
# Try pysbd
|
||||
if _HAS_PYSBD and SENTENCE_MODE:
|
||||
try:
|
||||
seg = pysbd.Segmenter(language="en" if lang == "auto" else lang)
|
||||
sentences = seg.segment(text)
|
||||
sentences = [s.strip() for s in sentences if s.strip()]
|
||||
if sentences:
|
||||
return sentences
|
||||
except Exception as e:
|
||||
logger.warning(f"PySBD failed: {e}")
|
||||
|
||||
# Fallback to simple splitting
|
||||
separators = ['. ', '。', '!', '?', '!', '?', '\n']
|
||||
sentences = [text]
|
||||
|
||||
for sep in separators:
|
||||
new_sentences = []
|
||||
for s in sentences:
|
||||
parts = s.split(sep)
|
||||
if len(parts) > 1:
|
||||
new_sentences.extend([p.strip() + sep.rstrip() for p in parts[:-1] if p.strip()])
|
||||
if parts[-1].strip():
|
||||
new_sentences.append(parts[-1].strip())
|
||||
else:
|
||||
new_sentences.append(s)
|
||||
sentences = new_sentences
|
||||
|
||||
return [s for s in sentences if len(s.strip()) > 3]
|
||||
|
||||
# ---------- Segment class ----------
|
||||
class Segment:
|
||||
"""Represents a translatable text segment in a document."""
|
||||
|
||||
def __init__(self, kind: str, ref: Any, ctx: str, text: str):
|
||||
self.kind = kind # 'para' | 'txbx'
|
||||
self.ref = ref # Reference to original document element
|
||||
self.ctx = ctx # Context information
|
||||
self.text = text # Text content
|
||||
|
||||
# ---------- TextBox helpers ----------
|
||||
def _txbx_iter_texts(doc: docx.Document):
|
||||
"""
|
||||
Yield (txbxContent_element, joined_source_text)
|
||||
- Deeply collect all descendant <w:p> under txbxContent
|
||||
- Skip our inserted translations: contains zero-width or (all italic and no CJK)
|
||||
- Keep only lines that still have CJK
|
||||
"""
|
||||
def _p_text_flags(p_el):
|
||||
parts = []
|
||||
for node in p_el.xpath(".//*[local-name()='t' or local-name()='br' or local-name()='tab']"):
|
||||
tag = node.tag.split('}', 1)[-1]
|
||||
if tag == "t":
|
||||
parts.append(node.text or "")
|
||||
elif tag == "br":
|
||||
parts.append("\n")
|
||||
else:
|
||||
parts.append(" ")
|
||||
text = "".join(parts)
|
||||
has_zero = ("\u200b" in text)
|
||||
runs = p_el.xpath(".//*[local-name()='r']")
|
||||
vis, ital = [], []
|
||||
for r in runs:
|
||||
rt = "".join([(t.text or "") for t in r.xpath(".//*[local-name()='t']")])
|
||||
if (rt or "").strip():
|
||||
vis.append(rt)
|
||||
ital.append(bool(r.xpath(".//*[local-name()='i']")))
|
||||
all_italic = (len(vis) > 0 and all(ital))
|
||||
return text, has_zero, all_italic
|
||||
|
||||
for tx in doc._element.xpath(".//*[local-name()='txbxContent']"):
|
||||
kept = []
|
||||
for p in tx.xpath(".//*[local-name()='p']"): # all descendant paragraphs
|
||||
text, has_zero, all_italic = _p_text_flags(p)
|
||||
if not (text or "").strip():
|
||||
continue
|
||||
if has_zero:
|
||||
continue # our inserted
|
||||
for line in text.split("\n"):
|
||||
if line.strip():
|
||||
kept.append(line.strip())
|
||||
if kept:
|
||||
joined = "\n".join(kept)
|
||||
yield tx, joined
|
||||
|
||||
def _txbx_append_paragraph(tx, text_block: str, italic: bool = True, font_size_pt: int = INSERT_FONT_SIZE_PT):
|
||||
"""Append a paragraph to textbox content."""
|
||||
p = OxmlElement("w:p")
|
||||
r = OxmlElement("w:r")
|
||||
rPr = OxmlElement("w:rPr")
|
||||
if italic:
|
||||
rPr.append(OxmlElement("w:i"))
|
||||
if font_size_pt:
|
||||
sz = OxmlElement("w:sz")
|
||||
sz.set(qn("w:val"), str(int(font_size_pt * 2)))
|
||||
rPr.append(sz)
|
||||
r.append(rPr)
|
||||
lines = text_block.split("\n")
|
||||
for i, line in enumerate(lines):
|
||||
if i > 0:
|
||||
r.append(OxmlElement("w:br"))
|
||||
t = OxmlElement("w:t")
|
||||
t.set(qn("xml:space"), "preserve")
|
||||
t.text = line
|
||||
r.append(t)
|
||||
tag = OxmlElement("w:t")
|
||||
tag.set(qn("xml:space"), "preserve")
|
||||
tag.text = "\u200b"
|
||||
r.append(tag)
|
||||
p.append(r)
|
||||
tx.append(p)
|
||||
|
||||
def _txbx_tail_equals(tx, translations: List[str]) -> bool:
|
||||
"""Check if textbox already contains the expected translations."""
|
||||
paras = tx.xpath("./*[local-name()='p']")
|
||||
if len(paras) < len(translations):
|
||||
return False
|
||||
tail = paras[-len(translations):]
|
||||
for q, expect in zip(tail, translations):
|
||||
parts = []
|
||||
for node in q.xpath(".//*[local-name()='t' or local-name()='br']"):
|
||||
tag = node.tag.split("}", 1)[-1]
|
||||
parts.append("\n" if tag == "br" else (node.text or ""))
|
||||
if _normalize_text("".join(parts).strip()) != _normalize_text(expect):
|
||||
return False
|
||||
return True
|
||||
|
||||
# ---------- Main extraction logic ----------
|
||||
def _get_paragraph_key(p: Paragraph) -> str:
|
||||
"""Generate a stable unique key for paragraph deduplication."""
|
||||
try:
|
||||
# Use XML content hash + text content for stable deduplication
|
||||
xml_content = p._p.xml if hasattr(p._p, 'xml') else str(p._p)
|
||||
text_content = _p_text_with_breaks(p)
|
||||
combined = f"{hash(xml_content)}_{len(text_content)}_{text_content[:50]}"
|
||||
return combined
|
||||
except Exception:
|
||||
# Fallback to simple text-based key
|
||||
text_content = _p_text_with_breaks(p)
|
||||
return f"fallback_{hash(text_content)}_{len(text_content)}"
|
||||
|
||||
def _collect_docx_segments(doc: docx.Document) -> List[Segment]:
|
||||
"""
|
||||
Enhanced segment collector with improved stability.
|
||||
Handles paragraphs, tables, textboxes, and SDT Content Controls.
|
||||
"""
|
||||
segs: List[Segment] = []
|
||||
seen_par_keys = set()
|
||||
|
||||
def _add_paragraph(p: Paragraph, ctx: str):
|
||||
try:
|
||||
p_key = _get_paragraph_key(p)
|
||||
if p_key in seen_par_keys:
|
||||
return
|
||||
|
||||
txt = _p_text_with_breaks(p)
|
||||
if txt.strip() and not _is_our_insert_block(p):
|
||||
segs.append(Segment("para", p, ctx, txt))
|
||||
seen_par_keys.add(p_key)
|
||||
except Exception as e:
|
||||
# Log error but continue processing
|
||||
logger.warning(f"段落處理錯誤: {e}, 跳過此段落")
|
||||
|
||||
def _process_container_content(container, ctx: str):
|
||||
"""
|
||||
Recursively processes content within a container (body, cell, or SDT content).
|
||||
Identifies and handles paragraphs, tables, and SDT elements.
|
||||
"""
|
||||
if container._element is None:
|
||||
return
|
||||
|
||||
for child_element in container._element:
|
||||
qname = child_element.tag
|
||||
|
||||
if qname.endswith('}p'): # Paragraph
|
||||
p = Paragraph(child_element, container)
|
||||
_add_paragraph(p, ctx)
|
||||
|
||||
elif qname.endswith('}tbl'): # Table
|
||||
table = Table(child_element, container)
|
||||
for r_idx, row in enumerate(table.rows, 1):
|
||||
for c_idx, cell in enumerate(row.cells, 1):
|
||||
cell_ctx = f"{ctx} > Tbl(r{r_idx},c{c_idx})"
|
||||
|
||||
# 使用儲存格為單位的提取方式(而非逐段落提取)
|
||||
cell_text = _get_cell_full_text(cell)
|
||||
if cell_text.strip() and not _is_our_insert_block_text(cell_text):
|
||||
segs.append(Segment("table_cell", cell, cell_ctx, cell_text))
|
||||
|
||||
elif qname.endswith('}sdt'): # Structured Document Tag (SDT)
|
||||
sdt_ctx = f"{ctx} > SDT"
|
||||
|
||||
# 1. 提取 SDT 的元數據文本 (Placeholder, Dropdown items)
|
||||
ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
|
||||
|
||||
# 提取 Placeholder text
|
||||
placeholder_texts = []
|
||||
for t in child_element.xpath('.//w:placeholder//w:t', namespaces=ns):
|
||||
if t.text:
|
||||
placeholder_texts.append(t.text)
|
||||
if placeholder_texts:
|
||||
full_placeholder = "".join(placeholder_texts).strip()
|
||||
if full_placeholder:
|
||||
segs.append(Segment("para", child_element, f"{sdt_ctx}-Placeholder", full_placeholder))
|
||||
|
||||
# 提取 Dropdown list items
|
||||
list_items = []
|
||||
for item in child_element.xpath('.//w:dropDownList/w:listItem', namespaces=ns):
|
||||
display_text = item.get(qn('w:displayText'))
|
||||
if display_text:
|
||||
list_items.append(display_text)
|
||||
if list_items:
|
||||
items_as_text = "\n".join(list_items)
|
||||
segs.append(Segment("para", child_element, f"{sdt_ctx}-Dropdown", items_as_text))
|
||||
|
||||
# 2. 遞迴處理 SDT 的實際內容 (sdtContent)
|
||||
sdt_content_element = child_element.find(qn('w:sdtContent'))
|
||||
if sdt_content_element is not None:
|
||||
class SdtContentWrapper:
|
||||
def __init__(self, element, parent):
|
||||
self._element = element
|
||||
self._parent = parent
|
||||
|
||||
sdt_content_wrapper = SdtContentWrapper(sdt_content_element, container)
|
||||
_process_container_content(sdt_content_wrapper, sdt_ctx)
|
||||
|
||||
# --- Main execution starts here ---
|
||||
|
||||
# 1. Process the main document body
|
||||
_process_container_content(doc._body, "Body")
|
||||
|
||||
# 2. Process textboxes
|
||||
for tx, s in _txbx_iter_texts(doc):
|
||||
if s.strip() and (_has_cjk(s) or should_translate(s, 'auto')):
|
||||
segs.append(Segment("txbx", tx, "TextBox", s))
|
||||
|
||||
return segs
|
||||
|
||||
def _insert_docx_translations(doc: docx.Document, segs: List[Segment],
|
||||
tmap: Dict[Tuple[str, str], str],
|
||||
targets: List[str], log=lambda s: None) -> Tuple[int, int]:
|
||||
"""
|
||||
Insert translations into DOCX document segments.
|
||||
|
||||
CRITICAL: This function contains the fix for the major translation insertion bug.
|
||||
The key fix is in the segment filtering logic - we now correctly check if any target
|
||||
language has translation available using the proper key format (target_lang, text).
|
||||
|
||||
Args:
|
||||
doc: The DOCX document object
|
||||
segs: List of segments to translate
|
||||
tmap: Translation map with keys as (target_language, source_text)
|
||||
targets: List of target languages in order
|
||||
log: Logging function
|
||||
|
||||
Returns:
|
||||
Tuple of (successful_insertions, skipped_insertions)
|
||||
|
||||
Key Bug Fix:
|
||||
OLD (INCORRECT): if (seg.kind, seg.text) not in tmap and (targets[0], seg.text) not in tmap
|
||||
NEW (CORRECT): has_any_translation = any((tgt, seg.text) in tmap for tgt in targets)
|
||||
"""
|
||||
ok_cnt = skip_cnt = 0
|
||||
|
||||
# Helper function to add a formatted run to a paragraph
|
||||
def _add_formatted_run(p: Paragraph, text: str, italic: bool, font_size_pt: int):
|
||||
lines = text.split("\n")
|
||||
for i, line in enumerate(lines):
|
||||
run = p.add_run(line)
|
||||
if italic:
|
||||
run.italic = True
|
||||
if font_size_pt:
|
||||
run.font.size = Pt(font_size_pt)
|
||||
if i < len(lines) - 1:
|
||||
run.add_break()
|
||||
# Add our zero-width space marker
|
||||
tag_run = p.add_run("\u200b")
|
||||
if italic:
|
||||
tag_run.italic = True
|
||||
if font_size_pt:
|
||||
tag_run.font.size = Pt(font_size_pt)
|
||||
|
||||
for seg in segs:
|
||||
# Check if any target language has translation for this segment
|
||||
has_any_translation = any((tgt, seg.text) in tmap for tgt in targets)
|
||||
if not has_any_translation:
|
||||
log(f"[SKIP] 無翻譯結果: {seg.ctx} | {seg.text[:50]}...")
|
||||
skip_cnt += 1
|
||||
continue
|
||||
|
||||
# Get translations for all targets, with fallback for missing ones
|
||||
translations = []
|
||||
for tgt in targets:
|
||||
if (tgt, seg.text) in tmap:
|
||||
translations.append(tmap[(tgt, seg.text)])
|
||||
else:
|
||||
log(f"[WARNING] 缺少 {tgt} 翻譯: {seg.text[:30]}...")
|
||||
translations.append(f"【翻譯查詢失敗|{tgt}】{seg.text[:50]}...")
|
||||
|
||||
log(f"[INSERT] 準備插入 {len(translations)} 個翻譯到 {seg.ctx}: {seg.text[:30]}...")
|
||||
|
||||
if seg.kind == "para":
|
||||
# Check if this is an SDT segment (ref is an XML element, not a Paragraph)
|
||||
if hasattr(seg.ref, 'tag') and seg.ref.tag.endswith('}sdt'):
|
||||
# Handle SDT segments - insert translation into sdtContent
|
||||
sdt_element = seg.ref
|
||||
ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
|
||||
sdt_content = sdt_element.find(qn('w:sdtContent'))
|
||||
|
||||
if sdt_content is not None:
|
||||
# Check if translations already exist
|
||||
existing_paras = sdt_content.xpath('.//w:p', namespaces=ns)
|
||||
existing_texts = []
|
||||
for ep in existing_paras:
|
||||
p_obj = Paragraph(ep, None)
|
||||
if _is_our_insert_block(p_obj):
|
||||
existing_texts.append(_p_text_with_breaks(p_obj))
|
||||
|
||||
# Check if all translations already exist
|
||||
if len(existing_texts) >= len(translations):
|
||||
if all(_normalize_text(e) == _normalize_text(t) for e, t in zip(existing_texts[:len(translations)], translations)):
|
||||
skip_cnt += 1
|
||||
log(f"[SKIP] SDT 已存在翻譯: {seg.text[:30]}...")
|
||||
continue
|
||||
|
||||
# Add translations to SDT content
|
||||
for t in translations:
|
||||
if not any(_normalize_text(t) == _normalize_text(e) for e in existing_texts):
|
||||
# Create new paragraph in SDT content
|
||||
new_p_element = OxmlElement("w:p")
|
||||
sdt_content.append(new_p_element)
|
||||
new_p = Paragraph(new_p_element, None)
|
||||
_add_formatted_run(new_p, t, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
|
||||
|
||||
ok_cnt += 1
|
||||
log(f"[SUCCESS] SDT 插入翻譯(交錯格式)")
|
||||
continue
|
||||
|
||||
p: Paragraph = seg.ref
|
||||
|
||||
# --- CONTEXT-AWARE INSERTION LOGIC (from successful version) ---
|
||||
# Check if the paragraph's parent is a table cell
|
||||
if isinstance(p._parent, _Cell):
|
||||
cell = p._parent
|
||||
|
||||
try:
|
||||
# Find the current paragraph's position in the cell
|
||||
cell_paragraphs = list(cell.paragraphs)
|
||||
p_index = -1
|
||||
for idx, cell_p in enumerate(cell_paragraphs):
|
||||
if cell_p._element == p._element:
|
||||
p_index = idx
|
||||
break
|
||||
|
||||
if p_index == -1:
|
||||
log(f"[WARNING] 無法找到段落在單元格中的位置,使用原始方法")
|
||||
# Fallback to original method
|
||||
for block in translations:
|
||||
new_p = cell.add_paragraph()
|
||||
_add_formatted_run(new_p, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
|
||||
ok_cnt += 1
|
||||
continue
|
||||
|
||||
# Check if translations already exist right after this paragraph
|
||||
existing_texts = []
|
||||
check_limit = min(p_index + 1 + len(translations), len(cell_paragraphs))
|
||||
for idx in range(p_index + 1, check_limit):
|
||||
if _is_our_insert_block(cell_paragraphs[idx]):
|
||||
existing_texts.append(_p_text_with_breaks(cell_paragraphs[idx]))
|
||||
|
||||
# Check if all translations already exist in order
|
||||
if len(existing_texts) >= len(translations):
|
||||
if all(_normalize_text(e) == _normalize_text(t) for e, t in zip(existing_texts[:len(translations)], translations)):
|
||||
skip_cnt += 1
|
||||
log(f"[SKIP] 表格單元格已存在翻譯: {seg.text[:30]}...")
|
||||
continue
|
||||
|
||||
# Determine which translations need to be added
|
||||
to_add = []
|
||||
for t in translations:
|
||||
if not any(_normalize_text(t) == _normalize_text(e) for e in existing_texts):
|
||||
to_add.append(t)
|
||||
|
||||
if not to_add:
|
||||
skip_cnt += 1
|
||||
log(f"[SKIP] 表格單元格所有翻譯已存在: {seg.text[:30]}...")
|
||||
continue
|
||||
|
||||
# Insert new paragraphs right after the current paragraph
|
||||
insert_after = p
|
||||
for block in to_add:
|
||||
try:
|
||||
# Create new paragraph and insert it after the current position
|
||||
new_p_element = OxmlElement("w:p")
|
||||
insert_after._element.addnext(new_p_element)
|
||||
new_p = Paragraph(new_p_element, cell)
|
||||
_add_formatted_run(new_p, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
|
||||
insert_after = new_p # Update position for next insertion
|
||||
except Exception as e:
|
||||
log(f"[ERROR] 表格插入失敗: {e}, 嘗試fallback方法")
|
||||
# Fallback: add at the end of cell
|
||||
try:
|
||||
new_p = cell.add_paragraph()
|
||||
_add_formatted_run(new_p, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
|
||||
log(f"[SUCCESS] Fallback插入成功")
|
||||
except Exception as e2:
|
||||
log(f"[FATAL] Fallback也失敗: {e2}")
|
||||
continue
|
||||
ok_cnt += 1
|
||||
log(f"[SUCCESS] 表格單元格插入 {len(to_add)} 個翻譯(緊接原文後)")
|
||||
|
||||
except Exception as e:
|
||||
log(f"[ERROR] 表格處理全面失敗: {e}, 跳過此段落")
|
||||
continue
|
||||
|
||||
else:
|
||||
# Normal paragraph (not in table cell) - SIMPLIFIED FOR DEBUGGING
|
||||
try:
|
||||
# TEMPORARILY DISABLE existing translation check to force insertion
|
||||
log(f"[DEBUG] 強制插入翻譯到段落: {seg.text[:30]}...")
|
||||
|
||||
# Force all translations to be added
|
||||
to_add = translations
|
||||
|
||||
# Use simple positioning - always insert after current paragraph
|
||||
anchor = p
|
||||
|
||||
for block in to_add:
|
||||
try:
|
||||
log(f"[DEBUG] 嘗試插入: {block[:50]}...")
|
||||
anchor = _append_after(anchor, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
|
||||
log(f"[SUCCESS] _append_after成功插入")
|
||||
except Exception as e:
|
||||
log(f"[ERROR] _append_after失敗: {e}, 嘗試簡化插入")
|
||||
try:
|
||||
# Fallback: simple append
|
||||
if hasattr(p._parent, 'add_paragraph'):
|
||||
new_p = p._parent.add_paragraph()
|
||||
_add_formatted_run(new_p, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
|
||||
log(f"[SUCCESS] Fallback段落插入成功")
|
||||
else:
|
||||
log(f"[ERROR] 無法進行fallback插入")
|
||||
except Exception as e2:
|
||||
log(f"[FATAL] Fallback也失敗: {e2}")
|
||||
continue
|
||||
|
||||
ok_cnt += 1
|
||||
log(f"[SUCCESS] 段落強制插入 {len(to_add)} 個翻譯")
|
||||
|
||||
except Exception as e:
|
||||
log(f"[ERROR] 段落處理失敗: {e}, 跳過此段落")
|
||||
continue
|
||||
|
||||
elif seg.kind == "table_cell":
|
||||
# 處理表格儲存格翻譯插入
|
||||
cell = seg.ref # cell 是 _Cell 對象
|
||||
|
||||
# 檢查儲存格是否已有翻譯
|
||||
existing_translations = []
|
||||
cell_paragraphs = list(cell.paragraphs)
|
||||
|
||||
# 檢查儲存格末尾是否已有翻譯
|
||||
translation_start_index = len(cell_paragraphs)
|
||||
for i in range(len(cell_paragraphs) - 1, -1, -1):
|
||||
if _is_our_insert_block(cell_paragraphs[i]):
|
||||
existing_translations.insert(0, _p_text_with_breaks(cell_paragraphs[i]))
|
||||
translation_start_index = i
|
||||
else:
|
||||
break
|
||||
|
||||
# 檢查是否所有翻譯都已存在且相同
|
||||
if len(existing_translations) >= len(translations):
|
||||
if all(_normalize_text(e) == _normalize_text(t) for e, t in zip(existing_translations[:len(translations)], translations)):
|
||||
skip_cnt += 1
|
||||
log(f"[SKIP] 表格儲存格已存在翻譯: {seg.text[:30]}...")
|
||||
continue
|
||||
|
||||
# 移除舊的翻譯段落(如果有的話)
|
||||
for i in range(len(cell_paragraphs) - 1, translation_start_index - 1, -1):
|
||||
if _is_our_insert_block(cell_paragraphs[i]):
|
||||
cell._element.remove(cell_paragraphs[i]._element)
|
||||
|
||||
# 檢查是否為簡單的短文本儲存格(只有原文,沒有複雜結構)
|
||||
cell_content = cell.text.strip()
|
||||
is_simple_cell = len(cell_content) <= 10 and cell_content == seg.text.strip()
|
||||
|
||||
if is_simple_cell:
|
||||
# 對於簡單短文本,直接替換內容而不是添加段落
|
||||
log(f"[INFO] 簡單儲存格內容替換: '{seg.text.strip()}' -> '{translations[0] if translations else 'N/A'}'")
|
||||
|
||||
# 清空所有段落內容
|
||||
for para in cell.paragraphs:
|
||||
para.clear()
|
||||
|
||||
# 在第一個段落中添加原文和翻譯
|
||||
first_para = cell.paragraphs[0] if cell.paragraphs else cell.add_paragraph()
|
||||
|
||||
# 添加原文
|
||||
run_orig = first_para.add_run(seg.text.strip())
|
||||
|
||||
# 添加換行和翻譯
|
||||
for t in translations:
|
||||
first_para.add_run('\n')
|
||||
run_trans = first_para.add_run(t)
|
||||
run_trans.italic = True
|
||||
if INSERT_FONT_SIZE_PT:
|
||||
run_trans.font.size = Pt(INSERT_FONT_SIZE_PT)
|
||||
|
||||
# 添加標記
|
||||
tag_run = first_para.add_run("\u200b")
|
||||
tag_run.italic = True
|
||||
if INSERT_FONT_SIZE_PT:
|
||||
tag_run.font.size = Pt(INSERT_FONT_SIZE_PT)
|
||||
else:
|
||||
# 對於複雜儲存格,使用原有的添加段落方式
|
||||
for t in translations:
|
||||
new_p = cell.add_paragraph()
|
||||
_add_formatted_run(new_p, t, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
|
||||
|
||||
ok_cnt += 1
|
||||
log(f"[SUCCESS] 表格儲存格插入 {len(translations)} 個翻譯")
|
||||
|
||||
elif seg.kind == "txbx":
|
||||
tx = seg.ref
|
||||
# Check if textbox already has our translations at the end
|
||||
if _txbx_tail_equals(tx, translations):
|
||||
skip_cnt += 1
|
||||
log(f"[SKIP] 文字框已存在翻譯: {seg.text[:30]}...")
|
||||
continue
|
||||
|
||||
# Append translations to textbox
|
||||
for t in translations:
|
||||
_txbx_append_paragraph(tx, t, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
|
||||
|
||||
ok_cnt += 1
|
||||
log(f"[SUCCESS] 文字框插入 {len(translations)} 個翻譯")
|
||||
|
||||
return ok_cnt, skip_cnt
|
||||
|
||||
# ---------- Main DocumentProcessor class ----------
|
||||
class DocumentProcessor:
|
||||
"""Enhanced document processor with complete DOCX handling capabilities."""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
|
||||
def extract_docx_segments(self, file_path: str) -> List[Segment]:
|
||||
"""Extract all translatable segments from DOCX file."""
|
||||
try:
|
||||
doc = docx.Document(file_path)
|
||||
segments = _collect_docx_segments(doc)
|
||||
|
||||
self.logger.info(f"Extracted {len(segments)} segments from {file_path}")
|
||||
for seg in segments[:5]: # Log first 5 segments for debugging
|
||||
self.logger.debug(f"Segment: {seg.kind} | {seg.ctx} | {seg.text[:50]}...")
|
||||
|
||||
return segments
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to extract DOCX segments from {file_path}: {str(e)}")
|
||||
raise FileProcessingError(f"DOCX 文件分析失敗: {str(e)}")
|
||||
|
||||
def _rematch_segments_to_document(self, doc: docx.Document, old_segments: List[Segment]) -> List[Segment]:
|
||||
"""Re-match segments from old document instance to new document instance."""
|
||||
try:
|
||||
# Extract fresh segments from the current document instance
|
||||
fresh_segments = _collect_docx_segments(doc)
|
||||
|
||||
# Match old segments with fresh segments based on text content
|
||||
matched_segments = []
|
||||
|
||||
for old_seg in old_segments:
|
||||
# Find matching segment in fresh segments
|
||||
matched = False
|
||||
for fresh_seg in fresh_segments:
|
||||
if (old_seg.kind == fresh_seg.kind and
|
||||
old_seg.ctx == fresh_seg.ctx and
|
||||
_normalize_text(old_seg.text) == _normalize_text(fresh_seg.text)):
|
||||
matched_segments.append(fresh_seg)
|
||||
matched = True
|
||||
break
|
||||
|
||||
if not matched:
|
||||
self.logger.warning(f"Failed to match segment: {old_seg.text[:50]}...")
|
||||
# Still add the old segment but it might not work for insertion
|
||||
matched_segments.append(old_seg)
|
||||
|
||||
self.logger.debug(f"Re-matched {len(matched_segments)} segments to current document")
|
||||
return matched_segments
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to re-match segments: {str(e)}")
|
||||
# Return original segments as fallback
|
||||
return old_segments
|
||||
|
||||
def insert_docx_translations(self, file_path: str, segments: List[Segment],
|
||||
translation_map: Dict[Tuple[str, str], str],
|
||||
target_languages: List[str], output_path: str) -> Tuple[int, int]:
|
||||
"""Insert translations into DOCX file and save to output path."""
|
||||
try:
|
||||
doc = docx.Document(file_path)
|
||||
|
||||
# CRITICAL FIX: Re-match segments with the current document instance
|
||||
# The original segments were extracted from a different document instance
|
||||
matched_segments = self._rematch_segments_to_document(doc, segments)
|
||||
|
||||
def log_func(msg: str):
|
||||
self.logger.debug(msg)
|
||||
|
||||
ok_count, skip_count = _insert_docx_translations(
|
||||
doc, matched_segments, translation_map, target_languages, log_func
|
||||
)
|
||||
|
||||
# Save the modified document
|
||||
doc.save(output_path)
|
||||
|
||||
self.logger.info(f"Inserted {ok_count} translations, skipped {skip_count}. Saved to: {output_path}")
|
||||
return ok_count, skip_count
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to insert DOCX translations: {str(e)}")
|
||||
raise FileProcessingError(f"DOCX 翻譯插入失敗: {str(e)}")
|
||||
|
||||
def split_text_into_sentences(self, text: str, language: str = 'auto') -> List[str]:
|
||||
"""Split text into sentences using the best available method."""
|
||||
return _split_sentences(text, language)
|
||||
|
||||
def should_translate_text(self, text: str, source_language: str) -> bool:
|
||||
"""Determine if text should be translated."""
|
||||
return should_translate(text, source_language)
|
||||
|
||||
def insert_docx_combined_translations(self, file_path: str, segments: List[Segment],
|
||||
translation_map: Dict[Tuple[str, str], str],
|
||||
target_languages: List[str], output_path: str) -> Tuple[int, int]:
|
||||
"""Insert all translations into a single DOCX file with combined multi-language output.
|
||||
|
||||
This creates a combined file where each original text is followed by all translations
|
||||
in the format: original\n英文\n越南文 etc.
|
||||
"""
|
||||
try:
|
||||
doc = docx.Document(file_path)
|
||||
|
||||
# Re-match segments with the current document instance
|
||||
matched_segments = self._rematch_segments_to_document(doc, segments)
|
||||
|
||||
def log_func(msg: str):
|
||||
self.logger.debug(msg)
|
||||
|
||||
# Use the existing _insert_docx_translations function which already supports
|
||||
# multiple target languages in a single document
|
||||
ok_count, skip_count = _insert_docx_translations(
|
||||
doc, matched_segments, translation_map, target_languages, log_func
|
||||
)
|
||||
|
||||
# Save the combined document
|
||||
doc.save(output_path)
|
||||
|
||||
self.logger.info(f"Generated combined multi-language file: {output_path}")
|
||||
self.logger.info(f"Inserted {ok_count} translations, skipped {skip_count}")
|
||||
return ok_count, skip_count
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to create combined DOCX translations: {str(e)}")
|
||||
raise FileProcessingError(f"組合多語言 DOCX 檔案生成失敗: {str(e)}")
|
700
app/services/enhanced_pdf_parser.py
Normal file
700
app/services/enhanced_pdf_parser.py
Normal file
@@ -0,0 +1,700 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
增强的PDF解析器 - 支持扫描PDF的OCR处理
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-09-23
|
||||
Modified: 2024-09-23
|
||||
"""
|
||||
|
||||
import io
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
from PyPDF2 import PdfReader
|
||||
from app.utils.logger import get_logger
|
||||
from app.utils.exceptions import FileProcessingError
|
||||
from app.services.dify_client import DifyClient
|
||||
from app.services.ocr_cache import OCRCache
|
||||
from app.utils.image_preprocessor import ImagePreprocessor
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# 检查PyMuPDF依赖
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
_HAS_PYMUPDF = True
|
||||
except ImportError:
|
||||
_HAS_PYMUPDF = False
|
||||
logger.warning("PyMuPDF not available. Scanned PDF processing will be disabled.")
|
||||
|
||||
|
||||
class EnhancedPdfParser:
|
||||
"""支持扫描PDF的增强解析器"""
|
||||
|
||||
def __init__(self, file_path: str):
|
||||
self.file_path = Path(file_path)
|
||||
self.dify_client = DifyClient()
|
||||
self.ocr_cache = OCRCache()
|
||||
self.image_preprocessor = ImagePreprocessor(use_opencv=True)
|
||||
|
||||
if not self.file_path.exists():
|
||||
raise FileProcessingError(f"PDF文件不存在: {file_path}")
|
||||
|
||||
def is_scanned_pdf(self) -> bool:
|
||||
"""检测PDF是否为扫描件"""
|
||||
try:
|
||||
reader = PdfReader(str(self.file_path))
|
||||
text_content = ""
|
||||
|
||||
# 检查前3页的文字内容
|
||||
pages_to_check = min(3, len(reader.pages))
|
||||
for i in range(pages_to_check):
|
||||
page_text = reader.pages[i].extract_text()
|
||||
text_content += page_text
|
||||
|
||||
# 如果文字内容很少,很可能是扫描件
|
||||
text_length = len(text_content.strip())
|
||||
logger.info(f"PDF text extraction found {text_length} characters in first {pages_to_check} pages")
|
||||
|
||||
# 阈值:少于100个字符认为是扫描件
|
||||
is_scanned = text_length < 100
|
||||
|
||||
if is_scanned:
|
||||
logger.info("PDF detected as scanned document, will use OCR processing")
|
||||
else:
|
||||
logger.info("PDF detected as text-based document, will use direct text extraction")
|
||||
|
||||
return is_scanned
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to analyze PDF type: {e}, treating as scanned document")
|
||||
return True # 默认当作扫描件处理
|
||||
|
||||
def extract_text_segments(self, user_id: int = None, job_id: int = None) -> List[str]:
|
||||
"""智能提取PDF文字片段"""
|
||||
try:
|
||||
# 首先尝试直接文字提取
|
||||
if not self.is_scanned_pdf():
|
||||
return self._extract_from_text_pdf()
|
||||
|
||||
# 扫描PDF则转换为图片后使用Dify OCR
|
||||
if not _HAS_PYMUPDF:
|
||||
raise FileProcessingError("处理扫描PDF需要PyMuPDF库,请安装: pip install PyMuPDF")
|
||||
|
||||
return self._extract_from_scanned_pdf(user_id, job_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"PDF文字提取失败: {str(e)}")
|
||||
raise FileProcessingError(f"PDF文件解析失败: {str(e)}")
|
||||
|
||||
def _extract_from_text_pdf(self) -> List[str]:
|
||||
"""从文字型PDF提取文字片段"""
|
||||
try:
|
||||
reader = PdfReader(str(self.file_path))
|
||||
text_segments = []
|
||||
|
||||
for page_num, page in enumerate(reader.pages, 1):
|
||||
page_text = page.extract_text()
|
||||
|
||||
if page_text.strip():
|
||||
# 简单的句子分割
|
||||
sentences = self._split_text_into_sentences(page_text)
|
||||
|
||||
# 过滤掉太短的片段
|
||||
valid_sentences = [s for s in sentences if len(s.strip()) > 10]
|
||||
text_segments.extend(valid_sentences)
|
||||
|
||||
logger.debug(f"Page {page_num}: extracted {len(valid_sentences)} sentences")
|
||||
|
||||
logger.info(f"Text PDF extraction completed: {len(text_segments)} segments")
|
||||
|
||||
# 合併短段落以減少不必要的翻譯調用
|
||||
merged_segments = self._merge_short_segments(text_segments)
|
||||
return merged_segments
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Text PDF extraction failed: {str(e)}")
|
||||
raise FileProcessingError(f"文字PDF提取失败: {str(e)}")
|
||||
|
||||
def _extract_from_scanned_pdf(self, user_id: int = None, job_id: int = None) -> List[str]:
|
||||
"""从扫描PDF提取文字片段(使用Dify OCR)"""
|
||||
try:
|
||||
doc = fitz.open(str(self.file_path))
|
||||
text_segments = []
|
||||
total_pages = doc.page_count
|
||||
|
||||
logger.info(f"Processing scanned PDF with {total_pages} pages using Dify OCR")
|
||||
|
||||
for page_num in range(total_pages):
|
||||
try:
|
||||
logger.info(f"[PDF-OCR] Processing page {page_num + 1}/{total_pages}")
|
||||
page = doc[page_num]
|
||||
|
||||
# 转换页面为高分辨率图片
|
||||
# 使用2倍缩放提高OCR准确度
|
||||
zoom = 2.0
|
||||
mat = fitz.Matrix(zoom, zoom)
|
||||
pix = page.get_pixmap(matrix=mat, alpha=False)
|
||||
|
||||
# 转换为PNG字节数据
|
||||
# 轉換為 PNG 並進行圖像預處理以提升 OCR 準確度
|
||||
img_data_raw = pix.tobytes("png")
|
||||
img_data = self.image_preprocessor.preprocess_smart(img_data_raw)
|
||||
logger.debug(f"[PDF-OCR] Page {page_num + 1}: Image preprocessed ({len(img_data_raw)} -> {len(img_data)} bytes)")
|
||||
filename = f"page_{page_num + 1}.png"
|
||||
|
||||
logger.info(f"[PDF-OCR] Page {page_num + 1}: Converted to image ({len(img_data)} bytes)")
|
||||
logger.debug(f"[PDF-OCR] Page {page_num + 1}: Image zoom={zoom}, format=PNG")
|
||||
|
||||
# 检查OCR快取
|
||||
cache_key_info = f"{self.file_path.name}_page_{page_num + 1}_zoom_{zoom}"
|
||||
cached_text = self.ocr_cache.get_cached_text(
|
||||
file_data=img_data,
|
||||
filename=filename,
|
||||
additional_info=cache_key_info
|
||||
)
|
||||
|
||||
if cached_text:
|
||||
logger.info(f"[PDF-OCR] Page {page_num + 1}: ✓ 使用快取的OCR結果 (節省AI流量)")
|
||||
ocr_text = cached_text
|
||||
else:
|
||||
# 使用Dify OCR识别文字
|
||||
logger.info(f"[PDF-OCR] Page {page_num + 1}: Starting OCR recognition...")
|
||||
ocr_text = self.dify_client.ocr_image_with_dify(
|
||||
image_data=img_data,
|
||||
filename=filename,
|
||||
user_id=user_id,
|
||||
job_id=job_id
|
||||
)
|
||||
|
||||
# 保存OCR结果到快取
|
||||
if ocr_text.strip():
|
||||
self.ocr_cache.save_cached_text(
|
||||
file_data=img_data,
|
||||
extracted_text=ocr_text,
|
||||
filename=filename,
|
||||
additional_info=cache_key_info,
|
||||
metadata={
|
||||
'source_file': str(self.file_path),
|
||||
'page_number': page_num + 1,
|
||||
'total_pages': total_pages,
|
||||
'zoom_level': zoom,
|
||||
'image_size_bytes': len(img_data),
|
||||
'user_id': user_id,
|
||||
'job_id': job_id
|
||||
}
|
||||
)
|
||||
logger.info(f"[PDF-OCR] Page {page_num + 1}: ✓ OCR結果已保存到快取")
|
||||
|
||||
logger.info(f"[PDF-OCR] Page {page_num + 1}: OCR completed")
|
||||
logger.debug(f"[PDF-OCR] Page {page_num + 1}: Raw OCR result length: {len(ocr_text)}")
|
||||
|
||||
if ocr_text.strip():
|
||||
# 分割OCR结果为句子
|
||||
logger.debug(f"[PDF-OCR] Page {page_num + 1}: Splitting OCR text into sentences...")
|
||||
sentences = self._split_ocr_text(ocr_text)
|
||||
|
||||
# 过滤有效句子
|
||||
valid_sentences = [s for s in sentences if len(s.strip()) > 5]
|
||||
text_segments.extend(valid_sentences)
|
||||
|
||||
logger.info(f"[PDF-OCR] Page {page_num + 1}: ✓ Extracted {len(valid_sentences)} valid sentences")
|
||||
logger.debug(f"[PDF-OCR] Page {page_num + 1}: Total sentences before filter: {len(sentences)}")
|
||||
|
||||
# 记录前50个字符用于调试
|
||||
if valid_sentences:
|
||||
preview = valid_sentences[0][:50] + "..." if len(valid_sentences[0]) > 50 else valid_sentences[0]
|
||||
logger.debug(f"[PDF-OCR] Page {page_num + 1}: First sentence preview: {preview}")
|
||||
else:
|
||||
logger.warning(f"[PDF-OCR] Page {page_num + 1}: ⚠ OCR returned empty result")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[PDF-OCR] Page {page_num + 1}: ✗ Processing failed: {str(e)}")
|
||||
logger.error(f"[PDF-OCR] Page {page_num + 1}: Exception type: {type(e).__name__}")
|
||||
# 继续处理下一页,不中断整个流程
|
||||
continue
|
||||
|
||||
doc.close()
|
||||
|
||||
logger.info(f"[PDF-OCR] OCR processing completed for all {total_pages} pages")
|
||||
logger.info(f"[PDF-OCR] Total text segments extracted: {len(text_segments)}")
|
||||
|
||||
if not text_segments:
|
||||
logger.error(f"[PDF-OCR] ✗ No text content extracted from any page")
|
||||
raise FileProcessingError("OCR处理完成,但未提取到任何文字内容")
|
||||
|
||||
logger.info(f"[PDF-OCR] ✓ Scanned PDF processing completed successfully")
|
||||
logger.info(f"[PDF-OCR] Final result: {len(text_segments)} text segments extracted")
|
||||
|
||||
# 合併短段落以減少不必要的翻譯調用
|
||||
merged_segments = self._merge_short_segments(text_segments)
|
||||
logger.info(f"[PDF-OCR] After merging: {len(merged_segments)} segments ready for translation")
|
||||
return merged_segments
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Scanned PDF processing failed: {str(e)}")
|
||||
raise FileProcessingError(f"扫描PDF处理失败: {str(e)}")
|
||||
|
||||
def _split_text_into_sentences(self, text: str) -> List[str]:
|
||||
"""将文字分割成句子"""
|
||||
if not text.strip():
|
||||
return []
|
||||
|
||||
# 简单的分句逻辑
|
||||
sentences = []
|
||||
separators = ['. ', '。', '!', '?', '!', '?', '\n\n']
|
||||
|
||||
current_sentences = [text]
|
||||
|
||||
for sep in separators:
|
||||
new_sentences = []
|
||||
for sentence in current_sentences:
|
||||
parts = sentence.split(sep)
|
||||
if len(parts) > 1:
|
||||
# 保留分隔符
|
||||
for i, part in enumerate(parts[:-1]):
|
||||
if part.strip():
|
||||
new_sentences.append(part.strip() + sep.rstrip())
|
||||
# 最后一部分
|
||||
if parts[-1].strip():
|
||||
new_sentences.append(parts[-1].strip())
|
||||
else:
|
||||
new_sentences.append(sentence)
|
||||
current_sentences = new_sentences
|
||||
|
||||
# 过滤掉太短的句子
|
||||
valid_sentences = [s for s in current_sentences if len(s.strip()) > 3]
|
||||
return valid_sentences
|
||||
|
||||
def _split_ocr_text(self, ocr_text: str) -> List[str]:
|
||||
"""分割OCR识别的文字"""
|
||||
if not ocr_text.strip():
|
||||
return []
|
||||
|
||||
# OCR结果可能包含表格或特殊格式,需要特殊处理
|
||||
lines = ocr_text.split('\n')
|
||||
sentences = []
|
||||
|
||||
current_paragraph = []
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
# 空行表示段落结束
|
||||
if current_paragraph:
|
||||
paragraph_text = ' '.join(current_paragraph)
|
||||
if len(paragraph_text) > 10:
|
||||
sentences.append(paragraph_text)
|
||||
current_paragraph = []
|
||||
continue
|
||||
|
||||
# 检查是否是表格行(包含|或多个制表符)
|
||||
if '|' in line or '\t' in line:
|
||||
# 表格行单独处理
|
||||
if current_paragraph:
|
||||
paragraph_text = ' '.join(current_paragraph)
|
||||
if len(paragraph_text) > 10:
|
||||
sentences.append(paragraph_text)
|
||||
current_paragraph = []
|
||||
|
||||
if len(line) > 10:
|
||||
sentences.append(line)
|
||||
else:
|
||||
# 普通文字行
|
||||
current_paragraph.append(line)
|
||||
|
||||
# 处理最后的段落
|
||||
if current_paragraph:
|
||||
paragraph_text = ' '.join(current_paragraph)
|
||||
if len(paragraph_text) > 10:
|
||||
sentences.append(paragraph_text)
|
||||
|
||||
return sentences
|
||||
|
||||
def generate_translated_document(self, translations: dict, target_language: str,
|
||||
output_dir: Path) -> str:
|
||||
"""生成翻译的Word文档(保持与DOCX相同的格式)"""
|
||||
try:
|
||||
from app.utils.helpers import generate_filename
|
||||
|
||||
translated_texts = translations.get(target_language, [])
|
||||
|
||||
# 生成Word文档而非文字文件
|
||||
output_filename = f"{self.file_path.stem}_{target_language}_translated.docx"
|
||||
output_path = output_dir / output_filename
|
||||
|
||||
# 创建Word文档
|
||||
from docx import Document
|
||||
from docx.shared import Pt
|
||||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
||||
|
||||
doc = Document()
|
||||
|
||||
# 添加标题页
|
||||
title = doc.add_heading(f"PDF翻译结果 - {target_language}", 0)
|
||||
title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
|
||||
# 添加文档信息
|
||||
info_para = doc.add_paragraph()
|
||||
info_para.add_run("原始文件: ").bold = True
|
||||
info_para.add_run(self.file_path.name)
|
||||
info_para.add_run("\n处理方式: ").bold = True
|
||||
info_para.add_run("OCR识别" if self.is_scanned_pdf() else "直接文字提取")
|
||||
info_para.add_run(f"\n翻译语言: ").bold = True
|
||||
info_para.add_run(target_language)
|
||||
info_para.add_run(f"\n总段落数: ").bold = True
|
||||
info_para.add_run(str(len(translated_texts)))
|
||||
|
||||
doc.add_paragraph() # 空行
|
||||
|
||||
# 添加翻译内容
|
||||
for i, text in enumerate(translated_texts, 1):
|
||||
content_type = self._detect_content_type(text)
|
||||
|
||||
if content_type == 'table':
|
||||
# 尝试创建实际的表格
|
||||
self._add_table_content(doc, text, i)
|
||||
elif content_type == 'heading':
|
||||
# 添加标题
|
||||
self._add_heading_content(doc, text, i)
|
||||
elif content_type == 'list':
|
||||
# 添加列表
|
||||
self._add_list_content(doc, text, i)
|
||||
else:
|
||||
# 普通段落
|
||||
self._add_paragraph_content(doc, text, i)
|
||||
|
||||
# 保存Word文档
|
||||
doc.save(output_path)
|
||||
logger.info(f"Generated translated PDF Word document: {output_path}")
|
||||
return str(output_path)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate translated Word document: {str(e)}")
|
||||
raise FileProcessingError(f"生成翻译Word文档失败: {str(e)}")
|
||||
|
||||
def generate_combined_translated_document(self, all_translations: dict, target_languages: list,
|
||||
output_dir: Path) -> str:
|
||||
"""生成包含所有翻譯語言的組合Word文檔(譯文1/譯文2格式)"""
|
||||
try:
|
||||
from app.utils.helpers import generate_filename
|
||||
|
||||
# 生成組合文檔檔名
|
||||
languages_suffix = '_'.join(target_languages)
|
||||
output_filename = f"{self.file_path.stem}_{languages_suffix}_combined.docx"
|
||||
output_path = output_dir / output_filename
|
||||
|
||||
# 创建Word文档
|
||||
from docx import Document
|
||||
from docx.shared import Pt
|
||||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
||||
|
||||
doc = Document()
|
||||
|
||||
# 添加标题页
|
||||
title = doc.add_heading(f"PDF翻译結果 - 多語言組合文檔", 0)
|
||||
title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
|
||||
# 添加文档信息
|
||||
info_para = doc.add_paragraph()
|
||||
info_para.add_run("原始文件: ").bold = True
|
||||
info_para.add_run(self.file_path.name)
|
||||
info_para.add_run("\n处理方式: ").bold = True
|
||||
info_para.add_run("OCR识别" if self.is_scanned_pdf() else "直接文字提取")
|
||||
info_para.add_run(f"\n翻译语言: ").bold = True
|
||||
info_para.add_run(' / '.join(target_languages))
|
||||
|
||||
# 获取第一个語言的翻譯作為基準長度
|
||||
first_language = target_languages[0]
|
||||
segment_count = len(all_translations.get(first_language, []))
|
||||
info_para.add_run(f"\n总段落数: ").bold = True
|
||||
info_para.add_run(str(segment_count))
|
||||
|
||||
doc.add_paragraph() # 空行
|
||||
|
||||
# 添加翻译内容 - 譯文1/譯文2格式
|
||||
for i in range(segment_count):
|
||||
content_para = doc.add_paragraph()
|
||||
|
||||
# 添加段落编号
|
||||
num_run = content_para.add_run(f"{i+1:03d}. ")
|
||||
num_run.bold = True
|
||||
num_run.font.size = Pt(12)
|
||||
|
||||
# 为每种语言添加翻譯
|
||||
for j, target_language in enumerate(target_languages):
|
||||
if i < len(all_translations.get(target_language, [])):
|
||||
translation_text = all_translations[target_language][i]
|
||||
|
||||
# 添加語言標識
|
||||
if j > 0:
|
||||
content_para.add_run("\n\n") # 翻譯之間的間距
|
||||
|
||||
lang_run = content_para.add_run(f"[{target_language}] ")
|
||||
lang_run.bold = True
|
||||
lang_run.font.size = Pt(11)
|
||||
|
||||
# 添加翻譯内容
|
||||
trans_run = content_para.add_run(translation_text)
|
||||
trans_run.font.size = Pt(11)
|
||||
|
||||
# 段落間距
|
||||
content_para.paragraph_format.space_after = Pt(12)
|
||||
|
||||
# 保存Word文档
|
||||
doc.save(output_path)
|
||||
logger.info(f"Generated combined translated PDF Word document: {output_path}")
|
||||
return str(output_path)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate combined translated Word document: {str(e)}")
|
||||
raise FileProcessingError(f"生成組合翻译Word文档失败: {str(e)}")
|
||||
|
||||
def _is_table_component(self, segment: str) -> bool:
|
||||
"""檢查段落是否為表格組件(表格邊界、分隔線等)"""
|
||||
segment = segment.strip()
|
||||
|
||||
# Markdown表格分隔線:如 |---|---|---| 或 |===|===|===|
|
||||
if '|' in segment and ('-' in segment or '=' in segment):
|
||||
# 移除 | 和 - = 後,如果剩餘內容很少,則判斷為表格分隔線
|
||||
clean_segment = segment.replace('|', '').replace('-', '').replace('=', '').replace(' ', '').replace(':', '')
|
||||
if len(clean_segment) <= 2: # 允許少量其他字符
|
||||
return True
|
||||
|
||||
# 純分隔線
|
||||
if segment.replace('=', '').replace('-', '').replace(' ', '') == '':
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _is_table_row(self, segment: str) -> bool:
|
||||
"""檢查段落是否為表格行(包含實際數據的表格行)"""
|
||||
segment = segment.strip()
|
||||
|
||||
# Markdown表格行:至少包含兩個 | 符號,且有實際內容
|
||||
if segment.count('|') >= 2:
|
||||
# 移除首尾的 | 並分割為單元格
|
||||
cells = segment.strip('|').split('|')
|
||||
# 檢查是否有實際的文字內容(不只是分隔符號)
|
||||
has_content = any(
|
||||
cell.strip() and
|
||||
not cell.replace('-', '').replace('=', '').replace(' ', '').replace(':', '') == ''
|
||||
for cell in cells
|
||||
)
|
||||
if has_content:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _merge_table_segments(self, segments: List[str], start_idx: int) -> tuple[str, int]:
|
||||
"""
|
||||
合併表格相關的段落
|
||||
|
||||
Returns:
|
||||
(merged_table_content, next_index)
|
||||
"""
|
||||
table_parts = []
|
||||
current_idx = start_idx
|
||||
|
||||
# 收集連續的表格相關段落
|
||||
while current_idx < len(segments):
|
||||
segment = segments[current_idx].strip()
|
||||
|
||||
if self._is_table_component(segment) or self._is_table_row(segment):
|
||||
table_parts.append(segment)
|
||||
current_idx += 1
|
||||
else:
|
||||
break
|
||||
|
||||
# 將表格部分合併為一個段落
|
||||
merged_table = '\n'.join(table_parts)
|
||||
return merged_table, current_idx
|
||||
|
||||
def _merge_short_segments(self, text_segments: List[str], min_length: int = 10) -> List[str]:
|
||||
"""
|
||||
合併短段落以減少不必要的翻譯調用,特別處理表格結構
|
||||
|
||||
Args:
|
||||
text_segments: 原始文字段落列表
|
||||
min_length: 最小段落長度閾值,短於此長度的段落將被合併
|
||||
|
||||
Returns:
|
||||
合併後的段落列表
|
||||
"""
|
||||
if not text_segments:
|
||||
return text_segments
|
||||
|
||||
merged_segments = []
|
||||
current_merge = ""
|
||||
i = 0
|
||||
|
||||
while i < len(text_segments):
|
||||
segment = text_segments[i].strip()
|
||||
if not segment: # 跳過空段落
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# 檢查是否為表格組件
|
||||
if self._is_table_component(segment) or self._is_table_row(segment):
|
||||
# 先處理之前積累的短段落
|
||||
if current_merge:
|
||||
merged_segments.append(current_merge.strip())
|
||||
logger.debug(f"Merged short segments before table: '{current_merge[:50]}...'")
|
||||
current_merge = ""
|
||||
|
||||
# 合併表格相關段落
|
||||
table_content, next_i = self._merge_table_segments(text_segments, i)
|
||||
merged_segments.append(table_content)
|
||||
logger.debug(f"Merged table content: {next_i - i} segments -> 1 table block")
|
||||
i = next_i
|
||||
continue
|
||||
|
||||
# 檢查是否為短段落
|
||||
if len(segment) < min_length:
|
||||
# 檢查是否為純標點符號或數字(排除表格符號)
|
||||
if segment.replace('*', '').replace('-', '').replace('_', '').replace('#', '').strip() == '':
|
||||
logger.debug(f"Skipping pure symbol segment: '{segment}'")
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# 短段落需要合併
|
||||
if current_merge:
|
||||
current_merge += " " + segment
|
||||
else:
|
||||
current_merge = segment
|
||||
|
||||
logger.debug(f"Adding short segment to merge: '{segment}' (length: {len(segment)})")
|
||||
|
||||
else:
|
||||
# 長段落,先處理之前積累的短段落
|
||||
if current_merge:
|
||||
merged_segments.append(current_merge.strip())
|
||||
logger.debug(f"Merged short segments: '{current_merge[:50]}...' (total length: {len(current_merge)})")
|
||||
current_merge = ""
|
||||
|
||||
# 添加當前長段落
|
||||
merged_segments.append(segment)
|
||||
logger.debug(f"Added long segment: '{segment[:50]}...' (length: {len(segment)})")
|
||||
|
||||
i += 1
|
||||
|
||||
# 處理最後剩餘的短段落
|
||||
if current_merge:
|
||||
merged_segments.append(current_merge.strip())
|
||||
logger.debug(f"Final merged short segments: '{current_merge[:50]}...' (total length: {len(current_merge)})")
|
||||
|
||||
logger.info(f"Segment merging: {len(text_segments)} -> {len(merged_segments)} segments")
|
||||
return merged_segments
|
||||
|
||||
def _detect_content_type(self, text: str) -> str:
|
||||
"""检测内容类型"""
|
||||
text_lower = text.lower().strip()
|
||||
|
||||
# 检测表格(包含多个|或制表符)
|
||||
if ('|' in text and text.count('|') >= 2) or '\t' in text:
|
||||
return 'table'
|
||||
|
||||
# 检测标题
|
||||
if (text_lower.startswith(('第', '章', 'chapter', 'section', '#')) or
|
||||
any(keyword in text_lower for keyword in ['章', '节', '第']) and len(text) < 100):
|
||||
return 'heading'
|
||||
|
||||
# 检测列表
|
||||
if (text_lower.startswith(('•', '-', '*', '1.', '2.', '3.', '4.', '5.')) or
|
||||
any(text_lower.startswith(f"{i}.") for i in range(1, 20))):
|
||||
return 'list'
|
||||
|
||||
return 'paragraph'
|
||||
|
||||
def _add_table_content(self, doc, text: str, index: int):
|
||||
"""添加表格内容"""
|
||||
from docx.shared import Pt
|
||||
|
||||
# 添加表格标题
|
||||
title_para = doc.add_paragraph()
|
||||
title_run = title_para.add_run(f"表格 {index}: ")
|
||||
title_run.bold = True
|
||||
title_run.font.size = Pt(12)
|
||||
|
||||
# 解析表格
|
||||
if '|' in text:
|
||||
# Markdown风格表格
|
||||
lines = [line.strip() for line in text.split('\n') if line.strip()]
|
||||
rows = []
|
||||
for line in lines:
|
||||
if line.startswith('|') and line.endswith('|'):
|
||||
cells = [cell.strip() for cell in line.split('|')[1:-1]]
|
||||
if cells: # 过滤掉分隔行(如|---|---|)
|
||||
if not all(cell.replace('-', '').replace(' ', '') == '' for cell in cells):
|
||||
rows.append(cells)
|
||||
|
||||
if rows:
|
||||
# 创建表格
|
||||
table = doc.add_table(rows=len(rows), cols=len(rows[0]))
|
||||
table.style = 'Table Grid'
|
||||
|
||||
for i, row_data in enumerate(rows):
|
||||
for j, cell_data in enumerate(row_data):
|
||||
if j < len(table.rows[i].cells):
|
||||
cell = table.rows[i].cells[j]
|
||||
cell.text = cell_data
|
||||
# 设置字体
|
||||
for paragraph in cell.paragraphs:
|
||||
for run in paragraph.runs:
|
||||
run.font.size = Pt(10)
|
||||
else:
|
||||
# 制表符分隔的表格
|
||||
para = doc.add_paragraph()
|
||||
content_run = para.add_run(text)
|
||||
content_run.font.name = 'Courier New'
|
||||
content_run.font.size = Pt(10)
|
||||
|
||||
def _add_heading_content(self, doc, text: str, index: int):
|
||||
"""添加标题内容"""
|
||||
from docx.shared import Pt
|
||||
|
||||
# 移除段落编号,直接作为标题
|
||||
clean_text = text.strip()
|
||||
if len(clean_text) < 100:
|
||||
heading = doc.add_heading(clean_text, level=2)
|
||||
else:
|
||||
# 长文本作为普通段落但使用标题样式
|
||||
para = doc.add_paragraph()
|
||||
run = para.add_run(clean_text)
|
||||
run.bold = True
|
||||
run.font.size = Pt(14)
|
||||
|
||||
def _add_list_content(self, doc, text: str, index: int):
|
||||
"""添加列表内容"""
|
||||
from docx.shared import Pt
|
||||
|
||||
# 检查是否已经有编号
|
||||
if any(text.strip().startswith(f"{i}.") for i in range(1, 20)):
|
||||
# 已编号列表
|
||||
para = doc.add_paragraph(text.strip(), style='List Number')
|
||||
else:
|
||||
# 项目符号列表
|
||||
para = doc.add_paragraph(text.strip(), style='List Bullet')
|
||||
|
||||
# 设置字体大小
|
||||
for run in para.runs:
|
||||
run.font.size = Pt(11)
|
||||
|
||||
def _add_paragraph_content(self, doc, text: str, index: int):
|
||||
"""添加普通段落内容"""
|
||||
from docx.shared import Pt
|
||||
|
||||
para = doc.add_paragraph()
|
||||
|
||||
# 添加段落编号(可选)
|
||||
num_run = para.add_run(f"{index:03d}. ")
|
||||
num_run.bold = True
|
||||
num_run.font.size = Pt(12)
|
||||
|
||||
# 添加内容
|
||||
content_run = para.add_run(text)
|
||||
content_run.font.size = Pt(11)
|
||||
|
||||
# 设置段落间距
|
||||
para.paragraph_format.space_after = Pt(6)
|
647
app/services/notification_service.py
Normal file
647
app/services/notification_service.py
Normal file
@@ -0,0 +1,647 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
通知服務
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
import os
|
||||
import smtplib
|
||||
from email.mime.text import MIMEText
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, List, Dict, Any
|
||||
from flask import current_app, url_for
|
||||
from app import db
|
||||
from app.utils.logger import get_logger
|
||||
from app.models.job import TranslationJob
|
||||
from app.models.user import User
|
||||
from app.models.notification import Notification, NotificationType
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class NotificationService:
|
||||
"""通知服務"""
|
||||
|
||||
def __init__(self):
|
||||
self.smtp_server = current_app.config.get('SMTP_SERVER')
|
||||
self.smtp_port = current_app.config.get('SMTP_PORT', 587)
|
||||
self.use_tls = current_app.config.get('SMTP_USE_TLS', False)
|
||||
self.use_ssl = current_app.config.get('SMTP_USE_SSL', False)
|
||||
self.auth_required = current_app.config.get('SMTP_AUTH_REQUIRED', False)
|
||||
self.sender_email = current_app.config.get('SMTP_SENDER_EMAIL')
|
||||
self.sender_password = current_app.config.get('SMTP_SENDER_PASSWORD', '')
|
||||
self.app_name = current_app.config.get('APP_NAME', 'PANJIT Document Translator')
|
||||
|
||||
def _create_smtp_connection(self):
|
||||
"""建立 SMTP 連線"""
|
||||
try:
|
||||
if self.use_ssl:
|
||||
server = smtplib.SMTP_SSL(self.smtp_server, self.smtp_port)
|
||||
else:
|
||||
server = smtplib.SMTP(self.smtp_server, self.smtp_port)
|
||||
if self.use_tls:
|
||||
server.starttls()
|
||||
|
||||
if self.auth_required and self.sender_password:
|
||||
server.login(self.sender_email, self.sender_password)
|
||||
|
||||
return server
|
||||
except Exception as e:
|
||||
logger.error(f"SMTP connection failed: {str(e)}")
|
||||
return None
|
||||
|
||||
def _send_email(self, to_email: str, subject: str, html_content: str, text_content: str = None) -> bool:
|
||||
"""發送郵件的基礎方法 - 已停用 (資安限制,無法連接內網)"""
|
||||
logger.info(f"SMTP service disabled - Email notification skipped for {to_email}: {subject}")
|
||||
return True # 回傳 True 避免影響其他流程
|
||||
|
||||
# 以下 SMTP 功能已註解,因應資安限制無法連接內網
|
||||
# try:
|
||||
# if not self.smtp_server or not self.sender_email:
|
||||
# logger.error("SMTP configuration incomplete")
|
||||
# return False
|
||||
#
|
||||
# # 建立郵件
|
||||
# msg = MIMEMultipart('alternative')
|
||||
# msg['From'] = f"{self.app_name} <{self.sender_email}>"
|
||||
# msg['To'] = to_email
|
||||
# msg['Subject'] = subject
|
||||
#
|
||||
# # 添加文本內容
|
||||
# if text_content:
|
||||
# text_part = MIMEText(text_content, 'plain', 'utf-8')
|
||||
# msg.attach(text_part)
|
||||
#
|
||||
# # 添加 HTML 內容
|
||||
# html_part = MIMEText(html_content, 'html', 'utf-8')
|
||||
# msg.attach(html_part)
|
||||
#
|
||||
# # 發送郵件
|
||||
# server = self._create_smtp_connection()
|
||||
# if not server:
|
||||
# return False
|
||||
#
|
||||
# server.send_message(msg)
|
||||
# server.quit()
|
||||
#
|
||||
# logger.info(f"Email sent successfully to {to_email}")
|
||||
# return True
|
||||
#
|
||||
# except Exception as e:
|
||||
# logger.error(f"Failed to send email to {to_email}: {str(e)}")
|
||||
# return False
|
||||
|
||||
def send_job_completion_notification(self, job: TranslationJob) -> bool:
|
||||
"""發送任務完成通知"""
|
||||
try:
|
||||
if not job.user or not job.user.email:
|
||||
logger.warning(f"No email address for job {job.job_uuid}")
|
||||
return False
|
||||
|
||||
# 準備郵件內容
|
||||
subject = f"📄 翻譯完成通知 - {job.original_filename}"
|
||||
|
||||
# 計算處理時間
|
||||
processing_time = ""
|
||||
if job.processing_started_at and job.completed_at:
|
||||
duration = job.completed_at - job.processing_started_at
|
||||
total_seconds = int(duration.total_seconds())
|
||||
|
||||
if total_seconds < 60:
|
||||
processing_time = f"{total_seconds}秒"
|
||||
elif total_seconds < 3600:
|
||||
minutes = total_seconds // 60
|
||||
seconds = total_seconds % 60
|
||||
processing_time = f"{minutes}分{seconds}秒"
|
||||
else:
|
||||
hours = total_seconds // 3600
|
||||
minutes = (total_seconds % 3600) // 60
|
||||
processing_time = f"{hours}小時{minutes}分"
|
||||
|
||||
# 生成下載連結(簡化版本)
|
||||
download_links = []
|
||||
for lang in job.target_languages:
|
||||
download_links.append(f"• {lang}: [下載翻譯檔案]")
|
||||
|
||||
html_content = f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<style>
|
||||
body {{ font-family: Arial, sans-serif; line-height: 1.6; color: #333; }}
|
||||
.container {{ max-width: 600px; margin: 0 auto; padding: 20px; }}
|
||||
.header {{ background-color: #2563eb; color: white; padding: 20px; text-align: center; border-radius: 8px 8px 0 0; }}
|
||||
.content {{ background-color: #f8fafc; padding: 30px; border: 1px solid #e5e7eb; }}
|
||||
.info-box {{ background-color: #dbeafe; border-left: 4px solid #2563eb; padding: 15px; margin: 20px 0; }}
|
||||
.footer {{ background-color: #374151; color: #d1d5db; padding: 15px; text-align: center; font-size: 12px; border-radius: 0 0 8px 8px; }}
|
||||
.success {{ color: #059669; font-weight: bold; }}
|
||||
.download-section {{ margin: 20px 0; }}
|
||||
.download-link {{ display: inline-block; background-color: #2563eb; color: white; padding: 10px 20px; text-decoration: none; border-radius: 5px; margin: 5px; }}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="header">
|
||||
<h1>🎉 翻譯任務完成</h1>
|
||||
</div>
|
||||
|
||||
<div class="content">
|
||||
<p>親愛的 <strong>{job.user.display_name}</strong>,</p>
|
||||
|
||||
<p class="success">您的文件翻譯任務已成功完成!</p>
|
||||
|
||||
<div class="info-box">
|
||||
<h3>📋 任務詳細資訊</h3>
|
||||
<p><strong>檔案名稱:</strong> {job.original_filename}</p>
|
||||
<p><strong>任務編號:</strong> {job.job_uuid}</p>
|
||||
<p><strong>來源語言:</strong> {job.source_language}</p>
|
||||
<p><strong>目標語言:</strong> {', '.join(job.target_languages)}</p>
|
||||
<p><strong>處理時間:</strong> {processing_time}</p>
|
||||
<p><strong>完成時間:</strong> {job.completed_at.strftime('%Y-%m-%d %H:%M:%S') if job.completed_at else '未知'}</p>
|
||||
{f'<p><strong>總成本:</strong> ${job.total_cost:.4f}</p>' if job.total_cost else ''}
|
||||
</div>
|
||||
|
||||
<div class="download-section">
|
||||
<h3>📥 下載翻譯檔案</h3>
|
||||
<p>請登入系統下載您的翻譯檔案:</p>
|
||||
<p>{'<br>'.join(download_links)}</p>
|
||||
<p style="margin-top: 15px;">
|
||||
<strong>注意:</strong> 翻譯檔案將在系統中保留 7 天,請及時下載。
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div style="margin-top: 30px; padding-top: 20px; border-top: 1px solid #e5e7eb;">
|
||||
<p>感謝您使用 {self.app_name}!</p>
|
||||
<p>如有任何問題,請聯繫系統管理員。</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="footer">
|
||||
<p>此郵件由 {self.app_name} 系統自動發送,請勿回覆。</p>
|
||||
<p>發送時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
# 純文字版本
|
||||
text_content = f"""
|
||||
翻譯任務完成通知
|
||||
|
||||
親愛的 {job.user.display_name},
|
||||
|
||||
您的文件翻譯任務已成功完成!
|
||||
|
||||
任務詳細資訊:
|
||||
- 檔案名稱: {job.original_filename}
|
||||
- 任務編號: {job.job_uuid}
|
||||
- 來源語言: {job.source_language}
|
||||
- 目標語言: {', '.join(job.target_languages)}
|
||||
- 處理時間: {processing_time}
|
||||
- 完成時間: {job.completed_at.strftime('%Y-%m-%d %H:%M:%S') if job.completed_at else '未知'}
|
||||
|
||||
請登入系統下載您的翻譯檔案。翻譯檔案將在系統中保留 7 天。
|
||||
|
||||
感謝您使用 {self.app_name}!
|
||||
|
||||
----
|
||||
此郵件由系統自動發送,請勿回覆。
|
||||
"""
|
||||
|
||||
return self._send_email(job.user.email, subject, html_content, text_content)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to send completion notification for job {job.job_uuid}: {str(e)}")
|
||||
return False
|
||||
|
||||
def send_job_failure_notification(self, job: TranslationJob) -> bool:
|
||||
"""發送任務失敗通知"""
|
||||
try:
|
||||
if not job.user or not job.user.email:
|
||||
logger.warning(f"No email address for job {job.job_uuid}")
|
||||
return False
|
||||
|
||||
subject = f"⚠️ 翻譯失敗通知 - {job.original_filename}"
|
||||
|
||||
html_content = f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<style>
|
||||
body {{ font-family: Arial, sans-serif; line-height: 1.6; color: #333; }}
|
||||
.container {{ max-width: 600px; margin: 0 auto; padding: 20px; }}
|
||||
.header {{ background-color: #dc2626; color: white; padding: 20px; text-align: center; border-radius: 8px 8px 0 0; }}
|
||||
.content {{ background-color: #f8fafc; padding: 30px; border: 1px solid #e5e7eb; }}
|
||||
.error-box {{ background-color: #fef2f2; border-left: 4px solid #dc2626; padding: 15px; margin: 20px 0; }}
|
||||
.footer {{ background-color: #374151; color: #d1d5db; padding: 15px; text-align: center; font-size: 12px; border-radius: 0 0 8px 8px; }}
|
||||
.error {{ color: #dc2626; font-weight: bold; }}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="header">
|
||||
<h1>❌ 翻譯任務失敗</h1>
|
||||
</div>
|
||||
|
||||
<div class="content">
|
||||
<p>親愛的 <strong>{job.user.display_name}</strong>,</p>
|
||||
|
||||
<p class="error">很抱歉,您的文件翻譯任務處理失敗。</p>
|
||||
|
||||
<div class="error-box">
|
||||
<h3>📋 任務資訊</h3>
|
||||
<p><strong>檔案名稱:</strong> {job.original_filename}</p>
|
||||
<p><strong>任務編號:</strong> {job.job_uuid}</p>
|
||||
<p><strong>重試次數:</strong> {job.retry_count}</p>
|
||||
<p><strong>錯誤訊息:</strong> {job.error_message or '未知錯誤'}</p>
|
||||
<p><strong>失敗時間:</strong> {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
|
||||
</div>
|
||||
|
||||
<div style="margin-top: 20px;">
|
||||
<p><strong>建議處理方式:</strong></p>
|
||||
<ul>
|
||||
<li>檢查檔案格式是否正確</li>
|
||||
<li>確認檔案沒有損壞</li>
|
||||
<li>稍後再次嘗試上傳</li>
|
||||
<li>如問題持續,請聯繫系統管理員</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div style="margin-top: 30px; padding-top: 20px; border-top: 1px solid #e5e7eb;">
|
||||
<p>如需協助,請聯繫系統管理員。</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="footer">
|
||||
<p>此郵件由 {self.app_name} 系統自動發送,請勿回覆。</p>
|
||||
<p>發送時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
text_content = f"""
|
||||
翻譯任務失敗通知
|
||||
|
||||
親愛的 {job.user.display_name},
|
||||
|
||||
很抱歉,您的文件翻譯任務處理失敗。
|
||||
|
||||
任務資訊:
|
||||
- 檔案名稱: {job.original_filename}
|
||||
- 任務編號: {job.job_uuid}
|
||||
- 重試次數: {job.retry_count}
|
||||
- 錯誤訊息: {job.error_message or '未知錯誤'}
|
||||
|
||||
建議處理方式:
|
||||
1. 檢查檔案格式是否正確
|
||||
2. 確認檔案沒有損壞
|
||||
3. 稍後再次嘗試上傳
|
||||
4. 如問題持續,請聯繫系統管理員
|
||||
|
||||
如需協助,請聯繫系統管理員。
|
||||
|
||||
----
|
||||
此郵件由 {self.app_name} 系統自動發送,請勿回覆。
|
||||
"""
|
||||
|
||||
return self._send_email(job.user.email, subject, html_content, text_content)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to send failure notification for job {job.job_uuid}: {str(e)}")
|
||||
return False
|
||||
|
||||
def send_admin_notification(self, subject: str, message: str, admin_emails: List[str] = None) -> bool:
|
||||
"""發送管理員通知"""
|
||||
try:
|
||||
if not admin_emails:
|
||||
# 取得所有管理員郵件地址
|
||||
admin_users = User.get_admin_users()
|
||||
admin_emails = [user.email for user in admin_users if user.email]
|
||||
|
||||
if not admin_emails:
|
||||
logger.warning("No admin email addresses found")
|
||||
return False
|
||||
|
||||
html_content = f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<style>
|
||||
body {{ font-family: Arial, sans-serif; line-height: 1.6; color: #333; }}
|
||||
.container {{ max-width: 600px; margin: 0 auto; padding: 20px; }}
|
||||
.header {{ background-color: #f59e0b; color: white; padding: 20px; text-align: center; border-radius: 8px 8px 0 0; }}
|
||||
.content {{ background-color: #f8fafc; padding: 30px; border: 1px solid #e5e7eb; }}
|
||||
.footer {{ background-color: #374151; color: #d1d5db; padding: 15px; text-align: center; font-size: 12px; border-radius: 0 0 8px 8px; }}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="header">
|
||||
<h1>🔔 系統管理通知</h1>
|
||||
</div>
|
||||
|
||||
<div class="content">
|
||||
<p>系統管理員您好,</p>
|
||||
|
||||
<div style="background-color: #fef3c7; border-left: 4px solid #f59e0b; padding: 15px; margin: 20px 0;">
|
||||
<h3>{subject}</h3>
|
||||
<p>{message}</p>
|
||||
</div>
|
||||
|
||||
<p>發送時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
|
||||
</div>
|
||||
|
||||
<div class="footer">
|
||||
<p>此郵件由 {self.app_name} 系統自動發送,請勿回覆。</p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
success_count = 0
|
||||
for email in admin_emails:
|
||||
if self._send_email(email, f"[管理通知] {subject}", html_content):
|
||||
success_count += 1
|
||||
|
||||
return success_count > 0
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to send admin notification: {str(e)}")
|
||||
return False
|
||||
|
||||
def test_smtp_connection(self) -> bool:
|
||||
"""測試 SMTP 連線"""
|
||||
try:
|
||||
server = self._create_smtp_connection()
|
||||
if server:
|
||||
server.quit()
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"SMTP connection test failed: {str(e)}")
|
||||
return False
|
||||
|
||||
# ========== 資料庫通知方法 ==========
|
||||
|
||||
def create_db_notification(
|
||||
self,
|
||||
user_id: int,
|
||||
title: str,
|
||||
message: str,
|
||||
notification_type: NotificationType = NotificationType.INFO,
|
||||
job_uuid: Optional[str] = None,
|
||||
extra_data: Optional[Dict[str, Any]] = None,
|
||||
expires_at: Optional[datetime] = None,
|
||||
link: Optional[str] = None
|
||||
) -> Optional[Notification]:
|
||||
"""
|
||||
創建資料庫通知
|
||||
|
||||
Args:
|
||||
user_id: 用戶ID
|
||||
title: 通知標題
|
||||
message: 通知內容
|
||||
notification_type: 通知類型
|
||||
job_uuid: 關聯任務UUID
|
||||
extra_data: 額外數據
|
||||
expires_at: 過期時間
|
||||
link: 相關連結
|
||||
|
||||
Returns:
|
||||
Notification: 創建的通知對象
|
||||
"""
|
||||
try:
|
||||
# 如果沒有指定連結但有任務UUID,自動生成任務詳情連結
|
||||
if not link and job_uuid:
|
||||
link = f"/job/{job_uuid}"
|
||||
|
||||
notification = Notification(
|
||||
user_id=user_id,
|
||||
type=notification_type.value,
|
||||
title=title,
|
||||
message=message,
|
||||
job_uuid=job_uuid,
|
||||
link=link,
|
||||
extra_data=extra_data,
|
||||
expires_at=expires_at
|
||||
)
|
||||
|
||||
db.session.add(notification)
|
||||
db.session.commit()
|
||||
|
||||
logger.info(f"資料庫通知已創建: {notification.notification_uuid} for user {user_id}")
|
||||
|
||||
# WebSocket 推送已禁用
|
||||
# self._send_websocket_notification(notification)
|
||||
|
||||
return notification
|
||||
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
logger.error(f"創建資料庫通知失敗: {e}")
|
||||
return None
|
||||
|
||||
def send_job_started_db_notification(self, job: TranslationJob) -> Optional[Notification]:
|
||||
"""
|
||||
發送任務開始處理的資料庫通知
|
||||
|
||||
Args:
|
||||
job: 翻譯任務對象
|
||||
|
||||
Returns:
|
||||
Notification: 創建的通知對象
|
||||
"""
|
||||
try:
|
||||
title = "翻譯任務開始處理"
|
||||
message = f'您的文件「{job.original_filename}」已開始翻譯處理。'
|
||||
|
||||
if job.target_languages:
|
||||
languages = ', '.join(job.target_languages)
|
||||
message += f" 目標語言: {languages}"
|
||||
|
||||
return self.create_db_notification(
|
||||
user_id=job.user_id,
|
||||
title=title,
|
||||
message=message,
|
||||
notification_type=NotificationType.INFO,
|
||||
job_uuid=job.job_uuid,
|
||||
extra_data={
|
||||
'filename': job.original_filename,
|
||||
'target_languages': job.target_languages,
|
||||
'started_at': job.processing_started_at.isoformat() if job.processing_started_at else None
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"發送任務開始資料庫通知失敗: {e}")
|
||||
return None
|
||||
|
||||
def send_job_completion_db_notification(self, job: TranslationJob) -> Optional[Notification]:
|
||||
"""
|
||||
發送任務完成的資料庫通知
|
||||
|
||||
Args:
|
||||
job: 翻譯任務對象
|
||||
|
||||
Returns:
|
||||
Notification: 創建的通知對象
|
||||
"""
|
||||
try:
|
||||
if job.status != 'COMPLETED':
|
||||
logger.warning(f"任務 {job.job_uuid} 狀態不是已完成,跳過完成通知")
|
||||
return None
|
||||
|
||||
# 構建通知內容
|
||||
title = "翻譯任務完成"
|
||||
message = f'您的文件「{job.original_filename}」已成功翻譯完成。'
|
||||
|
||||
# 添加目標語言信息
|
||||
if job.target_languages:
|
||||
languages = ', '.join(job.target_languages)
|
||||
message += f" 目標語言: {languages}"
|
||||
|
||||
# 添加處理時間信息
|
||||
if job.processing_started_at and job.completed_at:
|
||||
duration = job.completed_at - job.processing_started_at
|
||||
minutes = int(duration.total_seconds() / 60)
|
||||
if minutes > 0:
|
||||
message += f" 處理時間: {minutes} 分鐘"
|
||||
else:
|
||||
message += f" 處理時間: {int(duration.total_seconds())} 秒"
|
||||
|
||||
return self.create_db_notification(
|
||||
user_id=job.user_id,
|
||||
title=title,
|
||||
message=message,
|
||||
notification_type=NotificationType.SUCCESS,
|
||||
job_uuid=job.job_uuid,
|
||||
extra_data={
|
||||
'filename': job.original_filename,
|
||||
'target_languages': job.target_languages,
|
||||
'total_cost': float(job.total_cost) if job.total_cost else 0,
|
||||
'completed_at': job.completed_at.isoformat() if job.completed_at else None
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"發送任務完成資料庫通知失敗: {e}")
|
||||
return None
|
||||
|
||||
def send_job_completion_db_notification_direct(self, job: TranslationJob) -> Optional[Notification]:
|
||||
"""
|
||||
直接發送任務完成的資料庫通知(不檢查狀態)
|
||||
"""
|
||||
try:
|
||||
# 構建通知內容
|
||||
title = "翻譯任務完成"
|
||||
message = f'您的文件「{job.original_filename}」已成功翻譯完成。'
|
||||
|
||||
# 添加目標語言信息
|
||||
if job.target_languages:
|
||||
languages = ', '.join(job.target_languages)
|
||||
message += f" 目標語言: {languages}"
|
||||
|
||||
message += " 您可以在任務列表中下載翻譯結果。"
|
||||
|
||||
# 創建資料庫通知
|
||||
return self.create_db_notification(
|
||||
user_id=job.user_id,
|
||||
title=title,
|
||||
message=message,
|
||||
notification_type=NotificationType.SUCCESS,
|
||||
job_uuid=job.job_uuid,
|
||||
extra_data={
|
||||
'filename': job.original_filename,
|
||||
'target_languages': job.target_languages,
|
||||
'total_cost': float(job.total_cost) if job.total_cost else 0,
|
||||
'completed_at': job.completed_at.isoformat() if job.completed_at else None
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"發送任務完成資料庫通知失敗: {e}")
|
||||
return None
|
||||
|
||||
def send_job_failure_db_notification(self, job: TranslationJob, error_message: str = None) -> Optional[Notification]:
|
||||
"""
|
||||
發送任務失敗的資料庫通知
|
||||
|
||||
Args:
|
||||
job: 翻譯任務對象
|
||||
error_message: 錯誤訊息
|
||||
|
||||
Returns:
|
||||
Notification: 創建的通知對象
|
||||
"""
|
||||
try:
|
||||
title = "翻譯任務失敗"
|
||||
message = f'您的文件「{job.original_filename}」翻譯失敗。'
|
||||
|
||||
if error_message:
|
||||
message += f" 錯誤訊息: {error_message}"
|
||||
|
||||
if job.retry_count > 0:
|
||||
message += f" 已重試 {job.retry_count} 次。"
|
||||
|
||||
return self.create_db_notification(
|
||||
user_id=job.user_id,
|
||||
title=title,
|
||||
message=message,
|
||||
notification_type=NotificationType.ERROR,
|
||||
job_uuid=job.job_uuid,
|
||||
extra_data={
|
||||
'filename': job.original_filename,
|
||||
'error_message': error_message,
|
||||
'retry_count': job.retry_count,
|
||||
'failed_at': datetime.now().isoformat()
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"發送任務失敗資料庫通知失敗: {e}")
|
||||
return None
|
||||
|
||||
def _send_websocket_notification(self, notification: Notification):
|
||||
"""
|
||||
通過 WebSocket 發送通知 - 已禁用
|
||||
|
||||
Args:
|
||||
notification: 通知對象
|
||||
"""
|
||||
# WebSocket 功能已完全禁用
|
||||
logger.debug(f"WebSocket 推送已禁用,跳過通知: {notification.notification_uuid}")
|
||||
pass
|
||||
|
||||
def get_unread_count(self, user_id: int) -> int:
|
||||
"""
|
||||
獲取用戶未讀通知數量
|
||||
|
||||
Args:
|
||||
user_id: 用戶ID
|
||||
|
||||
Returns:
|
||||
int: 未讀通知數量
|
||||
"""
|
||||
try:
|
||||
return Notification.query.filter_by(
|
||||
user_id=user_id,
|
||||
is_read=False
|
||||
).filter(
|
||||
(Notification.expires_at.is_(None)) |
|
||||
(Notification.expires_at > datetime.now())
|
||||
).count()
|
||||
except Exception as e:
|
||||
logger.error(f"獲取未讀通知數量失敗: {e}")
|
||||
return 0
|
282
app/services/ocr_cache.py
Normal file
282
app/services/ocr_cache.py
Normal file
@@ -0,0 +1,282 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
OCR 快取管理模組
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import sqlite3
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class OCRCache:
|
||||
"""OCR 結果快取管理器"""
|
||||
|
||||
def __init__(self, cache_db_path: str = "ocr_cache.db", cache_expire_days: int = 30):
|
||||
"""
|
||||
初始化 OCR 快取管理器
|
||||
|
||||
Args:
|
||||
cache_db_path: 快取資料庫路徑
|
||||
cache_expire_days: 快取過期天數
|
||||
"""
|
||||
self.cache_db_path = Path(cache_db_path)
|
||||
self.cache_expire_days = cache_expire_days
|
||||
self.init_database()
|
||||
|
||||
def init_database(self):
|
||||
"""初始化快取資料庫"""
|
||||
try:
|
||||
with sqlite3.connect(self.cache_db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS ocr_cache (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
file_hash TEXT UNIQUE NOT NULL,
|
||||
filename TEXT,
|
||||
file_size INTEGER,
|
||||
extracted_text TEXT NOT NULL,
|
||||
extraction_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
access_count INTEGER DEFAULT 1,
|
||||
last_access_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
metadata TEXT
|
||||
)
|
||||
''')
|
||||
|
||||
# 創建索引以提高查詢效能
|
||||
cursor.execute('''
|
||||
CREATE INDEX IF NOT EXISTS idx_file_hash
|
||||
ON ocr_cache(file_hash)
|
||||
''')
|
||||
cursor.execute('''
|
||||
CREATE INDEX IF NOT EXISTS idx_extraction_time
|
||||
ON ocr_cache(extraction_time)
|
||||
''')
|
||||
|
||||
conn.commit()
|
||||
logger.info("OCR 快取資料庫初始化完成")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"初始化 OCR 快取資料庫失敗: {e}")
|
||||
raise
|
||||
|
||||
def _calculate_file_hash(self, file_data: bytes, additional_info: str = "") -> str:
|
||||
"""
|
||||
計算檔案內容的 SHA256 雜湊值
|
||||
|
||||
Args:
|
||||
file_data: 檔案二進位資料
|
||||
additional_info: 額外資訊(如頁數、處理參數等)
|
||||
|
||||
Returns:
|
||||
檔案的 SHA256 雜湊值
|
||||
"""
|
||||
hash_input = file_data + additional_info.encode('utf-8')
|
||||
return hashlib.sha256(hash_input).hexdigest()
|
||||
|
||||
def get_cached_text(self, file_data: bytes, filename: str = "",
|
||||
additional_info: str = "") -> Optional[str]:
|
||||
"""
|
||||
獲取快取的 OCR 文字
|
||||
|
||||
Args:
|
||||
file_data: 檔案二進位資料
|
||||
filename: 檔案名稱
|
||||
additional_info: 額外資訊
|
||||
|
||||
Returns:
|
||||
快取的文字內容,如果不存在則返回 None
|
||||
"""
|
||||
try:
|
||||
file_hash = self._calculate_file_hash(file_data, additional_info)
|
||||
|
||||
with sqlite3.connect(self.cache_db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 查詢快取
|
||||
cursor.execute('''
|
||||
SELECT extracted_text, access_count
|
||||
FROM ocr_cache
|
||||
WHERE file_hash = ? AND
|
||||
extraction_time > datetime('now', '-{} days')
|
||||
'''.format(self.cache_expire_days), (file_hash,))
|
||||
|
||||
result = cursor.fetchone()
|
||||
|
||||
if result:
|
||||
extracted_text, access_count = result
|
||||
|
||||
# 更新訪問計數和時間
|
||||
cursor.execute('''
|
||||
UPDATE ocr_cache
|
||||
SET access_count = ?, last_access_time = CURRENT_TIMESTAMP
|
||||
WHERE file_hash = ?
|
||||
''', (access_count + 1, file_hash))
|
||||
|
||||
conn.commit()
|
||||
|
||||
logger.info(f"[OCR-CACHE] 快取命中: {filename} (訪問次數: {access_count + 1})")
|
||||
return extracted_text
|
||||
|
||||
logger.debug(f"[OCR-CACHE] 快取未命中: {filename}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"獲取 OCR 快取失敗: {e}")
|
||||
return None
|
||||
|
||||
def save_cached_text(self, file_data: bytes, extracted_text: str,
|
||||
filename: str = "", additional_info: str = "",
|
||||
metadata: Dict[str, Any] = None) -> bool:
|
||||
"""
|
||||
儲存 OCR 文字到快取
|
||||
|
||||
Args:
|
||||
file_data: 檔案二進位資料
|
||||
extracted_text: 提取的文字
|
||||
filename: 檔案名稱
|
||||
additional_info: 額外資訊
|
||||
metadata: 中繼資料
|
||||
|
||||
Returns:
|
||||
是否儲存成功
|
||||
"""
|
||||
try:
|
||||
file_hash = self._calculate_file_hash(file_data, additional_info)
|
||||
file_size = len(file_data)
|
||||
metadata_json = json.dumps(metadata or {}, ensure_ascii=False)
|
||||
|
||||
with sqlite3.connect(self.cache_db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 使用 INSERT OR REPLACE 來處理重複的雜湊值
|
||||
cursor.execute('''
|
||||
INSERT OR REPLACE INTO ocr_cache
|
||||
(file_hash, filename, file_size, extracted_text, metadata)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
''', (file_hash, filename, file_size, extracted_text, metadata_json))
|
||||
|
||||
conn.commit()
|
||||
|
||||
logger.info(f"[OCR-CACHE] 儲存快取成功: {filename} ({len(extracted_text)} 字元)")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"儲存 OCR 快取失敗: {e}")
|
||||
return False
|
||||
|
||||
def get_cache_stats(self) -> Dict[str, Any]:
|
||||
"""
|
||||
獲取快取統計資訊
|
||||
|
||||
Returns:
|
||||
快取統計資料
|
||||
"""
|
||||
try:
|
||||
with sqlite3.connect(self.cache_db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 總記錄數
|
||||
cursor.execute('SELECT COUNT(*) FROM ocr_cache')
|
||||
total_records = cursor.fetchone()[0]
|
||||
|
||||
# 總訪問次數
|
||||
cursor.execute('SELECT SUM(access_count) FROM ocr_cache')
|
||||
total_accesses = cursor.fetchone()[0] or 0
|
||||
|
||||
# 快取大小
|
||||
cursor.execute('SELECT SUM(LENGTH(extracted_text)) FROM ocr_cache')
|
||||
cache_size_chars = cursor.fetchone()[0] or 0
|
||||
|
||||
# 最近 7 天的記錄數
|
||||
cursor.execute('''
|
||||
SELECT COUNT(*) FROM ocr_cache
|
||||
WHERE extraction_time > datetime('now', '-7 days')
|
||||
''')
|
||||
recent_records = cursor.fetchone()[0]
|
||||
|
||||
# 最常訪問的記錄
|
||||
cursor.execute('''
|
||||
SELECT filename, access_count, last_access_time
|
||||
FROM ocr_cache
|
||||
ORDER BY access_count DESC
|
||||
LIMIT 5
|
||||
''')
|
||||
top_accessed = cursor.fetchall()
|
||||
|
||||
return {
|
||||
'total_records': total_records,
|
||||
'total_accesses': total_accesses,
|
||||
'cache_size_chars': cache_size_chars,
|
||||
'cache_size_mb': cache_size_chars / (1024 * 1024),
|
||||
'recent_records_7days': recent_records,
|
||||
'top_accessed_files': [
|
||||
{
|
||||
'filename': row[0],
|
||||
'access_count': row[1],
|
||||
'last_access': row[2]
|
||||
}
|
||||
for row in top_accessed
|
||||
],
|
||||
'cache_hit_potential': f"{(total_accesses - total_records) / max(total_accesses, 1) * 100:.1f}%"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"獲取快取統計失敗: {e}")
|
||||
return {}
|
||||
|
||||
def clean_expired_cache(self) -> int:
|
||||
"""
|
||||
清理過期的快取記錄
|
||||
|
||||
Returns:
|
||||
清理的記錄數量
|
||||
"""
|
||||
try:
|
||||
with sqlite3.connect(self.cache_db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 刪除過期記錄
|
||||
cursor.execute('''
|
||||
DELETE FROM ocr_cache
|
||||
WHERE extraction_time < datetime('now', '-{} days')
|
||||
'''.format(self.cache_expire_days))
|
||||
|
||||
deleted_count = cursor.rowcount
|
||||
conn.commit()
|
||||
|
||||
logger.info(f"[OCR-CACHE] 清理過期快取: {deleted_count} 筆記錄")
|
||||
return deleted_count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"清理過期快取失敗: {e}")
|
||||
return 0
|
||||
|
||||
def clear_all_cache(self) -> bool:
|
||||
"""
|
||||
清空所有快取
|
||||
|
||||
Returns:
|
||||
是否成功
|
||||
"""
|
||||
try:
|
||||
with sqlite3.connect(self.cache_db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('DELETE FROM ocr_cache')
|
||||
conn.commit()
|
||||
|
||||
logger.info("[OCR-CACHE] 已清空所有快取")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"清空快取失敗: {e}")
|
||||
return False
|
2634
app/services/translation_service.py
Normal file
2634
app/services/translation_service.py
Normal file
File diff suppressed because it is too large
Load Diff
16
app/tasks/__init__.py
Normal file
16
app/tasks/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Celery 任務模組
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
from .translation import process_translation_job, cleanup_old_files
|
||||
|
||||
__all__ = [
|
||||
'process_translation_job',
|
||||
'cleanup_old_files'
|
||||
]
|
350
app/tasks/translation.py
Normal file
350
app/tasks/translation.py
Normal file
@@ -0,0 +1,350 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
翻譯相關 Celery 任務
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from celery import Celery, current_task
|
||||
from celery.schedules import crontab
|
||||
from app import create_app, db
|
||||
|
||||
logger = None
|
||||
|
||||
def get_celery_instance():
|
||||
"""取得 Celery 實例"""
|
||||
app = create_app()
|
||||
return app.celery
|
||||
|
||||
# 建立 Celery 實例
|
||||
celery = get_celery_instance()
|
||||
|
||||
# 初始化 logger
|
||||
from app.utils.logger import get_logger
|
||||
logger = get_logger(__name__)
|
||||
|
||||
from app.models.job import TranslationJob
|
||||
from app.models.log import SystemLog
|
||||
from app.services.translation_service import TranslationService
|
||||
from app.services.notification_service import NotificationService
|
||||
from app.utils.exceptions import TranslationError
|
||||
|
||||
|
||||
@celery.task(bind=True, max_retries=3)
|
||||
def process_translation_job(self, job_id: int):
|
||||
"""處理翻譯任務"""
|
||||
app = create_app()
|
||||
|
||||
with app.app_context():
|
||||
try:
|
||||
# 取得任務資訊
|
||||
job = TranslationJob.query.get(job_id)
|
||||
if not job:
|
||||
raise ValueError(f"Job {job_id} not found")
|
||||
|
||||
logger.info(f"Starting translation job processing: {job.job_uuid}")
|
||||
|
||||
# 記錄任務開始
|
||||
SystemLog.info(
|
||||
'tasks.translation',
|
||||
f'Translation job started: {job.job_uuid}',
|
||||
user_id=job.user_id,
|
||||
job_id=job.id,
|
||||
extra_data={
|
||||
'filename': job.original_filename,
|
||||
'target_languages': job.target_languages,
|
||||
'retry_count': self.request.retries
|
||||
}
|
||||
)
|
||||
|
||||
# 建立翻譯服務
|
||||
translation_service = TranslationService()
|
||||
|
||||
# 執行翻譯
|
||||
result = translation_service.translate_document(job.job_uuid)
|
||||
|
||||
if result['success']:
|
||||
logger.info(f"Translation job completed successfully: {job.job_uuid}")
|
||||
|
||||
# 重新獲取任務以確保狀態是最新的
|
||||
db.session.refresh(job)
|
||||
|
||||
# 發送完成通知
|
||||
try:
|
||||
notification_service = NotificationService()
|
||||
# 發送郵件通知
|
||||
notification_service.send_job_completion_notification(job)
|
||||
# 發送資料庫通知 - 跳過狀態檢查,直接發送
|
||||
notification_service.send_job_completion_db_notification_direct(job)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to send completion notification: {str(e)}")
|
||||
|
||||
# 記錄完成日誌
|
||||
SystemLog.info(
|
||||
'tasks.translation',
|
||||
f'Translation job completed: {job.job_uuid}',
|
||||
user_id=job.user_id,
|
||||
job_id=job.id,
|
||||
extra_data={
|
||||
'total_cost': result.get('total_cost', 0),
|
||||
'total_sentences': result.get('total_sentences', 0),
|
||||
'output_files': list(result.get('output_files', {}).keys())
|
||||
}
|
||||
)
|
||||
else:
|
||||
raise TranslationError(result.get('error', 'Unknown translation error'))
|
||||
|
||||
except Exception as exc:
|
||||
logger.error(f"Translation job failed: {job.job_uuid}. Error: {str(exc)}")
|
||||
|
||||
with app.app_context():
|
||||
# 更新任務狀態
|
||||
job = TranslationJob.query.get(job_id)
|
||||
if job:
|
||||
job.error_message = str(exc)
|
||||
job.retry_count = self.request.retries + 1
|
||||
|
||||
if self.request.retries < self.max_retries:
|
||||
# 準備重試
|
||||
job.update_status('RETRY')
|
||||
|
||||
# 計算重試延遲:30s, 60s, 120s
|
||||
countdown = [30, 60, 120][self.request.retries]
|
||||
|
||||
SystemLog.warning(
|
||||
'tasks.translation',
|
||||
f'Translation job retry scheduled: {job.job_uuid} (attempt {self.request.retries + 2})',
|
||||
user_id=job.user_id,
|
||||
job_id=job.id,
|
||||
extra_data={
|
||||
'error': str(exc),
|
||||
'retry_count': self.request.retries + 1,
|
||||
'countdown': countdown
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"Retrying translation job in {countdown}s: {job.job_uuid}")
|
||||
raise self.retry(exc=exc, countdown=countdown)
|
||||
|
||||
else:
|
||||
# 重試次數用盡,標記失敗
|
||||
job.update_status('FAILED')
|
||||
|
||||
# 發送失敗通知
|
||||
try:
|
||||
notification_service = NotificationService()
|
||||
# 發送郵件通知
|
||||
notification_service.send_job_failure_notification(job)
|
||||
# 發送資料庫通知
|
||||
notification_service.send_job_failure_db_notification(job, str(exc))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to send failure notification: {str(e)}")
|
||||
|
||||
SystemLog.error(
|
||||
'tasks.translation',
|
||||
f'Translation job failed permanently: {job.job_uuid}',
|
||||
user_id=job.user_id,
|
||||
job_id=job.id,
|
||||
extra_data={
|
||||
'error': str(exc),
|
||||
'total_retries': self.request.retries
|
||||
}
|
||||
)
|
||||
|
||||
# 發送失敗通知
|
||||
try:
|
||||
notification_service = NotificationService()
|
||||
notification_service.send_job_failure_notification(job)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to send failure notification: {str(e)}")
|
||||
|
||||
logger.error(f"Translation job failed permanently: {job.job_uuid}")
|
||||
|
||||
raise exc
|
||||
|
||||
|
||||
@celery.task
|
||||
def cleanup_old_files():
|
||||
"""清理舊檔案(定期任務)"""
|
||||
app = create_app()
|
||||
|
||||
with app.app_context():
|
||||
try:
|
||||
logger.info("Starting file cleanup task")
|
||||
|
||||
upload_folder = Path(app.config.get('UPLOAD_FOLDER'))
|
||||
retention_days = app.config.get('FILE_RETENTION_DAYS', 7)
|
||||
cutoff_date = datetime.utcnow() - timedelta(days=retention_days)
|
||||
|
||||
if not upload_folder.exists():
|
||||
logger.warning(f"Upload folder does not exist: {upload_folder}")
|
||||
return
|
||||
|
||||
deleted_files = 0
|
||||
deleted_dirs = 0
|
||||
total_size_freed = 0
|
||||
|
||||
# 遍歷上傳目錄中的所有 UUID 目錄
|
||||
for item in upload_folder.iterdir():
|
||||
if not item.is_dir():
|
||||
continue
|
||||
|
||||
try:
|
||||
# 檢查目錄的修改時間
|
||||
dir_mtime = datetime.fromtimestamp(item.stat().st_mtime)
|
||||
|
||||
if dir_mtime < cutoff_date:
|
||||
# 計算目錄大小
|
||||
dir_size = sum(f.stat().st_size for f in item.rglob('*') if f.is_file())
|
||||
|
||||
# 檢查是否還有相關的資料庫記錄
|
||||
job_uuid = item.name
|
||||
job = TranslationJob.query.filter_by(job_uuid=job_uuid).first()
|
||||
|
||||
if job:
|
||||
# 檢查任務是否已完成且超過保留期
|
||||
if job.completed_at and job.completed_at < cutoff_date:
|
||||
# 刪除目錄
|
||||
shutil.rmtree(item)
|
||||
deleted_dirs += 1
|
||||
total_size_freed += dir_size
|
||||
|
||||
logger.info(f"Cleaned up job directory: {job_uuid}")
|
||||
|
||||
# 記錄清理日誌
|
||||
SystemLog.info(
|
||||
'tasks.cleanup',
|
||||
f'Cleaned up files for completed job: {job_uuid}',
|
||||
user_id=job.user_id,
|
||||
job_id=job.id,
|
||||
extra_data={
|
||||
'files_size_mb': dir_size / (1024 * 1024),
|
||||
'retention_days': retention_days
|
||||
}
|
||||
)
|
||||
else:
|
||||
# 沒有對應的資料庫記錄,直接刪除
|
||||
shutil.rmtree(item)
|
||||
deleted_dirs += 1
|
||||
total_size_freed += dir_size
|
||||
|
||||
logger.info(f"Cleaned up orphaned directory: {job_uuid}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process directory {item}: {str(e)}")
|
||||
continue
|
||||
|
||||
# 記錄清理結果
|
||||
cleanup_result = {
|
||||
'deleted_directories': deleted_dirs,
|
||||
'total_size_freed_mb': total_size_freed / (1024 * 1024),
|
||||
'retention_days': retention_days,
|
||||
'cutoff_date': cutoff_date.isoformat()
|
||||
}
|
||||
|
||||
SystemLog.info(
|
||||
'tasks.cleanup',
|
||||
f'File cleanup completed: {deleted_dirs} directories, {total_size_freed / (1024 * 1024):.2f} MB freed',
|
||||
extra_data=cleanup_result
|
||||
)
|
||||
|
||||
logger.info(f"File cleanup completed: {cleanup_result}")
|
||||
|
||||
return cleanup_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"File cleanup task failed: {str(e)}")
|
||||
|
||||
SystemLog.error(
|
||||
'tasks.cleanup',
|
||||
f'File cleanup task failed: {str(e)}',
|
||||
extra_data={'error': str(e)}
|
||||
)
|
||||
|
||||
raise e
|
||||
|
||||
|
||||
@celery.task
|
||||
def send_daily_admin_report():
|
||||
"""發送每日管理員報告"""
|
||||
app = create_app()
|
||||
|
||||
with app.app_context():
|
||||
try:
|
||||
logger.info("Generating daily admin report")
|
||||
|
||||
from app.models.stats import APIUsageStats
|
||||
from app.services.notification_service import NotificationService
|
||||
|
||||
# 取得昨日統計
|
||||
yesterday = datetime.utcnow() - timedelta(days=1)
|
||||
daily_stats = APIUsageStats.get_daily_statistics(days=1)
|
||||
|
||||
# 取得系統錯誤摘要
|
||||
error_summary = SystemLog.get_error_summary(days=1)
|
||||
|
||||
# 準備報告內容
|
||||
if daily_stats:
|
||||
yesterday_data = daily_stats[0]
|
||||
subject = f"每日系統報告 - {yesterday_data['date']}"
|
||||
|
||||
message = f"""
|
||||
昨日系統使用狀況:
|
||||
• 翻譯任務: {yesterday_data['total_calls']} 個
|
||||
• 成功任務: {yesterday_data['successful_calls']} 個
|
||||
• 失敗任務: {yesterday_data['failed_calls']} 個
|
||||
• 總成本: ${yesterday_data['total_cost']:.4f}
|
||||
• 總 Token 數: {yesterday_data['total_tokens']}
|
||||
|
||||
系統錯誤摘要:
|
||||
• 錯誤數量: {error_summary['total_errors']}
|
||||
|
||||
請查看管理後台了解詳細資訊。
|
||||
"""
|
||||
else:
|
||||
subject = f"每日系統報告 - {yesterday.strftime('%Y-%m-%d')}"
|
||||
message = "昨日無翻譯任務記錄。"
|
||||
|
||||
# 發送管理員通知
|
||||
notification_service = NotificationService()
|
||||
result = notification_service.send_admin_notification(subject, message)
|
||||
|
||||
if result:
|
||||
logger.info("Daily admin report sent successfully")
|
||||
else:
|
||||
logger.warning("Failed to send daily admin report")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Daily admin report task failed: {str(e)}")
|
||||
raise e
|
||||
|
||||
|
||||
# 定期任務設定
|
||||
@celery.on_after_configure.connect
|
||||
def setup_periodic_tasks(sender, **kwargs):
|
||||
"""設定定期任務"""
|
||||
|
||||
# 每日凌晨 2 點執行檔案清理
|
||||
sender.add_periodic_task(
|
||||
crontab(hour=2, minute=0),
|
||||
cleanup_old_files.s(),
|
||||
name='cleanup-old-files-daily'
|
||||
)
|
||||
|
||||
# 每日早上 8 點發送管理員報告
|
||||
sender.add_periodic_task(
|
||||
crontab(hour=8, minute=0),
|
||||
send_daily_admin_report.s(),
|
||||
name='daily-admin-report'
|
||||
)
|
||||
|
||||
|
34
app/utils/__init__.py
Normal file
34
app/utils/__init__.py
Normal file
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
工具模組
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
from .decorators import login_required, admin_required
|
||||
from .validators import validate_file, validate_languages
|
||||
from .helpers import generate_filename, format_file_size
|
||||
from .exceptions import (
|
||||
DocumentTranslatorError,
|
||||
AuthenticationError,
|
||||
ValidationError,
|
||||
TranslationError,
|
||||
FileProcessingError
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'login_required',
|
||||
'admin_required',
|
||||
'validate_file',
|
||||
'validate_languages',
|
||||
'generate_filename',
|
||||
'format_file_size',
|
||||
'DocumentTranslatorError',
|
||||
'AuthenticationError',
|
||||
'ValidationError',
|
||||
'TranslationError',
|
||||
'FileProcessingError'
|
||||
]
|
277
app/utils/api_auth.py
Normal file
277
app/utils/api_auth.py
Normal file
@@ -0,0 +1,277 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
API 認證服務
|
||||
用於與 PANJIT Auth API 整合認證
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2025-10-01
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, Dict, Any, Tuple
|
||||
from flask import current_app
|
||||
from .logger import get_logger
|
||||
from .exceptions import AuthenticationError
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class APIAuthService:
|
||||
"""API 認證服務"""
|
||||
|
||||
def __init__(self):
|
||||
self.config = current_app.config
|
||||
self.api_base_url = "https://pj-auth-api.vercel.app"
|
||||
self.login_endpoint = "/api/auth/login"
|
||||
self.logout_endpoint = "/api/auth/logout"
|
||||
self.timeout = 30 # 30 秒超時
|
||||
|
||||
def authenticate_user(self, username: str, password: str) -> Dict[str, Any]:
|
||||
"""
|
||||
透過 API 驗證使用者憑證
|
||||
|
||||
Args:
|
||||
username: 使用者帳號
|
||||
password: 密碼
|
||||
|
||||
Returns:
|
||||
Dict: 包含使用者資訊和 Token 的字典
|
||||
|
||||
Raises:
|
||||
AuthenticationError: 認證失敗時拋出
|
||||
"""
|
||||
try:
|
||||
login_url = f"{self.api_base_url}{self.login_endpoint}"
|
||||
|
||||
payload = {
|
||||
"username": username,
|
||||
"password": password
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
logger.info(f"正在透過 API 驗證使用者: {username}")
|
||||
|
||||
# 發送認證請求
|
||||
response = requests.post(
|
||||
login_url,
|
||||
json=payload,
|
||||
headers=headers,
|
||||
timeout=self.timeout
|
||||
)
|
||||
|
||||
# 解析回應
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
|
||||
if data.get('success'):
|
||||
logger.info(f"API 認證成功: {username}")
|
||||
return self._parse_auth_response(data)
|
||||
else:
|
||||
error_msg = data.get('error', '認證失敗')
|
||||
logger.warning(f"API 認證失敗: {username} - {error_msg}")
|
||||
raise AuthenticationError(f"認證失敗: {error_msg}")
|
||||
|
||||
elif response.status_code == 401:
|
||||
data = response.json()
|
||||
error_msg = data.get('error', '帳號或密碼錯誤')
|
||||
logger.warning(f"API 認證失敗 (401): {username} - {error_msg}")
|
||||
raise AuthenticationError("帳號或密碼錯誤")
|
||||
|
||||
else:
|
||||
logger.error(f"API 認證請求失敗: HTTP {response.status_code}")
|
||||
raise AuthenticationError(f"認證服務錯誤 (HTTP {response.status_code})")
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
logger.error(f"API 認證請求超時: {username}")
|
||||
raise AuthenticationError("認證服務回應超時,請稍後再試")
|
||||
|
||||
except requests.exceptions.ConnectionError:
|
||||
logger.error(f"API 認證連線錯誤: {username}")
|
||||
raise AuthenticationError("無法連接認證服務,請檢查網路連線")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"API 認證請求錯誤: {username} - {str(e)}")
|
||||
raise AuthenticationError(f"認證服務錯誤: {str(e)}")
|
||||
|
||||
except json.JSONDecodeError:
|
||||
logger.error(f"API 認證回應格式錯誤: {username}")
|
||||
raise AuthenticationError("認證服務回應格式錯誤")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"API 認證未知錯誤: {username} - {str(e)}")
|
||||
raise AuthenticationError(f"認證過程發生錯誤: {str(e)}")
|
||||
|
||||
def _parse_auth_response(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
解析 API 認證回應
|
||||
|
||||
Args:
|
||||
data: API 回應資料
|
||||
|
||||
Returns:
|
||||
Dict: 標準化的使用者資訊
|
||||
"""
|
||||
try:
|
||||
auth_data = data.get('data', {})
|
||||
user_info = auth_data.get('userInfo', {})
|
||||
|
||||
# 解析 Token 過期時間
|
||||
expires_at = None
|
||||
issued_at = None
|
||||
|
||||
if 'expiresAt' in auth_data:
|
||||
try:
|
||||
expires_at = datetime.fromisoformat(auth_data['expiresAt'].replace('Z', '+00:00'))
|
||||
except (ValueError, AttributeError):
|
||||
logger.warning("無法解析 API Token 過期時間")
|
||||
|
||||
if 'issuedAt' in auth_data:
|
||||
try:
|
||||
issued_at = datetime.fromisoformat(auth_data['issuedAt'].replace('Z', '+00:00'))
|
||||
except (ValueError, AttributeError):
|
||||
logger.warning("無法解析 API Token 發行時間")
|
||||
|
||||
# 標準化使用者資訊 (方案 A: API name 是姓名+email 格式)
|
||||
api_name = user_info.get('name', '') # 例: "劉怡明 ymirliu@panjit.com.tw"
|
||||
api_email = user_info.get('email', '') # 例: "ymirliu@panjit.com.tw"
|
||||
|
||||
result = {
|
||||
# 基本使用者資訊 (方案 A: username 和 display_name 都用 API name)
|
||||
'username': api_name, # 姓名+email 格式
|
||||
'display_name': api_name, # 姓名+email 格式
|
||||
'email': api_email, # 純 email
|
||||
'department': user_info.get('jobTitle'), # 使用 jobTitle 作為部門
|
||||
'user_principal_name': api_email,
|
||||
|
||||
# API 特有資訊
|
||||
'api_user_id': user_info.get('id', ''), # Azure Object ID
|
||||
'job_title': user_info.get('jobTitle'),
|
||||
'office_location': user_info.get('officeLocation'),
|
||||
'business_phones': user_info.get('businessPhones', []),
|
||||
|
||||
# Token 資訊
|
||||
'api_access_token': auth_data.get('access_token', ''),
|
||||
'api_id_token': auth_data.get('id_token', ''),
|
||||
'api_token_type': auth_data.get('token_type', 'Bearer'),
|
||||
'api_expires_in': auth_data.get('expires_in', 0),
|
||||
'api_issued_at': issued_at,
|
||||
'api_expires_at': expires_at,
|
||||
|
||||
# 完整的 API 回應 (用於記錄)
|
||||
'full_api_response': data,
|
||||
'api_user_info': user_info
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"解析 API 回應時發生錯誤: {str(e)}")
|
||||
raise AuthenticationError(f"解析認證回應時發生錯誤: {str(e)}")
|
||||
|
||||
def logout_user(self, access_token: str) -> bool:
|
||||
"""
|
||||
透過 API 登出使用者
|
||||
|
||||
Args:
|
||||
access_token: 使用者的 access token
|
||||
|
||||
Returns:
|
||||
bool: 登出是否成功
|
||||
"""
|
||||
try:
|
||||
logout_url = f"{self.api_base_url}{self.logout_endpoint}"
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {access_token}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
logout_url,
|
||||
headers=headers,
|
||||
timeout=self.timeout
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
if data.get('success'):
|
||||
logger.info("API 登出成功")
|
||||
return True
|
||||
|
||||
logger.warning(f"API 登出失敗: HTTP {response.status_code}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"API 登出時發生錯誤: {str(e)}")
|
||||
return False
|
||||
|
||||
def validate_token(self, access_token: str) -> bool:
|
||||
"""
|
||||
驗證 Token 是否有效
|
||||
|
||||
Args:
|
||||
access_token: 要驗證的 token
|
||||
|
||||
Returns:
|
||||
bool: Token 是否有效
|
||||
"""
|
||||
try:
|
||||
# 這裡可以實作 Token 驗證邏輯
|
||||
# 目前 API 沒有提供專門的驗證端點,可以考慮解析 JWT 或調用其他端點
|
||||
|
||||
# 簡單的檢查:Token 不能為空且格式看起來像 JWT
|
||||
if not access_token or len(access_token.split('.')) != 3:
|
||||
return False
|
||||
|
||||
# TODO: 實作更完整的 JWT 驗證邏輯
|
||||
# 可以解析 JWT payload 檢查過期時間等
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"驗證 Token 時發生錯誤: {str(e)}")
|
||||
return False
|
||||
|
||||
def test_connection(self) -> bool:
|
||||
"""
|
||||
測試 API 連線
|
||||
|
||||
Returns:
|
||||
bool: 連線是否正常
|
||||
"""
|
||||
try:
|
||||
# 嘗試連接 API 基礎端點
|
||||
response = requests.get(
|
||||
self.api_base_url,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
return response.status_code in [200, 404] # 404 也算正常,表示能連接到伺服器
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"API 連線測試失敗: {str(e)}")
|
||||
return False
|
||||
|
||||
def calculate_internal_expiry(self, api_expires_at: Optional[datetime], extend_days: int = 3) -> datetime:
|
||||
"""
|
||||
計算內部 Token 過期時間
|
||||
|
||||
Args:
|
||||
api_expires_at: API Token 過期時間
|
||||
extend_days: 延長天數
|
||||
|
||||
Returns:
|
||||
datetime: 內部 Token 過期時間
|
||||
"""
|
||||
if api_expires_at:
|
||||
# 基於 API Token 過期時間延長
|
||||
return api_expires_at + timedelta(days=extend_days)
|
||||
else:
|
||||
# 如果沒有 API 過期時間,從現在開始計算
|
||||
return datetime.utcnow() + timedelta(days=extend_days)
|
238
app/utils/decorators.py
Normal file
238
app/utils/decorators.py
Normal file
@@ -0,0 +1,238 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
裝飾器模組
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
from functools import wraps
|
||||
from flask import session, jsonify, g, current_app
|
||||
from flask_jwt_extended import jwt_required, get_jwt_identity, get_jwt
|
||||
|
||||
|
||||
def login_required(f):
|
||||
"""登入驗證裝飾器"""
|
||||
@wraps(f)
|
||||
def decorated_function(*args, **kwargs):
|
||||
from app.utils.logger import get_logger
|
||||
from flask import request
|
||||
logger = get_logger(__name__)
|
||||
|
||||
user_id = session.get('user_id')
|
||||
|
||||
# 調試:記錄 session 檢查
|
||||
logger.info(f"🔐 [Session Check] Endpoint: {request.endpoint}, Method: {request.method}, URL: {request.url}")
|
||||
logger.info(f"🔐 [Session Data] UserID: {user_id}, SessionData: {dict(session)}, SessionID: {session.get('_id', 'unknown')}")
|
||||
|
||||
if not user_id:
|
||||
logger.warning(f"❌ [Auth Failed] No user_id in session for {request.endpoint}")
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'AUTHENTICATION_REQUIRED',
|
||||
'message': '請先登入'
|
||||
}), 401
|
||||
|
||||
# 取得使用者資訊並設定到 g 物件
|
||||
from app.models import User
|
||||
user = User.query.get(user_id)
|
||||
if not user:
|
||||
# 清除無效的 session
|
||||
session.clear()
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'USER_NOT_FOUND',
|
||||
'message': '使用者不存在'
|
||||
}), 401
|
||||
|
||||
g.current_user = user
|
||||
g.current_user_id = user.id
|
||||
g.is_admin = user.is_admin
|
||||
|
||||
return f(*args, **kwargs)
|
||||
|
||||
return decorated_function
|
||||
|
||||
|
||||
def jwt_login_required(f):
|
||||
"""JWT 登入驗證裝飾器"""
|
||||
@wraps(f)
|
||||
@jwt_required()
|
||||
def decorated_function(*args, **kwargs):
|
||||
from app.utils.logger import get_logger
|
||||
from flask import request
|
||||
logger = get_logger(__name__)
|
||||
|
||||
try:
|
||||
username = get_jwt_identity()
|
||||
claims = get_jwt()
|
||||
|
||||
# 設定到 g 物件供其他地方使用
|
||||
g.current_user_username = username
|
||||
g.current_user_id = claims.get('user_id')
|
||||
g.is_admin = claims.get('is_admin', False)
|
||||
|
||||
logger.info(f"🔑 [JWT Auth] User: {username}, UserID: {claims.get('user_id')}, Admin: {claims.get('is_admin')}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ [JWT Auth] JWT validation failed: {str(e)}")
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'AUTHENTICATION_REQUIRED',
|
||||
'message': '認證失效,請重新登入'
|
||||
}), 401
|
||||
|
||||
return f(*args, **kwargs)
|
||||
|
||||
return decorated_function
|
||||
|
||||
|
||||
def admin_required(f):
|
||||
"""管理員權限裝飾器(使用JWT認證)"""
|
||||
@wraps(f)
|
||||
@jwt_required()
|
||||
def decorated_function(*args, **kwargs):
|
||||
from app.utils.logger import get_logger
|
||||
from flask import request
|
||||
logger = get_logger(__name__)
|
||||
|
||||
try:
|
||||
username = get_jwt_identity()
|
||||
claims = get_jwt()
|
||||
|
||||
# 設定到 g 物件供其他地方使用
|
||||
g.current_user_username = username
|
||||
g.current_user_id = claims.get('user_id')
|
||||
g.is_admin = claims.get('is_admin', False)
|
||||
|
||||
logger.info(f"🔑 [JWT Admin Auth] User: {username}, UserID: {claims.get('user_id')}, Admin: {claims.get('is_admin')}")
|
||||
|
||||
# 檢查管理員權限
|
||||
if not claims.get('is_admin', False):
|
||||
logger.warning(f"❌ [Admin Auth] Permission denied for user: {username}")
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'PERMISSION_DENIED',
|
||||
'message': '權限不足,需要管理員權限'
|
||||
}), 403
|
||||
|
||||
# 驗證用戶是否存在且仍為管理員
|
||||
from app.models import User
|
||||
user = User.query.get(claims.get('user_id'))
|
||||
if not user:
|
||||
logger.error(f"❌ [Admin Auth] User not found: {claims.get('user_id')}")
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'USER_NOT_FOUND',
|
||||
'message': '使用者不存在'
|
||||
}), 401
|
||||
|
||||
if not user.is_admin:
|
||||
logger.warning(f"❌ [Admin Auth] User no longer admin: {username}")
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'PERMISSION_DENIED',
|
||||
'message': '權限不足,需要管理員權限'
|
||||
}), 403
|
||||
|
||||
# 設定完整用戶資訊
|
||||
g.current_user = user
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ [Admin Auth] JWT validation failed: {str(e)}")
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'AUTHENTICATION_REQUIRED',
|
||||
'message': '認證失效,請重新登入'
|
||||
}), 401
|
||||
|
||||
return f(*args, **kwargs)
|
||||
|
||||
return decorated_function
|
||||
|
||||
|
||||
def validate_json(required_fields=None):
|
||||
"""JSON 驗證裝飾器"""
|
||||
def decorator(f):
|
||||
@wraps(f)
|
||||
def decorated_function(*args, **kwargs):
|
||||
from flask import request
|
||||
|
||||
if not request.is_json:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'INVALID_CONTENT_TYPE',
|
||||
'message': '請求必須為 JSON 格式'
|
||||
}), 400
|
||||
|
||||
data = request.get_json()
|
||||
if not data:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'INVALID_JSON',
|
||||
'message': 'JSON 資料格式錯誤'
|
||||
}), 400
|
||||
|
||||
# 檢查必要欄位
|
||||
if required_fields:
|
||||
missing_fields = [field for field in required_fields if field not in data]
|
||||
if missing_fields:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'MISSING_FIELDS',
|
||||
'message': f'缺少必要欄位: {", ".join(missing_fields)}'
|
||||
}), 400
|
||||
|
||||
return f(*args, **kwargs)
|
||||
|
||||
return decorated_function
|
||||
return decorator
|
||||
|
||||
|
||||
def rate_limit(max_requests=100, per_seconds=3600):
|
||||
"""簡單的速率限制裝飾器"""
|
||||
def decorator(f):
|
||||
@wraps(f)
|
||||
def decorated_function(*args, **kwargs):
|
||||
from flask import request
|
||||
import redis
|
||||
import time
|
||||
|
||||
try:
|
||||
# 使用 Redis 進行速率限制
|
||||
redis_client = redis.from_url(current_app.config['REDIS_URL'])
|
||||
|
||||
# 使用 IP 地址作為 key
|
||||
client_id = request.remote_addr
|
||||
key = f"rate_limit:{f.__name__}:{client_id}"
|
||||
|
||||
current_time = int(time.time())
|
||||
window_start = current_time - per_seconds
|
||||
|
||||
# 清理過期的請求記錄
|
||||
redis_client.zremrangebyscore(key, 0, window_start)
|
||||
|
||||
# 取得當前窗口內的請求數
|
||||
current_requests = redis_client.zcard(key)
|
||||
|
||||
if current_requests >= max_requests:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'RATE_LIMIT_EXCEEDED',
|
||||
'message': '請求過於頻繁,請稍後再試'
|
||||
}), 429
|
||||
|
||||
# 記錄當前請求
|
||||
redis_client.zadd(key, {str(current_time): current_time})
|
||||
redis_client.expire(key, per_seconds)
|
||||
|
||||
except Exception:
|
||||
# 如果 Redis 不可用,不阻擋請求
|
||||
pass
|
||||
|
||||
return f(*args, **kwargs)
|
||||
|
||||
return decorated_function
|
||||
return decorator
|
52
app/utils/exceptions.py
Normal file
52
app/utils/exceptions.py
Normal file
@@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
自定義例外模組
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
|
||||
class DocumentTranslatorError(Exception):
|
||||
"""文件翻譯系統基礎例外"""
|
||||
def __init__(self, message, error_code=None):
|
||||
self.message = message
|
||||
self.error_code = error_code
|
||||
super().__init__(self.message)
|
||||
|
||||
|
||||
class AuthenticationError(DocumentTranslatorError):
|
||||
"""認證相關例外"""
|
||||
pass
|
||||
|
||||
|
||||
class ValidationError(DocumentTranslatorError):
|
||||
"""驗證相關例外"""
|
||||
pass
|
||||
|
||||
|
||||
class TranslationError(DocumentTranslatorError):
|
||||
"""翻譯相關例外"""
|
||||
pass
|
||||
|
||||
|
||||
class FileProcessingError(DocumentTranslatorError):
|
||||
"""檔案處理相關例外"""
|
||||
pass
|
||||
|
||||
|
||||
class APIError(DocumentTranslatorError):
|
||||
"""API 相關例外"""
|
||||
pass
|
||||
|
||||
|
||||
class ConfigurationError(DocumentTranslatorError):
|
||||
"""配置相關例外"""
|
||||
pass
|
||||
|
||||
|
||||
class DatabaseError(DocumentTranslatorError):
|
||||
"""資料庫相關例外"""
|
||||
pass
|
280
app/utils/helpers.py
Normal file
280
app/utils/helpers.py
Normal file
@@ -0,0 +1,280 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
輔助工具模組
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
import os
|
||||
import uuid
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from werkzeug.utils import secure_filename
|
||||
from flask import current_app
|
||||
|
||||
|
||||
def generate_filename(original_filename, job_uuid, file_type='original', language_code=None):
|
||||
"""生成安全的檔案名稱"""
|
||||
# 取得檔案副檔名
|
||||
file_ext = Path(original_filename).suffix.lower()
|
||||
|
||||
# 清理原始檔名
|
||||
clean_name = Path(original_filename).stem
|
||||
clean_name = secure_filename(clean_name)[:50] # 限制長度
|
||||
|
||||
if file_type == 'original':
|
||||
return f"original_{clean_name}_{job_uuid[:8]}{file_ext}"
|
||||
elif file_type == 'translated':
|
||||
return f"translated_{clean_name}_{language_code}_{job_uuid[:8]}{file_ext}"
|
||||
else:
|
||||
return f"{file_type}_{clean_name}_{job_uuid[:8]}{file_ext}"
|
||||
|
||||
|
||||
def create_job_directory(job_uuid):
|
||||
"""建立任務專用目錄"""
|
||||
upload_folder = current_app.config.get('UPLOAD_FOLDER')
|
||||
job_dir = Path(upload_folder) / job_uuid
|
||||
|
||||
# 建立目錄
|
||||
job_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
return job_dir
|
||||
|
||||
|
||||
def save_uploaded_file(file_obj, job_uuid):
|
||||
"""儲存上傳的檔案"""
|
||||
try:
|
||||
# 建立任務目錄
|
||||
job_dir = create_job_directory(job_uuid)
|
||||
|
||||
# 生成檔案名稱
|
||||
filename = generate_filename(file_obj.filename, job_uuid, 'original')
|
||||
file_path = job_dir / filename
|
||||
|
||||
# 儲存檔案
|
||||
file_obj.save(str(file_path))
|
||||
|
||||
# 取得檔案大小
|
||||
file_size = file_path.stat().st_size
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'filename': filename,
|
||||
'file_path': str(file_path),
|
||||
'file_size': file_size
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'success': False,
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
|
||||
def cleanup_job_directory(job_uuid):
|
||||
"""清理任務目錄"""
|
||||
try:
|
||||
upload_folder = current_app.config.get('UPLOAD_FOLDER')
|
||||
job_dir = Path(upload_folder) / job_uuid
|
||||
|
||||
if job_dir.exists() and job_dir.is_dir():
|
||||
shutil.rmtree(job_dir)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def format_file_size(size_bytes):
|
||||
"""格式化檔案大小"""
|
||||
if size_bytes == 0:
|
||||
return "0 B"
|
||||
|
||||
size_names = ["B", "KB", "MB", "GB", "TB"]
|
||||
i = 0
|
||||
while size_bytes >= 1024 and i < len(size_names) - 1:
|
||||
size_bytes /= 1024.0
|
||||
i += 1
|
||||
|
||||
return f"{size_bytes:.1f} {size_names[i]}"
|
||||
|
||||
|
||||
def get_file_icon(file_extension):
|
||||
"""根據副檔名取得檔案圖示"""
|
||||
icon_map = {
|
||||
'.docx': 'file-word',
|
||||
'.doc': 'file-word',
|
||||
'.pptx': 'file-powerpoint',
|
||||
'.ppt': 'file-powerpoint',
|
||||
'.xlsx': 'file-excel',
|
||||
'.xls': 'file-excel',
|
||||
'.pdf': 'file-pdf'
|
||||
}
|
||||
|
||||
return icon_map.get(file_extension.lower(), 'file')
|
||||
|
||||
|
||||
def calculate_processing_time(start_time, end_time=None):
|
||||
"""計算處理時間"""
|
||||
if not start_time:
|
||||
return None
|
||||
|
||||
if not end_time:
|
||||
end_time = datetime.utcnow()
|
||||
|
||||
if isinstance(start_time, str):
|
||||
start_time = datetime.fromisoformat(start_time.replace('Z', '+00:00'))
|
||||
|
||||
if isinstance(end_time, str):
|
||||
end_time = datetime.fromisoformat(end_time.replace('Z', '+00:00'))
|
||||
|
||||
duration = end_time - start_time
|
||||
|
||||
# 轉換為秒
|
||||
total_seconds = int(duration.total_seconds())
|
||||
|
||||
if total_seconds < 60:
|
||||
return f"{total_seconds}秒"
|
||||
elif total_seconds < 3600:
|
||||
minutes = total_seconds // 60
|
||||
seconds = total_seconds % 60
|
||||
return f"{minutes}分{seconds}秒"
|
||||
else:
|
||||
hours = total_seconds // 3600
|
||||
minutes = (total_seconds % 3600) // 60
|
||||
return f"{hours}小時{minutes}分"
|
||||
|
||||
|
||||
def generate_download_token(job_uuid, language_code, user_id):
|
||||
"""生成下載令牌"""
|
||||
import hashlib
|
||||
import time
|
||||
|
||||
# 組合資料
|
||||
data = f"{job_uuid}:{language_code}:{user_id}:{int(time.time())}"
|
||||
|
||||
# 加上應用程式密鑰
|
||||
secret_key = current_app.config.get('SECRET_KEY', 'default_secret')
|
||||
data_with_secret = f"{data}:{secret_key}"
|
||||
|
||||
# 生成 hash
|
||||
token = hashlib.sha256(data_with_secret.encode()).hexdigest()
|
||||
|
||||
return token
|
||||
|
||||
|
||||
def verify_download_token(token, job_uuid, language_code, user_id, max_age=3600):
|
||||
"""驗證下載令牌"""
|
||||
import time
|
||||
|
||||
try:
|
||||
# 取得當前時間戳
|
||||
current_time = int(time.time())
|
||||
|
||||
# 在有效時間範圍內嘗試匹配令牌
|
||||
for i in range(max_age):
|
||||
timestamp = current_time - i
|
||||
expected_token = generate_download_token_with_timestamp(
|
||||
job_uuid, language_code, user_id, timestamp
|
||||
)
|
||||
|
||||
if token == expected_token:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def generate_download_token_with_timestamp(job_uuid, language_code, user_id, timestamp):
|
||||
"""使用指定時間戳生成下載令牌"""
|
||||
import hashlib
|
||||
|
||||
data = f"{job_uuid}:{language_code}:{user_id}:{timestamp}"
|
||||
secret_key = current_app.config.get('SECRET_KEY', 'default_secret')
|
||||
data_with_secret = f"{data}:{secret_key}"
|
||||
|
||||
return hashlib.sha256(data_with_secret.encode()).hexdigest()
|
||||
|
||||
|
||||
def get_supported_languages():
|
||||
"""取得支援的語言列表"""
|
||||
return {
|
||||
'auto': '自動偵測',
|
||||
'zh-CN': '簡體中文',
|
||||
'zh-TW': '繁體中文',
|
||||
'en': '英文',
|
||||
'ja': '日文',
|
||||
'ko': '韓文',
|
||||
'vi': '越南文',
|
||||
'th': '泰文',
|
||||
'id': '印尼文',
|
||||
'ms': '馬來文',
|
||||
'es': '西班牙文',
|
||||
'fr': '法文',
|
||||
'de': '德文',
|
||||
'ru': '俄文'
|
||||
}
|
||||
|
||||
|
||||
def parse_json_field(json_str):
|
||||
"""安全解析JSON欄位"""
|
||||
import json
|
||||
|
||||
if not json_str:
|
||||
return None
|
||||
|
||||
try:
|
||||
if isinstance(json_str, str):
|
||||
return json.loads(json_str)
|
||||
return json_str
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
def format_datetime(dt, format_type='full'):
|
||||
"""格式化日期時間"""
|
||||
if not dt:
|
||||
return None
|
||||
|
||||
if isinstance(dt, str):
|
||||
try:
|
||||
dt = datetime.fromisoformat(dt.replace('Z', '+00:00'))
|
||||
except ValueError:
|
||||
return dt
|
||||
|
||||
if format_type == 'date':
|
||||
return dt.strftime('%Y-%m-%d')
|
||||
elif format_type == 'time':
|
||||
return dt.strftime('%H:%M:%S')
|
||||
elif format_type == 'short':
|
||||
return dt.strftime('%Y-%m-%d %H:%M')
|
||||
else: # full
|
||||
return dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
|
||||
def create_response(success=True, data=None, message=None, error=None, error_code=None):
|
||||
"""建立統一的API回應格式"""
|
||||
response = {
|
||||
'success': success
|
||||
}
|
||||
|
||||
if data is not None:
|
||||
response['data'] = data
|
||||
|
||||
if message:
|
||||
response['message'] = message
|
||||
|
||||
if error:
|
||||
response['error'] = error_code or 'ERROR'
|
||||
if not message:
|
||||
response['message'] = error
|
||||
|
||||
return response
|
248
app/utils/image_preprocessor.py
Normal file
248
app/utils/image_preprocessor.py
Normal file
@@ -0,0 +1,248 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
圖像預處理工具 - 用於提升 OCR 識別準確度
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2025-10-01
|
||||
Modified: 2025-10-01
|
||||
"""
|
||||
|
||||
import io
|
||||
import numpy as np
|
||||
from PIL import Image, ImageEnhance, ImageFilter
|
||||
from typing import Optional, Tuple
|
||||
from app.utils.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# 檢查 OpenCV 是否可用
|
||||
try:
|
||||
import cv2
|
||||
_HAS_OPENCV = True
|
||||
logger.info("OpenCV is available for advanced image preprocessing")
|
||||
except ImportError:
|
||||
_HAS_OPENCV = False
|
||||
logger.warning("OpenCV not available, using PIL-only preprocessing")
|
||||
|
||||
|
||||
class ImagePreprocessor:
|
||||
"""圖像預處理器 - 提升掃描文件 OCR 品質"""
|
||||
|
||||
def __init__(self, use_opencv: bool = True):
|
||||
"""
|
||||
初始化圖像預處理器
|
||||
|
||||
Args:
|
||||
use_opencv: 是否使用 OpenCV 進行進階處理(若可用)
|
||||
"""
|
||||
self.use_opencv = use_opencv and _HAS_OPENCV
|
||||
logger.info(f"ImagePreprocessor initialized (OpenCV: {self.use_opencv})")
|
||||
|
||||
def preprocess_for_ocr(self, image_bytes: bytes,
|
||||
enhance_level: str = 'medium') -> bytes:
|
||||
"""
|
||||
對圖像進行 OCR 前處理
|
||||
|
||||
Args:
|
||||
image_bytes: 原始圖像字節數據
|
||||
enhance_level: 增強級別 ('low', 'medium', 'high')
|
||||
|
||||
Returns:
|
||||
處理後的圖像字節數據 (PNG格式)
|
||||
"""
|
||||
try:
|
||||
# 1. 載入圖像
|
||||
image = Image.open(io.BytesIO(image_bytes))
|
||||
original_mode = image.mode
|
||||
logger.debug(f"Original image: {image.size}, mode={original_mode}")
|
||||
|
||||
# 2. 轉換為 RGB (如果需要)
|
||||
if image.mode not in ('RGB', 'L'):
|
||||
image = image.convert('RGB')
|
||||
logger.debug(f"Converted to RGB mode")
|
||||
|
||||
# 3. 根據增強級別選擇處理流程
|
||||
if self.use_opencv:
|
||||
processed_image = self._preprocess_with_opencv(image, enhance_level)
|
||||
else:
|
||||
processed_image = self._preprocess_with_pil(image, enhance_level)
|
||||
|
||||
# 4. 轉換為 PNG 字節
|
||||
output_buffer = io.BytesIO()
|
||||
processed_image.save(output_buffer, format='PNG', optimize=True)
|
||||
processed_bytes = output_buffer.getvalue()
|
||||
|
||||
logger.info(f"Image preprocessed: {len(image_bytes)} -> {len(processed_bytes)} bytes (level={enhance_level})")
|
||||
return processed_bytes
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Image preprocessing failed: {e}, returning original image")
|
||||
return image_bytes # 失敗時返回原圖
|
||||
|
||||
def _preprocess_with_opencv(self, image: Image.Image, level: str) -> Image.Image:
|
||||
"""使用 OpenCV 進行進階圖像處理"""
|
||||
# PIL Image -> NumPy array
|
||||
img_array = np.array(image)
|
||||
|
||||
# 轉換為 BGR (OpenCV 格式)
|
||||
if len(img_array.shape) == 3 and img_array.shape[2] == 3:
|
||||
img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
|
||||
else:
|
||||
img_bgr = img_array
|
||||
|
||||
# 1. 灰階化
|
||||
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
||||
logger.debug("Applied grayscale conversion (OpenCV)")
|
||||
|
||||
# 2. 去噪 - 根據級別調整
|
||||
if level == 'high':
|
||||
# 高級別:較強去噪
|
||||
denoised = cv2.fastNlMeansDenoising(gray, None, h=10, templateWindowSize=7, searchWindowSize=21)
|
||||
logger.debug("Applied strong denoising (h=10)")
|
||||
elif level == 'medium':
|
||||
# 中級別:中等去噪
|
||||
denoised = cv2.fastNlMeansDenoising(gray, None, h=7, templateWindowSize=7, searchWindowSize=21)
|
||||
logger.debug("Applied medium denoising (h=7)")
|
||||
else:
|
||||
# 低級別:輕度去噪
|
||||
denoised = cv2.bilateralFilter(gray, 5, 50, 50)
|
||||
logger.debug("Applied light denoising (bilateral)")
|
||||
|
||||
# 3. 對比度增強 - CLAHE
|
||||
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
||||
enhanced = clahe.apply(denoised)
|
||||
logger.debug("Applied CLAHE contrast enhancement")
|
||||
|
||||
# 4. 銳化 (高級別才使用)
|
||||
if level == 'high':
|
||||
kernel = np.array([[-1,-1,-1],
|
||||
[-1, 9,-1],
|
||||
[-1,-1,-1]])
|
||||
sharpened = cv2.filter2D(enhanced, -1, kernel)
|
||||
logger.debug("Applied sharpening filter")
|
||||
else:
|
||||
sharpened = enhanced
|
||||
|
||||
# 5. 自適應二值化 (根據級別決定是否使用)
|
||||
if level in ('medium', 'high'):
|
||||
# 使用自適應閾值
|
||||
binary = cv2.adaptiveThreshold(
|
||||
sharpened, 255,
|
||||
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||
cv2.THRESH_BINARY,
|
||||
blockSize=11,
|
||||
C=2
|
||||
)
|
||||
logger.debug("Applied adaptive thresholding")
|
||||
final_image = binary
|
||||
else:
|
||||
final_image = sharpened
|
||||
|
||||
# NumPy array -> PIL Image
|
||||
return Image.fromarray(final_image)
|
||||
|
||||
def _preprocess_with_pil(self, image: Image.Image, level: str) -> Image.Image:
|
||||
"""使用 PIL 進行基礎圖像處理(當 OpenCV 不可用時)"""
|
||||
|
||||
# 1. 灰階化
|
||||
gray = image.convert('L')
|
||||
logger.debug("Applied grayscale conversion (PIL)")
|
||||
|
||||
# 2. 對比度增強
|
||||
enhancer = ImageEnhance.Contrast(gray)
|
||||
if level == 'high':
|
||||
contrast_factor = 2.0
|
||||
elif level == 'medium':
|
||||
contrast_factor = 1.5
|
||||
else:
|
||||
contrast_factor = 1.2
|
||||
|
||||
enhanced = enhancer.enhance(contrast_factor)
|
||||
logger.debug(f"Applied contrast enhancement (factor={contrast_factor})")
|
||||
|
||||
# 3. 銳化
|
||||
if level in ('medium', 'high'):
|
||||
sharpness = ImageEnhance.Sharpness(enhanced)
|
||||
sharp_factor = 2.0 if level == 'high' else 1.5
|
||||
sharpened = sharpness.enhance(sharp_factor)
|
||||
logger.debug(f"Applied sharpening (factor={sharp_factor})")
|
||||
else:
|
||||
sharpened = enhanced
|
||||
|
||||
# 4. 去噪 (使用中值濾波)
|
||||
if level == 'high':
|
||||
denoised = sharpened.filter(ImageFilter.MedianFilter(size=3))
|
||||
logger.debug("Applied median filter (size=3)")
|
||||
else:
|
||||
denoised = sharpened
|
||||
|
||||
return denoised
|
||||
|
||||
def auto_detect_enhance_level(self, image_bytes: bytes) -> str:
|
||||
"""
|
||||
自動偵測最佳增強級別
|
||||
|
||||
Args:
|
||||
image_bytes: 圖像字節數據
|
||||
|
||||
Returns:
|
||||
建議的增強級別 ('low', 'medium', 'high')
|
||||
"""
|
||||
try:
|
||||
image = Image.open(io.BytesIO(image_bytes))
|
||||
|
||||
if self.use_opencv:
|
||||
# 使用 OpenCV 計算圖像品質指標
|
||||
img_array = np.array(image.convert('L'))
|
||||
|
||||
# 計算拉普拉斯方差 (評估清晰度)
|
||||
laplacian_var = cv2.Laplacian(img_array, cv2.CV_64F).var()
|
||||
|
||||
# 計算對比度 (標準差)
|
||||
contrast = np.std(img_array)
|
||||
|
||||
logger.debug(f"Image quality metrics: laplacian_var={laplacian_var:.2f}, contrast={contrast:.2f}")
|
||||
|
||||
# 根據指標決定增強級別
|
||||
if laplacian_var < 50 or contrast < 40:
|
||||
# 模糊或低對比度 -> 高級別增強
|
||||
return 'high'
|
||||
elif laplacian_var < 100 or contrast < 60:
|
||||
# 中等品質 -> 中級別增強
|
||||
return 'medium'
|
||||
else:
|
||||
# 高品質 -> 低級別增強
|
||||
return 'low'
|
||||
else:
|
||||
# PIL 簡易判斷
|
||||
gray = image.convert('L')
|
||||
img_array = np.array(gray)
|
||||
|
||||
# 簡單對比度評估
|
||||
contrast = np.std(img_array)
|
||||
|
||||
if contrast < 40:
|
||||
return 'high'
|
||||
elif contrast < 60:
|
||||
return 'medium'
|
||||
else:
|
||||
return 'low'
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Auto enhance level detection failed: {e}")
|
||||
return 'medium' # 預設使用中級別
|
||||
|
||||
def preprocess_smart(self, image_bytes: bytes) -> bytes:
|
||||
"""
|
||||
智能預處理 - 自動偵測並應用最佳處理級別
|
||||
|
||||
Args:
|
||||
image_bytes: 原始圖像字節數據
|
||||
|
||||
Returns:
|
||||
處理後的圖像字節數據
|
||||
"""
|
||||
enhance_level = self.auto_detect_enhance_level(image_bytes)
|
||||
logger.info(f"Auto-detected enhancement level: {enhance_level}")
|
||||
return self.preprocess_for_ocr(image_bytes, enhance_level)
|
232
app/utils/ldap_auth.py
Normal file
232
app/utils/ldap_auth.py
Normal file
@@ -0,0 +1,232 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
LDAP 認證服務
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
import time
|
||||
from ldap3 import Server, Connection, SUBTREE, ALL_ATTRIBUTES
|
||||
from flask import current_app
|
||||
from .logger import get_logger
|
||||
from .exceptions import AuthenticationError
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class LDAPAuthService:
|
||||
"""LDAP 認證服務"""
|
||||
|
||||
def __init__(self):
|
||||
self.config = current_app.config
|
||||
self.server_url = self.config.get('LDAP_SERVER')
|
||||
self.port = self.config.get('LDAP_PORT', 389)
|
||||
self.use_ssl = self.config.get('LDAP_USE_SSL', False)
|
||||
self.bind_user_dn = self.config.get('LDAP_BIND_USER_DN')
|
||||
self.bind_password = self.config.get('LDAP_BIND_USER_PASSWORD')
|
||||
self.search_base = self.config.get('LDAP_SEARCH_BASE')
|
||||
self.login_attr = self.config.get('LDAP_USER_LOGIN_ATTR', 'userPrincipalName')
|
||||
|
||||
def create_connection(self, retries=3):
|
||||
"""建立 LDAP 連線(帶重試機制)"""
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
server = Server(
|
||||
self.server_url,
|
||||
port=self.port,
|
||||
use_ssl=self.use_ssl,
|
||||
get_info=ALL_ATTRIBUTES
|
||||
)
|
||||
|
||||
conn = Connection(
|
||||
server,
|
||||
user=self.bind_user_dn,
|
||||
password=self.bind_password,
|
||||
auto_bind=True,
|
||||
raise_exceptions=True
|
||||
)
|
||||
|
||||
logger.info("LDAP connection established successfully")
|
||||
return conn
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"LDAP connection attempt {attempt + 1} failed: {str(e)}")
|
||||
if attempt == retries - 1:
|
||||
raise AuthenticationError(f"LDAP connection failed: {str(e)}")
|
||||
time.sleep(1)
|
||||
|
||||
return None
|
||||
|
||||
def authenticate_user(self, username, password):
|
||||
"""驗證使用者憑證"""
|
||||
try:
|
||||
conn = self.create_connection()
|
||||
if not conn:
|
||||
raise AuthenticationError("Unable to connect to LDAP server")
|
||||
|
||||
# 搜尋使用者
|
||||
search_filter = f"(&(objectClass=person)(objectCategory=person)({self.login_attr}={username}))"
|
||||
|
||||
conn.search(
|
||||
self.search_base,
|
||||
search_filter,
|
||||
SUBTREE,
|
||||
attributes=['displayName', 'mail', 'sAMAccountName', 'userPrincipalName', 'department']
|
||||
)
|
||||
|
||||
if not conn.entries:
|
||||
logger.warning(f"User not found: {username}")
|
||||
raise AuthenticationError("帳號不存在")
|
||||
|
||||
user_entry = conn.entries[0]
|
||||
user_dn = user_entry.entry_dn
|
||||
|
||||
# 驗證使用者密碼
|
||||
try:
|
||||
user_conn = Connection(
|
||||
conn.server,
|
||||
user=user_dn,
|
||||
password=password,
|
||||
auto_bind=True,
|
||||
raise_exceptions=True
|
||||
)
|
||||
user_conn.unbind()
|
||||
|
||||
# 返回使用者資訊
|
||||
user_info = {
|
||||
'username': str(user_entry.sAMAccountName) if user_entry.sAMAccountName else username,
|
||||
'display_name': str(user_entry.displayName) if user_entry.displayName else username,
|
||||
'email': str(user_entry.mail) if user_entry.mail else f"{username}@panjit.com.tw",
|
||||
'department': str(user_entry.department) if hasattr(user_entry, 'department') and user_entry.department else None,
|
||||
'user_principal_name': str(user_entry.userPrincipalName) if user_entry.userPrincipalName else username
|
||||
}
|
||||
|
||||
logger.info(f"User authenticated successfully: {username}")
|
||||
return user_info
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Authentication failed for user {username}: {str(e)}")
|
||||
raise AuthenticationError("密碼錯誤")
|
||||
|
||||
except AuthenticationError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"LDAP authentication error: {str(e)}")
|
||||
raise AuthenticationError(f"認證服務錯誤: {str(e)}")
|
||||
|
||||
finally:
|
||||
if 'conn' in locals() and conn:
|
||||
conn.unbind()
|
||||
|
||||
def search_users(self, search_term, limit=20):
|
||||
"""搜尋使用者"""
|
||||
try:
|
||||
conn = self.create_connection()
|
||||
if not conn:
|
||||
return []
|
||||
|
||||
# 建構搜尋過濾器
|
||||
search_filter = f"""(&
|
||||
(objectClass=person)
|
||||
(objectCategory=person)
|
||||
(!(userAccountControl:1.2.840.113556.1.4.803:=2))
|
||||
(|
|
||||
(displayName=*{search_term}*)
|
||||
(mail=*{search_term}*)
|
||||
(sAMAccountName=*{search_term}*)
|
||||
(userPrincipalName=*{search_term}*)
|
||||
)
|
||||
)"""
|
||||
|
||||
# 移除多餘空白
|
||||
search_filter = ' '.join(search_filter.split())
|
||||
|
||||
conn.search(
|
||||
self.search_base,
|
||||
search_filter,
|
||||
SUBTREE,
|
||||
attributes=['sAMAccountName', 'displayName', 'mail', 'department'],
|
||||
size_limit=limit
|
||||
)
|
||||
|
||||
results = []
|
||||
for entry in conn.entries:
|
||||
results.append({
|
||||
'username': str(entry.sAMAccountName) if entry.sAMAccountName else '',
|
||||
'display_name': str(entry.displayName) if entry.displayName else '',
|
||||
'email': str(entry.mail) if entry.mail else '',
|
||||
'department': str(entry.department) if hasattr(entry, 'department') and entry.department else ''
|
||||
})
|
||||
|
||||
logger.info(f"LDAP search found {len(results)} results for term: {search_term}")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"LDAP search error: {str(e)}")
|
||||
return []
|
||||
finally:
|
||||
if 'conn' in locals() and conn:
|
||||
conn.unbind()
|
||||
|
||||
def get_user_info(self, username):
|
||||
"""取得使用者詳細資訊"""
|
||||
try:
|
||||
conn = self.create_connection()
|
||||
if not conn:
|
||||
return None
|
||||
|
||||
# 支援 sAMAccountName 和 userPrincipalName 格式
|
||||
if '@' in username:
|
||||
search_filter = f"""(&
|
||||
(objectClass=person)
|
||||
(|
|
||||
(userPrincipalName={username})
|
||||
(mail={username})
|
||||
)
|
||||
)"""
|
||||
else:
|
||||
search_filter = f"(&(objectClass=person)(sAMAccountName={username}))"
|
||||
|
||||
# 移除多餘空白
|
||||
search_filter = ' '.join(search_filter.split())
|
||||
|
||||
conn.search(
|
||||
self.search_base,
|
||||
search_filter,
|
||||
SUBTREE,
|
||||
attributes=['displayName', 'mail', 'sAMAccountName', 'userPrincipalName', 'department']
|
||||
)
|
||||
|
||||
if not conn.entries:
|
||||
return None
|
||||
|
||||
entry = conn.entries[0]
|
||||
return {
|
||||
'username': str(entry.sAMAccountName) if entry.sAMAccountName else username,
|
||||
'display_name': str(entry.displayName) if entry.displayName else username,
|
||||
'email': str(entry.mail) if entry.mail else f"{username}@panjit.com.tw",
|
||||
'department': str(entry.department) if hasattr(entry, 'department') and entry.department else None,
|
||||
'user_principal_name': str(entry.userPrincipalName) if entry.userPrincipalName else ''
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting user info for {username}: {str(e)}")
|
||||
return None
|
||||
finally:
|
||||
if 'conn' in locals() and conn:
|
||||
conn.unbind()
|
||||
|
||||
def test_connection(self):
|
||||
"""測試 LDAP 連線(健康檢查用)"""
|
||||
try:
|
||||
conn = self.create_connection(retries=1)
|
||||
if conn:
|
||||
conn.unbind()
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"LDAP connection test failed: {str(e)}")
|
||||
return False
|
126
app/utils/logger.py
Normal file
126
app/utils/logger.py
Normal file
@@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
日誌管理模組
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from flask import current_app, has_request_context, request, g
|
||||
|
||||
|
||||
def get_logger(name):
|
||||
"""取得指定名稱的日誌器"""
|
||||
logger = logging.getLogger(name)
|
||||
|
||||
# 避免重複設定 handler
|
||||
if not logger.handlers:
|
||||
setup_logger(logger)
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
def setup_logger(logger):
|
||||
"""設定日誌器"""
|
||||
if has_request_context() and current_app:
|
||||
log_level = current_app.config.get('LOG_LEVEL', 'INFO')
|
||||
log_file = current_app.config.get('LOG_FILE', 'logs/app.log')
|
||||
else:
|
||||
log_level = os.environ.get('LOG_LEVEL', 'INFO')
|
||||
log_file = os.environ.get('LOG_FILE', 'logs/app.log')
|
||||
|
||||
# 確保日誌目錄存在
|
||||
log_path = Path(log_file)
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 設定日誌等級
|
||||
logger.setLevel(getattr(logging, log_level.upper()))
|
||||
|
||||
# 建立格式化器
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s [%(levelname)s] %(name)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
|
||||
# 檔案處理器(使用輪轉)
|
||||
file_handler = RotatingFileHandler(
|
||||
log_file,
|
||||
maxBytes=10*1024*1024, # 10MB
|
||||
backupCount=5,
|
||||
encoding='utf-8'
|
||||
)
|
||||
file_handler.setLevel(getattr(logging, log_level.upper()))
|
||||
file_handler.setFormatter(formatter)
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
# 控制台處理器
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setLevel(logging.INFO)
|
||||
console_handler.setFormatter(formatter)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
|
||||
class DatabaseLogHandler(logging.Handler):
|
||||
"""資料庫日誌處理器"""
|
||||
|
||||
def emit(self, record):
|
||||
"""發送日誌記錄到資料庫"""
|
||||
try:
|
||||
from app.models.log import SystemLog
|
||||
|
||||
# 取得使用者和任務資訊(如果有的話)
|
||||
user_id = None
|
||||
job_id = None
|
||||
extra_data = {}
|
||||
|
||||
if has_request_context():
|
||||
user_id = g.get('current_user_id')
|
||||
extra_data.update({
|
||||
'method': request.method,
|
||||
'endpoint': request.endpoint,
|
||||
'url': request.url,
|
||||
'ip_address': request.remote_addr,
|
||||
'user_agent': request.headers.get('User-Agent')
|
||||
})
|
||||
|
||||
# 儲存到資料庫
|
||||
SystemLog.log(
|
||||
level=record.levelname,
|
||||
module=record.name,
|
||||
message=record.getMessage(),
|
||||
user_id=user_id,
|
||||
job_id=job_id,
|
||||
extra_data=extra_data if extra_data else None
|
||||
)
|
||||
|
||||
except Exception:
|
||||
# 避免日誌記錄失敗影響主程序
|
||||
pass
|
||||
|
||||
|
||||
def init_logging(app):
|
||||
"""初始化應用程式日誌"""
|
||||
# 設定根日誌器
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(logging.INFO)
|
||||
|
||||
# 添加資料庫日誌處理器(僅對重要日誌)
|
||||
if app.config.get('SQLALCHEMY_DATABASE_URI'):
|
||||
db_handler = DatabaseLogHandler()
|
||||
db_handler.setLevel(logging.WARNING) # 只記錄警告以上等級到資料庫
|
||||
root_logger.addHandler(db_handler)
|
||||
|
||||
# 設定 Flask 應用日誌
|
||||
if not app.logger.handlers:
|
||||
setup_logger(app.logger)
|
||||
|
||||
# 設定第三方庫日誌等級
|
||||
logging.getLogger('werkzeug').setLevel(logging.WARNING)
|
||||
logging.getLogger('urllib3').setLevel(logging.WARNING)
|
||||
logging.getLogger('requests').setLevel(logging.WARNING)
|
84
app/utils/response.py
Normal file
84
app/utils/response.py
Normal file
@@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
API 響應處理工具
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2025-09-02
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, List, Union
|
||||
from app.utils.timezone import to_taiwan_time, format_taiwan_time
|
||||
|
||||
|
||||
def convert_datetime_to_taiwan(data: Union[Dict, List, Any]) -> Union[Dict, List, Any]:
|
||||
"""遞迴轉換資料中的 datetime 欄位為台灣時間
|
||||
|
||||
Args:
|
||||
data: 要轉換的資料(字典、列表或其他)
|
||||
|
||||
Returns:
|
||||
轉換後的資料
|
||||
"""
|
||||
if isinstance(data, dict):
|
||||
result = {}
|
||||
for key, value in data.items():
|
||||
if isinstance(value, datetime):
|
||||
# 將 datetime 轉換為台灣時間的 ISO 字符串
|
||||
taiwan_dt = to_taiwan_time(value)
|
||||
result[key] = taiwan_dt.isoformat()
|
||||
elif key in ['created_at', 'updated_at', 'completed_at', 'processing_started_at', 'last_login', 'timestamp']:
|
||||
# 特定的時間欄位
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
# 嘗試解析 ISO 格式的時間字符串
|
||||
dt = datetime.fromisoformat(value.replace('Z', '+00:00'))
|
||||
taiwan_dt = to_taiwan_time(dt)
|
||||
result[key] = taiwan_dt.isoformat()
|
||||
except:
|
||||
result[key] = value
|
||||
else:
|
||||
result[key] = convert_datetime_to_taiwan(value)
|
||||
else:
|
||||
result[key] = convert_datetime_to_taiwan(value)
|
||||
return result
|
||||
elif isinstance(data, list):
|
||||
return [convert_datetime_to_taiwan(item) for item in data]
|
||||
else:
|
||||
return data
|
||||
|
||||
|
||||
def create_taiwan_response(success: bool = True, data: Any = None, message: str = '',
|
||||
error: str = '', **kwargs) -> Dict[str, Any]:
|
||||
"""創建包含台灣時區轉換的 API 響應
|
||||
|
||||
Args:
|
||||
success: 是否成功
|
||||
data: 響應資料
|
||||
message: 成功訊息
|
||||
error: 錯誤訊息
|
||||
**kwargs: 其他參數
|
||||
|
||||
Returns:
|
||||
包含台灣時區的響應字典
|
||||
"""
|
||||
response = {
|
||||
'success': success,
|
||||
'timestamp': format_taiwan_time(datetime.now(), "%Y-%m-%d %H:%M:%S")
|
||||
}
|
||||
|
||||
if data is not None:
|
||||
response['data'] = convert_datetime_to_taiwan(data)
|
||||
|
||||
if message:
|
||||
response['message'] = message
|
||||
|
||||
if error:
|
||||
response['error'] = error
|
||||
|
||||
# 加入其他參數
|
||||
for key, value in kwargs.items():
|
||||
response[key] = convert_datetime_to_taiwan(value)
|
||||
|
||||
return response
|
104
app/utils/timezone.py
Normal file
104
app/utils/timezone.py
Normal file
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
時區工具函數
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2025-09-02
|
||||
"""
|
||||
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Optional
|
||||
|
||||
# 台灣時區 UTC+8
|
||||
TAIWAN_TZ = timezone(timedelta(hours=8))
|
||||
|
||||
|
||||
def now_taiwan() -> datetime:
|
||||
"""取得當前台灣時間(UTC+8)"""
|
||||
return datetime.now(TAIWAN_TZ)
|
||||
|
||||
|
||||
def now_utc() -> datetime:
|
||||
"""取得當前 UTC 時間"""
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
def to_taiwan_time(dt: datetime) -> datetime:
|
||||
"""將 datetime 轉換為台灣時間
|
||||
|
||||
Args:
|
||||
dt: datetime 物件(可能是 naive 或 aware)
|
||||
|
||||
Returns:
|
||||
台灣時區的 datetime 物件
|
||||
"""
|
||||
if dt is None:
|
||||
return None
|
||||
|
||||
# 如果是 naive datetime,假設為 UTC
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
|
||||
# 轉換為台灣時區
|
||||
return dt.astimezone(TAIWAN_TZ)
|
||||
|
||||
|
||||
def to_utc_time(dt: datetime) -> datetime:
|
||||
"""將 datetime 轉換為 UTC 時間
|
||||
|
||||
Args:
|
||||
dt: datetime 物件(可能是 naive 或 aware)
|
||||
|
||||
Returns:
|
||||
UTC 時區的 datetime 物件
|
||||
"""
|
||||
if dt is None:
|
||||
return None
|
||||
|
||||
# 如果是 naive datetime,假設為台灣時間
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=TAIWAN_TZ)
|
||||
|
||||
# 轉換為 UTC
|
||||
return dt.astimezone(timezone.utc)
|
||||
|
||||
|
||||
def format_taiwan_time(dt: datetime, format_str: str = "%Y-%m-%d %H:%M:%S") -> str:
|
||||
"""格式化台灣時間為字符串
|
||||
|
||||
Args:
|
||||
dt: datetime 物件
|
||||
format_str: 格式化字符串
|
||||
|
||||
Returns:
|
||||
格式化後的時間字符串
|
||||
"""
|
||||
if dt is None:
|
||||
return ""
|
||||
|
||||
taiwan_dt = to_taiwan_time(dt)
|
||||
return taiwan_dt.strftime(format_str)
|
||||
|
||||
|
||||
def parse_taiwan_time(time_str: str, format_str: str = "%Y-%m-%d %H:%M:%S") -> datetime:
|
||||
"""解析台灣時間字符串為 datetime
|
||||
|
||||
Args:
|
||||
time_str: 時間字符串
|
||||
format_str: 解析格式
|
||||
|
||||
Returns:
|
||||
台灣時區的 datetime 物件
|
||||
"""
|
||||
naive_dt = datetime.strptime(time_str, format_str)
|
||||
return naive_dt.replace(tzinfo=TAIWAN_TZ)
|
||||
|
||||
|
||||
# 為了向後兼容,提供替代 datetime.utcnow() 的函數
|
||||
def utcnow() -> datetime:
|
||||
"""取得當前 UTC 時間(替代 datetime.utcnow())
|
||||
|
||||
注意:新代碼建議使用 now_taiwan() 或 now_utc()
|
||||
"""
|
||||
return now_utc().replace(tzinfo=None) # 返回 naive UTC datetime 以保持兼容性
|
203
app/utils/validators.py
Normal file
203
app/utils/validators.py
Normal file
@@ -0,0 +1,203 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
驗證工具模組
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from flask import current_app
|
||||
from .exceptions import ValidationError
|
||||
|
||||
|
||||
def validate_file(file_obj):
|
||||
"""驗證上傳的檔案"""
|
||||
if not file_obj:
|
||||
raise ValidationError("未選擇檔案", "NO_FILE")
|
||||
|
||||
if not file_obj.filename:
|
||||
raise ValidationError("檔案名稱為空", "NO_FILENAME")
|
||||
|
||||
# 檢查檔案副檔名
|
||||
file_ext = Path(file_obj.filename).suffix.lower()
|
||||
allowed_extensions = current_app.config.get('ALLOWED_EXTENSIONS', {'.docx', '.doc', '.pptx', '.xlsx', '.xls', '.pdf'})
|
||||
|
||||
if file_ext not in allowed_extensions:
|
||||
raise ValidationError(
|
||||
f"不支援的檔案類型: {file_ext},支援的格式: {', '.join(allowed_extensions)}",
|
||||
"INVALID_FILE_TYPE"
|
||||
)
|
||||
|
||||
# 檢查檔案大小
|
||||
max_size = current_app.config.get('MAX_CONTENT_LENGTH', 26214400) # 25MB
|
||||
|
||||
# 取得檔案大小
|
||||
file_obj.seek(0, os.SEEK_END)
|
||||
file_size = file_obj.tell()
|
||||
file_obj.seek(0)
|
||||
|
||||
if file_size > max_size:
|
||||
raise ValidationError(
|
||||
f"檔案大小超過限制 ({format_file_size(max_size)})",
|
||||
"FILE_TOO_LARGE"
|
||||
)
|
||||
|
||||
if file_size == 0:
|
||||
raise ValidationError("檔案為空", "EMPTY_FILE")
|
||||
|
||||
return {
|
||||
'filename': file_obj.filename,
|
||||
'file_extension': file_ext,
|
||||
'file_size': file_size,
|
||||
'valid': True
|
||||
}
|
||||
|
||||
|
||||
def validate_languages(source_language, target_languages):
|
||||
"""驗證語言設定"""
|
||||
# 支援的語言列表
|
||||
supported_languages = {
|
||||
'auto': '自動偵測',
|
||||
'zh-CN': '簡體中文',
|
||||
'zh-TW': '繁體中文',
|
||||
'en': '英文',
|
||||
'ja': '日文',
|
||||
'ko': '韓文',
|
||||
'vi': '越南文',
|
||||
'th': '泰文',
|
||||
'id': '印尼文',
|
||||
'ms': '馬來文',
|
||||
'es': '西班牙文',
|
||||
'fr': '法文',
|
||||
'de': '德文',
|
||||
'ru': '俄文'
|
||||
}
|
||||
|
||||
# 驗證來源語言
|
||||
if source_language and source_language not in supported_languages:
|
||||
raise ValidationError(
|
||||
f"不支援的來源語言: {source_language}",
|
||||
"INVALID_SOURCE_LANGUAGE"
|
||||
)
|
||||
|
||||
# 驗證目標語言
|
||||
if not target_languages or not isinstance(target_languages, list):
|
||||
raise ValidationError("必須指定至少一個目標語言", "NO_TARGET_LANGUAGES")
|
||||
|
||||
if len(target_languages) == 0:
|
||||
raise ValidationError("必須指定至少一個目標語言", "NO_TARGET_LANGUAGES")
|
||||
|
||||
if len(target_languages) > 10: # 限制最多10個目標語言
|
||||
raise ValidationError("目標語言數量過多,最多支援10個", "TOO_MANY_TARGET_LANGUAGES")
|
||||
|
||||
invalid_languages = [lang for lang in target_languages if lang not in supported_languages]
|
||||
if invalid_languages:
|
||||
raise ValidationError(
|
||||
f"不支援的目標語言: {', '.join(invalid_languages)}",
|
||||
"INVALID_TARGET_LANGUAGE"
|
||||
)
|
||||
|
||||
# 檢查來源語言和目標語言是否有重疊
|
||||
if source_language and source_language != 'auto' and source_language in target_languages:
|
||||
raise ValidationError(
|
||||
"目標語言不能包含來源語言",
|
||||
"SOURCE_TARGET_OVERLAP"
|
||||
)
|
||||
|
||||
return {
|
||||
'source_language': source_language or 'auto',
|
||||
'target_languages': target_languages,
|
||||
'supported_languages': supported_languages,
|
||||
'valid': True
|
||||
}
|
||||
|
||||
|
||||
def validate_job_uuid(job_uuid):
|
||||
"""驗證任務UUID格式"""
|
||||
import uuid
|
||||
|
||||
if not job_uuid:
|
||||
raise ValidationError("任務UUID不能為空", "INVALID_UUID")
|
||||
|
||||
try:
|
||||
uuid.UUID(job_uuid)
|
||||
return True
|
||||
except ValueError:
|
||||
raise ValidationError("任務UUID格式錯誤", "INVALID_UUID")
|
||||
|
||||
|
||||
def validate_pagination(page, per_page):
|
||||
"""驗證分頁參數"""
|
||||
try:
|
||||
page = int(page) if page else 1
|
||||
per_page = int(per_page) if per_page else 20
|
||||
except (ValueError, TypeError):
|
||||
raise ValidationError("分頁參數必須為數字", "INVALID_PAGINATION")
|
||||
|
||||
if page < 1:
|
||||
raise ValidationError("頁數必須大於0", "INVALID_PAGE")
|
||||
|
||||
if per_page < 1 or per_page > 100:
|
||||
raise ValidationError("每頁項目數必須在1-100之間", "INVALID_PER_PAGE")
|
||||
|
||||
return page, per_page
|
||||
|
||||
|
||||
def format_file_size(size_bytes):
|
||||
"""格式化檔案大小顯示"""
|
||||
if size_bytes == 0:
|
||||
return "0 B"
|
||||
|
||||
size_names = ["B", "KB", "MB", "GB", "TB"]
|
||||
i = 0
|
||||
while size_bytes >= 1024 and i < len(size_names) - 1:
|
||||
size_bytes /= 1024.0
|
||||
i += 1
|
||||
|
||||
return f"{size_bytes:.1f} {size_names[i]}"
|
||||
|
||||
|
||||
def sanitize_filename(filename):
|
||||
"""清理檔案名稱,移除不安全字元"""
|
||||
import re
|
||||
|
||||
# 保留檔案名稱和副檔名
|
||||
name = Path(filename).stem
|
||||
ext = Path(filename).suffix
|
||||
|
||||
# 移除或替換不安全字元
|
||||
safe_name = re.sub(r'[^\w\s.-]', '_', name)
|
||||
safe_name = re.sub(r'\s+', '_', safe_name) # 空白替換為底線
|
||||
safe_name = safe_name.strip('._') # 移除開頭結尾的點和底線
|
||||
|
||||
# 限制長度
|
||||
if len(safe_name) > 100:
|
||||
safe_name = safe_name[:100]
|
||||
|
||||
return f"{safe_name}{ext}"
|
||||
|
||||
|
||||
def validate_date_range(start_date, end_date):
|
||||
"""驗證日期範圍"""
|
||||
from datetime import datetime
|
||||
|
||||
if start_date:
|
||||
try:
|
||||
start_date = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
|
||||
except ValueError:
|
||||
raise ValidationError("開始日期格式錯誤", "INVALID_START_DATE")
|
||||
|
||||
if end_date:
|
||||
try:
|
||||
end_date = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
|
||||
except ValueError:
|
||||
raise ValidationError("結束日期格式錯誤", "INVALID_END_DATE")
|
||||
|
||||
if start_date and end_date and start_date > end_date:
|
||||
raise ValidationError("開始日期不能晚於結束日期", "INVALID_DATE_RANGE")
|
||||
|
||||
return start_date, end_date
|
233
app/websocket.py.disabled
Normal file
233
app/websocket.py.disabled
Normal file
@@ -0,0 +1,233 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
WebSocket 服務模組
|
||||
|
||||
Author: PANJIT IT Team
|
||||
Created: 2024-01-28
|
||||
Modified: 2024-01-28
|
||||
"""
|
||||
|
||||
import os
|
||||
from flask_socketio import SocketIO, emit, join_room, leave_room, disconnect
|
||||
from flask_jwt_extended import decode_token, get_jwt
|
||||
from flask import request
|
||||
from functools import wraps
|
||||
import logging
|
||||
|
||||
# 初始化 SocketIO
|
||||
socketio = SocketIO(
|
||||
cors_allowed_origins="*",
|
||||
# Use eventlet for production and enable Redis message queue for multi-process/replica support
|
||||
async_mode='eventlet',
|
||||
message_queue=os.getenv('REDIS_URL'),
|
||||
logger=True,
|
||||
engineio_logger=False
|
||||
)
|
||||
|
||||
# 存儲用戶連接
|
||||
connected_users = {}
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def jwt_required_ws(f):
|
||||
"""WebSocket JWT 驗證裝飾器"""
|
||||
@wraps(f)
|
||||
def decorated_function(*args, **kwargs):
|
||||
try:
|
||||
# 從查詢參數獲取 token
|
||||
token = request.args.get('token')
|
||||
if not token:
|
||||
disconnect()
|
||||
return False
|
||||
|
||||
# 解碼 token
|
||||
decoded = decode_token(token)
|
||||
user_id = decoded.get('sub')
|
||||
|
||||
# 儲存用戶信息
|
||||
request.user_id = user_id
|
||||
|
||||
return f(*args, **kwargs)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"WebSocket authentication failed: {e}")
|
||||
disconnect()
|
||||
return False
|
||||
|
||||
return decorated_function
|
||||
|
||||
|
||||
@socketio.on('connect')
|
||||
def handle_connect(auth):
|
||||
"""處理客戶端連接"""
|
||||
try:
|
||||
# 從認證數據獲取 token
|
||||
if auth and 'token' in auth:
|
||||
token = auth['token']
|
||||
decoded = decode_token(token)
|
||||
user_id = decoded.get('sub')
|
||||
|
||||
# 記錄連接
|
||||
connected_users[request.sid] = {
|
||||
'user_id': user_id,
|
||||
'sid': request.sid
|
||||
}
|
||||
|
||||
# 加入用戶專屬房間
|
||||
join_room(f"user_{user_id}")
|
||||
|
||||
logger.info(f"User {user_id} connected with session {request.sid}")
|
||||
|
||||
# 發送連接成功消息
|
||||
emit('connected', {
|
||||
'message': '連接成功',
|
||||
'user_id': user_id
|
||||
})
|
||||
|
||||
return True
|
||||
else:
|
||||
logger.warning("Connection attempt without authentication")
|
||||
disconnect()
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Connection error: {e}")
|
||||
disconnect()
|
||||
return False
|
||||
|
||||
|
||||
@socketio.on('disconnect')
|
||||
def handle_disconnect():
|
||||
"""處理客戶端斷開連接"""
|
||||
try:
|
||||
if request.sid in connected_users:
|
||||
user_info = connected_users[request.sid]
|
||||
user_id = user_info['user_id']
|
||||
|
||||
# 離開房間
|
||||
leave_room(f"user_{user_id}")
|
||||
|
||||
# 移除連接記錄
|
||||
del connected_users[request.sid]
|
||||
|
||||
logger.info(f"User {user_id} disconnected")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Disconnect error: {e}")
|
||||
|
||||
|
||||
@socketio.on('ping')
|
||||
def handle_ping():
|
||||
"""處理心跳包"""
|
||||
emit('pong', {'timestamp': request.args.get('timestamp')})
|
||||
|
||||
|
||||
@socketio.on('subscribe_job')
|
||||
def handle_subscribe_job(data):
|
||||
"""訂閱任務更新"""
|
||||
try:
|
||||
job_uuid = data.get('job_uuid')
|
||||
if job_uuid:
|
||||
join_room(f"job_{job_uuid}")
|
||||
logger.info(f"Client {request.sid} subscribed to job {job_uuid}")
|
||||
emit('subscribed', {'job_uuid': job_uuid})
|
||||
except Exception as e:
|
||||
logger.error(f"Subscribe job error: {e}")
|
||||
|
||||
|
||||
@socketio.on('unsubscribe_job')
|
||||
def handle_unsubscribe_job(data):
|
||||
"""取消訂閱任務更新"""
|
||||
try:
|
||||
job_uuid = data.get('job_uuid')
|
||||
if job_uuid:
|
||||
leave_room(f"job_{job_uuid}")
|
||||
logger.info(f"Client {request.sid} unsubscribed from job {job_uuid}")
|
||||
emit('unsubscribed', {'job_uuid': job_uuid})
|
||||
except Exception as e:
|
||||
logger.error(f"Unsubscribe job error: {e}")
|
||||
|
||||
|
||||
# 工具函數:發送通知
|
||||
def send_notification_to_user(user_id, notification_data):
|
||||
"""
|
||||
向特定用戶發送通知
|
||||
|
||||
Args:
|
||||
user_id: 用戶ID
|
||||
notification_data: 通知數據
|
||||
"""
|
||||
try:
|
||||
socketio.emit(
|
||||
'new_notification',
|
||||
notification_data,
|
||||
room=f"user_{user_id}",
|
||||
namespace='/'
|
||||
)
|
||||
logger.info(f"Notification sent to user {user_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to send notification: {e}")
|
||||
|
||||
|
||||
def send_job_update(job_uuid, update_data):
|
||||
"""
|
||||
發送任務更新
|
||||
|
||||
Args:
|
||||
job_uuid: 任務UUID
|
||||
update_data: 更新數據
|
||||
"""
|
||||
try:
|
||||
socketio.emit(
|
||||
'job_update',
|
||||
{
|
||||
'job_uuid': job_uuid,
|
||||
**update_data
|
||||
},
|
||||
room=f"job_{job_uuid}",
|
||||
namespace='/'
|
||||
)
|
||||
logger.info(f"Job update sent for {job_uuid}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to send job update: {e}")
|
||||
|
||||
|
||||
def broadcast_system_message(message, message_type='info'):
|
||||
"""
|
||||
廣播系統消息給所有連接的用戶
|
||||
|
||||
Args:
|
||||
message: 消息內容
|
||||
message_type: 消息類型
|
||||
"""
|
||||
try:
|
||||
socketio.emit(
|
||||
'system_message',
|
||||
{
|
||||
'message': message,
|
||||
'type': message_type
|
||||
},
|
||||
namespace='/',
|
||||
broadcast=True
|
||||
)
|
||||
logger.info(f"System message broadcasted: {message}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to broadcast system message: {e}")
|
||||
|
||||
|
||||
# 初始化函數
|
||||
def init_websocket(app):
|
||||
"""
|
||||
初始化 WebSocket
|
||||
|
||||
Args:
|
||||
app: Flask 應用實例
|
||||
"""
|
||||
socketio.init_app(app)
|
||||
logger.info("WebSocket initialized")
|
||||
return socketio
|
Reference in New Issue
Block a user