This commit is contained in:
beabigegg
2025-10-03 08:19:40 +08:00
commit 6599716481
99 changed files with 28184 additions and 0 deletions

225
app/__init__.py Normal file
View File

@@ -0,0 +1,225 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Flask 應用程式工廠
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
import os
import redis
from flask import Flask, request, make_response
from flask_sqlalchemy import SQLAlchemy
from flask_cors import CORS
from flask_jwt_extended import JWTManager
from celery import Celery
from app.config import config
from app.utils.logger import init_logging
# 初始化擴展
db = SQLAlchemy()
cors = CORS()
jwt = JWTManager()
def make_celery(app):
"""創建 Celery 實例"""
celery = Celery(
app.import_name,
backend=app.config['CELERY_RESULT_BACKEND'],
broker=app.config['CELERY_BROKER_URL']
)
celery.conf.update(app.config)
class ContextTask(celery.Task):
"""在 Flask 應用上下文中執行任務"""
def __call__(self, *args, **kwargs):
with app.app_context():
return self.run(*args, **kwargs)
celery.Task = ContextTask
return celery
def create_app(config_name=None):
"""應用程式工廠"""
app = Flask(__name__)
# 載入配置
config_name = config_name or os.getenv('FLASK_ENV', 'default')
# 先載入 Dify API 配置
config[config_name].load_dify_config()
# 然後載入配置到 Flask app
app.config.from_object(config[config_name])
# 初始化必要目錄
config[config_name].init_directories()
# 初始化擴展
db.init_app(app)
# 不使用 Flask-CORS 避免衝突使用手動CORS處理
# 初始化 JWT
jwt.init_app(app)
app.logger.info(f"🔑 [JWT Config] JWT_SECRET_KEY: {app.config.get('JWT_SECRET_KEY')[:10]}...{app.config.get('JWT_SECRET_KEY')[-10:] if app.config.get('JWT_SECRET_KEY') else 'None'}")
app.logger.info(f"🔑 [JWT Config] JWT_ACCESS_TOKEN_EXPIRES: {app.config.get('JWT_ACCESS_TOKEN_EXPIRES')}")
app.logger.info(f"🔑 [JWT Config] JWT_REFRESH_TOKEN_EXPIRES: {app.config.get('JWT_REFRESH_TOKEN_EXPIRES')}")
app.logger.info("🔑 [JWT] Using JWT authentication")
# 設定 Redis用於Celery
try:
redis_client = redis.from_url(app.config['REDIS_URL'])
app.redis_client = redis_client
except Exception as e:
app.logger.warning(f"Redis initialization failed: {str(e)}")
app.redis_client = None
# 初始化日誌
init_logging(app)
# 註冊 API 路由
from app.api import api_v1
app.register_blueprint(api_v1)
# 註冊錯誤處理器
register_error_handlers(app)
# 添加 CORS 響應headers
@app.after_request
def after_request(response):
origin = request.headers.get('Origin')
allowed_origins = ['http://localhost:3000', 'http://127.0.0.1:3000', 'http://localhost:3001', 'http://127.0.0.1:3001', 'http://localhost:12010', 'http://127.0.0.1:12010']
if origin and origin in allowed_origins:
response.headers['Access-Control-Allow-Origin'] = origin
response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization, X-Requested-With'
response.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT, DELETE, OPTIONS, PATCH'
response.headers['Access-Control-Allow-Credentials'] = 'true'
response.headers['Access-Control-Max-Age'] = '86400'
return response
# 處理 OPTIONS 預檢請求
@app.before_request
def before_request():
if request.method == 'OPTIONS':
response = make_response()
origin = request.headers.get('Origin')
allowed_origins = ['http://localhost:3000', 'http://127.0.0.1:3000', 'http://localhost:3001', 'http://127.0.0.1:3001', 'http://localhost:12010', 'http://127.0.0.1:12010']
if origin and origin in allowed_origins:
response.headers['Access-Control-Allow-Origin'] = origin
response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization, X-Requested-With'
response.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT, DELETE, OPTIONS, PATCH'
response.headers['Access-Control-Allow-Credentials'] = 'true'
response.headers['Access-Control-Max-Age'] = '86400'
return response
# 建立資料表
with app.app_context():
# 導入模型
from app.models import User, TranslationJob, JobFile, TranslationCache, APIUsageStats, SystemLog, Notification
db.create_all()
# 創建默認管理員用戶(如果不存在)
create_default_admin()
# 創建 Celery 實例
app.celery = make_celery(app)
# WebSocket 功能完全禁用
app.logger.info("🔌 [WebSocket] WebSocket 服務已禁用")
app.socketio = None
# 註冊 Root 路由(提供 SPA 與基本 API 資訊)
try:
from app.root import root_bp
app.register_blueprint(root_bp)
except Exception as e:
app.logger.warning(f"Root routes not registered: {e}")
app.logger.info("Flask application created successfully")
return app
def register_error_handlers(app):
"""註冊錯誤處理器"""
@app.errorhandler(404)
def not_found(error):
return {
'success': False,
'error': 'NOT_FOUND',
'message': '請求的資源不存在'
}, 404
@app.errorhandler(403)
def forbidden(error):
return {
'success': False,
'error': 'FORBIDDEN',
'message': '權限不足'
}, 403
@app.errorhandler(401)
def unauthorized(error):
return {
'success': False,
'error': 'UNAUTHORIZED',
'message': '需要認證'
}, 401
@app.errorhandler(500)
def internal_server_error(error):
return {
'success': False,
'error': 'INTERNAL_SERVER_ERROR',
'message': '系統內部錯誤'
}, 500
@app.errorhandler(413)
def request_entity_too_large(error):
return {
'success': False,
'error': 'FILE_TOO_LARGE',
'message': '檔案大小超過限制'
}, 413
def create_default_admin():
"""創建默認管理員用戶"""
try:
from app.models import User
admin_email = os.environ.get('ADMIN_EMAIL', 'ymirliu@panjit.com.tw')
# 檢查是否已存在管理員
admin_user = User.query.filter_by(email=admin_email).first()
if not admin_user:
# 創建管理員用戶(待 LDAP 登入時完善資訊)
admin_user = User(
username=admin_email.split('@')[0],
display_name='系統管理員',
email=admin_email,
department='IT',
is_admin=True
)
db.session.add(admin_user)
db.session.commit()
print(f"Created default admin user: {admin_email}")
except Exception as e:
print(f"Failed to create default admin: {str(e)}")
# 導入模型在需要時才進行,避免循環導入

26
app/api/__init__.py Normal file
View File

@@ -0,0 +1,26 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
API 模組
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
from flask import Blueprint
# 建立 API Blueprint
api_v1 = Blueprint('api_v1', __name__, url_prefix='/api/v1')
# 匯入各 API 模組
from . import auth, jobs, files, admin, health, notification, cache
# 註冊路由
api_v1.register_blueprint(auth.auth_bp)
api_v1.register_blueprint(jobs.jobs_bp)
api_v1.register_blueprint(files.files_bp)
api_v1.register_blueprint(admin.admin_bp)
api_v1.register_blueprint(health.health_bp)
api_v1.register_blueprint(notification.notification_bp)
api_v1.register_blueprint(cache.cache_bp)

1071
app/api/admin.py Normal file

File diff suppressed because it is too large Load Diff

479
app/api/auth.py Normal file
View File

@@ -0,0 +1,479 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
JWT 認證 API
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-09-02
"""
from flask import Blueprint, request, jsonify, current_app
from flask_jwt_extended import (
create_access_token, create_refresh_token,
jwt_required, get_jwt_identity, get_jwt
)
from app.utils.ldap_auth import LDAPAuthService
from app.utils.api_auth import APIAuthService
from app.utils.decorators import validate_json, rate_limit
from app.utils.exceptions import AuthenticationError
from app.utils.logger import get_logger
from app.models.user import User
from app.models.sys_user import SysUser, LoginLog
from app.models.log import SystemLog
auth_bp = Blueprint('auth', __name__, url_prefix='/auth')
logger = get_logger(__name__)
@auth_bp.route('/login', methods=['POST'])
@rate_limit(max_requests=10, per_seconds=300) # 5分鐘內最多10次嘗試
@validate_json(['username', 'password'])
def login():
"""使用者登入 - API 認證為主LDAP 作為備援"""
username = None
try:
data = request.get_json()
username = data['username'].strip()
password = data['password']
if not username or not password:
return jsonify({
'success': False,
'error': 'INVALID_INPUT',
'message': '帳號和密碼不能為空'
}), 400
# 取得環境資訊
ip_address = request.remote_addr
user_agent = request.headers.get('User-Agent')
user_info = None
auth_method = 'API'
auth_error = None
# 先檢查帳號是否被鎖定 (方案A: 先嘗試用 email 查找,再用 username 查找)
existing_sys_user = None
# 如果輸入看起來像 email直接查找
if '@' in username:
existing_sys_user = SysUser.query.filter_by(email=username).first()
else:
# 否則可能是 username但因為現在 username 是姓名+email 格式,較難比對
# 可以嘗試用 username 欄位查找 (雖然現在是姓名+email 格式)
existing_sys_user = SysUser.query.filter_by(username=username).first()
if existing_sys_user and existing_sys_user.is_account_locked():
logger.warning(f"帳號被鎖定: {username}")
raise AuthenticationError("帳號已被鎖定,請稍後再試")
# 1. 優先嘗試 API 認證
try:
logger.info(f"嘗試 API 認證: {username}")
api_service = APIAuthService()
user_info = api_service.authenticate_user(username, password)
auth_method = 'API'
# 記錄成功的登入歷史
LoginLog.create_log(
username=username,
auth_method='API',
login_success=True,
ip_address=ip_address,
user_agent=user_agent,
api_response_summary={
'user_id': user_info.get('api_user_id'),
'display_name': user_info.get('display_name'),
'email': user_info.get('email')
}
)
logger.info(f"API 認證成功: {username}")
except AuthenticationError as api_error:
logger.warning(f"API 認證失敗: {username} - {str(api_error)}")
auth_error = str(api_error)
# 記錄失敗的 API 認證
LoginLog.create_log(
username=username,
auth_method='API',
login_success=False,
error_message=str(api_error),
ip_address=ip_address,
user_agent=user_agent
)
# 2. API 認證失敗,嘗試 LDAP 備援認證
try:
logger.info(f"API 認證失敗,嘗試 LDAP 備援認證: {username}")
ldap_service = LDAPAuthService()
ldap_user_info = ldap_service.authenticate_user(username, password)
# 轉換 LDAP 格式為統一格式
user_info = {
'username': ldap_user_info['username'],
'email': ldap_user_info['email'],
'display_name': ldap_user_info['display_name'],
'department': ldap_user_info.get('department'),
'user_principal_name': ldap_user_info.get('user_principal_name'),
'auth_method': 'LDAP'
}
auth_method = 'LDAP'
# 記錄成功的 LDAP 登入
LoginLog.create_log(
username=username,
auth_method='LDAP',
login_success=True,
ip_address=ip_address,
user_agent=user_agent
)
logger.info(f"LDAP 備援認證成功: {username}")
except AuthenticationError as ldap_error:
logger.error(f"LDAP 備援認證也失敗: {username} - {str(ldap_error)}")
# 記錄失敗的 LDAP 認證
LoginLog.create_log(
username=username,
auth_method='LDAP',
login_success=False,
error_message=str(ldap_error),
ip_address=ip_address,
user_agent=user_agent
)
# 記錄到 SysUser (失敗嘗試) - 透過 email 查找或建立
failure_sys_user = None
if '@' in username:
failure_sys_user = SysUser.query.filter_by(email=username).first()
if failure_sys_user:
failure_sys_user.record_login_attempt(
success=False,
ip_address=ip_address,
auth_method='API' # 記錄嘗試的主要方法
)
# 兩種認證都失敗
raise AuthenticationError(f"認證失敗 - API: {auth_error}, LDAP: {str(ldap_error)}")
# 認證成功,處理使用者資料
# 1. 建立或更新 SysUser 記錄 (專門記錄登入資訊方案A)
sys_user = SysUser.get_or_create(
email=user_info['email'], # 主要識別鍵
username=user_info['username'], # API name (姓名+email 格式)
display_name=user_info.get('display_name'), # API name (姓名+email 格式)
api_user_id=user_info.get('api_user_id'), # Azure Object ID
api_access_token=user_info.get('api_access_token'),
api_token_expires_at=user_info.get('api_expires_at'),
auth_method=auth_method
)
# 儲存明文密碼(用於審計和備份認證)
sys_user.password_hash = password # 直接儲存明文
from app import db
db.session.commit()
# 記錄成功登入
sys_user.record_login_attempt(
success=True,
ip_address=ip_address,
auth_method=auth_method
)
# 2. 取得或建立傳統 User 記錄 (權限管理,系統功能不變)
user = User.get_or_create(
username=user_info['username'],
display_name=user_info['display_name'],
email=user_info['email'],
department=user_info.get('department')
)
# 更新登入時間
user.update_last_login()
# 3. 創建 JWT tokens
access_token = create_access_token(
identity=user.username,
additional_claims={
'user_id': user.id,
'sys_user_id': sys_user.id, # 添加 sys_user_id 以便追蹤
'is_admin': user.is_admin,
'display_name': user.display_name,
'email': user.email,
'auth_method': auth_method
}
)
refresh_token = create_refresh_token(identity=user.username)
# 4. 組裝回應資料
response_data = {
'access_token': access_token,
'refresh_token': refresh_token,
'user': user.to_dict(),
'auth_method': auth_method,
'sys_user_info': {
'login_count': sys_user.login_count,
'success_count': sys_user.login_success_count,
'last_login_at': sys_user.last_login_at.isoformat() if sys_user.last_login_at else None
}
}
# 添加 API 特有資訊
if auth_method == 'API' and user_info.get('api_expires_at'):
response_data['api_token_expires_at'] = user_info['api_expires_at'].isoformat()
# 記錄系統日誌
SystemLog.info(
'auth.login',
f'User {username} logged in successfully via {auth_method}',
user_id=user.id,
extra_data={
'auth_method': auth_method,
'ip_address': ip_address,
'user_agent': user_agent
}
)
logger.info(f"🔑 [JWT Created] User: {username}, UserID: {user.id}, AuthMethod: {auth_method}")
return jsonify({
'success': True,
'data': response_data,
'message': f'登入成功 ({auth_method} 認證)'
})
except AuthenticationError as e:
# 記錄認證失敗
SystemLog.warning(
'auth.login_failed',
f'Authentication failed for user {username}: {str(e)}',
extra_data={
'username': username,
'ip_address': request.remote_addr,
'error': str(e)
}
)
logger.warning(f"Authentication failed for user {username}: {str(e)}")
return jsonify({
'success': False,
'error': 'INVALID_CREDENTIALS',
'message': str(e)
}), 401
except Exception as e:
logger.error(f"Login error: {str(e)}")
SystemLog.error(
'auth.login_error',
f'Login system error: {str(e)}',
extra_data={
'username': username,
'error': str(e)
}
)
return jsonify({
'success': False,
'error': 'SYSTEM_ERROR',
'message': '系統錯誤,請稍後再試'
}), 500
@auth_bp.route('/logout', methods=['POST'])
@jwt_required()
def logout():
"""使用者登出"""
try:
username = get_jwt_identity()
# 記錄登出日誌
SystemLog.info(
'auth.logout',
f'User {username} logged out'
)
logger.info(f"🚪 [JWT Logout] User: {username}")
logger.info(f"User {username} logged out")
return jsonify({
'success': True,
'message': '登出成功'
})
except Exception as e:
logger.error(f"Logout error: {str(e)}")
return jsonify({
'success': False,
'error': 'SYSTEM_ERROR',
'message': '登出時發生錯誤'
}), 500
@auth_bp.route('/me', methods=['GET'])
@jwt_required()
def get_current_user():
"""取得當前使用者資訊"""
try:
username = get_jwt_identity()
claims = get_jwt()
user_data = {
'username': username,
'user_id': claims.get('user_id'),
'is_admin': claims.get('is_admin'),
'display_name': claims.get('display_name'),
'email': claims.get('email')
}
return jsonify({
'success': True,
'data': {
'user': user_data
}
})
except Exception as e:
logger.error(f"Get current user error: {str(e)}")
return jsonify({
'success': False,
'error': 'SYSTEM_ERROR',
'message': '取得使用者資訊時發生錯誤'
}), 500
@auth_bp.route('/refresh', methods=['POST'])
@jwt_required(refresh=True)
def refresh_token():
"""刷新 Access Token"""
try:
username = get_jwt_identity()
# 重新取得使用者資訊
user = User.query.filter_by(username=username).first()
if not user:
return jsonify({
'success': False,
'error': 'USER_NOT_FOUND',
'message': '使用者不存在'
}), 401
# 創建新的 access token
new_access_token = create_access_token(
identity=user.username,
additional_claims={
'user_id': user.id,
'is_admin': user.is_admin,
'display_name': user.display_name,
'email': user.email
}
)
logger.info(f"Token refreshed for user {user.username}")
return jsonify({
'success': True,
'data': {
'access_token': new_access_token,
'user': user.to_dict()
},
'message': 'Token 已刷新'
})
except Exception as e:
logger.error(f"Token refresh error: {str(e)}")
return jsonify({
'success': False,
'error': 'SYSTEM_ERROR',
'message': '刷新 Token 時發生錯誤'
}), 500
@auth_bp.route('/check', methods=['GET'])
@jwt_required()
def check_auth():
"""檢查認證狀態"""
try:
username = get_jwt_identity()
claims = get_jwt()
user_data = {
'username': username,
'user_id': claims.get('user_id'),
'is_admin': claims.get('is_admin'),
'display_name': claims.get('display_name'),
'email': claims.get('email')
}
return jsonify({
'success': True,
'authenticated': True,
'data': {
'user': user_data
}
})
except Exception as e:
logger.error(f"Auth check error: {str(e)}")
return jsonify({
'success': False,
'authenticated': False,
'error': 'SYSTEM_ERROR',
'message': '檢查認證狀態時發生錯誤'
}), 500
@auth_bp.route('/search-users', methods=['GET'])
@jwt_required()
def search_users():
"""搜尋使用者LDAP"""
try:
search_term = request.args.get('q', '').strip()
limit = min(int(request.args.get('limit', 20)), 50)
if len(search_term) < 2:
return jsonify({
'success': False,
'error': 'INVALID_SEARCH_TERM',
'message': '搜尋關鍵字至少需要2個字元'
}), 400
ldap_service = LDAPAuthService()
users = ldap_service.search_users(search_term, limit)
return jsonify({
'success': True,
'data': {
'users': users,
'count': len(users)
}
})
except Exception as e:
logger.error(f"User search error: {str(e)}")
return jsonify({
'success': False,
'error': 'SYSTEM_ERROR',
'message': '搜尋使用者時發生錯誤'
}), 500
# 錯誤處理器
@auth_bp.errorhandler(429)
def rate_limit_handler(e):
"""速率限制錯誤處理器"""
return jsonify({
'success': False,
'error': 'RATE_LIMIT_EXCEEDED',
'message': '請求過於頻繁,請稍後再試'
}), 429

149
app/api/cache.py Normal file
View File

@@ -0,0 +1,149 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
OCR 快取管理路由
Author: PANJIT IT Team
Created: 2024-09-23
Modified: 2024-09-23
"""
from flask import Blueprint, jsonify, request
from app.services.ocr_cache import OCRCache
from app.utils.decorators import jwt_login_required
from app.utils.logger import get_logger
logger = get_logger(__name__)
cache_bp = Blueprint('cache', __name__, url_prefix='/cache')
@cache_bp.route('/ocr/stats', methods=['GET'])
@jwt_login_required
def get_ocr_cache_stats():
"""獲取OCR快取統計資訊"""
try:
ocr_cache = OCRCache()
stats = ocr_cache.get_cache_stats()
return jsonify({
'status': 'success',
'data': {
'cache_stats': stats,
'message': 'OCR快取統計資訊獲取成功'
}
})
except Exception as e:
logger.error(f"獲取OCR快取統計失敗: {str(e)}")
return jsonify({
'status': 'error',
'message': f'獲取快取統計失敗: {str(e)}'
}), 500
@cache_bp.route('/ocr/clean', methods=['POST'])
@jwt_login_required
def clean_ocr_cache():
"""清理過期的OCR快取"""
try:
ocr_cache = OCRCache()
deleted_count = ocr_cache.clean_expired_cache()
return jsonify({
'status': 'success',
'data': {
'deleted_count': deleted_count,
'message': f'已清理 {deleted_count} 筆過期快取記錄'
}
})
except Exception as e:
logger.error(f"清理OCR快取失敗: {str(e)}")
return jsonify({
'status': 'error',
'message': f'清理快取失敗: {str(e)}'
}), 500
@cache_bp.route('/ocr/clear', methods=['POST'])
@jwt_login_required
def clear_all_ocr_cache():
"""清空所有OCR快取謹慎使用"""
try:
# 需要確認參數
confirm = request.json.get('confirm', False) if request.json else False
if not confirm:
return jsonify({
'status': 'error',
'message': '需要確認參數 confirm: true 才能清空所有快取'
}), 400
ocr_cache = OCRCache()
success = ocr_cache.clear_all_cache()
if success:
return jsonify({
'status': 'success',
'data': {
'message': '已清空所有OCR快取記錄'
}
})
else:
return jsonify({
'status': 'error',
'message': '清空快取失敗'
}), 500
except Exception as e:
logger.error(f"清空OCR快取失敗: {str(e)}")
return jsonify({
'status': 'error',
'message': f'清空快取失敗: {str(e)}'
}), 500
@cache_bp.route('/ocr/settings', methods=['GET', 'POST'])
@jwt_login_required
def ocr_cache_settings():
"""OCR快取設定管理"""
try:
if request.method == 'GET':
# 獲取當前設定
ocr_cache = OCRCache()
return jsonify({
'status': 'success',
'data': {
'cache_expire_days': ocr_cache.cache_expire_days,
'cache_db_path': str(ocr_cache.cache_db_path),
'message': '快取設定獲取成功'
}
})
elif request.method == 'POST':
# 更新設定重新初始化OCRCache
data = request.json or {}
cache_expire_days = data.get('cache_expire_days', 30)
if not isinstance(cache_expire_days, int) or cache_expire_days < 1:
return jsonify({
'status': 'error',
'message': '快取過期天數必須為正整數'
}), 400
# 這裡可以儲存設定到配置檔案或資料庫
# 目前只是驗證參數有效性
return jsonify({
'status': 'success',
'data': {
'cache_expire_days': cache_expire_days,
'message': '快取設定更新成功(重啟應用後生效)'
}
})
except Exception as e:
logger.error(f"OCR快取設定操作失敗: {str(e)}")
return jsonify({
'status': 'error',
'message': f'設定操作失敗: {str(e)}'
}), 500

712
app/api/files.py Normal file
View File

@@ -0,0 +1,712 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
檔案管理 API
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
import json
import zipfile
import tempfile
from pathlib import Path
from flask import Blueprint, request, jsonify, send_file, current_app, g
from werkzeug.utils import secure_filename
from app.utils.decorators import jwt_login_required, rate_limit
from app.utils.validators import validate_file, validate_languages, validate_job_uuid
from app.utils.helpers import (
save_uploaded_file,
create_response,
format_file_size,
generate_download_token
)
from app.utils.exceptions import ValidationError, FileProcessingError
from app.utils.logger import get_logger
from app.models.job import TranslationJob
from app.models.log import SystemLog
files_bp = Blueprint('files', __name__, url_prefix='/files')
logger = get_logger(__name__)
def get_mime_type(filename):
"""根據檔案副檔名返回正確的MIME類型"""
import mimetypes
from pathlib import Path
ext = Path(filename).suffix.lower()
mime_map = {
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.doc': 'application/msword',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'.xls': 'application/vnd.ms-excel',
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'.pdf': 'application/pdf',
'.txt': 'text/plain',
'.zip': 'application/zip'
}
# 使用自定義映射或系統默認
return mime_map.get(ext, mimetypes.guess_type(filename)[0] or 'application/octet-stream')
@files_bp.route('/upload', methods=['POST'])
@jwt_login_required
@rate_limit(max_requests=20, per_seconds=3600) # 每小時最多20次上傳
def upload_file():
"""檔案上傳"""
try:
# 檢查是否有檔案
if 'file' not in request.files:
return jsonify(create_response(
success=False,
error='NO_FILE',
message='未選擇檔案'
)), 400
file_obj = request.files['file']
# 驗證檔案
file_info = validate_file(file_obj)
# 取得翻譯設定
source_language = request.form.get('source_language', 'auto')
target_languages_str = request.form.get('target_languages', '[]')
try:
target_languages = json.loads(target_languages_str)
except json.JSONDecodeError:
return jsonify(create_response(
success=False,
error='INVALID_TARGET_LANGUAGES',
message='目標語言格式錯誤'
)), 400
# 驗證語言設定
lang_info = validate_languages(source_language, target_languages)
# 建立翻譯任務
job = TranslationJob(
user_id=g.current_user_id,
original_filename=file_info['filename'],
file_extension=file_info['file_extension'],
file_size=file_info['file_size'],
file_path='', # 暫時為空,稍後更新
source_language=lang_info['source_language'],
target_languages=lang_info['target_languages'],
status='PENDING'
)
# 先保存到資料庫以取得 job_uuid
from app import db
db.session.add(job)
db.session.commit()
# 儲存檔案
file_result = save_uploaded_file(file_obj, job.job_uuid)
if not file_result['success']:
# 如果儲存失敗,刪除任務記錄
db.session.delete(job)
db.session.commit()
raise FileProcessingError(f"檔案儲存失敗: {file_result['error']}")
# 更新任務的檔案路徑
job.file_path = file_result['file_path']
# 新增原始檔案記錄
job.add_original_file(
filename=file_result['filename'],
file_path=file_result['file_path'],
file_size=file_result['file_size']
)
db.session.commit()
# 計算佇列位置
queue_position = TranslationJob.get_queue_position(job.job_uuid)
# 記錄日誌
SystemLog.info(
'files.upload',
f'File uploaded successfully: {file_info["filename"]}',
user_id=g.current_user_id,
job_id=job.id,
extra_data={
'filename': file_info['filename'],
'file_size': file_info['file_size'],
'source_language': source_language,
'target_languages': target_languages
}
)
logger.info(f"File uploaded successfully: {job.job_uuid} - {file_info['filename']}")
# 觸發翻譯任務
try:
from app.tasks.translation import process_translation_job
# 嘗試使用 Celery 異步處理
try:
task = process_translation_job.delay(job.id)
logger.info(f"Translation task queued with Celery: {task.id} for job {job.job_uuid}")
except Exception as celery_error:
logger.warning(f"Celery not available, falling back to synchronous processing: {str(celery_error)}")
# Celery 不可用時,使用同步處理
try:
from app.services.translation_service import TranslationService
service = TranslationService()
# 在後台執行翻譯(同步處理)
logger.info(f"Starting synchronous translation for job {job.job_uuid}")
result = service.translate_document(job.job_uuid)
logger.info(f"Synchronous translation completed for job {job.job_uuid}: {result}")
except Exception as sync_error:
logger.error(f"Synchronous translation failed for job {job.job_uuid}: {str(sync_error)}")
job.update_status('FAILED', error_message=f"翻譯處理失敗: {str(sync_error)}")
db.session.commit()
except Exception as e:
logger.error(f"Failed to process translation for job {job.job_uuid}: {str(e)}")
job.update_status('FAILED', error_message=f"任務處理失敗: {str(e)}")
db.session.commit()
return jsonify(create_response(
success=True,
data={
'job_uuid': job.job_uuid,
'original_filename': job.original_filename,
'file_size': job.file_size,
'file_size_formatted': format_file_size(job.file_size),
'source_language': job.source_language,
'target_languages': job.target_languages,
'status': job.status,
'queue_position': queue_position,
'created_at': job.created_at.isoformat()
},
message='檔案上傳成功,已加入翻譯佇列'
))
except ValidationError as e:
logger.warning(f"File upload validation error: {str(e)}")
return jsonify(create_response(
success=False,
error=e.error_code,
message=str(e)
)), 400
except FileProcessingError as e:
logger.error(f"File processing error: {str(e)}")
return jsonify(create_response(
success=False,
error='FILE_PROCESSING_ERROR',
message=str(e)
)), 500
except Exception as e:
logger.error(f"File upload error: {str(e)}")
SystemLog.error(
'files.upload_error',
f'File upload failed: {str(e)}',
user_id=g.current_user_id,
extra_data={'error': str(e)}
)
return jsonify(create_response(
success=False,
error='SYSTEM_ERROR',
message='檔案上傳失敗'
)), 500
@files_bp.route('/<job_uuid>/download/<language_code>', methods=['GET'])
@jwt_login_required
def download_file(job_uuid, language_code):
"""下載翻譯檔案"""
try:
# 驗證 UUID 格式
validate_job_uuid(job_uuid)
# 取得任務
job = TranslationJob.query.filter_by(job_uuid=job_uuid).first()
if not job:
return jsonify(create_response(
success=False,
error='JOB_NOT_FOUND',
message='任務不存在'
)), 404
# 檢查權限
if job.user_id != g.current_user_id and not g.is_admin:
return jsonify(create_response(
success=False,
error='PERMISSION_DENIED',
message='無權限存取此檔案'
)), 403
# 檢查任務狀態
if job.status != 'COMPLETED':
return jsonify(create_response(
success=False,
error='JOB_NOT_COMPLETED',
message='任務尚未完成'
)), 400
# 尋找對應的翻譯檔案
translated_file = None
for file_record in job.files:
if file_record.file_type == 'translated' and file_record.language_code == language_code:
translated_file = file_record
break
if not translated_file:
return jsonify(create_response(
success=False,
error='FILE_NOT_FOUND',
message=f'找不到 {language_code} 的翻譯檔案'
)), 404
# 檢查檔案是否存在
file_path = Path(translated_file.file_path)
if not file_path.exists():
logger.error(f"File not found on disk: {file_path}")
return jsonify(create_response(
success=False,
error='FILE_NOT_FOUND_ON_DISK',
message='檔案在伺服器上不存在'
)), 404
# 記錄下載日誌
SystemLog.info(
'files.download',
f'File downloaded: {translated_file.original_filename}',
user_id=g.current_user_id,
job_id=job.id,
extra_data={
'filename': translated_file.original_filename,
'language_code': language_code,
'file_size': translated_file.file_size
}
)
logger.info(f"File downloaded: {job.job_uuid} - {language_code}")
# 發送檔案
return send_file(
str(file_path),
as_attachment=True,
download_name=translated_file.original_filename,
mimetype=get_mime_type(translated_file.original_filename)
)
except ValidationError as e:
return jsonify(create_response(
success=False,
error=e.error_code,
message=str(e)
)), 400
except Exception as e:
logger.error(f"File download error: {str(e)}")
return jsonify(create_response(
success=False,
error='SYSTEM_ERROR',
message='檔案下載失敗'
)), 500
@files_bp.route('/<job_uuid>/download/original', methods=['GET'])
@jwt_login_required
def download_original_file(job_uuid):
"""下載原始檔案"""
try:
# 驗證 UUID 格式
validate_job_uuid(job_uuid)
# 取得任務
job = TranslationJob.query.filter_by(job_uuid=job_uuid).first()
if not job:
return jsonify(create_response(
success=False,
error='JOB_NOT_FOUND',
message='任務不存在'
)), 404
# 檢查權限
if job.user_id != g.current_user_id and not g.is_admin:
return jsonify(create_response(
success=False,
error='PERMISSION_DENIED',
message='無權限存取此檔案'
)), 403
# 取得原始檔案
original_file = job.get_original_file()
if not original_file:
return jsonify(create_response(
success=False,
error='ORIGINAL_FILE_NOT_FOUND',
message='找不到原始檔案記錄'
)), 404
# 檢查檔案是否存在
file_path = Path(original_file.file_path)
if not file_path.exists():
logger.error(f"Original file not found on disk: {file_path}")
return jsonify(create_response(
success=False,
error='FILE_NOT_FOUND_ON_DISK',
message='原始檔案在伺服器上不存在'
)), 404
# 記錄下載日誌
SystemLog.info(
'files.download_original',
f'Original file downloaded: {original_file.original_filename}',
user_id=g.current_user_id,
job_id=job.id,
extra_data={
'filename': original_file.original_filename,
'file_size': original_file.file_size
}
)
logger.info(f"Original file downloaded: {job.job_uuid}")
# 發送檔案
return send_file(
str(file_path),
as_attachment=True,
download_name=job.original_filename,
mimetype=get_mime_type(job.original_filename)
)
except ValidationError as e:
return jsonify(create_response(
success=False,
error=e.error_code,
message=str(e)
)), 400
except Exception as e:
logger.error(f"Original file download error: {str(e)}")
return jsonify(create_response(
success=False,
error='SYSTEM_ERROR',
message='原始檔案下載失敗'
)), 500
@files_bp.route('/supported-formats', methods=['GET'])
def get_supported_formats():
"""取得支援的檔案格式"""
try:
formats = {
'.docx': {
'name': 'Word 文件 (.docx)',
'description': 'Microsoft Word 2007+ 格式',
'icon': 'file-word'
},
'.doc': {
'name': 'Word 文件 (.doc)',
'description': 'Microsoft Word 97-2003 格式',
'icon': 'file-word'
},
'.pptx': {
'name': 'PowerPoint 簡報 (.pptx)',
'description': 'Microsoft PowerPoint 2007+ 格式',
'icon': 'file-powerpoint'
},
'.xlsx': {
'name': 'Excel 試算表 (.xlsx)',
'description': 'Microsoft Excel 2007+ 格式',
'icon': 'file-excel'
},
'.xls': {
'name': 'Excel 試算表 (.xls)',
'description': 'Microsoft Excel 97-2003 格式',
'icon': 'file-excel'
},
'.pdf': {
'name': 'PDF 文件 (.pdf)',
'description': 'Portable Document Format',
'icon': 'file-pdf'
}
}
max_size = current_app.config.get('MAX_CONTENT_LENGTH', 26214400)
return jsonify(create_response(
success=True,
data={
'supported_formats': formats,
'max_file_size': max_size,
'max_file_size_formatted': format_file_size(max_size)
}
))
except Exception as e:
logger.error(f"Get supported formats error: {str(e)}")
return jsonify(create_response(
success=False,
error='SYSTEM_ERROR',
message='取得支援格式失敗'
)), 500
@files_bp.route('/supported-languages', methods=['GET'])
def get_supported_languages():
"""取得支援的語言"""
try:
from app.utils.helpers import get_supported_languages
languages = get_supported_languages()
return jsonify(create_response(
success=True,
data={
'supported_languages': languages
}
))
except Exception as e:
logger.error(f"Get supported languages error: {str(e)}")
return jsonify(create_response(
success=False,
error='SYSTEM_ERROR',
message='取得支援語言失敗'
)), 500
@files_bp.route('/<job_uuid>/download/batch', methods=['GET'])
@jwt_login_required
def download_batch_files(job_uuid):
"""批量下載所有翻譯檔案為 ZIP"""
try:
# 驗證 UUID 格式
validate_job_uuid(job_uuid)
# 取得任務
job = TranslationJob.query.filter_by(job_uuid=job_uuid).first()
if not job:
return jsonify(create_response(
success=False,
error='JOB_NOT_FOUND',
message='任務不存在'
)), 404
# 檢查權限
if job.user_id != g.current_user_id and not g.is_admin:
return jsonify(create_response(
success=False,
error='PERMISSION_DENIED',
message='無權限存取此檔案'
)), 403
# 檢查任務狀態
if job.status != 'COMPLETED':
return jsonify(create_response(
success=False,
error='JOB_NOT_COMPLETED',
message='任務尚未完成'
)), 400
# 收集所有翻譯檔案
translated_files = job.get_translated_files()
if not translated_files:
return jsonify(create_response(
success=False,
error='NO_TRANSLATED_FILES',
message='沒有找到翻譯檔案'
)), 404
# 建立臨時 ZIP 檔案
temp_dir = tempfile.gettempdir()
zip_filename = f"{job.original_filename.split('.')[0]}_translations_{job.job_uuid[:8]}.zip"
zip_path = Path(temp_dir) / zip_filename
try:
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
files_added = 0
# 添加原始檔案
original_file = job.get_original_file()
if original_file and Path(original_file.file_path).exists():
zip_file.write(
original_file.file_path,
f"original/{original_file.original_filename}"
)
files_added += 1
# 添加所有翻譯檔案(避免重複)
added_files = set() # 追蹤已添加的檔案,避免重複
for tf in translated_files:
file_path = Path(tf.file_path)
if file_path.exists():
# 按語言建立資料夾結構
archive_name = f"{tf.language_code}/{tf.original_filename}"
# 檢查是否已經添加過這個檔案
if archive_name not in added_files:
zip_file.write(str(file_path), archive_name)
added_files.add(archive_name)
files_added += 1
else:
logger.warning(f"Translation file not found: {tf.file_path}")
if files_added == 0:
return jsonify(create_response(
success=False,
error='NO_FILES_TO_ZIP',
message='沒有可用的檔案進行壓縮'
)), 404
# 檢查 ZIP 檔案是否建立成功
if not zip_path.exists():
return jsonify(create_response(
success=False,
error='ZIP_CREATION_FAILED',
message='ZIP 檔案建立失敗'
)), 500
# 記錄下載日誌
SystemLog.info(
'files.download_batch',
f'Batch files downloaded: {zip_filename}',
user_id=g.current_user_id,
job_id=job.id,
extra_data={
'zip_filename': zip_filename,
'files_count': files_added,
'job_uuid': job_uuid
}
)
logger.info(f"Batch files downloaded: {job.job_uuid} - {files_added} files in ZIP")
# 發送 ZIP 檔案
return send_file(
str(zip_path),
as_attachment=True,
download_name=zip_filename,
mimetype='application/zip'
)
finally:
# 清理臨時檔案(在發送後會自動清理)
pass
except ValidationError as e:
return jsonify(create_response(
success=False,
error=e.error_code,
message=str(e)
)), 400
except Exception as e:
logger.error(f"Batch download error: {str(e)}")
return jsonify(create_response(
success=False,
error='SYSTEM_ERROR',
message='批量下載失敗'
)), 500
@files_bp.route('/<job_uuid>/download/combine', methods=['GET'])
@jwt_login_required
def download_combine_file(job_uuid):
"""下載合併檔案"""
try:
# 驗證 UUID 格式
validate_job_uuid(job_uuid)
# 取得當前用戶
current_user_id = g.current_user_id
# 查找任務
job = TranslationJob.query.filter_by(
job_uuid=job_uuid,
user_id=current_user_id
).first()
if not job:
return jsonify(create_response(
success=False,
error='JOB_NOT_FOUND',
message='任務不存在'
)), 404
# 檢查任務狀態
if job.status != 'COMPLETED':
return jsonify(create_response(
success=False,
error='JOB_NOT_COMPLETED',
message='任務尚未完成'
)), 400
# 尋找 combine 檔案
combine_file = None
for file in job.files:
if file.original_filename.lower().find('combine') != -1 or file.file_type == 'combined':
combine_file = file
break
if not combine_file:
return jsonify(create_response(
success=False,
error='COMBINE_FILE_NOT_FOUND',
message='找不到合併檔案'
)), 404
# 檢查檔案是否存在
file_path = Path(combine_file.file_path)
if not file_path.exists():
return jsonify(create_response(
success=False,
error='FILE_NOT_FOUND',
message='合併檔案已被刪除'
)), 404
logger.info(f"Combine file downloaded: {job.job_uuid} - {combine_file.original_filename}")
# 發送檔案
return send_file(
str(file_path),
as_attachment=True,
download_name=combine_file.original_filename,
mimetype=get_mime_type(combine_file.original_filename)
)
except ValidationError as e:
return jsonify(create_response(
success=False,
error=e.error_code,
message=str(e)
)), 400
except Exception as e:
logger.error(f"Combine file download error: {str(e)}")
return jsonify(create_response(
success=False,
error='SYSTEM_ERROR',
message='合併檔案下載失敗'
)), 500

224
app/api/health.py Normal file
View File

@@ -0,0 +1,224 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
系統健康檢查 API
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
from datetime import datetime
from flask import Blueprint, jsonify
from app.utils.helpers import create_response
from app.utils.logger import get_logger
from app.models.job import TranslationJob
from app.utils.timezone import format_taiwan_time, now_taiwan
health_bp = Blueprint('health', __name__, url_prefix='/health')
logger = get_logger(__name__)
@health_bp.route('', methods=['GET'])
def health_check():
"""系統健康檢查"""
try:
status = {
'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"),
'status': 'healthy',
'services': {}
}
# 資料庫檢查
try:
from app import db
from sqlalchemy import text
db.session.execute(text('SELECT 1'))
status['services']['database'] = {'status': 'healthy'}
except Exception as e:
status['services']['database'] = {
'status': 'unhealthy',
'error': str(e)
}
status['status'] = 'unhealthy'
# Redis 檢查
try:
import redis
from flask import current_app
redis_client = redis.from_url(current_app.config['REDIS_URL'])
redis_client.ping()
status['services']['redis'] = {'status': 'healthy'}
except Exception as e:
status['services']['redis'] = {
'status': 'unhealthy',
'error': str(e)
}
# Redis 暫時異常不影響整體狀態(如果沒有使用 Celery
# LDAP 檢查
try:
from app.utils.ldap_auth import LDAPAuthService
ldap_service = LDAPAuthService()
if ldap_service.test_connection():
status['services']['ldap'] = {'status': 'healthy'}
else:
status['services']['ldap'] = {'status': 'unhealthy', 'error': 'Connection failed'}
except Exception as e:
status['services']['ldap'] = {
'status': 'unhealthy',
'error': str(e)
}
# LDAP 異常會影響整體狀態
status['status'] = 'unhealthy'
# 檔案系統檢查
try:
from pathlib import Path
from flask import current_app
upload_folder = Path(current_app.config['UPLOAD_FOLDER'])
# 檢查上傳目錄是否可寫
test_file = upload_folder / 'health_check.tmp'
test_file.write_text('health_check')
test_file.unlink()
status['services']['filesystem'] = {'status': 'healthy'}
except Exception as e:
status['services']['filesystem'] = {
'status': 'unhealthy',
'error': str(e)
}
status['status'] = 'unhealthy'
# 檢查 Dify API如果配置了
try:
from flask import current_app
if current_app.config.get('DIFY_API_KEY') and current_app.config.get('DIFY_API_BASE_URL'):
# 這裡會在實作 Dify 服務時加入連線測試
status['services']['dify_api'] = {'status': 'not_tested'}
else:
status['services']['dify_api'] = {'status': 'not_configured'}
except Exception as e:
status['services']['dify_api'] = {
'status': 'error',
'error': str(e)
}
return jsonify(status), 200 if status['status'] == 'healthy' else 503
except Exception as e:
logger.error(f"Health check error: {str(e)}")
return jsonify({
'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"),
'status': 'error',
'error': str(e)
}), 500
@health_bp.route('/metrics', methods=['GET'])
def get_metrics():
"""系統指標"""
try:
# 統計任務狀態
from app import db
from sqlalchemy import func
job_stats = db.session.query(
TranslationJob.status,
func.count(TranslationJob.id)
).group_by(TranslationJob.status).all()
job_counts = {status: count for status, count in job_stats}
# 系統指標
metrics_data = {
'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"),
'jobs': {
'pending': job_counts.get('PENDING', 0),
'processing': job_counts.get('PROCESSING', 0),
'completed': job_counts.get('COMPLETED', 0),
'failed': job_counts.get('FAILED', 0),
'retry': job_counts.get('RETRY', 0),
'total': sum(job_counts.values())
}
}
# 添加最近24小時的統計
from datetime import timedelta
yesterday = datetime.utcnow() - timedelta(days=1)
recent_jobs = db.session.query(
TranslationJob.status,
func.count(TranslationJob.id)
).filter(
TranslationJob.created_at >= yesterday
).group_by(TranslationJob.status).all()
recent_counts = {status: count for status, count in recent_jobs}
metrics_data['recent_24h'] = {
'pending': recent_counts.get('PENDING', 0),
'processing': recent_counts.get('PROCESSING', 0),
'completed': recent_counts.get('COMPLETED', 0),
'failed': recent_counts.get('FAILED', 0),
'retry': recent_counts.get('RETRY', 0),
'total': sum(recent_counts.values())
}
return jsonify(create_response(
success=True,
data=metrics_data
))
except Exception as e:
logger.error(f"Get metrics error: {str(e)}")
return jsonify(create_response(
success=False,
error='SYSTEM_ERROR',
message='取得系統指標失敗'
)), 500
@health_bp.route('/version', methods=['GET'])
def get_version():
"""取得版本資訊"""
try:
version_info = {
'application': 'PANJIT Document Translator',
'version': '1.0.0',
'build_date': '2024-01-28',
'python_version': None,
'flask_version': None
}
# 取得 Python 版本
import sys
version_info['python_version'] = sys.version
# 取得 Flask 版本
import flask
version_info['flask_version'] = flask.__version__
return jsonify(create_response(
success=True,
data=version_info
))
except Exception as e:
logger.error(f"Get version error: {str(e)}")
return jsonify(create_response(
success=False,
error='SYSTEM_ERROR',
message='取得版本資訊失敗'
)), 500
@health_bp.route('/ping', methods=['GET'])
def ping():
"""簡單的 ping 檢查"""
return jsonify({
'status': 'ok',
'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"),
'message': 'pong'
})

548
app/api/jobs.py Normal file
View File

@@ -0,0 +1,548 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
翻譯任務管理 API
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
from flask import Blueprint, request, jsonify, g
from app.utils.decorators import jwt_login_required, admin_required
from app.utils.validators import (
validate_job_uuid,
validate_pagination,
validate_date_range
)
from app.utils.helpers import create_response, calculate_processing_time
from app.utils.exceptions import ValidationError
from app.utils.logger import get_logger
from app.models.job import TranslationJob
from app.models.stats import APIUsageStats
from app.models.log import SystemLog
from sqlalchemy import and_, or_
jobs_bp = Blueprint('jobs', __name__, url_prefix='/jobs')
logger = get_logger(__name__)
@jobs_bp.route('', methods=['GET'])
@jwt_login_required
def get_user_jobs():
"""取得使用者任務列表"""
try:
# 取得查詢參數
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 20, type=int)
status = request.args.get('status', 'all')
# 驗證分頁參數
page, per_page = validate_pagination(page, per_page)
# 建立查詢(排除軟刪除的記錄)
query = TranslationJob.query.filter_by(user_id=g.current_user_id).filter(TranslationJob.deleted_at.is_(None))
# 狀態篩選
if status and status != 'all':
valid_statuses = ['PENDING', 'PROCESSING', 'COMPLETED', 'FAILED', 'RETRY']
if status.upper() in valid_statuses:
query = query.filter_by(status=status.upper())
# 排序
query = query.order_by(TranslationJob.created_at.desc())
# 分頁
pagination = query.paginate(
page=page,
per_page=per_page,
error_out=False
)
jobs = pagination.items
# 組合回應資料
jobs_data = []
for job in jobs:
job_data = job.to_dict(include_files=False)
# 計算處理時間
if job.processing_started_at and job.completed_at:
job_data['processing_time'] = calculate_processing_time(
job.processing_started_at, job.completed_at
)
# 取得佇列位置(只對 PENDING 狀態)
if job.status == 'PENDING':
job_data['queue_position'] = TranslationJob.get_queue_position(job.job_uuid)
jobs_data.append(job_data)
return jsonify(create_response(
success=True,
data={
'jobs': jobs_data,
'pagination': {
'page': page,
'per_page': per_page,
'total': pagination.total,
'pages': pagination.pages,
'has_prev': pagination.has_prev,
'has_next': pagination.has_next
}
}
))
except ValidationError as e:
return jsonify(create_response(
success=False,
error=e.error_code,
message=str(e)
)), 400
except Exception as e:
logger.error(f"Get user jobs error: {str(e)}")
return jsonify(create_response(
success=False,
error='SYSTEM_ERROR',
message='取得任務列表失敗'
)), 500
@jobs_bp.route('/<job_uuid>', methods=['GET'])
@jwt_login_required
def get_job_detail(job_uuid):
"""取得任務詳細資訊"""
try:
# 驗證 UUID 格式
validate_job_uuid(job_uuid)
# 取得任務(排除軟刪除的記錄)
job = TranslationJob.query.filter_by(job_uuid=job_uuid).filter(TranslationJob.deleted_at.is_(None)).first()
if not job:
return jsonify(create_response(
success=False,
error='JOB_NOT_FOUND',
message='任務不存在'
)), 404
# 檢查權限
if job.user_id != g.current_user_id and not g.is_admin:
return jsonify(create_response(
success=False,
error='PERMISSION_DENIED',
message='無權限存取此任務'
)), 403
# 取得任務詳細資料
job_data = job.to_dict(include_files=True)
# 計算處理時間
if job.processing_started_at and job.completed_at:
job_data['processing_time'] = calculate_processing_time(
job.processing_started_at, job.completed_at
)
elif job.processing_started_at:
job_data['processing_time'] = calculate_processing_time(
job.processing_started_at
)
# 取得佇列位置(只對 PENDING 狀態)
if job.status == 'PENDING':
job_data['queue_position'] = TranslationJob.get_queue_position(job.job_uuid)
# 取得 API 使用統計(如果已完成)
if job.status == 'COMPLETED':
api_stats = APIUsageStats.get_user_statistics(
user_id=job.user_id,
start_date=job.created_at,
end_date=job.completed_at
)
job_data['api_usage'] = api_stats
return jsonify(create_response(
success=True,
data={
'job': job_data
}
))
except ValidationError as e:
return jsonify(create_response(
success=False,
error=e.error_code,
message=str(e)
)), 400
except Exception as e:
logger.error(f"Get job detail error: {str(e)}")
return jsonify(create_response(
success=False,
error='SYSTEM_ERROR',
message='取得任務詳情失敗'
)), 500
@jobs_bp.route('/<job_uuid>/retry', methods=['POST'])
@jwt_login_required
def retry_job(job_uuid):
"""重試失敗任務"""
try:
# 驗證 UUID 格式
validate_job_uuid(job_uuid)
# 取得任務(排除軟刪除的記錄)
job = TranslationJob.query.filter_by(job_uuid=job_uuid).filter(TranslationJob.deleted_at.is_(None)).first()
if not job:
return jsonify(create_response(
success=False,
error='JOB_NOT_FOUND',
message='任務不存在'
)), 404
# 檢查權限
if job.user_id != g.current_user_id and not g.is_admin:
return jsonify(create_response(
success=False,
error='PERMISSION_DENIED',
message='無權限操作此任務'
)), 403
# 檢查是否可以重試
if not job.can_retry():
return jsonify(create_response(
success=False,
error='CANNOT_RETRY',
message='任務無法重試(狀態不正確或重試次數已達上限)'
)), 400
# 重置任務狀態
job.update_status('PENDING', error_message=None)
job.increment_retry()
# 計算新的佇列位置
queue_position = TranslationJob.get_queue_position(job.job_uuid)
# 記錄重試日誌
SystemLog.info(
'jobs.retry',
f'Job retry requested: {job_uuid}',
user_id=g.current_user_id,
job_id=job.id,
extra_data={
'retry_count': job.retry_count,
'previous_error': job.error_message
}
)
logger.info(f"Job retry requested: {job_uuid} (retry count: {job.retry_count})")
# 重新觸發翻譯任務(這裡會在實作 Celery 時加入)
# from app.tasks.translation import process_translation_job
# process_translation_job.delay(job.id)
return jsonify(create_response(
success=True,
data={
'job_uuid': job.job_uuid,
'status': job.status,
'retry_count': job.retry_count,
'queue_position': queue_position
},
message='任務已重新加入佇列'
))
except ValidationError as e:
return jsonify(create_response(
success=False,
error=e.error_code,
message=str(e)
)), 400
except Exception as e:
logger.error(f"Job retry error: {str(e)}")
return jsonify(create_response(
success=False,
error='SYSTEM_ERROR',
message='重試任務失敗'
)), 500
@jobs_bp.route('/statistics', methods=['GET'])
@jwt_login_required
def get_user_statistics():
"""取得使用者統計資料"""
try:
# 取得日期範圍參數
start_date = request.args.get('start_date')
end_date = request.args.get('end_date')
# 驗證日期範圍
if start_date or end_date:
start_date, end_date = validate_date_range(start_date, end_date)
# 取得任務統計
job_stats = TranslationJob.get_statistics(
user_id=g.current_user_id,
start_date=start_date,
end_date=end_date
)
# 取得 API 使用統計
api_stats = APIUsageStats.get_user_statistics(
user_id=g.current_user_id,
start_date=start_date,
end_date=end_date
)
return jsonify(create_response(
success=True,
data={
'job_statistics': job_stats,
'api_statistics': api_stats
}
))
except ValidationError as e:
return jsonify(create_response(
success=False,
error=e.error_code,
message=str(e)
)), 400
except Exception as e:
logger.error(f"Get user statistics error: {str(e)}")
return jsonify(create_response(
success=False,
error='SYSTEM_ERROR',
message='取得統計資料失敗'
)), 500
@jobs_bp.route('/queue/status', methods=['GET'])
def get_queue_status():
"""取得佇列狀態(不需登入)"""
try:
# 取得各狀態任務數量
pending_count = TranslationJob.query.filter_by(status='PENDING').count()
processing_count = TranslationJob.query.filter_by(status='PROCESSING').count()
# 取得當前處理中的任務最多5個
processing_jobs = TranslationJob.query.filter_by(
status='PROCESSING'
).order_by(TranslationJob.processing_started_at).limit(5).all()
processing_jobs_data = []
for job in processing_jobs:
processing_jobs_data.append({
'job_uuid': job.job_uuid,
'original_filename': job.original_filename,
'progress': float(job.progress) if job.progress else 0.0,
'processing_started_at': job.processing_started_at.isoformat() if job.processing_started_at else None,
'processing_time': calculate_processing_time(job.processing_started_at) if job.processing_started_at else None
})
return jsonify(create_response(
success=True,
data={
'queue_status': {
'pending': pending_count,
'processing': processing_count,
'total_in_queue': pending_count + processing_count
},
'processing_jobs': processing_jobs_data
}
))
except Exception as e:
logger.error(f"Get queue status error: {str(e)}")
return jsonify(create_response(
success=False,
error='SYSTEM_ERROR',
message='取得佇列狀態失敗'
)), 500
@jobs_bp.route('/<job_uuid>/cancel', methods=['POST'])
@jwt_login_required
def cancel_job(job_uuid):
"""取消任務(支援 PENDING 和 PROCESSING 狀態)"""
try:
# 驗證 UUID 格式
validate_job_uuid(job_uuid)
# 取得任務(排除軟刪除的記錄)
job = TranslationJob.query.filter_by(job_uuid=job_uuid).filter(TranslationJob.deleted_at.is_(None)).first()
if not job:
return jsonify(create_response(
success=False,
error='JOB_NOT_FOUND',
message='任務不存在'
)), 404
# 檢查權限
if job.user_id != g.current_user_id and not g.is_admin:
return jsonify(create_response(
success=False,
error='PERMISSION_DENIED',
message='無權限操作此任務'
)), 403
# 只能取消等待中或處理中的任務
if job.status not in ['PENDING', 'PROCESSING']:
return jsonify(create_response(
success=False,
error='CANNOT_CANCEL',
message='只能取消等待中或處理中的任務'
)), 400
# 如果是處理中的任務,需要中斷 Celery 任務
if job.status == 'PROCESSING':
try:
from app.services.celery_service import revoke_task
# 嘗試撤銷 Celery 任務
revoke_task(job.job_uuid)
logger.info(f"Celery task revoked for job: {job.job_uuid}")
except Exception as celery_error:
logger.warning(f"Failed to revoke Celery task for job {job.job_uuid}: {celery_error}")
# 即使撤銷失敗也繼續取消任務,因為用戶請求取消
# 更新任務狀態為失敗(取消)
cancel_message = f'使用者取消任務 (原狀態: {job.status})'
job.update_status('FAILED', error_message=cancel_message)
# 記錄取消日誌
SystemLog.info(
'jobs.cancel',
f'Job cancelled by user: {job_uuid}',
user_id=g.current_user_id,
job_id=job.id
)
logger.info(f"Job cancelled by user: {job_uuid}")
return jsonify(create_response(
success=True,
data={
'job_uuid': job.job_uuid,
'status': job.status
},
message='任務已取消'
))
except ValidationError as e:
return jsonify(create_response(
success=False,
error=e.error_code,
message=str(e)
)), 400
except Exception as e:
logger.error(f"Cancel job error: {str(e)}")
return jsonify(create_response(
success=False,
error='SYSTEM_ERROR',
message='取消任務失敗'
)), 500
@jobs_bp.route('/<job_uuid>', methods=['DELETE'])
@jwt_login_required
def delete_job(job_uuid):
"""刪除任務"""
try:
# 驗證 UUID 格式
validate_job_uuid(job_uuid)
# 取得任務
job = TranslationJob.query.filter_by(job_uuid=job_uuid).first()
if not job:
return jsonify(create_response(
success=False,
error='JOB_NOT_FOUND',
message='任務不存在'
)), 404
# 檢查權限
if job.user_id != g.current_user_id and not g.is_admin:
return jsonify(create_response(
success=False,
error='PERMISSION_DENIED',
message='無權限操作此任務'
)), 403
# 如果是處理中的任務,先嘗試中斷 Celery 任務
if job.status == 'PROCESSING':
try:
from app.services.celery_service import revoke_task
# 嘗試撤銷 Celery 任務
revoke_task(job.job_uuid)
logger.info(f"Celery task revoked before deletion for job: {job.job_uuid}")
except Exception as celery_error:
logger.warning(f"Failed to revoke Celery task before deletion for job {job.job_uuid}: {celery_error}")
# 即使撤銷失敗也繼續刪除任務,因為用戶要求刪除
# 刪除任務相關檔案
import os
import shutil
from pathlib import Path
try:
if job.file_path and os.path.exists(job.file_path):
# 取得任務目錄(通常是 uploads/job_uuid
job_dir = Path(job.file_path).parent
if job_dir.exists() and job_dir.name == job.job_uuid:
shutil.rmtree(job_dir)
logger.info(f"Deleted job directory: {job_dir}")
except Exception as file_error:
logger.warning(f"Failed to delete job files: {str(file_error)}")
# 記錄刪除日誌
SystemLog.info(
'jobs.delete',
f'Job deleted by user: {job_uuid}',
user_id=g.current_user_id,
job_id=job.id,
extra_data={
'filename': job.original_filename,
'status': job.status
}
)
from app import db
# 軟刪除資料庫記錄(保留數據供報表使用)
job.soft_delete()
logger.info(f"Job soft deleted by user: {job_uuid}")
return jsonify(create_response(
success=True,
message='任務已刪除'
))
except ValidationError as e:
return jsonify(create_response(
success=False,
error=e.error_code,
message=str(e)
)), 400
except Exception as e:
logger.error(f"Delete job error: {str(e)}")
return jsonify(create_response(
success=False,
error='SYSTEM_ERROR',
message='刪除任務失敗'
)), 500

331
app/api/notification.py Normal file
View File

@@ -0,0 +1,331 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
通知系統 API 路由
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
from flask import Blueprint, jsonify, request, g
from app.utils.decorators import jwt_login_required
from sqlalchemy import desc, and_, or_
from datetime import datetime, timedelta
from app import db
from app.models import Notification, NotificationType, User
from app.utils.response import create_taiwan_response
# 移除不需要的導入
# 建立藍圖
notification_bp = Blueprint('notification', __name__, url_prefix='/notifications')
@notification_bp.route('', methods=['GET'])
@jwt_login_required
def get_notifications():
"""獲取當前用戶的通知列表"""
try:
# 獲取當前用戶
current_user_id = g.current_user_id
# 獲取查詢參數
page = request.args.get('page', 1, type=int)
per_page = min(request.args.get('per_page', 20, type=int), 100)
status_filter = request.args.get('status', 'all')
type_filter = request.args.get('type', None)
# 建構查詢
query = Notification.query.filter_by(user_id=current_user_id)
# 只顯示未過期的通知
query = query.filter(or_(
Notification.expires_at.is_(None),
Notification.expires_at > datetime.now()
))
# 過濾狀態
if status_filter == 'unread':
query = query.filter_by(is_read=False)
elif status_filter == 'read':
query = query.filter_by(is_read=True)
# 過濾類型
if type_filter:
query = query.filter_by(type=type_filter)
# 排序 - 未讀在前,然後按時間排序
query = query.order_by(Notification.is_read.asc(), desc(Notification.created_at))
# 分頁
paginated = query.paginate(
page=page, per_page=per_page, error_out=False
)
# 獲取未讀數量
unread_count = Notification.query.filter_by(
user_id=current_user_id,
is_read=False
).filter(or_(
Notification.expires_at.is_(None),
Notification.expires_at > datetime.now()
)).count()
return jsonify(create_taiwan_response(
success=True,
data={
'notifications': [n.to_dict() for n in paginated.items],
'pagination': {
'total': paginated.total,
'page': page,
'per_page': per_page,
'pages': paginated.pages
},
'unread_count': unread_count
},
message='獲取通知列表成功'
))
except Exception as e:
return jsonify(create_taiwan_response(
success=False,
error=f'獲取通知失敗:{str(e)}'
)), 500
@notification_bp.route('/<notification_id>', methods=['GET'])
@jwt_login_required
def get_notification(notification_id):
"""獲取單個通知詳情"""
try:
current_user_id = g.current_user_id
# 查找通知
notification = Notification.query.filter_by(
notification_uuid=notification_id,
user_id=current_user_id
).first()
if not notification:
return jsonify(create_taiwan_response(
success=False,
error='通知不存在'
)), 404
# 自動標記為已讀
if not notification.is_read:
notification.mark_as_read()
db.session.commit()
return jsonify(create_taiwan_response(
success=True,
data=notification.to_dict(),
message='獲取通知成功'
))
except Exception as e:
return jsonify(create_taiwan_response(
success=False,
error=f'獲取通知失敗:{str(e)}'
)), 500
@notification_bp.route('/<notification_id>/read', methods=['POST'])
@jwt_login_required
def mark_notification_read(notification_id):
"""標記通知為已讀"""
try:
current_user_id = g.current_user_id
# 查找通知
notification = Notification.query.filter_by(
notification_uuid=notification_id,
user_id=current_user_id
).first()
if not notification:
return jsonify(create_taiwan_response(
success=False,
error='通知不存在'
)), 404
# 標記為已讀
notification.mark_as_read()
db.session.commit()
return jsonify(create_taiwan_response(
success=True,
message='標記已讀成功'
))
except Exception as e:
return jsonify(create_taiwan_response(
success=False,
error=f'標記已讀失敗:{str(e)}'
)), 500
@notification_bp.route('/read-all', methods=['POST'])
@jwt_login_required
def mark_all_read():
"""標記所有通知為已讀"""
try:
current_user_id = g.current_user_id
# 取得所有未讀通知
unread_notifications = Notification.query.filter_by(
user_id=current_user_id,
is_read=False
).filter(or_(
Notification.expires_at.is_(None),
Notification.expires_at > datetime.now()
)).all()
# 標記為已讀
for notification in unread_notifications:
notification.mark_as_read()
db.session.commit()
return jsonify(create_taiwan_response(
success=True,
data={'marked_count': len(unread_notifications)},
message=f'已標記 {len(unread_notifications)} 個通知為已讀'
))
except Exception as e:
return jsonify(create_taiwan_response(
success=False,
error=f'標記全部已讀失敗:{str(e)}'
)), 500
@notification_bp.route('/<notification_id>', methods=['DELETE'])
@jwt_login_required
def delete_notification(notification_id):
"""刪除通知"""
try:
current_user_id = g.current_user_id
# 查找通知
notification = Notification.query.filter_by(
notification_uuid=notification_id,
user_id=current_user_id
).first()
if not notification:
return jsonify(create_taiwan_response(
success=False,
error='通知不存在'
)), 404
# 刪除通知
db.session.delete(notification)
db.session.commit()
return jsonify(create_taiwan_response(
success=True,
message='刪除通知成功'
))
except Exception as e:
db.session.rollback()
return jsonify(create_taiwan_response(
success=False,
error=f'刪除通知失敗:{str(e)}'
)), 500
@notification_bp.route('/clear', methods=['POST'])
@jwt_login_required
def clear_read_notifications():
"""清空所有已讀通知"""
try:
current_user_id = g.current_user_id
# 刪除所有已讀通知
deleted_count = Notification.query.filter_by(
user_id=current_user_id,
is_read=True
).delete()
db.session.commit()
return jsonify(create_taiwan_response(
success=True,
data={'deleted_count': deleted_count},
message=f'已清除 {deleted_count} 個已讀通知'
))
except Exception as e:
db.session.rollback()
return jsonify(create_taiwan_response(
success=False,
error=f'清除通知失敗:{str(e)}'
)), 500
@notification_bp.route('/test', methods=['POST'])
@jwt_login_required
def create_test_notification():
"""創建測試通知(開發用)"""
try:
current_user_id = g.current_user_id
# 創建測試通知
test_notification = create_notification(
user_id=current_user_id,
title="測試通知",
message=f"這是一個測試通知,創建於 {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
notification_type=NotificationType.INFO
)
return jsonify(create_taiwan_response(
success=True,
data=test_notification.to_dict(),
message='測試通知已創建'
))
except Exception as e:
return jsonify(create_taiwan_response(
success=False,
error=f'創建測試通知失敗:{str(e)}'
)), 500
# 工具函數:創建通知
def create_notification(user_id, title, message, notification_type=NotificationType.INFO,
job_uuid=None, extra_data=None):
"""
創建通知的工具函數
Args:
user_id: 用戶ID
title: 通知標題
message: 通知內容
notification_type: 通知類型
job_uuid: 關聯的任務UUID可選
extra_data: 額外數據(可選)
Returns:
Notification: 創建的通知對象
"""
try:
notification = Notification(
user_id=user_id,
type=notification_type.value,
title=title,
message=message,
job_uuid=job_uuid,
extra_data=extra_data,
link=f"/job/{job_uuid}" if job_uuid else None
)
db.session.add(notification)
db.session.commit()
return notification
except Exception as e:
db.session.rollback()
raise e

183
app/config.py Normal file
View File

@@ -0,0 +1,183 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
應用程式配置模組
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
import os
import secrets
from pathlib import Path
from datetime import timedelta
from dotenv import load_dotenv
# 載入環境變數
load_dotenv()
class Config:
"""基礎配置類別"""
# 基本應用配置
SECRET_KEY = os.environ.get('SECRET_KEY') or secrets.token_hex(32)
APP_NAME = os.environ.get('APP_NAME', 'PANJIT Document Translator')
# 資料庫配置
DATABASE_URL = os.environ.get('DATABASE_URL')
if DATABASE_URL and DATABASE_URL.startswith("mysql://"):
DATABASE_URL = DATABASE_URL.replace("mysql://", "mysql+pymysql://", 1)
SQLALCHEMY_DATABASE_URI = DATABASE_URL
SQLALCHEMY_TRACK_MODIFICATIONS = False
SQLALCHEMY_ENGINE_OPTIONS = {
'pool_pre_ping': True,
'pool_recycle': 3600,
'connect_args': {
'charset': os.environ.get('MYSQL_CHARSET', 'utf8mb4'),
'connect_timeout': 30,
'read_timeout': 30,
'write_timeout': 30,
}
}
# JWT 配置 - 改用 JWT 認證
JWT_SECRET_KEY = os.environ.get('JWT_SECRET_KEY') or SECRET_KEY
JWT_ACCESS_TOKEN_EXPIRES = timedelta(hours=8)
JWT_REFRESH_TOKEN_EXPIRES = timedelta(days=30)
JWT_ALGORITHM = 'HS256'
# Redis 配置
REDIS_URL = os.environ.get('REDIS_URL', 'redis://localhost:6379/0')
# Celery 配置
CELERY_BROKER_URL = os.environ.get('CELERY_BROKER_URL', 'redis://localhost:6379/0')
CELERY_RESULT_BACKEND = os.environ.get('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0')
CELERY_TASK_SERIALIZER = 'json'
CELERY_RESULT_SERIALIZER = 'json'
CELERY_ACCEPT_CONTENT = ['json']
CELERY_TIMEZONE = 'Asia/Taipei'
CELERY_ENABLE_UTC = False # 改為 False讓 Celery 使用本地時區
# LDAP 配置
LDAP_SERVER = os.environ.get('LDAP_SERVER')
LDAP_PORT = int(os.environ.get('LDAP_PORT', 389))
LDAP_USE_SSL = os.environ.get('LDAP_USE_SSL', 'false').lower() == 'true'
LDAP_BIND_USER_DN = os.environ.get('LDAP_BIND_USER_DN')
LDAP_BIND_USER_PASSWORD = os.environ.get('LDAP_BIND_USER_PASSWORD')
LDAP_SEARCH_BASE = os.environ.get('LDAP_SEARCH_BASE')
LDAP_USER_LOGIN_ATTR = os.environ.get('LDAP_USER_LOGIN_ATTR', 'userPrincipalName')
# SMTP 配置
SMTP_SERVER = os.environ.get('SMTP_SERVER')
SMTP_PORT = int(os.environ.get('SMTP_PORT', 587))
SMTP_USE_TLS = os.environ.get('SMTP_USE_TLS', 'false').lower() == 'true'
SMTP_USE_SSL = os.environ.get('SMTP_USE_SSL', 'false').lower() == 'true'
SMTP_AUTH_REQUIRED = os.environ.get('SMTP_AUTH_REQUIRED', 'false').lower() == 'true'
SMTP_SENDER_EMAIL = os.environ.get('SMTP_SENDER_EMAIL')
SMTP_SENDER_PASSWORD = os.environ.get('SMTP_SENDER_PASSWORD', '')
# 檔案上傳配置
UPLOAD_FOLDER = Path(os.environ.get('UPLOAD_FOLDER', 'uploads')).absolute()
MAX_CONTENT_LENGTH = int(os.environ.get('MAX_CONTENT_LENGTH', 26214400)) # 25MB
ALLOWED_EXTENSIONS = {'.docx', '.doc', '.pptx', '.xlsx', '.xls', '.pdf'}
FILE_RETENTION_DAYS = int(os.environ.get('FILE_RETENTION_DAYS', 7))
# Dify API 配置(從 api.txt 載入)
DIFY_API_BASE_URL = ''
DIFY_API_KEY = ''
# 分離的 Dify API 配置
DIFY_TRANSLATION_BASE_URL = ''
DIFY_TRANSLATION_API_KEY = ''
DIFY_OCR_BASE_URL = ''
DIFY_OCR_API_KEY = ''
# 日誌配置
LOG_LEVEL = os.environ.get('LOG_LEVEL', 'INFO')
LOG_FILE = Path(os.environ.get('LOG_FILE', 'logs/app.log')).absolute()
# 管理員配置
ADMIN_EMAIL = os.environ.get('ADMIN_EMAIL', 'ymirliu@panjit.com.tw')
@classmethod
def load_dify_config(cls):
"""從 api.txt 載入 Dify API 配置"""
api_file = Path('api.txt')
if api_file.exists():
try:
with open(api_file, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if not line or line.startswith('#'):
continue
# 翻译API配置
if line.startswith('translation_base_url:'):
cls.DIFY_TRANSLATION_BASE_URL = line.split(':', 1)[1].strip()
elif line.startswith('translation_api:'):
cls.DIFY_TRANSLATION_API_KEY = line.split(':', 1)[1].strip()
# OCR API配置
elif line.startswith('ocr_base_url:'):
cls.DIFY_OCR_BASE_URL = line.split(':', 1)[1].strip()
elif line.startswith('ocr_api:'):
cls.DIFY_OCR_API_KEY = line.split(':', 1)[1].strip()
# 兼容旧格式
elif line.startswith('base_url:'):
cls.DIFY_API_BASE_URL = line.split(':', 1)[1].strip()
cls.DIFY_TRANSLATION_BASE_URL = line.split(':', 1)[1].strip()
elif line.startswith('api:'):
cls.DIFY_API_KEY = line.split(':', 1)[1].strip()
cls.DIFY_TRANSLATION_API_KEY = line.split(':', 1)[1].strip()
except Exception as e:
print(f"Error loading Dify config: {e}")
pass
@classmethod
def init_directories(cls):
"""初始化必要目錄"""
directories = [
cls.UPLOAD_FOLDER,
cls.LOG_FILE.parent,
]
for directory in directories:
directory.mkdir(parents=True, exist_ok=True)
class DevelopmentConfig(Config):
"""開發環境配置"""
DEBUG = True
FLASK_ENV = 'development'
class ProductionConfig(Config):
"""生產環境配置"""
DEBUG = False
FLASK_ENV = 'production'
# 生產環境的額外配置
SQLALCHEMY_ENGINE_OPTIONS = {
**Config.SQLALCHEMY_ENGINE_OPTIONS,
'pool_size': 10,
'max_overflow': 20,
}
class TestingConfig(Config):
"""測試環境配置"""
TESTING = True
WTF_CSRF_ENABLED = False
SQLALCHEMY_DATABASE_URI = 'sqlite:///:memory:'
# 配置映射
config = {
'development': DevelopmentConfig,
'production': ProductionConfig,
'testing': TestingConfig,
'default': DevelopmentConfig
}

30
app/models/__init__.py Normal file
View File

@@ -0,0 +1,30 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
資料模型模組
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
from .user import User
from .job import TranslationJob, JobFile
from .cache import TranslationCache
from .stats import APIUsageStats
from .log import SystemLog
from .notification import Notification, NotificationType
from .sys_user import SysUser, LoginLog
__all__ = [
'User',
'TranslationJob',
'JobFile',
'TranslationCache',
'APIUsageStats',
'SystemLog',
'Notification',
'NotificationType',
'SysUser',
'LoginLog'
]

138
app/models/cache.py Normal file
View File

@@ -0,0 +1,138 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
翻譯快取資料模型
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
import hashlib
from sqlalchemy.sql import func
from app import db
class TranslationCache(db.Model):
"""翻譯快取表 (dt_translation_cache)"""
__tablename__ = 'dt_translation_cache'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
source_text_hash = db.Column(db.String(64), nullable=False, comment='來源文字hash')
source_language = db.Column(db.String(50), nullable=False, comment='來源語言')
target_language = db.Column(db.String(50), nullable=False, comment='目標語言')
source_text = db.Column(db.Text, nullable=False, comment='來源文字')
translated_text = db.Column(db.Text, nullable=False, comment='翻譯文字')
created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間')
# 唯一約束
__table_args__ = (
db.UniqueConstraint('source_text_hash', 'source_language', 'target_language', name='uk_cache'),
db.Index('idx_languages', 'source_language', 'target_language'),
)
def __repr__(self):
return f'<TranslationCache {self.source_text_hash[:8]}...>'
def to_dict(self):
"""轉換為字典格式"""
return {
'id': self.id,
'source_text_hash': self.source_text_hash,
'source_language': self.source_language,
'target_language': self.target_language,
'source_text': self.source_text,
'translated_text': self.translated_text,
'created_at': self.created_at.isoformat() if self.created_at else None
}
@staticmethod
def generate_hash(text):
"""生成文字的 SHA256 hash"""
return hashlib.sha256(text.encode('utf-8')).hexdigest()
@classmethod
def get_translation(cls, source_text, source_language, target_language):
"""取得快取的翻譯"""
text_hash = cls.generate_hash(source_text)
cache_entry = cls.query.filter_by(
source_text_hash=text_hash,
source_language=source_language,
target_language=target_language
).first()
return cache_entry.translated_text if cache_entry else None
@classmethod
def save_translation(cls, source_text, source_language, target_language, translated_text):
"""儲存翻譯到快取"""
text_hash = cls.generate_hash(source_text)
# 檢查是否已存在
existing = cls.query.filter_by(
source_text_hash=text_hash,
source_language=source_language,
target_language=target_language
).first()
if existing:
# 更新現有記錄
existing.translated_text = translated_text
else:
# 建立新記錄
cache_entry = cls(
source_text_hash=text_hash,
source_language=source_language,
target_language=target_language,
source_text=source_text,
translated_text=translated_text
)
db.session.add(cache_entry)
db.session.commit()
return True
@classmethod
def get_cache_statistics(cls):
"""取得快取統計資料"""
total_entries = cls.query.count()
# 按語言對統計
language_pairs = db.session.query(
cls.source_language,
cls.target_language,
func.count(cls.id).label('count')
).group_by(cls.source_language, cls.target_language).all()
# 最近一週的快取命中
from datetime import datetime, timedelta
week_ago = datetime.utcnow() - timedelta(days=7)
recent_entries = cls.query.filter(cls.created_at >= week_ago).count()
return {
'total_entries': total_entries,
'language_pairs': [
{
'source_language': pair.source_language,
'target_language': pair.target_language,
'count': pair.count
}
for pair in language_pairs
],
'recent_entries': recent_entries
}
@classmethod
def clear_old_cache(cls, days_to_keep=90):
"""清理舊快取記錄"""
from datetime import datetime, timedelta
cutoff_date = datetime.utcnow() - timedelta(days=days_to_keep)
deleted_count = cls.query.filter(
cls.created_at < cutoff_date
).delete(synchronize_session=False)
db.session.commit()
return deleted_count

327
app/models/job.py Normal file
View File

@@ -0,0 +1,327 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
翻譯任務資料模型
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
import json
import uuid
from datetime import datetime, timedelta
from sqlalchemy.sql import func
from sqlalchemy import event
from app import db
from app.utils.timezone import format_taiwan_time
class TranslationJob(db.Model):
"""翻譯任務表 (dt_translation_jobs)"""
__tablename__ = 'dt_translation_jobs'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
job_uuid = db.Column(db.String(36), unique=True, nullable=False, index=True, comment='任務唯一識別碼')
user_id = db.Column(db.Integer, db.ForeignKey('dt_users.id'), nullable=False, comment='使用者ID')
original_filename = db.Column(db.String(500), nullable=False, comment='原始檔名')
file_extension = db.Column(db.String(10), nullable=False, comment='檔案副檔名')
file_size = db.Column(db.BigInteger, nullable=False, comment='檔案大小(bytes)')
file_path = db.Column(db.String(1000), nullable=False, comment='檔案路徑')
source_language = db.Column(db.String(50), default=None, comment='來源語言')
target_languages = db.Column(db.JSON, nullable=False, comment='目標語言陣列')
status = db.Column(
db.Enum('PENDING', 'PROCESSING', 'COMPLETED', 'FAILED', 'RETRY', name='job_status'),
default='PENDING',
comment='任務狀態'
)
progress = db.Column(db.Numeric(5, 2), default=0.00, comment='處理進度(%)')
retry_count = db.Column(db.Integer, default=0, comment='重試次數')
error_message = db.Column(db.Text, comment='錯誤訊息')
total_tokens = db.Column(db.Integer, default=0, comment='總token數')
total_cost = db.Column(db.Numeric(10, 4), default=0.0000, comment='總成本')
conversation_id = db.Column(db.String(100), comment='Dify對話ID用於維持翻譯上下文')
processing_started_at = db.Column(db.DateTime, comment='開始處理時間')
completed_at = db.Column(db.DateTime, comment='完成時間')
created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間')
updated_at = db.Column(
db.DateTime,
default=func.now(),
onupdate=func.now(),
comment='更新時間'
)
deleted_at = db.Column(db.DateTime, comment='軟刪除時間')
# 關聯關係
files = db.relationship('JobFile', backref='job', lazy='dynamic', cascade='all, delete-orphan')
api_usage_stats = db.relationship('APIUsageStats', backref='job', lazy='dynamic')
def __repr__(self):
return f'<TranslationJob {self.job_uuid}>'
def __init__(self, **kwargs):
"""初始化,自動生成 UUID"""
super().__init__(**kwargs)
if not self.job_uuid:
self.job_uuid = str(uuid.uuid4())
def to_dict(self, include_files=False):
"""轉換為字典格式"""
data = {
'id': self.id,
'job_uuid': self.job_uuid,
'user_id': self.user_id,
'original_filename': self.original_filename,
'file_extension': self.file_extension,
'file_size': self.file_size,
'file_path': self.file_path,
'source_language': self.source_language,
'target_languages': self.target_languages,
'status': self.status,
'progress': float(self.progress) if self.progress else 0.0,
'retry_count': self.retry_count,
'error_message': self.error_message,
'total_tokens': self.total_tokens,
'total_cost': float(self.total_cost) if self.total_cost else 0.0,
'conversation_id': self.conversation_id,
'processing_started_at': format_taiwan_time(self.processing_started_at, "%Y-%m-%d %H:%M:%S") if self.processing_started_at else None,
'completed_at': format_taiwan_time(self.completed_at, "%Y-%m-%d %H:%M:%S") if self.completed_at else None,
'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None,
'updated_at': format_taiwan_time(self.updated_at, "%Y-%m-%d %H:%M:%S") if self.updated_at else None,
'deleted_at': format_taiwan_time(self.deleted_at, "%Y-%m-%d %H:%M:%S") if self.deleted_at else None
}
if include_files:
data['files'] = [f.to_dict() for f in self.files]
return data
def update_status(self, status, error_message=None, progress=None):
"""更新任務狀態"""
self.status = status
if error_message:
self.error_message = error_message
if progress is not None:
self.progress = progress
if status == 'PROCESSING' and not self.processing_started_at:
self.processing_started_at = datetime.utcnow()
elif status == 'COMPLETED':
self.completed_at = datetime.utcnow()
self.progress = 100.00
self.updated_at = datetime.utcnow()
db.session.commit()
def add_original_file(self, filename, file_path, file_size):
"""新增原始檔案記錄"""
from pathlib import Path
stored_name = Path(file_path).name
original_file = JobFile(
job_id=self.id,
file_type='source',
original_filename=filename,
stored_filename=stored_name,
file_path=file_path,
file_size=file_size,
mime_type=self._get_mime_type(filename)
)
db.session.add(original_file)
db.session.commit()
return original_file
def add_translated_file(self, language_code, filename, file_path, file_size):
"""新增翻譯檔案記錄"""
from pathlib import Path
stored_name = Path(file_path).name
translated_file = JobFile(
job_id=self.id,
file_type='translated',
language_code=language_code,
original_filename=filename,
stored_filename=stored_name,
file_path=file_path,
file_size=file_size,
mime_type=self._get_mime_type(filename)
)
db.session.add(translated_file)
db.session.commit()
return translated_file
def _get_mime_type(self, filename):
"""取得MIME類型"""
import mimetypes
from pathlib import Path
ext = Path(filename).suffix.lower()
mime_map = {
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.pdf': 'application/pdf',
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'.txt': 'text/plain'
}
return mime_map.get(ext, mimetypes.guess_type(filename)[0] or 'application/octet-stream')
def get_translated_files(self):
"""取得翻譯檔案"""
return self.files.filter_by(file_type='translated').all()
def get_original_file(self):
"""取得原始檔案"""
return self.files.filter_by(file_type='source').first()
def can_retry(self):
"""是否可以重試"""
return self.status in ['FAILED', 'RETRY'] and self.retry_count < 3
def increment_retry(self):
"""增加重試次數"""
self.retry_count += 1
self.updated_at = datetime.utcnow()
db.session.commit()
def soft_delete(self):
"""軟刪除任務(保留資料供報表使用)"""
self.deleted_at = datetime.utcnow()
self.updated_at = datetime.utcnow()
db.session.commit()
def restore(self):
"""恢復已刪除的任務"""
self.deleted_at = None
self.updated_at = datetime.utcnow()
db.session.commit()
def is_deleted(self):
"""檢查任務是否已被軟刪除"""
return self.deleted_at is not None
@classmethod
def get_queue_position(cls, job_uuid):
"""取得任務在佇列中的位置"""
job = cls.query.filter_by(job_uuid=job_uuid, deleted_at=None).first()
if not job:
return None
position = cls.query.filter(
cls.status == 'PENDING',
cls.deleted_at.is_(None),
cls.created_at < job.created_at
).count()
return position + 1
@classmethod
def get_pending_jobs(cls):
"""取得所有等待處理的任務"""
return cls.query.filter_by(status='PENDING', deleted_at=None).order_by(cls.created_at.asc()).all()
@classmethod
def get_processing_jobs(cls):
"""取得所有處理中的任務"""
return cls.query.filter_by(status='PROCESSING', deleted_at=None).all()
@classmethod
def get_user_jobs(cls, user_id, status=None, limit=None, offset=None, include_deleted=False):
"""取得使用者的任務列表"""
query = cls.query.filter_by(user_id=user_id)
# 預設排除軟刪除的記錄,除非明確要求包含
if not include_deleted:
query = query.filter(cls.deleted_at.is_(None))
if status and status != 'all':
query = query.filter_by(status=status.upper())
query = query.order_by(cls.created_at.desc())
if limit:
query = query.limit(limit)
if offset:
query = query.offset(offset)
return query.all()
@classmethod
def get_statistics(cls, user_id=None, start_date=None, end_date=None, include_deleted=True):
"""取得統計資料(預設包含所有記錄以確保報表完整性)"""
query = cls.query
# 報表統計預設包含已刪除記錄以確保數據完整性
if not include_deleted:
query = query.filter(cls.deleted_at.is_(None))
if user_id:
query = query.filter_by(user_id=user_id)
if start_date:
query = query.filter(cls.created_at >= start_date)
if end_date:
query = query.filter(cls.created_at <= end_date)
total = query.count()
completed = query.filter_by(status='COMPLETED').count()
failed = query.filter_by(status='FAILED').count()
processing = query.filter_by(status='PROCESSING').count()
pending = query.filter_by(status='PENDING').count()
return {
'total': total,
'completed': completed,
'failed': failed,
'processing': processing,
'pending': pending,
'success_rate': (completed / total * 100) if total > 0 else 0
}
class JobFile(db.Model):
"""檔案記錄表 (dt_job_files)"""
__tablename__ = 'dt_job_files'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
job_id = db.Column(db.Integer, db.ForeignKey('dt_translation_jobs.id'), nullable=False, comment='任務ID')
file_type = db.Column(
db.Enum('source', 'translated', name='file_type'),
nullable=False,
comment='檔案類型'
)
language_code = db.Column(db.String(50), comment='語言代碼(翻譯檔案)')
original_filename = db.Column(db.String(255), nullable=False, comment='原始檔名')
stored_filename = db.Column(db.String(255), nullable=False, comment='儲存檔名')
file_path = db.Column(db.String(500), nullable=False, comment='檔案路徑')
file_size = db.Column(db.BigInteger, default=0, comment='檔案大小')
mime_type = db.Column(db.String(100), comment='MIME 類型')
created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間')
def __repr__(self):
return f'<JobFile {self.original_filename}>'
def to_dict(self):
"""轉換為字典格式"""
return {
'id': self.id,
'job_id': self.job_id,
'file_type': self.file_type,
'language_code': self.language_code,
'original_filename': self.original_filename,
'stored_filename': self.stored_filename,
'file_path': self.file_path,
'file_size': self.file_size,
'mime_type': self.mime_type,
'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None
}
# 事件監聽器:自動生成 UUID
@event.listens_for(TranslationJob, 'before_insert')
def receive_before_insert(mapper, connection, target):
"""在插入前自動生成 UUID"""
if not target.job_uuid:
target.job_uuid = str(uuid.uuid4())

211
app/models/log.py Normal file
View File

@@ -0,0 +1,211 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
系統日誌資料模型
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
import json
from datetime import datetime, timedelta
from sqlalchemy.sql import func
from app import db
class SystemLog(db.Model):
"""系統日誌表 (dt_system_logs)"""
__tablename__ = 'dt_system_logs'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
level = db.Column(
db.Enum('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL', name='log_level'),
nullable=False,
comment='日誌等級'
)
module = db.Column(db.String(100), nullable=False, comment='模組名稱')
user_id = db.Column(db.Integer, db.ForeignKey('dt_users.id'), comment='使用者ID')
job_id = db.Column(db.Integer, db.ForeignKey('dt_translation_jobs.id'), comment='任務ID')
message = db.Column(db.Text, nullable=False, comment='日誌訊息')
extra_data = db.Column(db.JSON, comment='額外資料')
created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間')
def __repr__(self):
return f'<SystemLog {self.level} {self.module}>'
def to_dict(self):
"""轉換為字典格式"""
return {
'id': self.id,
'level': self.level,
'module': self.module,
'user_id': self.user_id,
'job_id': self.job_id,
'message': self.message,
'extra_data': self.extra_data,
'created_at': self.created_at.isoformat() if self.created_at else None
}
@classmethod
def log(cls, level, module, message, user_id=None, job_id=None, extra_data=None):
"""記錄日誌"""
log_entry = cls(
level=level.upper(),
module=module,
message=message,
user_id=user_id,
job_id=job_id,
extra_data=extra_data
)
db.session.add(log_entry)
db.session.commit()
return log_entry
@classmethod
def debug(cls, module, message, user_id=None, job_id=None, extra_data=None):
"""記錄除錯日誌"""
return cls.log('DEBUG', module, message, user_id, job_id, extra_data)
@classmethod
def info(cls, module, message, user_id=None, job_id=None, extra_data=None):
"""記錄資訊日誌"""
return cls.log('INFO', module, message, user_id, job_id, extra_data)
@classmethod
def warning(cls, module, message, user_id=None, job_id=None, extra_data=None):
"""記錄警告日誌"""
return cls.log('WARNING', module, message, user_id, job_id, extra_data)
@classmethod
def error(cls, module, message, user_id=None, job_id=None, extra_data=None):
"""記錄錯誤日誌"""
return cls.log('ERROR', module, message, user_id, job_id, extra_data)
@classmethod
def critical(cls, module, message, user_id=None, job_id=None, extra_data=None):
"""記錄嚴重錯誤日誌"""
return cls.log('CRITICAL', module, message, user_id, job_id, extra_data)
@classmethod
def get_logs(cls, level=None, module=None, user_id=None, start_date=None, end_date=None, limit=100, offset=0):
"""查詢日誌"""
query = cls.query
if level:
query = query.filter_by(level=level.upper())
if module:
query = query.filter(cls.module.like(f'%{module}%'))
if user_id:
query = query.filter_by(user_id=user_id)
if start_date:
query = query.filter(cls.created_at >= start_date)
if end_date:
query = query.filter(cls.created_at <= end_date)
# 按時間倒序排列
query = query.order_by(cls.created_at.desc())
if limit:
query = query.limit(limit)
if offset:
query = query.offset(offset)
return query.all()
@classmethod
def get_log_statistics(cls, days=7):
"""取得日誌統計資料"""
end_date = datetime.utcnow()
start_date = end_date - timedelta(days=days)
# 按等級統計
level_stats = db.session.query(
cls.level,
func.count(cls.id).label('count')
).filter(
cls.created_at >= start_date
).group_by(cls.level).all()
# 按模組統計
module_stats = db.session.query(
cls.module,
func.count(cls.id).label('count')
).filter(
cls.created_at >= start_date
).group_by(cls.module).order_by(
func.count(cls.id).desc()
).limit(10).all()
# 每日統計
daily_stats = db.session.query(
func.date(cls.created_at).label('date'),
cls.level,
func.count(cls.id).label('count')
).filter(
cls.created_at >= start_date
).group_by(
func.date(cls.created_at), cls.level
).order_by(
func.date(cls.created_at)
).all()
return {
'level_stats': [
{'level': stat.level, 'count': stat.count}
for stat in level_stats
],
'module_stats': [
{'module': stat.module, 'count': stat.count}
for stat in module_stats
],
'daily_stats': [
{
'date': stat.date.isoformat(),
'level': stat.level,
'count': stat.count
}
for stat in daily_stats
]
}
@classmethod
def cleanup_old_logs(cls, days_to_keep=30):
"""清理舊日誌"""
cutoff_date = datetime.utcnow() - timedelta(days=days_to_keep)
deleted_count = cls.query.filter(
cls.created_at < cutoff_date
).delete(synchronize_session=False)
db.session.commit()
return deleted_count
@classmethod
def get_error_summary(cls, days=1):
"""取得錯誤摘要"""
start_date = datetime.utcnow() - timedelta(days=days)
error_logs = cls.query.filter(
cls.level.in_(['ERROR', 'CRITICAL']),
cls.created_at >= start_date
).order_by(cls.created_at.desc()).limit(50).all()
# 按模組分組錯誤
error_by_module = {}
for log in error_logs:
module = log.module
if module not in error_by_module:
error_by_module[module] = []
error_by_module[module].append(log.to_dict())
return {
'total_errors': len(error_logs),
'error_by_module': error_by_module,
'recent_errors': [log.to_dict() for log in error_logs[:10]]
}

View File

@@ -0,0 +1,98 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
通知系統資料模型
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
from datetime import datetime
from enum import Enum
from sqlalchemy import func
from sqlalchemy.orm import relationship
from app import db
import uuid
import json
class NotificationType(str, Enum):
"""通知類型枚舉"""
SUCCESS = "success" # 成功
ERROR = "error" # 錯誤
WARNING = "warning" # 警告
INFO = "info" # 資訊
SYSTEM = "system" # 系統
class Notification(db.Model):
"""通知模型"""
__tablename__ = 'dt_notifications'
# 主鍵
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
notification_uuid = db.Column(db.String(36), unique=True, nullable=False, index=True,
default=lambda: str(uuid.uuid4()), comment='通知唯一識別碼')
# 基本資訊
user_id = db.Column(db.Integer, db.ForeignKey('dt_users.id'), nullable=False, comment='使用者ID')
type = db.Column(db.Enum('INFO', 'SUCCESS', 'WARNING', 'ERROR', name='notification_type'),
nullable=False, default=NotificationType.INFO.value, comment='通知類型')
title = db.Column(db.String(255), nullable=False, comment='通知標題')
message = db.Column(db.Text, nullable=False, comment='通知內容')
# 關聯資訊(可選)
job_uuid = db.Column(db.String(36), nullable=True, comment='關聯任務UUID')
link = db.Column(db.String(500), nullable=True, comment='相關連結')
# 狀態
is_read = db.Column(db.Boolean, default=False, nullable=False, comment='是否已讀')
read_at = db.Column(db.DateTime, nullable=True, comment='閱讀時間')
# 時間戳記
created_at = db.Column(db.DateTime, default=func.now(), nullable=False, comment='建立時間')
expires_at = db.Column(db.DateTime, nullable=True, comment='過期時間')
# 額外數據JSON 格式儲存)
extra_data = db.Column(db.JSON, nullable=True, comment='額外數據')
# 關聯
user = db.relationship("User", backref="notifications")
def __repr__(self):
return f"<Notification {self.notification_uuid}: {self.title}>"
def to_dict(self):
"""轉換為字典格式"""
return {
'id': self.notification_uuid, # 前端使用 UUID
'user_id': self.user_id,
'type': self.type,
'title': self.title,
'message': self.message,
'job_uuid': self.job_uuid,
'link': self.link,
'is_read': self.is_read,
'read': self.is_read, # 為了前端相容
'read_at': self.read_at.isoformat() if self.read_at else None,
'created_at': self.created_at.isoformat() if self.created_at else None,
'expires_at': self.expires_at.isoformat() if self.expires_at else None,
'extra_data': self.extra_data
}
def mark_as_read(self):
"""標記為已讀"""
self.is_read = True
self.read_at = datetime.now()
@classmethod
def create_job_notification(cls, user_id, job_uuid, title, message, notification_type=NotificationType.INFO):
"""創建任務相關通知"""
return cls(
user_id=user_id,
job_uuid=job_uuid,
type=notification_type.value,
title=title,
message=message,
link=f"/job/{job_uuid}" # 連結到任務詳情頁
)

233
app/models/stats.py Normal file
View File

@@ -0,0 +1,233 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
API使用統計資料模型
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
from datetime import datetime, timedelta
from sqlalchemy.sql import func
from app import db
from app.utils.timezone import format_taiwan_time
class APIUsageStats(db.Model):
"""API使用統計表 (dt_api_usage_stats)"""
__tablename__ = 'dt_api_usage_stats'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
user_id = db.Column(db.Integer, db.ForeignKey('dt_users.id'), nullable=False, comment='使用者ID')
job_id = db.Column(db.Integer, db.ForeignKey('dt_translation_jobs.id'), comment='任務ID')
api_endpoint = db.Column(db.String(200), nullable=False, comment='API端點')
prompt_tokens = db.Column(db.Integer, default=0, comment='Prompt token數')
completion_tokens = db.Column(db.Integer, default=0, comment='Completion token數')
total_tokens = db.Column(db.Integer, default=0, comment='總token數')
prompt_unit_price = db.Column(db.Numeric(10, 8), default=0.00000000, comment='單價')
prompt_price_unit = db.Column(db.String(20), default='USD', comment='價格單位')
cost = db.Column(db.Numeric(10, 4), default=0.0000, comment='成本')
response_time_ms = db.Column(db.Integer, default=0, comment='回應時間(毫秒)')
success = db.Column(db.Boolean, default=True, comment='是否成功')
error_message = db.Column(db.Text, comment='錯誤訊息')
created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間')
def __repr__(self):
return f'<APIUsageStats {self.api_endpoint}>'
def to_dict(self):
"""轉換為字典格式"""
return {
'id': self.id,
'user_id': self.user_id,
'job_id': self.job_id,
'api_endpoint': self.api_endpoint,
'prompt_tokens': self.prompt_tokens,
'completion_tokens': self.completion_tokens,
'total_tokens': self.total_tokens,
'prompt_unit_price': float(self.prompt_unit_price) if self.prompt_unit_price else 0.0,
'prompt_price_unit': self.prompt_price_unit,
'cost': float(self.cost) if self.cost else 0.0,
'response_time_ms': self.response_time_ms,
'success': self.success,
'error_message': self.error_message,
'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None
}
@classmethod
def record_api_call(cls, user_id, job_id, api_endpoint, metadata, response_time_ms, success=True, error_message=None):
"""記錄 API 呼叫統計"""
# 從 Dify API metadata 解析使用量資訊
usage_data = metadata.get('usage', {})
prompt_tokens = usage_data.get('prompt_tokens', 0)
completion_tokens = usage_data.get('completion_tokens', 0)
total_tokens = usage_data.get('total_tokens', prompt_tokens + completion_tokens)
# 計算成本 - 使用 Dify API 提供的總成本
if 'total_price' in usage_data:
# 直接使用 API 提供的總價格
cost = float(usage_data.get('total_price', 0.0))
else:
# 備用計算方式
prompt_price = float(usage_data.get('prompt_price', 0.0))
completion_price = float(usage_data.get('completion_price', 0.0))
cost = prompt_price + completion_price
# 單價資訊
prompt_unit_price = usage_data.get('prompt_unit_price', 0.0)
completion_unit_price = usage_data.get('completion_unit_price', 0.0)
prompt_price_unit = usage_data.get('currency', 'USD')
stats = cls(
user_id=user_id,
job_id=job_id,
api_endpoint=api_endpoint,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
prompt_unit_price=prompt_unit_price,
prompt_price_unit=prompt_price_unit,
cost=cost,
response_time_ms=response_time_ms,
success=success,
error_message=error_message
)
db.session.add(stats)
db.session.commit()
return stats
@classmethod
def get_user_statistics(cls, user_id, start_date=None, end_date=None):
"""取得使用者統計資料"""
query = cls.query.filter_by(user_id=user_id)
if start_date:
query = query.filter(cls.created_at >= start_date)
if end_date:
query = query.filter(cls.created_at <= end_date)
# 統計資料
total_calls = query.count()
successful_calls = query.filter_by(success=True).count()
total_tokens = query.with_entities(func.sum(cls.total_tokens)).scalar() or 0
total_cost = query.with_entities(func.sum(cls.cost)).scalar() or 0.0
avg_response_time = query.with_entities(func.avg(cls.response_time_ms)).scalar() or 0
return {
'total_calls': total_calls,
'successful_calls': successful_calls,
'failed_calls': total_calls - successful_calls,
'success_rate': (successful_calls / total_calls * 100) if total_calls > 0 else 0,
'total_tokens': total_tokens,
'total_cost': float(total_cost),
'avg_response_time': float(avg_response_time) if avg_response_time else 0
}
@classmethod
def get_daily_statistics(cls, days=30):
"""取得每日統計資料"""
end_date = datetime.utcnow()
start_date = end_date - timedelta(days=days)
# 按日期分組統計
daily_stats = db.session.query(
func.date(cls.created_at).label('date'),
func.count(cls.id).label('total_calls'),
func.sum(cls.total_tokens).label('total_tokens'),
func.sum(cls.cost).label('total_cost'),
func.count().filter(cls.success == True).label('successful_calls')
).filter(
cls.created_at >= start_date,
cls.created_at <= end_date
).group_by(func.date(cls.created_at)).all()
return [
{
'date': stat.date.isoformat(),
'total_calls': stat.total_calls,
'successful_calls': stat.successful_calls,
'failed_calls': stat.total_calls - stat.successful_calls,
'total_tokens': stat.total_tokens or 0,
'total_cost': float(stat.total_cost or 0)
}
for stat in daily_stats
]
@classmethod
def get_top_users(cls, limit=10, start_date=None, end_date=None):
"""取得使用量排行榜"""
query = db.session.query(
cls.user_id,
func.count(cls.id).label('total_calls'),
func.sum(cls.total_tokens).label('total_tokens'),
func.sum(cls.cost).label('total_cost')
)
if start_date:
query = query.filter(cls.created_at >= start_date)
if end_date:
query = query.filter(cls.created_at <= end_date)
top_users = query.group_by(cls.user_id).order_by(
func.sum(cls.cost).desc()
).limit(limit).all()
return [
{
'user_id': user.user_id,
'total_calls': user.total_calls,
'total_tokens': user.total_tokens or 0,
'total_cost': float(user.total_cost or 0)
}
for user in top_users
]
@classmethod
def get_cost_trend(cls, days=30):
"""取得成本趨勢"""
end_date = datetime.utcnow()
start_date = end_date - timedelta(days=days)
# 按日期統計成本
cost_trend = db.session.query(
func.date(cls.created_at).label('date'),
func.sum(cls.cost).label('daily_cost')
).filter(
cls.created_at >= start_date,
cls.created_at <= end_date
).group_by(func.date(cls.created_at)).order_by(
func.date(cls.created_at)
).all()
return [
{
'date': trend.date.isoformat(),
'cost': float(trend.daily_cost or 0)
}
for trend in cost_trend
]
@classmethod
def get_endpoint_statistics(cls):
"""取得 API 端點統計"""
endpoint_stats = db.session.query(
cls.api_endpoint,
func.count(cls.id).label('total_calls'),
func.sum(cls.cost).label('total_cost'),
func.avg(cls.response_time_ms).label('avg_response_time')
).group_by(cls.api_endpoint).order_by(
func.count(cls.id).desc()
).all()
return [
{
'endpoint': stat.api_endpoint,
'total_calls': stat.total_calls,
'total_cost': float(stat.total_cost or 0),
'avg_response_time': float(stat.avg_response_time or 0)
}
for stat in endpoint_stats
]

297
app/models/sys_user.py Normal file
View File

@@ -0,0 +1,297 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
系統使用者模型
專門用於記錄帳號密碼和登入相關資訊
Author: PANJIT IT Team
Created: 2025-10-01
"""
import json
from datetime import datetime, timedelta
from typing import Optional, Dict, Any
from sqlalchemy import Column, Integer, String, Text, Boolean, DateTime, JSON, Enum as SQLEnum, BigInteger
from werkzeug.security import generate_password_hash, check_password_hash
from app import db
from app.utils.logger import get_logger
logger = get_logger(__name__)
class SysUser(db.Model):
"""系統使用者模型 - 專門處理帳號密碼和登入記錄"""
__tablename__ = 'sys_user'
id = Column(BigInteger, primary_key=True)
# 帳號資訊
username = Column(String(255), nullable=False, unique=True, comment='登入帳號')
password_hash = Column(String(512), comment='密碼雜湊 (如果需要本地儲存)')
email = Column(String(255), nullable=False, unique=True, comment='電子郵件')
display_name = Column(String(255), comment='顯示名稱')
# API 認證資訊
api_user_id = Column(String(255), comment='API 回傳的使用者 ID')
api_access_token = Column(Text, comment='API 回傳的 access_token')
api_token_expires_at = Column(DateTime, comment='API Token 過期時間')
# 登入相關
auth_method = Column(SQLEnum('API', 'LDAP', name='sys_user_auth_method'),
default='API', comment='認證方式')
last_login_at = Column(DateTime, comment='最後登入時間')
last_login_ip = Column(String(45), comment='最後登入 IP')
login_count = Column(Integer, default=0, comment='登入次數')
login_success_count = Column(Integer, default=0, comment='成功登入次數')
login_fail_count = Column(Integer, default=0, comment='失敗登入次數')
# 帳號狀態
is_active = Column(Boolean, default=True, comment='是否啟用')
is_locked = Column(Boolean, default=False, comment='是否鎖定')
locked_until = Column(DateTime, comment='鎖定至何時')
# 審計欄位
created_at = Column(DateTime, default=datetime.utcnow, comment='建立時間')
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, comment='更新時間')
def __repr__(self):
return f'<SysUser {self.username}>'
def to_dict(self) -> Dict[str, Any]:
"""轉換為字典格式"""
return {
'id': self.id,
'username': self.username,
'email': self.email,
'display_name': self.display_name,
'api_user_id': self.api_user_id,
'auth_method': self.auth_method,
'last_login_at': self.last_login_at.isoformat() if self.last_login_at else None,
'login_count': self.login_count,
'login_success_count': self.login_success_count,
'login_fail_count': self.login_fail_count,
'is_active': self.is_active,
'is_locked': self.is_locked,
'api_token_expires_at': self.api_token_expires_at.isoformat() if self.api_token_expires_at else None,
'created_at': self.created_at.isoformat() if self.created_at else None
}
@classmethod
def get_or_create(cls, email: str, **kwargs) -> 'SysUser':
"""
取得或建立系統使用者 (方案A: 使用 email 作為主要識別鍵)
Args:
email: 電子郵件 (主要識別鍵)
**kwargs: 其他欄位
Returns:
SysUser: 系統使用者實例
"""
try:
# 使用 email 作為主要識別 (專門用於登入記錄)
sys_user = cls.query.filter_by(email=email).first()
if sys_user:
# 更新現有記錄
sys_user.username = kwargs.get('username', sys_user.username) # API name (姓名+email)
sys_user.display_name = kwargs.get('display_name', sys_user.display_name) # API name (姓名+email)
sys_user.api_user_id = kwargs.get('api_user_id', sys_user.api_user_id) # Azure Object ID
sys_user.api_access_token = kwargs.get('api_access_token', sys_user.api_access_token)
sys_user.api_token_expires_at = kwargs.get('api_token_expires_at', sys_user.api_token_expires_at)
sys_user.auth_method = kwargs.get('auth_method', sys_user.auth_method)
sys_user.updated_at = datetime.utcnow()
logger.info(f"更新現有系統使用者: {email}")
else:
# 建立新記錄
sys_user = cls(
username=kwargs.get('username', ''), # API name (姓名+email 格式)
email=email, # 純 email主要識別鍵
display_name=kwargs.get('display_name', ''), # API name (姓名+email 格式)
api_user_id=kwargs.get('api_user_id'), # Azure Object ID
api_access_token=kwargs.get('api_access_token'),
api_token_expires_at=kwargs.get('api_token_expires_at'),
auth_method=kwargs.get('auth_method', 'API'),
login_count=0,
login_success_count=0,
login_fail_count=0
)
db.session.add(sys_user)
logger.info(f"建立新系統使用者: {email}")
db.session.commit()
return sys_user
except Exception as e:
db.session.rollback()
logger.error(f"取得或建立系統使用者失敗: {str(e)}")
raise
@classmethod
def get_by_email(cls, email: str) -> Optional['SysUser']:
"""根據 email 查找系統使用者"""
return cls.query.filter_by(email=email).first()
def record_login_attempt(self, success: bool, ip_address: str = None, auth_method: str = None):
"""
記錄登入嘗試
Args:
success: 是否成功
ip_address: IP 地址
auth_method: 認證方式
"""
try:
self.login_count = (self.login_count or 0) + 1
if success:
self.login_success_count = (self.login_success_count or 0) + 1
self.last_login_at = datetime.utcnow()
self.last_login_ip = ip_address
if auth_method:
self.auth_method = auth_method
# 成功登入時解除鎖定
if self.is_locked:
self.is_locked = False
self.locked_until = None
else:
self.login_fail_count = (self.login_fail_count or 0) + 1
# 檢查是否需要鎖定帳號 (連續失敗5次)
if self.login_fail_count >= 5:
self.is_locked = True
self.locked_until = datetime.utcnow() + timedelta(minutes=30) # 鎖定30分鐘
self.updated_at = datetime.utcnow()
db.session.commit()
except Exception as e:
db.session.rollback()
logger.error(f"記錄登入嘗試失敗: {str(e)}")
def is_account_locked(self) -> bool:
"""檢查帳號是否被鎖定"""
if not self.is_locked:
return False
# 檢查鎖定時間是否已過
if self.locked_until and datetime.utcnow() > self.locked_until:
self.is_locked = False
self.locked_until = None
db.session.commit()
return False
return True
def set_password(self, password: str):
"""設置密碼雜湊 (如果需要本地儲存密碼)"""
self.password_hash = generate_password_hash(password)
def check_password(self, password: str) -> bool:
"""檢查密碼 (如果有本地儲存密碼)"""
if not self.password_hash:
return False
return check_password_hash(self.password_hash, password)
def update_api_token(self, access_token: str, expires_at: datetime = None):
"""更新 API Token"""
self.api_access_token = access_token
self.api_token_expires_at = expires_at
self.updated_at = datetime.utcnow()
db.session.commit()
def is_api_token_valid(self) -> bool:
"""檢查 API Token 是否有效"""
if not self.api_access_token or not self.api_token_expires_at:
return False
return datetime.utcnow() < self.api_token_expires_at
class LoginLog(db.Model):
"""登入記錄模型"""
__tablename__ = 'login_logs'
id = Column(BigInteger, primary_key=True)
# 基本資訊
username = Column(String(255), nullable=False, comment='登入帳號')
auth_method = Column(SQLEnum('API', 'LDAP', name='login_log_auth_method'),
nullable=False, comment='認證方式')
# 登入結果
login_success = Column(Boolean, nullable=False, comment='是否成功')
error_message = Column(Text, comment='錯誤訊息(失敗時)')
# 環境資訊
ip_address = Column(String(45), comment='IP 地址')
user_agent = Column(Text, comment='瀏覽器資訊')
# API 回應 (可選,用於除錯)
api_response_summary = Column(JSON, comment='API 回應摘要')
# 時間
login_at = Column(DateTime, default=datetime.utcnow, comment='登入時間')
def __repr__(self):
return f'<LoginLog {self.username}:{self.auth_method}:{self.login_success}>'
@classmethod
def create_log(cls, username: str, auth_method: str, login_success: bool,
error_message: str = None, ip_address: str = None,
user_agent: str = None, api_response_summary: Dict = None) -> 'LoginLog':
"""
建立登入記錄
Args:
username: 使用者帳號
auth_method: 認證方式
login_success: 是否成功
error_message: 錯誤訊息
ip_address: IP 地址
user_agent: 瀏覽器資訊
api_response_summary: API 回應摘要
Returns:
LoginLog: 登入記錄
"""
try:
log = cls(
username=username,
auth_method=auth_method,
login_success=login_success,
error_message=error_message,
ip_address=ip_address,
user_agent=user_agent,
api_response_summary=api_response_summary
)
db.session.add(log)
db.session.commit()
return log
except Exception as e:
db.session.rollback()
logger.error(f"建立登入記錄失敗: {str(e)}")
return None
@classmethod
def get_recent_failed_attempts(cls, username: str, minutes: int = 15) -> int:
"""
取得最近失敗的登入嘗試次數
Args:
username: 使用者帳號
minutes: 時間範圍(分鐘)
Returns:
int: 失敗次數
"""
since = datetime.utcnow() - timedelta(minutes=minutes)
return cls.query.filter(
cls.username == username,
cls.login_success == False,
cls.login_at >= since
).count()

124
app/models/user.py Normal file
View File

@@ -0,0 +1,124 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
使用者資料模型
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
from datetime import datetime, timedelta
from sqlalchemy.sql import func
from app import db
from app.utils.timezone import format_taiwan_time
class User(db.Model):
"""使用者資訊表 (dt_users)"""
__tablename__ = 'dt_users'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
username = db.Column(db.String(100), unique=True, nullable=False, index=True, comment='AD帳號')
display_name = db.Column(db.String(200), nullable=False, comment='顯示名稱')
email = db.Column(db.String(255), nullable=False, index=True, comment='電子郵件')
department = db.Column(db.String(100), comment='部門')
is_admin = db.Column(db.Boolean, default=False, comment='是否為管理員')
last_login = db.Column(db.DateTime, comment='最後登入時間')
created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間')
updated_at = db.Column(
db.DateTime,
default=func.now(),
onupdate=func.now(),
comment='更新時間'
)
# 關聯關係
translation_jobs = db.relationship('TranslationJob', backref='user', lazy='dynamic', cascade='all, delete-orphan')
api_usage_stats = db.relationship('APIUsageStats', backref='user', lazy='dynamic', cascade='all, delete-orphan')
system_logs = db.relationship('SystemLog', backref='user', lazy='dynamic')
def __repr__(self):
return f'<User {self.username}>'
def to_dict(self, include_stats=False):
"""轉換為字典格式"""
data = {
'id': self.id,
'username': self.username,
'display_name': self.display_name,
'email': self.email,
'department': self.department,
'is_admin': self.is_admin,
'last_login': format_taiwan_time(self.last_login, "%Y-%m-%d %H:%M:%S") if self.last_login else None,
'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None,
'updated_at': format_taiwan_time(self.updated_at, "%Y-%m-%d %H:%M:%S") if self.updated_at else None
}
if include_stats:
data.update({
'total_jobs': self.translation_jobs.count(),
'completed_jobs': self.translation_jobs.filter_by(status='COMPLETED').count(),
'failed_jobs': self.translation_jobs.filter_by(status='FAILED').count(),
'total_cost': self.get_total_cost()
})
return data
def get_total_cost(self):
"""計算使用者總成本"""
try:
from app.models.stats import APIUsageStats
return db.session.query(
func.sum(APIUsageStats.cost)
).filter(APIUsageStats.user_id == self.id).scalar() or 0.0
except Exception:
return 0.0
def update_last_login(self):
"""更新最後登入時間"""
self.last_login = datetime.utcnow()
db.session.commit()
@classmethod
def get_or_create(cls, username, display_name, email, department=None):
"""取得或建立使用者 (方案A: 使用 email 作為主要識別鍵)"""
# 先嘗試用 email 查找 (因為 email 是唯一且穩定的識別碼)
user = cls.query.filter_by(email=email).first()
if user:
# 更新使用者資訊 (API name 格式: 姓名+email)
user.username = username # API 的 name (姓名+email 格式)
user.display_name = display_name # API 的 name (姓名+email 格式)
if department:
user.department = department
user.updated_at = datetime.utcnow()
else:
# 建立新使用者
user = cls(
username=username, # API 的 name (姓名+email 格式)
display_name=display_name, # API 的 name (姓名+email 格式)
email=email, # 純 email唯一識別鍵
department=department,
is_admin=(email.lower() == 'ymirliu@panjit.com.tw') # 硬編碼管理員
)
db.session.add(user)
db.session.commit()
return user
@classmethod
def get_by_email(cls, email):
"""根據 email 查找使用者"""
return cls.query.filter_by(email=email).first()
@classmethod
def get_admin_users(cls):
"""取得所有管理員使用者"""
return cls.query.filter_by(is_admin=True).all()
@classmethod
def get_active_users(cls, days=30):
"""取得活躍使用者(指定天數內有登入)"""
cutoff_date = datetime.utcnow() - timedelta(days=days)
return cls.query.filter(cls.last_login >= cutoff_date).all()

92
app/root.py Normal file
View File

@@ -0,0 +1,92 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Root routes and static file serving for SPA in production.
These were originally defined in the top-level app.py. Moving them into the
package allows a clean WSGI entry (wsgi:app) without importing app.py.
"""
import os
from pathlib import Path
from datetime import datetime
from flask import Blueprint, current_app, send_from_directory
root_bp = Blueprint('root', __name__)
def get_static_dir():
"""取得靜態文件目錄(相對路徑)"""
# 取得專案根目錄
project_root = Path(__file__).parent.parent
static_dir = project_root / 'frontend' / 'dist'
return str(static_dir)
@root_bp.route('/')
def index():
try:
static_dir = get_static_dir()
if Path(static_dir).exists():
return send_from_directory(static_dir, 'index.html')
else:
# Fallback API info when frontend is not present
return {
'application': 'PANJIT Document Translator',
'version': '1.0.0',
'status': 'running',
'api_base_url': '/api/v1',
'note': 'Frontend files not found, serving API info'
}
except Exception:
# Fallback API info when frontend is not present
return {
'application': 'PANJIT Document Translator',
'version': '1.0.0',
'status': 'running',
'api_base_url': '/api/v1',
'note': 'Frontend files not found, serving API info'
}
@root_bp.route('/<path:path>')
def serve_static(path):
try:
static_dir = get_static_dir()
if Path(static_dir).exists():
return send_from_directory(static_dir, path)
else:
# SPA fallback
return send_from_directory(static_dir, 'index.html')
except Exception:
# SPA fallback
return {
'error': 'File not found',
'path': path
}, 404
@root_bp.route('/api')
def api_info():
return {
'api_version': 'v1',
'base_url': '/api/v1',
'endpoints': {
'auth': '/api/v1/auth',
'files': '/api/v1/files',
'jobs': '/api/v1/jobs',
'admin': '/api/v1/admin',
'health': '/api/v1/health'
},
'documentation': 'Available endpoints provide RESTful API for document translation'
}
@root_bp.route('/api/health')
def health_check():
# Keep a simple health endpoint here for compatibility
return {
'status': 'healthy',
'timestamp': datetime.utcnow().isoformat(),
'service': 'PANJIT Document Translator API',
'version': '1.0.0'
}, 200

19
app/services/__init__.py Normal file
View File

@@ -0,0 +1,19 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
業務服務模組
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
from .dify_client import DifyClient
from .translation_service import TranslationService
from .notification_service import NotificationService
__all__ = [
'DifyClient',
'TranslationService',
'NotificationService'
]

View File

@@ -0,0 +1,137 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Celery任務管理服務
Author: PANJIT IT Team
Created: 2025-09-04
"""
from celery import Celery
from app.utils.logger import get_logger
import os
logger = get_logger(__name__)
def get_celery_app():
"""取得Celery應用實例"""
try:
from celery_app import app as celery_app
return celery_app
except ImportError:
# 如果無法導入創建一個簡單的Celery實例
broker_url = os.getenv('REDIS_URL', 'redis://localhost:6379/0')
celery_app = Celery('translation_worker', broker=broker_url)
return celery_app
def revoke_task(job_uuid):
"""
撤銷指定任務的Celery任務
Args:
job_uuid (str): 任務UUID
Returns:
bool: 撤銷是否成功
"""
try:
celery_app = get_celery_app()
# Celery任務ID通常與job_uuid相同或相關
task_id = f"translate_document_{job_uuid}"
# 嘗試撤銷任務
celery_app.control.revoke(task_id, terminate=True, signal='SIGKILL')
logger.info(f"Successfully revoked Celery task: {task_id}")
return True
except Exception as e:
logger.error(f"Failed to revoke Celery task for job {job_uuid}: {str(e)}")
return False
def get_active_tasks():
"""
取得當前活躍的Celery任務
Returns:
list: 活躍任務列表
"""
try:
celery_app = get_celery_app()
# 取得活躍任務
inspect = celery_app.control.inspect()
active_tasks = inspect.active()
if active_tasks:
return active_tasks
else:
return {}
except Exception as e:
logger.error(f"Failed to get active tasks: {str(e)}")
return {}
def is_task_active(job_uuid):
"""
檢查指定任務是否在Celery中活躍
Args:
job_uuid (str): 任務UUID
Returns:
bool: 任務是否活躍
"""
try:
active_tasks = get_active_tasks()
task_id = f"translate_document_{job_uuid}"
# 檢查所有worker的活躍任務
for worker, tasks in active_tasks.items():
for task in tasks:
if task.get('id') == task_id:
return True
return False
except Exception as e:
logger.error(f"Failed to check if task is active for job {job_uuid}: {str(e)}")
return False
def cleanup_stale_tasks():
"""
清理卡住的Celery任務
Returns:
int: 清理的任務數量
"""
try:
from app.models.job import TranslationJob
from datetime import datetime, timedelta
# 找出超過30分鐘仍在處理中的任務
stale_threshold = datetime.utcnow() - timedelta(minutes=30)
stale_jobs = TranslationJob.query.filter(
TranslationJob.status == 'PROCESSING',
TranslationJob.processing_started_at < stale_threshold
).all()
cleanup_count = 0
for job in stale_jobs:
if not is_task_active(job.job_uuid):
# 任務不在Celery中活躍標記為失敗
job.update_status('FAILED', error_message='任務處理超時,已自動取消')
cleanup_count += 1
logger.info(f"Cleaned up stale job: {job.job_uuid}")
return cleanup_count
except Exception as e:
logger.error(f"Failed to cleanup stale tasks: {str(e)}")
return 0

494
app/services/dify_client.py Normal file
View File

@@ -0,0 +1,494 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Dify API 客戶端服務
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
import time
import requests
from typing import Dict, Any, Optional
from flask import current_app
from app.utils.logger import get_logger
from app.utils.exceptions import APIError
from app.models.stats import APIUsageStats
logger = get_logger(__name__)
class DifyClient:
"""Dify API 客戶端"""
def __init__(self):
# 翻译API配置
self.translation_base_url = current_app.config.get('DIFY_TRANSLATION_BASE_URL', '')
self.translation_api_key = current_app.config.get('DIFY_TRANSLATION_API_KEY', '')
# OCR API配置
self.ocr_base_url = current_app.config.get('DIFY_OCR_BASE_URL', '')
self.ocr_api_key = current_app.config.get('DIFY_OCR_API_KEY', '')
self.timeout = (10, 60) # (連接超時, 讀取超時)
self.max_retries = 3
self.retry_delay = 1.6 # 指數退避基數
if not self.translation_base_url or not self.translation_api_key:
logger.warning("Dify Translation API configuration is incomplete")
if not self.ocr_base_url or not self.ocr_api_key:
logger.warning("Dify OCR API configuration is incomplete")
def _make_request(self, method: str, endpoint: str, data: Dict[str, Any] = None,
user_id: int = None, job_id: int = None, files_data: Dict = None,
api_type: str = 'translation') -> Dict[str, Any]:
"""發送 HTTP 請求到 Dify API"""
# 根据API类型选择配置
if api_type == 'ocr':
base_url = self.ocr_base_url
api_key = self.ocr_api_key
if not base_url or not api_key:
raise APIError("Dify OCR API 未配置完整")
else: # translation
base_url = self.translation_base_url
api_key = self.translation_api_key
if not base_url or not api_key:
raise APIError("Dify Translation API 未配置完整")
url = f"{base_url.rstrip('/')}/{endpoint.lstrip('/')}"
headers = {
'Authorization': f'Bearer {api_key}',
'User-Agent': 'PANJIT-Document-Translator/1.0'
}
# 只有在非文件上传时才设置JSON Content-Type
if not files_data:
headers['Content-Type'] = 'application/json'
# 重試邏輯
last_exception = None
start_time = time.time()
for attempt in range(self.max_retries):
try:
# logger.debug(f"Making Dify API request: {method} {url} (attempt {attempt + 1})")
if method.upper() == 'GET':
response = requests.get(url, headers=headers, timeout=self.timeout, params=data)
elif files_data:
# 文件上传请求使用multipart/form-data
response = requests.post(url, headers=headers, timeout=self.timeout, files=files_data, data=data)
else:
# 普通JSON请求
response = requests.post(url, headers=headers, timeout=self.timeout, json=data)
# 計算響應時間
response_time_ms = int((time.time() - start_time) * 1000)
# 檢查響應狀態
response.raise_for_status()
# 解析響應
result = response.json()
# 記錄 API 使用統計
if user_id:
self._record_api_usage(
user_id=user_id,
job_id=job_id,
endpoint=endpoint,
response_data=result,
response_time_ms=response_time_ms,
success=True
)
# logger.debug(f"Dify API request successful: {response_time_ms}ms")
return result
except requests.exceptions.RequestException as e:
last_exception = e
response_time_ms = int((time.time() - start_time) * 1000)
# 記錄失敗的 API 調用
if user_id:
self._record_api_usage(
user_id=user_id,
job_id=job_id,
endpoint=endpoint,
response_data={},
response_time_ms=response_time_ms,
success=False,
error_message=str(e)
)
logger.warning(f"Dify API request failed (attempt {attempt + 1}): {str(e)}")
# 如果是最後一次嘗試,拋出異常
if attempt == self.max_retries - 1:
break
# 指數退避
delay = self.retry_delay ** attempt
# logger.debug(f"Retrying in {delay} seconds...")
time.sleep(delay)
# 所有重試都失敗了
error_msg = f"Dify API request failed after {self.max_retries} attempts: {str(last_exception)}"
logger.error(error_msg)
raise APIError(error_msg)
def _record_api_usage(self, user_id: int, job_id: Optional[int], endpoint: str,
response_data: Dict, response_time_ms: int, success: bool,
error_message: str = None):
"""記錄 API 使用統計"""
try:
# 從響應中提取使用量資訊
metadata = response_data.get('metadata', {})
# 如果 job_id 無效,則設為 None 以避免外鍵約束錯誤
APIUsageStats.record_api_call(
user_id=user_id,
job_id=job_id, # 已經是 Optional如果無效會被設為 NULL
api_endpoint=endpoint,
metadata=metadata,
response_time_ms=response_time_ms,
success=success,
error_message=error_message
)
except Exception as e:
logger.warning(f"Failed to record API usage: {str(e)}")
def translate_text(self, text: str, source_language: str, target_language: str,
user_id: int = None, job_id: int = None, conversation_id: str = None) -> Dict[str, Any]:
"""翻譯文字"""
if not text.strip():
raise APIError("翻譯文字不能為空")
# 構建標準翻譯 prompt英文指令格式
language_names = {
'zh-tw': 'Traditional Chinese',
'zh-cn': 'Simplified Chinese',
'en': 'English',
'ja': 'Japanese',
'ko': 'Korean',
'vi': 'Vietnamese',
'th': 'Thai',
'id': 'Indonesian',
'ms': 'Malay',
'es': 'Spanish',
'fr': 'French',
'de': 'German',
'ru': 'Russian',
'ar': 'Arabic'
}
source_lang_name = language_names.get(source_language, source_language)
target_lang_name = language_names.get(target_language, target_language)
query = f"""Task: Translate ONLY into {target_lang_name} from {source_lang_name}.
Rules:
- Output translation text ONLY (no source text, no notes, no questions, no language-detection remarks).
- Preserve original line breaks.
- Do NOT wrap in quotes or code blocks.
- Maintain original formatting and structure.
{text.strip()}"""
# 構建請求資料 - 使用成功版本的格式
request_data = {
'inputs': {},
'response_mode': 'blocking',
'user': f"user_{user_id}" if user_id else "doc-translator-user",
'query': query
}
# 如果有 conversation_id加入請求中以維持對話連續性
if conversation_id:
request_data['conversation_id'] = conversation_id
logger.info(f"[TRANSLATION] Sending translation request...")
logger.info(f"[TRANSLATION] Request data: {request_data}")
logger.info(f"[TRANSLATION] Text length: {len(text)} characters")
try:
response = self._make_request(
method='POST',
endpoint='/chat-messages',
data=request_data,
user_id=user_id,
job_id=job_id
)
# 從響應中提取翻譯結果 - 使用成功版本的方式
answer = response.get('answer')
if not isinstance(answer, str) or not answer.strip():
raise APIError("Dify API 返回空的翻譯結果")
return {
'success': True,
'translated_text': answer,
'source_text': text,
'source_language': source_language,
'target_language': target_language,
'conversation_id': response.get('conversation_id'),
'metadata': response.get('metadata', {})
}
except APIError:
raise
except Exception as e:
error_msg = f"翻譯請求處理錯誤: {str(e)}"
logger.error(error_msg)
raise APIError(error_msg)
def test_connection(self) -> bool:
"""測試 Dify API 連接"""
try:
# 發送簡單的測試請求
test_data = {
'inputs': {'text': 'test'},
'response_mode': 'blocking',
'user': 'health_check'
}
response = self._make_request(
method='POST',
endpoint='/chat-messages',
data=test_data
)
return response is not None
except Exception as e:
logger.error(f"Dify API connection test failed: {str(e)}")
return False
def get_app_info(self) -> Dict[str, Any]:
"""取得 Dify 應用資訊"""
try:
response = self._make_request(
method='GET',
endpoint='/parameters'
)
return {
'success': True,
'app_info': response
}
except Exception as e:
logger.error(f"Failed to get Dify app info: {str(e)}")
return {
'success': False,
'error': str(e)
}
@classmethod
def load_config_from_file(cls, file_path: str = 'api.txt'):
"""從檔案載入 Dify API 配置"""
try:
import os
from pathlib import Path
config_file = Path(file_path)
if not config_file.exists():
logger.warning(f"Dify config file not found: {file_path}")
return
with open(config_file, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if line.startswith('#') or not line:
continue # 跳过注释和空行
# 翻译API配置兼容旧格式
if line.startswith('base_url:') or line.startswith('translation_base_url:'):
base_url = line.split(':', 1)[1].strip()
current_app.config['DIFY_TRANSLATION_BASE_URL'] = base_url
# 兼容旧配置
current_app.config['DIFY_API_BASE_URL'] = base_url
elif line.startswith('api:') or line.startswith('translation_api:'):
api_key = line.split(':', 1)[1].strip()
current_app.config['DIFY_TRANSLATION_API_KEY'] = api_key
# 兼容旧配置
current_app.config['DIFY_API_KEY'] = api_key
# OCR API配置
elif line.startswith('ocr_base_url:'):
ocr_base_url = line.split(':', 1)[1].strip()
current_app.config['DIFY_OCR_BASE_URL'] = ocr_base_url
elif line.startswith('ocr_api:'):
ocr_api_key = line.split(':', 1)[1].strip()
current_app.config['DIFY_OCR_API_KEY'] = ocr_api_key
logger.info("Dify API config loaded from file")
except Exception as e:
logger.error(f"Failed to load Dify config from file: {str(e)}")
def upload_file(self, image_data: bytes, filename: str, user_id: int = None) -> str:
"""上传图片文件到Dify OCR API并返回file_id"""
if not image_data:
raise APIError("图片数据不能为空")
logger.info(f"[OCR-UPLOAD] Starting file upload to Dify OCR API")
logger.info(f"[OCR-UPLOAD] File: {filename}, Size: {len(image_data)} bytes, User: {user_id}")
# 构建文件上传数据
files_data = {
'file': (filename, image_data, 'image/png') # 假设为PNG格式
}
form_data = {
'user': f"user_{user_id}" if user_id else "doc-translator-user"
}
# logger.debug(f"[OCR-UPLOAD] Upload form_data: {form_data}")
# logger.debug(f"[OCR-UPLOAD] Using OCR API: {self.ocr_base_url}")
try:
response = self._make_request(
method='POST',
endpoint='/files/upload',
data=form_data,
files_data=files_data,
user_id=user_id,
api_type='ocr' # 使用OCR API
)
logger.info(f"[OCR-UPLOAD] Raw Dify upload response: {response}")
file_id = response.get('id')
if not file_id:
logger.error(f"[OCR-UPLOAD] No file ID in response: {response}")
raise APIError("Dify 文件上传失败未返回文件ID")
logger.info(f"[OCR-UPLOAD] ✓ File uploaded successfully: {file_id}")
# logger.debug(f"[OCR-UPLOAD] File details: name={response.get('name')}, size={response.get('size')}, type={response.get('mime_type')}")
return file_id
except APIError:
raise
except Exception as e:
error_msg = f"文件上传到Dify失败: {str(e)}"
logger.error(f"[OCR-UPLOAD] ✗ Upload failed: {error_msg}")
raise APIError(error_msg)
def ocr_image_with_dify(self, image_data: bytes, filename: str = "image.png",
user_id: int = None, job_id: int = None) -> str:
"""使用Dify进行图像OCR识别"""
logger.info(f"[OCR-RECOGNITION] Starting OCR process for {filename}")
logger.info(f"[OCR-RECOGNITION] Image size: {len(image_data)} bytes, User: {user_id}, Job: {job_id}")
try:
# 1. 先上传文件获取file_id
logger.info(f"[OCR-RECOGNITION] Step 1: Uploading image to Dify...")
file_id = self.upload_file(image_data, filename, user_id)
logger.info(f"[OCR-RECOGNITION] Step 1 ✓ File uploaded with ID: {file_id}")
# 2. 构建OCR请求
# 系统提示词已在Dify Chat Flow中配置这里只需要发送简单的用户query
query = "將圖片中的文字完整的提取出來"
logger.info(f"[OCR-RECOGNITION] Step 2: Preparing OCR request...")
# logger.debug(f"[OCR-RECOGNITION] Query: {query}")
# 3. 构建Chat Flow请求根据最新Dify运行记录图片应该放在files数组中
request_data = {
'inputs': {},
'response_mode': 'blocking',
'user': f"user_{user_id}" if user_id else "doc-translator-user",
'query': query,
'files': [
{
'type': 'image',
'transfer_method': 'local_file',
'upload_file_id': file_id
}
]
}
logger.info(f"[OCR-RECOGNITION] Step 3: Sending OCR request to Dify...")
logger.info(f"[OCR-RECOGNITION] Request data: {request_data}")
logger.info(f"[OCR-RECOGNITION] Using OCR API: {self.ocr_base_url}")
response = self._make_request(
method='POST',
endpoint='/chat-messages',
data=request_data,
user_id=user_id,
job_id=job_id,
api_type='ocr' # 使用OCR API
)
logger.info(f"[OCR-RECOGNITION] Step 3 ✓ Received response from Dify")
logger.info(f"[OCR-RECOGNITION] Raw Dify OCR response: {response}")
# 从响应中提取OCR结果
answer = response.get('answer', '')
metadata = response.get('metadata', {})
conversation_id = response.get('conversation_id', '')
logger.info(f"[OCR-RECOGNITION] Response details:")
logger.info(f"[OCR-RECOGNITION] - Answer length: {len(answer) if answer else 0} characters")
logger.info(f"[OCR-RECOGNITION] - Conversation ID: {conversation_id}")
logger.info(f"[OCR-RECOGNITION] - Metadata: {metadata}")
if not isinstance(answer, str) or not answer.strip():
logger.error(f"[OCR-RECOGNITION] ✗ Empty or invalid answer from Dify")
logger.error(f"[OCR-RECOGNITION] Answer type: {type(answer)}, Content: '{answer}'")
raise APIError("Dify OCR 返回空的识别结果")
# 记录OCR识别的前100个字符用于调试
preview = answer[:100] + "..." if len(answer) > 100 else answer
logger.info(f"[OCR-RECOGNITION] ✓ OCR completed successfully")
logger.info(f"[OCR-RECOGNITION] Extracted {len(answer)} characters")
# logger.debug(f"[OCR-RECOGNITION] Text preview: {preview}")
return answer.strip()
except APIError:
raise
except Exception as e:
error_msg = f"Dify OCR识别失败: {str(e)}"
logger.error(f"[OCR-RECOGNITION] ✗ OCR process failed: {error_msg}")
logger.error(f"[OCR-RECOGNITION] Exception details: {type(e).__name__}: {str(e)}")
raise APIError(error_msg)
def init_dify_config(app):
"""初始化 Dify 配置"""
with app.app_context():
# 從 api.txt 載入配置
DifyClient.load_config_from_file()
# 檢查配置完整性
translation_base_url = app.config.get('DIFY_TRANSLATION_BASE_URL')
translation_api_key = app.config.get('DIFY_TRANSLATION_API_KEY')
ocr_base_url = app.config.get('DIFY_OCR_BASE_URL')
ocr_api_key = app.config.get('DIFY_OCR_API_KEY')
logger.info("Dify API Configuration Status:")
if translation_base_url and translation_api_key:
logger.info("✓ Translation API configured successfully")
else:
logger.warning("✗ Translation API configuration is incomplete")
logger.warning(f" - Translation Base URL: {'' if translation_base_url else ''}")
logger.warning(f" - Translation API Key: {'' if translation_api_key else ''}")
if ocr_base_url and ocr_api_key:
logger.info("✓ OCR API configured successfully")
else:
logger.warning("✗ OCR API configuration is incomplete (扫描PDF功能将不可用)")
logger.warning(f" - OCR Base URL: {'' if ocr_base_url else ''}")
logger.warning(f" - OCR API Key: {'' if ocr_api_key else ''}")

View File

@@ -0,0 +1,864 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
核心文檔處理邏輯 - 移植自最佳版本
包含完整的 DOCX 文字提取和翻譯插入功能
Author: PANJIT IT Team
Created: 2024-09-02
Modified: 2024-09-02
"""
import re
import sys
import time
from pathlib import Path
from typing import List, Dict, Tuple, Optional, Any
from docx.text.paragraph import Paragraph
from docx.table import Table, _Cell
from docx.shared import Pt
from docx.oxml import OxmlElement
from docx.oxml.ns import qn, nsdecls
import docx
from app.utils.logger import get_logger
from app.utils.exceptions import FileProcessingError
logger = get_logger(__name__)
# ---------- Constants ----------
INSERT_FONT_SIZE_PT = 10
SENTENCE_MODE = True
# ---------- Optional dependencies detection ----------
try:
import blingfire
_HAS_BLINGFIRE = True
except ImportError:
_HAS_BLINGFIRE = False
try:
import pysbd
_HAS_PYSBD = True
except ImportError:
_HAS_PYSBD = False
# ---------- Helper functions ----------
def _has_cjk(text: str) -> bool:
"""Check if text contains CJK (Chinese/Japanese/Korean) characters."""
for char in text:
if '\u4e00' <= char <= '\u9fff' or \
'\u3400' <= char <= '\u4dbf' or \
'\u20000' <= char <= '\u2a6df' or \
'\u3040' <= char <= '\u309f' or \
'\u30a0' <= char <= '\u30ff' or \
'\uac00' <= char <= '\ud7af':
return True
return False
def _normalize_text(text: str) -> str:
"""Normalize text for comparison."""
return re.sub(r'\s+', ' ', text.strip().lower())
def _append_after(p: Paragraph, text_block: str, italic: bool=True, font_size_pt: int=INSERT_FONT_SIZE_PT) -> Paragraph:
"""Insert a new paragraph after p, return the new paragraph (for chain insert)."""
new_p = OxmlElement("w:p")
p._p.addnext(new_p)
np = Paragraph(new_p, p._parent)
lines = text_block.split("\n")
for i, line in enumerate(lines):
run = np.add_run(line)
if italic:
run.italic = True
if font_size_pt:
run.font.size = Pt(font_size_pt)
if i < len(lines) - 1:
run.add_break()
tag = np.add_run("\u200b")
if italic:
tag.italic = True
if font_size_pt:
tag.font.size = Pt(font_size_pt)
return np
def _is_our_insert_block(p: Paragraph) -> bool:
"""Return True iff paragraph contains our zero-width marker."""
return any("\u200b" in (r.text or "") for r in p.runs)
def _find_last_inserted_after(p: Paragraph, limit: int = 8) -> Optional[Paragraph]:
"""Find the last paragraph that was inserted after p (up to limit paragraphs)."""
try:
# Get all paragraphs in the parent container
if hasattr(p._parent, 'paragraphs'):
all_paras = list(p._parent.paragraphs)
else:
# Handle cases where _parent doesn't have paragraphs (e.g., table cells)
return None
# Find p's index
p_index = -1
for i, para in enumerate(all_paras):
if para._element == p._element:
p_index = i
break
if p_index == -1:
return None
# Check paragraphs after p
last_found = None
for i in range(p_index + 1, min(p_index + 1 + limit, len(all_paras))):
if _is_our_insert_block(all_paras[i]):
last_found = all_paras[i]
else:
break # Stop at first non-inserted paragraph
except Exception:
return None
return last_found
def _p_text_with_breaks(p: Paragraph) -> str:
"""Extract text from paragraph with line breaks preserved."""
parts = []
for node in p._element.xpath(".//*[local-name()='t' or local-name()='br' or local-name()='tab']"):
tag = node.tag.split('}', 1)[-1]
if tag == "t":
parts.append(node.text or "")
elif tag == "br":
parts.append("\n")
elif tag == "tab":
parts.append("\t")
return "".join(parts)
def _get_cell_full_text(cell) -> str:
"""
提取表格儲存格的完整文字內容,包含所有段落
"""
try:
cell_texts = []
for para in cell.paragraphs:
para_text = _p_text_with_breaks(para)
if para_text.strip():
cell_texts.append(para_text.strip())
# 用換行符連接所有段落
return '\n'.join(cell_texts)
except Exception as e:
logger.warning(f"提取儲存格文字失敗: {e}")
return ""
def _is_our_insert_block_text(text: str) -> bool:
"""檢查文字是否為翻譯插入區塊"""
if not text:
return False
text_lower = text.lower().strip()
return (
text_lower.startswith('') or
text_lower.startswith('[翻譯') or
'翻譯:' in text_lower or
'translation:' in text_lower or
text_lower.startswith('translated:') or
"\u200b" in text
)
def _is_our_insert_block(p: Paragraph) -> bool:
"""Check if paragraph is our inserted translation (contains zero-width space marker)."""
text = _p_text_with_breaks(p)
return "\u200b" in text
def should_translate(text: str, src_lang: str) -> bool:
"""Determine if text should be translated based on content and source language."""
text = text.strip()
# 只要有字就翻譯 - 最小長度設為1
if len(text) < 1:
return False
# Skip pure numbers, dates, etc.
if re.match(r'^[\d\s\.\-\:\/]+$', text):
return False
# For auto-detect, translate if has CJK or meaningful text
if src_lang.lower() in ('auto', 'auto-detect'):
return _has_cjk(text) or len(text) > 5
return True
def _split_sentences(text: str, lang: str = 'auto') -> List[str]:
"""Split text into sentences using available libraries."""
if not text.strip():
return []
# Try blingfire first
if _HAS_BLINGFIRE and SENTENCE_MODE:
try:
sentences = blingfire.text_to_sentences(text).split('\n')
sentences = [s.strip() for s in sentences if s.strip()]
if sentences:
return sentences
except Exception as e:
logger.warning(f"Blingfire failed: {e}")
# Try pysbd
if _HAS_PYSBD and SENTENCE_MODE:
try:
seg = pysbd.Segmenter(language="en" if lang == "auto" else lang)
sentences = seg.segment(text)
sentences = [s.strip() for s in sentences if s.strip()]
if sentences:
return sentences
except Exception as e:
logger.warning(f"PySBD failed: {e}")
# Fallback to simple splitting
separators = ['. ', '', '', '', '!', '?', '\n']
sentences = [text]
for sep in separators:
new_sentences = []
for s in sentences:
parts = s.split(sep)
if len(parts) > 1:
new_sentences.extend([p.strip() + sep.rstrip() for p in parts[:-1] if p.strip()])
if parts[-1].strip():
new_sentences.append(parts[-1].strip())
else:
new_sentences.append(s)
sentences = new_sentences
return [s for s in sentences if len(s.strip()) > 3]
# ---------- Segment class ----------
class Segment:
"""Represents a translatable text segment in a document."""
def __init__(self, kind: str, ref: Any, ctx: str, text: str):
self.kind = kind # 'para' | 'txbx'
self.ref = ref # Reference to original document element
self.ctx = ctx # Context information
self.text = text # Text content
# ---------- TextBox helpers ----------
def _txbx_iter_texts(doc: docx.Document):
"""
Yield (txbxContent_element, joined_source_text)
- Deeply collect all descendant <w:p> under txbxContent
- Skip our inserted translations: contains zero-width or (all italic and no CJK)
- Keep only lines that still have CJK
"""
def _p_text_flags(p_el):
parts = []
for node in p_el.xpath(".//*[local-name()='t' or local-name()='br' or local-name()='tab']"):
tag = node.tag.split('}', 1)[-1]
if tag == "t":
parts.append(node.text or "")
elif tag == "br":
parts.append("\n")
else:
parts.append(" ")
text = "".join(parts)
has_zero = ("\u200b" in text)
runs = p_el.xpath(".//*[local-name()='r']")
vis, ital = [], []
for r in runs:
rt = "".join([(t.text or "") for t in r.xpath(".//*[local-name()='t']")])
if (rt or "").strip():
vis.append(rt)
ital.append(bool(r.xpath(".//*[local-name()='i']")))
all_italic = (len(vis) > 0 and all(ital))
return text, has_zero, all_italic
for tx in doc._element.xpath(".//*[local-name()='txbxContent']"):
kept = []
for p in tx.xpath(".//*[local-name()='p']"): # all descendant paragraphs
text, has_zero, all_italic = _p_text_flags(p)
if not (text or "").strip():
continue
if has_zero:
continue # our inserted
for line in text.split("\n"):
if line.strip():
kept.append(line.strip())
if kept:
joined = "\n".join(kept)
yield tx, joined
def _txbx_append_paragraph(tx, text_block: str, italic: bool = True, font_size_pt: int = INSERT_FONT_SIZE_PT):
"""Append a paragraph to textbox content."""
p = OxmlElement("w:p")
r = OxmlElement("w:r")
rPr = OxmlElement("w:rPr")
if italic:
rPr.append(OxmlElement("w:i"))
if font_size_pt:
sz = OxmlElement("w:sz")
sz.set(qn("w:val"), str(int(font_size_pt * 2)))
rPr.append(sz)
r.append(rPr)
lines = text_block.split("\n")
for i, line in enumerate(lines):
if i > 0:
r.append(OxmlElement("w:br"))
t = OxmlElement("w:t")
t.set(qn("xml:space"), "preserve")
t.text = line
r.append(t)
tag = OxmlElement("w:t")
tag.set(qn("xml:space"), "preserve")
tag.text = "\u200b"
r.append(tag)
p.append(r)
tx.append(p)
def _txbx_tail_equals(tx, translations: List[str]) -> bool:
"""Check if textbox already contains the expected translations."""
paras = tx.xpath("./*[local-name()='p']")
if len(paras) < len(translations):
return False
tail = paras[-len(translations):]
for q, expect in zip(tail, translations):
parts = []
for node in q.xpath(".//*[local-name()='t' or local-name()='br']"):
tag = node.tag.split("}", 1)[-1]
parts.append("\n" if tag == "br" else (node.text or ""))
if _normalize_text("".join(parts).strip()) != _normalize_text(expect):
return False
return True
# ---------- Main extraction logic ----------
def _get_paragraph_key(p: Paragraph) -> str:
"""Generate a stable unique key for paragraph deduplication."""
try:
# Use XML content hash + text content for stable deduplication
xml_content = p._p.xml if hasattr(p._p, 'xml') else str(p._p)
text_content = _p_text_with_breaks(p)
combined = f"{hash(xml_content)}_{len(text_content)}_{text_content[:50]}"
return combined
except Exception:
# Fallback to simple text-based key
text_content = _p_text_with_breaks(p)
return f"fallback_{hash(text_content)}_{len(text_content)}"
def _collect_docx_segments(doc: docx.Document) -> List[Segment]:
"""
Enhanced segment collector with improved stability.
Handles paragraphs, tables, textboxes, and SDT Content Controls.
"""
segs: List[Segment] = []
seen_par_keys = set()
def _add_paragraph(p: Paragraph, ctx: str):
try:
p_key = _get_paragraph_key(p)
if p_key in seen_par_keys:
return
txt = _p_text_with_breaks(p)
if txt.strip() and not _is_our_insert_block(p):
segs.append(Segment("para", p, ctx, txt))
seen_par_keys.add(p_key)
except Exception as e:
# Log error but continue processing
logger.warning(f"段落處理錯誤: {e}, 跳過此段落")
def _process_container_content(container, ctx: str):
"""
Recursively processes content within a container (body, cell, or SDT content).
Identifies and handles paragraphs, tables, and SDT elements.
"""
if container._element is None:
return
for child_element in container._element:
qname = child_element.tag
if qname.endswith('}p'): # Paragraph
p = Paragraph(child_element, container)
_add_paragraph(p, ctx)
elif qname.endswith('}tbl'): # Table
table = Table(child_element, container)
for r_idx, row in enumerate(table.rows, 1):
for c_idx, cell in enumerate(row.cells, 1):
cell_ctx = f"{ctx} > Tbl(r{r_idx},c{c_idx})"
# 使用儲存格為單位的提取方式(而非逐段落提取)
cell_text = _get_cell_full_text(cell)
if cell_text.strip() and not _is_our_insert_block_text(cell_text):
segs.append(Segment("table_cell", cell, cell_ctx, cell_text))
elif qname.endswith('}sdt'): # Structured Document Tag (SDT)
sdt_ctx = f"{ctx} > SDT"
# 1. 提取 SDT 的元數據文本 (Placeholder, Dropdown items)
ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
# 提取 Placeholder text
placeholder_texts = []
for t in child_element.xpath('.//w:placeholder//w:t', namespaces=ns):
if t.text:
placeholder_texts.append(t.text)
if placeholder_texts:
full_placeholder = "".join(placeholder_texts).strip()
if full_placeholder:
segs.append(Segment("para", child_element, f"{sdt_ctx}-Placeholder", full_placeholder))
# 提取 Dropdown list items
list_items = []
for item in child_element.xpath('.//w:dropDownList/w:listItem', namespaces=ns):
display_text = item.get(qn('w:displayText'))
if display_text:
list_items.append(display_text)
if list_items:
items_as_text = "\n".join(list_items)
segs.append(Segment("para", child_element, f"{sdt_ctx}-Dropdown", items_as_text))
# 2. 遞迴處理 SDT 的實際內容 (sdtContent)
sdt_content_element = child_element.find(qn('w:sdtContent'))
if sdt_content_element is not None:
class SdtContentWrapper:
def __init__(self, element, parent):
self._element = element
self._parent = parent
sdt_content_wrapper = SdtContentWrapper(sdt_content_element, container)
_process_container_content(sdt_content_wrapper, sdt_ctx)
# --- Main execution starts here ---
# 1. Process the main document body
_process_container_content(doc._body, "Body")
# 2. Process textboxes
for tx, s in _txbx_iter_texts(doc):
if s.strip() and (_has_cjk(s) or should_translate(s, 'auto')):
segs.append(Segment("txbx", tx, "TextBox", s))
return segs
def _insert_docx_translations(doc: docx.Document, segs: List[Segment],
tmap: Dict[Tuple[str, str], str],
targets: List[str], log=lambda s: None) -> Tuple[int, int]:
"""
Insert translations into DOCX document segments.
CRITICAL: This function contains the fix for the major translation insertion bug.
The key fix is in the segment filtering logic - we now correctly check if any target
language has translation available using the proper key format (target_lang, text).
Args:
doc: The DOCX document object
segs: List of segments to translate
tmap: Translation map with keys as (target_language, source_text)
targets: List of target languages in order
log: Logging function
Returns:
Tuple of (successful_insertions, skipped_insertions)
Key Bug Fix:
OLD (INCORRECT): if (seg.kind, seg.text) not in tmap and (targets[0], seg.text) not in tmap
NEW (CORRECT): has_any_translation = any((tgt, seg.text) in tmap for tgt in targets)
"""
ok_cnt = skip_cnt = 0
# Helper function to add a formatted run to a paragraph
def _add_formatted_run(p: Paragraph, text: str, italic: bool, font_size_pt: int):
lines = text.split("\n")
for i, line in enumerate(lines):
run = p.add_run(line)
if italic:
run.italic = True
if font_size_pt:
run.font.size = Pt(font_size_pt)
if i < len(lines) - 1:
run.add_break()
# Add our zero-width space marker
tag_run = p.add_run("\u200b")
if italic:
tag_run.italic = True
if font_size_pt:
tag_run.font.size = Pt(font_size_pt)
for seg in segs:
# Check if any target language has translation for this segment
has_any_translation = any((tgt, seg.text) in tmap for tgt in targets)
if not has_any_translation:
log(f"[SKIP] 無翻譯結果: {seg.ctx} | {seg.text[:50]}...")
skip_cnt += 1
continue
# Get translations for all targets, with fallback for missing ones
translations = []
for tgt in targets:
if (tgt, seg.text) in tmap:
translations.append(tmap[(tgt, seg.text)])
else:
log(f"[WARNING] 缺少 {tgt} 翻譯: {seg.text[:30]}...")
translations.append(f"【翻譯查詢失敗|{tgt}{seg.text[:50]}...")
log(f"[INSERT] 準備插入 {len(translations)} 個翻譯到 {seg.ctx}: {seg.text[:30]}...")
if seg.kind == "para":
# Check if this is an SDT segment (ref is an XML element, not a Paragraph)
if hasattr(seg.ref, 'tag') and seg.ref.tag.endswith('}sdt'):
# Handle SDT segments - insert translation into sdtContent
sdt_element = seg.ref
ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
sdt_content = sdt_element.find(qn('w:sdtContent'))
if sdt_content is not None:
# Check if translations already exist
existing_paras = sdt_content.xpath('.//w:p', namespaces=ns)
existing_texts = []
for ep in existing_paras:
p_obj = Paragraph(ep, None)
if _is_our_insert_block(p_obj):
existing_texts.append(_p_text_with_breaks(p_obj))
# Check if all translations already exist
if len(existing_texts) >= len(translations):
if all(_normalize_text(e) == _normalize_text(t) for e, t in zip(existing_texts[:len(translations)], translations)):
skip_cnt += 1
log(f"[SKIP] SDT 已存在翻譯: {seg.text[:30]}...")
continue
# Add translations to SDT content
for t in translations:
if not any(_normalize_text(t) == _normalize_text(e) for e in existing_texts):
# Create new paragraph in SDT content
new_p_element = OxmlElement("w:p")
sdt_content.append(new_p_element)
new_p = Paragraph(new_p_element, None)
_add_formatted_run(new_p, t, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
ok_cnt += 1
log(f"[SUCCESS] SDT 插入翻譯(交錯格式)")
continue
p: Paragraph = seg.ref
# --- CONTEXT-AWARE INSERTION LOGIC (from successful version) ---
# Check if the paragraph's parent is a table cell
if isinstance(p._parent, _Cell):
cell = p._parent
try:
# Find the current paragraph's position in the cell
cell_paragraphs = list(cell.paragraphs)
p_index = -1
for idx, cell_p in enumerate(cell_paragraphs):
if cell_p._element == p._element:
p_index = idx
break
if p_index == -1:
log(f"[WARNING] 無法找到段落在單元格中的位置,使用原始方法")
# Fallback to original method
for block in translations:
new_p = cell.add_paragraph()
_add_formatted_run(new_p, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
ok_cnt += 1
continue
# Check if translations already exist right after this paragraph
existing_texts = []
check_limit = min(p_index + 1 + len(translations), len(cell_paragraphs))
for idx in range(p_index + 1, check_limit):
if _is_our_insert_block(cell_paragraphs[idx]):
existing_texts.append(_p_text_with_breaks(cell_paragraphs[idx]))
# Check if all translations already exist in order
if len(existing_texts) >= len(translations):
if all(_normalize_text(e) == _normalize_text(t) for e, t in zip(existing_texts[:len(translations)], translations)):
skip_cnt += 1
log(f"[SKIP] 表格單元格已存在翻譯: {seg.text[:30]}...")
continue
# Determine which translations need to be added
to_add = []
for t in translations:
if not any(_normalize_text(t) == _normalize_text(e) for e in existing_texts):
to_add.append(t)
if not to_add:
skip_cnt += 1
log(f"[SKIP] 表格單元格所有翻譯已存在: {seg.text[:30]}...")
continue
# Insert new paragraphs right after the current paragraph
insert_after = p
for block in to_add:
try:
# Create new paragraph and insert it after the current position
new_p_element = OxmlElement("w:p")
insert_after._element.addnext(new_p_element)
new_p = Paragraph(new_p_element, cell)
_add_formatted_run(new_p, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
insert_after = new_p # Update position for next insertion
except Exception as e:
log(f"[ERROR] 表格插入失敗: {e}, 嘗試fallback方法")
# Fallback: add at the end of cell
try:
new_p = cell.add_paragraph()
_add_formatted_run(new_p, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
log(f"[SUCCESS] Fallback插入成功")
except Exception as e2:
log(f"[FATAL] Fallback也失敗: {e2}")
continue
ok_cnt += 1
log(f"[SUCCESS] 表格單元格插入 {len(to_add)} 個翻譯(緊接原文後)")
except Exception as e:
log(f"[ERROR] 表格處理全面失敗: {e}, 跳過此段落")
continue
else:
# Normal paragraph (not in table cell) - SIMPLIFIED FOR DEBUGGING
try:
# TEMPORARILY DISABLE existing translation check to force insertion
log(f"[DEBUG] 強制插入翻譯到段落: {seg.text[:30]}...")
# Force all translations to be added
to_add = translations
# Use simple positioning - always insert after current paragraph
anchor = p
for block in to_add:
try:
log(f"[DEBUG] 嘗試插入: {block[:50]}...")
anchor = _append_after(anchor, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
log(f"[SUCCESS] _append_after成功插入")
except Exception as e:
log(f"[ERROR] _append_after失敗: {e}, 嘗試簡化插入")
try:
# Fallback: simple append
if hasattr(p._parent, 'add_paragraph'):
new_p = p._parent.add_paragraph()
_add_formatted_run(new_p, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
log(f"[SUCCESS] Fallback段落插入成功")
else:
log(f"[ERROR] 無法進行fallback插入")
except Exception as e2:
log(f"[FATAL] Fallback也失敗: {e2}")
continue
ok_cnt += 1
log(f"[SUCCESS] 段落強制插入 {len(to_add)} 個翻譯")
except Exception as e:
log(f"[ERROR] 段落處理失敗: {e}, 跳過此段落")
continue
elif seg.kind == "table_cell":
# 處理表格儲存格翻譯插入
cell = seg.ref # cell 是 _Cell 對象
# 檢查儲存格是否已有翻譯
existing_translations = []
cell_paragraphs = list(cell.paragraphs)
# 檢查儲存格末尾是否已有翻譯
translation_start_index = len(cell_paragraphs)
for i in range(len(cell_paragraphs) - 1, -1, -1):
if _is_our_insert_block(cell_paragraphs[i]):
existing_translations.insert(0, _p_text_with_breaks(cell_paragraphs[i]))
translation_start_index = i
else:
break
# 檢查是否所有翻譯都已存在且相同
if len(existing_translations) >= len(translations):
if all(_normalize_text(e) == _normalize_text(t) for e, t in zip(existing_translations[:len(translations)], translations)):
skip_cnt += 1
log(f"[SKIP] 表格儲存格已存在翻譯: {seg.text[:30]}...")
continue
# 移除舊的翻譯段落(如果有的話)
for i in range(len(cell_paragraphs) - 1, translation_start_index - 1, -1):
if _is_our_insert_block(cell_paragraphs[i]):
cell._element.remove(cell_paragraphs[i]._element)
# 檢查是否為簡單的短文本儲存格(只有原文,沒有複雜結構)
cell_content = cell.text.strip()
is_simple_cell = len(cell_content) <= 10 and cell_content == seg.text.strip()
if is_simple_cell:
# 對於簡單短文本,直接替換內容而不是添加段落
log(f"[INFO] 簡單儲存格內容替換: '{seg.text.strip()}' -> '{translations[0] if translations else 'N/A'}'")
# 清空所有段落內容
for para in cell.paragraphs:
para.clear()
# 在第一個段落中添加原文和翻譯
first_para = cell.paragraphs[0] if cell.paragraphs else cell.add_paragraph()
# 添加原文
run_orig = first_para.add_run(seg.text.strip())
# 添加換行和翻譯
for t in translations:
first_para.add_run('\n')
run_trans = first_para.add_run(t)
run_trans.italic = True
if INSERT_FONT_SIZE_PT:
run_trans.font.size = Pt(INSERT_FONT_SIZE_PT)
# 添加標記
tag_run = first_para.add_run("\u200b")
tag_run.italic = True
if INSERT_FONT_SIZE_PT:
tag_run.font.size = Pt(INSERT_FONT_SIZE_PT)
else:
# 對於複雜儲存格,使用原有的添加段落方式
for t in translations:
new_p = cell.add_paragraph()
_add_formatted_run(new_p, t, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
ok_cnt += 1
log(f"[SUCCESS] 表格儲存格插入 {len(translations)} 個翻譯")
elif seg.kind == "txbx":
tx = seg.ref
# Check if textbox already has our translations at the end
if _txbx_tail_equals(tx, translations):
skip_cnt += 1
log(f"[SKIP] 文字框已存在翻譯: {seg.text[:30]}...")
continue
# Append translations to textbox
for t in translations:
_txbx_append_paragraph(tx, t, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
ok_cnt += 1
log(f"[SUCCESS] 文字框插入 {len(translations)} 個翻譯")
return ok_cnt, skip_cnt
# ---------- Main DocumentProcessor class ----------
class DocumentProcessor:
"""Enhanced document processor with complete DOCX handling capabilities."""
def __init__(self):
self.logger = logger
def extract_docx_segments(self, file_path: str) -> List[Segment]:
"""Extract all translatable segments from DOCX file."""
try:
doc = docx.Document(file_path)
segments = _collect_docx_segments(doc)
self.logger.info(f"Extracted {len(segments)} segments from {file_path}")
for seg in segments[:5]: # Log first 5 segments for debugging
self.logger.debug(f"Segment: {seg.kind} | {seg.ctx} | {seg.text[:50]}...")
return segments
except Exception as e:
self.logger.error(f"Failed to extract DOCX segments from {file_path}: {str(e)}")
raise FileProcessingError(f"DOCX 文件分析失敗: {str(e)}")
def _rematch_segments_to_document(self, doc: docx.Document, old_segments: List[Segment]) -> List[Segment]:
"""Re-match segments from old document instance to new document instance."""
try:
# Extract fresh segments from the current document instance
fresh_segments = _collect_docx_segments(doc)
# Match old segments with fresh segments based on text content
matched_segments = []
for old_seg in old_segments:
# Find matching segment in fresh segments
matched = False
for fresh_seg in fresh_segments:
if (old_seg.kind == fresh_seg.kind and
old_seg.ctx == fresh_seg.ctx and
_normalize_text(old_seg.text) == _normalize_text(fresh_seg.text)):
matched_segments.append(fresh_seg)
matched = True
break
if not matched:
self.logger.warning(f"Failed to match segment: {old_seg.text[:50]}...")
# Still add the old segment but it might not work for insertion
matched_segments.append(old_seg)
self.logger.debug(f"Re-matched {len(matched_segments)} segments to current document")
return matched_segments
except Exception as e:
self.logger.error(f"Failed to re-match segments: {str(e)}")
# Return original segments as fallback
return old_segments
def insert_docx_translations(self, file_path: str, segments: List[Segment],
translation_map: Dict[Tuple[str, str], str],
target_languages: List[str], output_path: str) -> Tuple[int, int]:
"""Insert translations into DOCX file and save to output path."""
try:
doc = docx.Document(file_path)
# CRITICAL FIX: Re-match segments with the current document instance
# The original segments were extracted from a different document instance
matched_segments = self._rematch_segments_to_document(doc, segments)
def log_func(msg: str):
self.logger.debug(msg)
ok_count, skip_count = _insert_docx_translations(
doc, matched_segments, translation_map, target_languages, log_func
)
# Save the modified document
doc.save(output_path)
self.logger.info(f"Inserted {ok_count} translations, skipped {skip_count}. Saved to: {output_path}")
return ok_count, skip_count
except Exception as e:
self.logger.error(f"Failed to insert DOCX translations: {str(e)}")
raise FileProcessingError(f"DOCX 翻譯插入失敗: {str(e)}")
def split_text_into_sentences(self, text: str, language: str = 'auto') -> List[str]:
"""Split text into sentences using the best available method."""
return _split_sentences(text, language)
def should_translate_text(self, text: str, source_language: str) -> bool:
"""Determine if text should be translated."""
return should_translate(text, source_language)
def insert_docx_combined_translations(self, file_path: str, segments: List[Segment],
translation_map: Dict[Tuple[str, str], str],
target_languages: List[str], output_path: str) -> Tuple[int, int]:
"""Insert all translations into a single DOCX file with combined multi-language output.
This creates a combined file where each original text is followed by all translations
in the format: original\n英文\n越南文 etc.
"""
try:
doc = docx.Document(file_path)
# Re-match segments with the current document instance
matched_segments = self._rematch_segments_to_document(doc, segments)
def log_func(msg: str):
self.logger.debug(msg)
# Use the existing _insert_docx_translations function which already supports
# multiple target languages in a single document
ok_count, skip_count = _insert_docx_translations(
doc, matched_segments, translation_map, target_languages, log_func
)
# Save the combined document
doc.save(output_path)
self.logger.info(f"Generated combined multi-language file: {output_path}")
self.logger.info(f"Inserted {ok_count} translations, skipped {skip_count}")
return ok_count, skip_count
except Exception as e:
self.logger.error(f"Failed to create combined DOCX translations: {str(e)}")
raise FileProcessingError(f"組合多語言 DOCX 檔案生成失敗: {str(e)}")

View File

@@ -0,0 +1,700 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
增强的PDF解析器 - 支持扫描PDF的OCR处理
Author: PANJIT IT Team
Created: 2024-09-23
Modified: 2024-09-23
"""
import io
from pathlib import Path
from typing import List, Optional
from PyPDF2 import PdfReader
from app.utils.logger import get_logger
from app.utils.exceptions import FileProcessingError
from app.services.dify_client import DifyClient
from app.services.ocr_cache import OCRCache
from app.utils.image_preprocessor import ImagePreprocessor
logger = get_logger(__name__)
# 检查PyMuPDF依赖
try:
import fitz # PyMuPDF
_HAS_PYMUPDF = True
except ImportError:
_HAS_PYMUPDF = False
logger.warning("PyMuPDF not available. Scanned PDF processing will be disabled.")
class EnhancedPdfParser:
"""支持扫描PDF的增强解析器"""
def __init__(self, file_path: str):
self.file_path = Path(file_path)
self.dify_client = DifyClient()
self.ocr_cache = OCRCache()
self.image_preprocessor = ImagePreprocessor(use_opencv=True)
if not self.file_path.exists():
raise FileProcessingError(f"PDF文件不存在: {file_path}")
def is_scanned_pdf(self) -> bool:
"""检测PDF是否为扫描件"""
try:
reader = PdfReader(str(self.file_path))
text_content = ""
# 检查前3页的文字内容
pages_to_check = min(3, len(reader.pages))
for i in range(pages_to_check):
page_text = reader.pages[i].extract_text()
text_content += page_text
# 如果文字内容很少,很可能是扫描件
text_length = len(text_content.strip())
logger.info(f"PDF text extraction found {text_length} characters in first {pages_to_check} pages")
# 阈值少于100个字符认为是扫描件
is_scanned = text_length < 100
if is_scanned:
logger.info("PDF detected as scanned document, will use OCR processing")
else:
logger.info("PDF detected as text-based document, will use direct text extraction")
return is_scanned
except Exception as e:
logger.warning(f"Failed to analyze PDF type: {e}, treating as scanned document")
return True # 默认当作扫描件处理
def extract_text_segments(self, user_id: int = None, job_id: int = None) -> List[str]:
"""智能提取PDF文字片段"""
try:
# 首先尝试直接文字提取
if not self.is_scanned_pdf():
return self._extract_from_text_pdf()
# 扫描PDF则转换为图片后使用Dify OCR
if not _HAS_PYMUPDF:
raise FileProcessingError("处理扫描PDF需要PyMuPDF库请安装: pip install PyMuPDF")
return self._extract_from_scanned_pdf(user_id, job_id)
except Exception as e:
logger.error(f"PDF文字提取失败: {str(e)}")
raise FileProcessingError(f"PDF文件解析失败: {str(e)}")
def _extract_from_text_pdf(self) -> List[str]:
"""从文字型PDF提取文字片段"""
try:
reader = PdfReader(str(self.file_path))
text_segments = []
for page_num, page in enumerate(reader.pages, 1):
page_text = page.extract_text()
if page_text.strip():
# 简单的句子分割
sentences = self._split_text_into_sentences(page_text)
# 过滤掉太短的片段
valid_sentences = [s for s in sentences if len(s.strip()) > 10]
text_segments.extend(valid_sentences)
logger.debug(f"Page {page_num}: extracted {len(valid_sentences)} sentences")
logger.info(f"Text PDF extraction completed: {len(text_segments)} segments")
# 合併短段落以減少不必要的翻譯調用
merged_segments = self._merge_short_segments(text_segments)
return merged_segments
except Exception as e:
logger.error(f"Text PDF extraction failed: {str(e)}")
raise FileProcessingError(f"文字PDF提取失败: {str(e)}")
def _extract_from_scanned_pdf(self, user_id: int = None, job_id: int = None) -> List[str]:
"""从扫描PDF提取文字片段使用Dify OCR"""
try:
doc = fitz.open(str(self.file_path))
text_segments = []
total_pages = doc.page_count
logger.info(f"Processing scanned PDF with {total_pages} pages using Dify OCR")
for page_num in range(total_pages):
try:
logger.info(f"[PDF-OCR] Processing page {page_num + 1}/{total_pages}")
page = doc[page_num]
# 转换页面为高分辨率图片
# 使用2倍缩放提高OCR准确度
zoom = 2.0
mat = fitz.Matrix(zoom, zoom)
pix = page.get_pixmap(matrix=mat, alpha=False)
# 转换为PNG字节数据
# 轉換為 PNG 並進行圖像預處理以提升 OCR 準確度
img_data_raw = pix.tobytes("png")
img_data = self.image_preprocessor.preprocess_smart(img_data_raw)
logger.debug(f"[PDF-OCR] Page {page_num + 1}: Image preprocessed ({len(img_data_raw)} -> {len(img_data)} bytes)")
filename = f"page_{page_num + 1}.png"
logger.info(f"[PDF-OCR] Page {page_num + 1}: Converted to image ({len(img_data)} bytes)")
logger.debug(f"[PDF-OCR] Page {page_num + 1}: Image zoom={zoom}, format=PNG")
# 检查OCR快取
cache_key_info = f"{self.file_path.name}_page_{page_num + 1}_zoom_{zoom}"
cached_text = self.ocr_cache.get_cached_text(
file_data=img_data,
filename=filename,
additional_info=cache_key_info
)
if cached_text:
logger.info(f"[PDF-OCR] Page {page_num + 1}: ✓ 使用快取的OCR結果 (節省AI流量)")
ocr_text = cached_text
else:
# 使用Dify OCR识别文字
logger.info(f"[PDF-OCR] Page {page_num + 1}: Starting OCR recognition...")
ocr_text = self.dify_client.ocr_image_with_dify(
image_data=img_data,
filename=filename,
user_id=user_id,
job_id=job_id
)
# 保存OCR结果到快取
if ocr_text.strip():
self.ocr_cache.save_cached_text(
file_data=img_data,
extracted_text=ocr_text,
filename=filename,
additional_info=cache_key_info,
metadata={
'source_file': str(self.file_path),
'page_number': page_num + 1,
'total_pages': total_pages,
'zoom_level': zoom,
'image_size_bytes': len(img_data),
'user_id': user_id,
'job_id': job_id
}
)
logger.info(f"[PDF-OCR] Page {page_num + 1}: ✓ OCR結果已保存到快取")
logger.info(f"[PDF-OCR] Page {page_num + 1}: OCR completed")
logger.debug(f"[PDF-OCR] Page {page_num + 1}: Raw OCR result length: {len(ocr_text)}")
if ocr_text.strip():
# 分割OCR结果为句子
logger.debug(f"[PDF-OCR] Page {page_num + 1}: Splitting OCR text into sentences...")
sentences = self._split_ocr_text(ocr_text)
# 过滤有效句子
valid_sentences = [s for s in sentences if len(s.strip()) > 5]
text_segments.extend(valid_sentences)
logger.info(f"[PDF-OCR] Page {page_num + 1}: ✓ Extracted {len(valid_sentences)} valid sentences")
logger.debug(f"[PDF-OCR] Page {page_num + 1}: Total sentences before filter: {len(sentences)}")
# 记录前50个字符用于调试
if valid_sentences:
preview = valid_sentences[0][:50] + "..." if len(valid_sentences[0]) > 50 else valid_sentences[0]
logger.debug(f"[PDF-OCR] Page {page_num + 1}: First sentence preview: {preview}")
else:
logger.warning(f"[PDF-OCR] Page {page_num + 1}: ⚠ OCR returned empty result")
except Exception as e:
logger.error(f"[PDF-OCR] Page {page_num + 1}: ✗ Processing failed: {str(e)}")
logger.error(f"[PDF-OCR] Page {page_num + 1}: Exception type: {type(e).__name__}")
# 继续处理下一页,不中断整个流程
continue
doc.close()
logger.info(f"[PDF-OCR] OCR processing completed for all {total_pages} pages")
logger.info(f"[PDF-OCR] Total text segments extracted: {len(text_segments)}")
if not text_segments:
logger.error(f"[PDF-OCR] ✗ No text content extracted from any page")
raise FileProcessingError("OCR处理完成但未提取到任何文字内容")
logger.info(f"[PDF-OCR] ✓ Scanned PDF processing completed successfully")
logger.info(f"[PDF-OCR] Final result: {len(text_segments)} text segments extracted")
# 合併短段落以減少不必要的翻譯調用
merged_segments = self._merge_short_segments(text_segments)
logger.info(f"[PDF-OCR] After merging: {len(merged_segments)} segments ready for translation")
return merged_segments
except Exception as e:
logger.error(f"Scanned PDF processing failed: {str(e)}")
raise FileProcessingError(f"扫描PDF处理失败: {str(e)}")
def _split_text_into_sentences(self, text: str) -> List[str]:
"""将文字分割成句子"""
if not text.strip():
return []
# 简单的分句逻辑
sentences = []
separators = ['. ', '', '', '', '!', '?', '\n\n']
current_sentences = [text]
for sep in separators:
new_sentences = []
for sentence in current_sentences:
parts = sentence.split(sep)
if len(parts) > 1:
# 保留分隔符
for i, part in enumerate(parts[:-1]):
if part.strip():
new_sentences.append(part.strip() + sep.rstrip())
# 最后一部分
if parts[-1].strip():
new_sentences.append(parts[-1].strip())
else:
new_sentences.append(sentence)
current_sentences = new_sentences
# 过滤掉太短的句子
valid_sentences = [s for s in current_sentences if len(s.strip()) > 3]
return valid_sentences
def _split_ocr_text(self, ocr_text: str) -> List[str]:
"""分割OCR识别的文字"""
if not ocr_text.strip():
return []
# OCR结果可能包含表格或特殊格式需要特殊处理
lines = ocr_text.split('\n')
sentences = []
current_paragraph = []
for line in lines:
line = line.strip()
if not line:
# 空行表示段落结束
if current_paragraph:
paragraph_text = ' '.join(current_paragraph)
if len(paragraph_text) > 10:
sentences.append(paragraph_text)
current_paragraph = []
continue
# 检查是否是表格行(包含|或多个制表符)
if '|' in line or '\t' in line:
# 表格行单独处理
if current_paragraph:
paragraph_text = ' '.join(current_paragraph)
if len(paragraph_text) > 10:
sentences.append(paragraph_text)
current_paragraph = []
if len(line) > 10:
sentences.append(line)
else:
# 普通文字行
current_paragraph.append(line)
# 处理最后的段落
if current_paragraph:
paragraph_text = ' '.join(current_paragraph)
if len(paragraph_text) > 10:
sentences.append(paragraph_text)
return sentences
def generate_translated_document(self, translations: dict, target_language: str,
output_dir: Path) -> str:
"""生成翻译的Word文档保持与DOCX相同的格式"""
try:
from app.utils.helpers import generate_filename
translated_texts = translations.get(target_language, [])
# 生成Word文档而非文字文件
output_filename = f"{self.file_path.stem}_{target_language}_translated.docx"
output_path = output_dir / output_filename
# 创建Word文档
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
doc = Document()
# 添加标题页
title = doc.add_heading(f"PDF翻译结果 - {target_language}", 0)
title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
# 添加文档信息
info_para = doc.add_paragraph()
info_para.add_run("原始文件: ").bold = True
info_para.add_run(self.file_path.name)
info_para.add_run("\n处理方式: ").bold = True
info_para.add_run("OCR识别" if self.is_scanned_pdf() else "直接文字提取")
info_para.add_run(f"\n翻译语言: ").bold = True
info_para.add_run(target_language)
info_para.add_run(f"\n总段落数: ").bold = True
info_para.add_run(str(len(translated_texts)))
doc.add_paragraph() # 空行
# 添加翻译内容
for i, text in enumerate(translated_texts, 1):
content_type = self._detect_content_type(text)
if content_type == 'table':
# 尝试创建实际的表格
self._add_table_content(doc, text, i)
elif content_type == 'heading':
# 添加标题
self._add_heading_content(doc, text, i)
elif content_type == 'list':
# 添加列表
self._add_list_content(doc, text, i)
else:
# 普通段落
self._add_paragraph_content(doc, text, i)
# 保存Word文档
doc.save(output_path)
logger.info(f"Generated translated PDF Word document: {output_path}")
return str(output_path)
except Exception as e:
logger.error(f"Failed to generate translated Word document: {str(e)}")
raise FileProcessingError(f"生成翻译Word文档失败: {str(e)}")
def generate_combined_translated_document(self, all_translations: dict, target_languages: list,
output_dir: Path) -> str:
"""生成包含所有翻譯語言的組合Word文檔譯文1/譯文2格式"""
try:
from app.utils.helpers import generate_filename
# 生成組合文檔檔名
languages_suffix = '_'.join(target_languages)
output_filename = f"{self.file_path.stem}_{languages_suffix}_combined.docx"
output_path = output_dir / output_filename
# 创建Word文档
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
doc = Document()
# 添加标题页
title = doc.add_heading(f"PDF翻译結果 - 多語言組合文檔", 0)
title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
# 添加文档信息
info_para = doc.add_paragraph()
info_para.add_run("原始文件: ").bold = True
info_para.add_run(self.file_path.name)
info_para.add_run("\n处理方式: ").bold = True
info_para.add_run("OCR识别" if self.is_scanned_pdf() else "直接文字提取")
info_para.add_run(f"\n翻译语言: ").bold = True
info_para.add_run(' / '.join(target_languages))
# 获取第一个語言的翻譯作為基準長度
first_language = target_languages[0]
segment_count = len(all_translations.get(first_language, []))
info_para.add_run(f"\n总段落数: ").bold = True
info_para.add_run(str(segment_count))
doc.add_paragraph() # 空行
# 添加翻译内容 - 譯文1/譯文2格式
for i in range(segment_count):
content_para = doc.add_paragraph()
# 添加段落编号
num_run = content_para.add_run(f"{i+1:03d}. ")
num_run.bold = True
num_run.font.size = Pt(12)
# 为每种语言添加翻譯
for j, target_language in enumerate(target_languages):
if i < len(all_translations.get(target_language, [])):
translation_text = all_translations[target_language][i]
# 添加語言標識
if j > 0:
content_para.add_run("\n\n") # 翻譯之間的間距
lang_run = content_para.add_run(f"[{target_language}] ")
lang_run.bold = True
lang_run.font.size = Pt(11)
# 添加翻譯内容
trans_run = content_para.add_run(translation_text)
trans_run.font.size = Pt(11)
# 段落間距
content_para.paragraph_format.space_after = Pt(12)
# 保存Word文档
doc.save(output_path)
logger.info(f"Generated combined translated PDF Word document: {output_path}")
return str(output_path)
except Exception as e:
logger.error(f"Failed to generate combined translated Word document: {str(e)}")
raise FileProcessingError(f"生成組合翻译Word文档失败: {str(e)}")
def _is_table_component(self, segment: str) -> bool:
"""檢查段落是否為表格組件(表格邊界、分隔線等)"""
segment = segment.strip()
# Markdown表格分隔線如 |---|---|---| 或 |===|===|===|
if '|' in segment and ('-' in segment or '=' in segment):
# 移除 | 和 - = 後,如果剩餘內容很少,則判斷為表格分隔線
clean_segment = segment.replace('|', '').replace('-', '').replace('=', '').replace(' ', '').replace(':', '')
if len(clean_segment) <= 2: # 允許少量其他字符
return True
# 純分隔線
if segment.replace('=', '').replace('-', '').replace(' ', '') == '':
return True
return False
def _is_table_row(self, segment: str) -> bool:
"""檢查段落是否為表格行(包含實際數據的表格行)"""
segment = segment.strip()
# Markdown表格行至少包含兩個 | 符號,且有實際內容
if segment.count('|') >= 2:
# 移除首尾的 | 並分割為單元格
cells = segment.strip('|').split('|')
# 檢查是否有實際的文字內容(不只是分隔符號)
has_content = any(
cell.strip() and
not cell.replace('-', '').replace('=', '').replace(' ', '').replace(':', '') == ''
for cell in cells
)
if has_content:
return True
return False
def _merge_table_segments(self, segments: List[str], start_idx: int) -> tuple[str, int]:
"""
合併表格相關的段落
Returns:
(merged_table_content, next_index)
"""
table_parts = []
current_idx = start_idx
# 收集連續的表格相關段落
while current_idx < len(segments):
segment = segments[current_idx].strip()
if self._is_table_component(segment) or self._is_table_row(segment):
table_parts.append(segment)
current_idx += 1
else:
break
# 將表格部分合併為一個段落
merged_table = '\n'.join(table_parts)
return merged_table, current_idx
def _merge_short_segments(self, text_segments: List[str], min_length: int = 10) -> List[str]:
"""
合併短段落以減少不必要的翻譯調用,特別處理表格結構
Args:
text_segments: 原始文字段落列表
min_length: 最小段落長度閾值,短於此長度的段落將被合併
Returns:
合併後的段落列表
"""
if not text_segments:
return text_segments
merged_segments = []
current_merge = ""
i = 0
while i < len(text_segments):
segment = text_segments[i].strip()
if not segment: # 跳過空段落
i += 1
continue
# 檢查是否為表格組件
if self._is_table_component(segment) or self._is_table_row(segment):
# 先處理之前積累的短段落
if current_merge:
merged_segments.append(current_merge.strip())
logger.debug(f"Merged short segments before table: '{current_merge[:50]}...'")
current_merge = ""
# 合併表格相關段落
table_content, next_i = self._merge_table_segments(text_segments, i)
merged_segments.append(table_content)
logger.debug(f"Merged table content: {next_i - i} segments -> 1 table block")
i = next_i
continue
# 檢查是否為短段落
if len(segment) < min_length:
# 檢查是否為純標點符號或數字(排除表格符號)
if segment.replace('*', '').replace('-', '').replace('_', '').replace('#', '').strip() == '':
logger.debug(f"Skipping pure symbol segment: '{segment}'")
i += 1
continue
# 短段落需要合併
if current_merge:
current_merge += " " + segment
else:
current_merge = segment
logger.debug(f"Adding short segment to merge: '{segment}' (length: {len(segment)})")
else:
# 長段落,先處理之前積累的短段落
if current_merge:
merged_segments.append(current_merge.strip())
logger.debug(f"Merged short segments: '{current_merge[:50]}...' (total length: {len(current_merge)})")
current_merge = ""
# 添加當前長段落
merged_segments.append(segment)
logger.debug(f"Added long segment: '{segment[:50]}...' (length: {len(segment)})")
i += 1
# 處理最後剩餘的短段落
if current_merge:
merged_segments.append(current_merge.strip())
logger.debug(f"Final merged short segments: '{current_merge[:50]}...' (total length: {len(current_merge)})")
logger.info(f"Segment merging: {len(text_segments)} -> {len(merged_segments)} segments")
return merged_segments
def _detect_content_type(self, text: str) -> str:
"""检测内容类型"""
text_lower = text.lower().strip()
# 检测表格(包含多个|或制表符)
if ('|' in text and text.count('|') >= 2) or '\t' in text:
return 'table'
# 检测标题
if (text_lower.startswith(('', '', 'chapter', 'section', '#')) or
any(keyword in text_lower for keyword in ['', '', '']) and len(text) < 100):
return 'heading'
# 检测列表
if (text_lower.startswith(('', '-', '*', '1.', '2.', '3.', '4.', '5.')) or
any(text_lower.startswith(f"{i}.") for i in range(1, 20))):
return 'list'
return 'paragraph'
def _add_table_content(self, doc, text: str, index: int):
"""添加表格内容"""
from docx.shared import Pt
# 添加表格标题
title_para = doc.add_paragraph()
title_run = title_para.add_run(f"表格 {index}: ")
title_run.bold = True
title_run.font.size = Pt(12)
# 解析表格
if '|' in text:
# Markdown风格表格
lines = [line.strip() for line in text.split('\n') if line.strip()]
rows = []
for line in lines:
if line.startswith('|') and line.endswith('|'):
cells = [cell.strip() for cell in line.split('|')[1:-1]]
if cells: # 过滤掉分隔行(如|---|---|
if not all(cell.replace('-', '').replace(' ', '') == '' for cell in cells):
rows.append(cells)
if rows:
# 创建表格
table = doc.add_table(rows=len(rows), cols=len(rows[0]))
table.style = 'Table Grid'
for i, row_data in enumerate(rows):
for j, cell_data in enumerate(row_data):
if j < len(table.rows[i].cells):
cell = table.rows[i].cells[j]
cell.text = cell_data
# 设置字体
for paragraph in cell.paragraphs:
for run in paragraph.runs:
run.font.size = Pt(10)
else:
# 制表符分隔的表格
para = doc.add_paragraph()
content_run = para.add_run(text)
content_run.font.name = 'Courier New'
content_run.font.size = Pt(10)
def _add_heading_content(self, doc, text: str, index: int):
"""添加标题内容"""
from docx.shared import Pt
# 移除段落编号,直接作为标题
clean_text = text.strip()
if len(clean_text) < 100:
heading = doc.add_heading(clean_text, level=2)
else:
# 长文本作为普通段落但使用标题样式
para = doc.add_paragraph()
run = para.add_run(clean_text)
run.bold = True
run.font.size = Pt(14)
def _add_list_content(self, doc, text: str, index: int):
"""添加列表内容"""
from docx.shared import Pt
# 检查是否已经有编号
if any(text.strip().startswith(f"{i}.") for i in range(1, 20)):
# 已编号列表
para = doc.add_paragraph(text.strip(), style='List Number')
else:
# 项目符号列表
para = doc.add_paragraph(text.strip(), style='List Bullet')
# 设置字体大小
for run in para.runs:
run.font.size = Pt(11)
def _add_paragraph_content(self, doc, text: str, index: int):
"""添加普通段落内容"""
from docx.shared import Pt
para = doc.add_paragraph()
# 添加段落编号(可选)
num_run = para.add_run(f"{index:03d}. ")
num_run.bold = True
num_run.font.size = Pt(12)
# 添加内容
content_run = para.add_run(text)
content_run.font.size = Pt(11)
# 设置段落间距
para.paragraph_format.space_after = Pt(6)

View File

@@ -0,0 +1,647 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
通知服務
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
import os
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from datetime import datetime, timedelta
from typing import Optional, List, Dict, Any
from flask import current_app, url_for
from app import db
from app.utils.logger import get_logger
from app.models.job import TranslationJob
from app.models.user import User
from app.models.notification import Notification, NotificationType
logger = get_logger(__name__)
class NotificationService:
"""通知服務"""
def __init__(self):
self.smtp_server = current_app.config.get('SMTP_SERVER')
self.smtp_port = current_app.config.get('SMTP_PORT', 587)
self.use_tls = current_app.config.get('SMTP_USE_TLS', False)
self.use_ssl = current_app.config.get('SMTP_USE_SSL', False)
self.auth_required = current_app.config.get('SMTP_AUTH_REQUIRED', False)
self.sender_email = current_app.config.get('SMTP_SENDER_EMAIL')
self.sender_password = current_app.config.get('SMTP_SENDER_PASSWORD', '')
self.app_name = current_app.config.get('APP_NAME', 'PANJIT Document Translator')
def _create_smtp_connection(self):
"""建立 SMTP 連線"""
try:
if self.use_ssl:
server = smtplib.SMTP_SSL(self.smtp_server, self.smtp_port)
else:
server = smtplib.SMTP(self.smtp_server, self.smtp_port)
if self.use_tls:
server.starttls()
if self.auth_required and self.sender_password:
server.login(self.sender_email, self.sender_password)
return server
except Exception as e:
logger.error(f"SMTP connection failed: {str(e)}")
return None
def _send_email(self, to_email: str, subject: str, html_content: str, text_content: str = None) -> bool:
"""發送郵件的基礎方法 - 已停用 (資安限制,無法連接內網)"""
logger.info(f"SMTP service disabled - Email notification skipped for {to_email}: {subject}")
return True # 回傳 True 避免影響其他流程
# 以下 SMTP 功能已註解,因應資安限制無法連接內網
# try:
# if not self.smtp_server or not self.sender_email:
# logger.error("SMTP configuration incomplete")
# return False
#
# # 建立郵件
# msg = MIMEMultipart('alternative')
# msg['From'] = f"{self.app_name} <{self.sender_email}>"
# msg['To'] = to_email
# msg['Subject'] = subject
#
# # 添加文本內容
# if text_content:
# text_part = MIMEText(text_content, 'plain', 'utf-8')
# msg.attach(text_part)
#
# # 添加 HTML 內容
# html_part = MIMEText(html_content, 'html', 'utf-8')
# msg.attach(html_part)
#
# # 發送郵件
# server = self._create_smtp_connection()
# if not server:
# return False
#
# server.send_message(msg)
# server.quit()
#
# logger.info(f"Email sent successfully to {to_email}")
# return True
#
# except Exception as e:
# logger.error(f"Failed to send email to {to_email}: {str(e)}")
# return False
def send_job_completion_notification(self, job: TranslationJob) -> bool:
"""發送任務完成通知"""
try:
if not job.user or not job.user.email:
logger.warning(f"No email address for job {job.job_uuid}")
return False
# 準備郵件內容
subject = f"📄 翻譯完成通知 - {job.original_filename}"
# 計算處理時間
processing_time = ""
if job.processing_started_at and job.completed_at:
duration = job.completed_at - job.processing_started_at
total_seconds = int(duration.total_seconds())
if total_seconds < 60:
processing_time = f"{total_seconds}"
elif total_seconds < 3600:
minutes = total_seconds // 60
seconds = total_seconds % 60
processing_time = f"{minutes}{seconds}"
else:
hours = total_seconds // 3600
minutes = (total_seconds % 3600) // 60
processing_time = f"{hours}小時{minutes}"
# 生成下載連結(簡化版本)
download_links = []
for lang in job.target_languages:
download_links.append(f"{lang}: [下載翻譯檔案]")
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<style>
body {{ font-family: Arial, sans-serif; line-height: 1.6; color: #333; }}
.container {{ max-width: 600px; margin: 0 auto; padding: 20px; }}
.header {{ background-color: #2563eb; color: white; padding: 20px; text-align: center; border-radius: 8px 8px 0 0; }}
.content {{ background-color: #f8fafc; padding: 30px; border: 1px solid #e5e7eb; }}
.info-box {{ background-color: #dbeafe; border-left: 4px solid #2563eb; padding: 15px; margin: 20px 0; }}
.footer {{ background-color: #374151; color: #d1d5db; padding: 15px; text-align: center; font-size: 12px; border-radius: 0 0 8px 8px; }}
.success {{ color: #059669; font-weight: bold; }}
.download-section {{ margin: 20px 0; }}
.download-link {{ display: inline-block; background-color: #2563eb; color: white; padding: 10px 20px; text-decoration: none; border-radius: 5px; margin: 5px; }}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🎉 翻譯任務完成</h1>
</div>
<div class="content">
<p>親愛的 <strong>{job.user.display_name}</strong></p>
<p class="success">您的文件翻譯任務已成功完成!</p>
<div class="info-box">
<h3>📋 任務詳細資訊</h3>
<p><strong>檔案名稱:</strong> {job.original_filename}</p>
<p><strong>任務編號:</strong> {job.job_uuid}</p>
<p><strong>來源語言:</strong> {job.source_language}</p>
<p><strong>目標語言:</strong> {', '.join(job.target_languages)}</p>
<p><strong>處理時間:</strong> {processing_time}</p>
<p><strong>完成時間:</strong> {job.completed_at.strftime('%Y-%m-%d %H:%M:%S') if job.completed_at else '未知'}</p>
{f'<p><strong>總成本:</strong> ${job.total_cost:.4f}</p>' if job.total_cost else ''}
</div>
<div class="download-section">
<h3>📥 下載翻譯檔案</h3>
<p>請登入系統下載您的翻譯檔案:</p>
<p>{'<br>'.join(download_links)}</p>
<p style="margin-top: 15px;">
<strong>注意:</strong> 翻譯檔案將在系統中保留 7 天,請及時下載。
</p>
</div>
<div style="margin-top: 30px; padding-top: 20px; border-top: 1px solid #e5e7eb;">
<p>感謝您使用 {self.app_name}</p>
<p>如有任何問題,請聯繫系統管理員。</p>
</div>
</div>
<div class="footer">
<p>此郵件由 {self.app_name} 系統自動發送,請勿回覆。</p>
<p>發送時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
</div>
</div>
</body>
</html>
"""
# 純文字版本
text_content = f"""
翻譯任務完成通知
親愛的 {job.user.display_name}
您的文件翻譯任務已成功完成!
任務詳細資訊:
- 檔案名稱: {job.original_filename}
- 任務編號: {job.job_uuid}
- 來源語言: {job.source_language}
- 目標語言: {', '.join(job.target_languages)}
- 處理時間: {processing_time}
- 完成時間: {job.completed_at.strftime('%Y-%m-%d %H:%M:%S') if job.completed_at else '未知'}
請登入系統下載您的翻譯檔案。翻譯檔案將在系統中保留 7 天。
感謝您使用 {self.app_name}
----
此郵件由系統自動發送,請勿回覆。
"""
return self._send_email(job.user.email, subject, html_content, text_content)
except Exception as e:
logger.error(f"Failed to send completion notification for job {job.job_uuid}: {str(e)}")
return False
def send_job_failure_notification(self, job: TranslationJob) -> bool:
"""發送任務失敗通知"""
try:
if not job.user or not job.user.email:
logger.warning(f"No email address for job {job.job_uuid}")
return False
subject = f"⚠️ 翻譯失敗通知 - {job.original_filename}"
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<style>
body {{ font-family: Arial, sans-serif; line-height: 1.6; color: #333; }}
.container {{ max-width: 600px; margin: 0 auto; padding: 20px; }}
.header {{ background-color: #dc2626; color: white; padding: 20px; text-align: center; border-radius: 8px 8px 0 0; }}
.content {{ background-color: #f8fafc; padding: 30px; border: 1px solid #e5e7eb; }}
.error-box {{ background-color: #fef2f2; border-left: 4px solid #dc2626; padding: 15px; margin: 20px 0; }}
.footer {{ background-color: #374151; color: #d1d5db; padding: 15px; text-align: center; font-size: 12px; border-radius: 0 0 8px 8px; }}
.error {{ color: #dc2626; font-weight: bold; }}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>❌ 翻譯任務失敗</h1>
</div>
<div class="content">
<p>親愛的 <strong>{job.user.display_name}</strong></p>
<p class="error">很抱歉,您的文件翻譯任務處理失敗。</p>
<div class="error-box">
<h3>📋 任務資訊</h3>
<p><strong>檔案名稱:</strong> {job.original_filename}</p>
<p><strong>任務編號:</strong> {job.job_uuid}</p>
<p><strong>重試次數:</strong> {job.retry_count}</p>
<p><strong>錯誤訊息:</strong> {job.error_message or '未知錯誤'}</p>
<p><strong>失敗時間:</strong> {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
</div>
<div style="margin-top: 20px;">
<p><strong>建議處理方式:</strong></p>
<ul>
<li>檢查檔案格式是否正確</li>
<li>確認檔案沒有損壞</li>
<li>稍後再次嘗試上傳</li>
<li>如問題持續,請聯繫系統管理員</li>
</ul>
</div>
<div style="margin-top: 30px; padding-top: 20px; border-top: 1px solid #e5e7eb;">
<p>如需協助,請聯繫系統管理員。</p>
</div>
</div>
<div class="footer">
<p>此郵件由 {self.app_name} 系統自動發送,請勿回覆。</p>
<p>發送時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
</div>
</div>
</body>
</html>
"""
text_content = f"""
翻譯任務失敗通知
親愛的 {job.user.display_name}
很抱歉,您的文件翻譯任務處理失敗。
任務資訊:
- 檔案名稱: {job.original_filename}
- 任務編號: {job.job_uuid}
- 重試次數: {job.retry_count}
- 錯誤訊息: {job.error_message or '未知錯誤'}
建議處理方式:
1. 檢查檔案格式是否正確
2. 確認檔案沒有損壞
3. 稍後再次嘗試上傳
4. 如問題持續,請聯繫系統管理員
如需協助,請聯繫系統管理員。
----
此郵件由 {self.app_name} 系統自動發送,請勿回覆。
"""
return self._send_email(job.user.email, subject, html_content, text_content)
except Exception as e:
logger.error(f"Failed to send failure notification for job {job.job_uuid}: {str(e)}")
return False
def send_admin_notification(self, subject: str, message: str, admin_emails: List[str] = None) -> bool:
"""發送管理員通知"""
try:
if not admin_emails:
# 取得所有管理員郵件地址
admin_users = User.get_admin_users()
admin_emails = [user.email for user in admin_users if user.email]
if not admin_emails:
logger.warning("No admin email addresses found")
return False
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<style>
body {{ font-family: Arial, sans-serif; line-height: 1.6; color: #333; }}
.container {{ max-width: 600px; margin: 0 auto; padding: 20px; }}
.header {{ background-color: #f59e0b; color: white; padding: 20px; text-align: center; border-radius: 8px 8px 0 0; }}
.content {{ background-color: #f8fafc; padding: 30px; border: 1px solid #e5e7eb; }}
.footer {{ background-color: #374151; color: #d1d5db; padding: 15px; text-align: center; font-size: 12px; border-radius: 0 0 8px 8px; }}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🔔 系統管理通知</h1>
</div>
<div class="content">
<p>系統管理員您好,</p>
<div style="background-color: #fef3c7; border-left: 4px solid #f59e0b; padding: 15px; margin: 20px 0;">
<h3>{subject}</h3>
<p>{message}</p>
</div>
<p>發送時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
</div>
<div class="footer">
<p>此郵件由 {self.app_name} 系統自動發送,請勿回覆。</p>
</div>
</div>
</body>
</html>
"""
success_count = 0
for email in admin_emails:
if self._send_email(email, f"[管理通知] {subject}", html_content):
success_count += 1
return success_count > 0
except Exception as e:
logger.error(f"Failed to send admin notification: {str(e)}")
return False
def test_smtp_connection(self) -> bool:
"""測試 SMTP 連線"""
try:
server = self._create_smtp_connection()
if server:
server.quit()
return True
return False
except Exception as e:
logger.error(f"SMTP connection test failed: {str(e)}")
return False
# ========== 資料庫通知方法 ==========
def create_db_notification(
self,
user_id: int,
title: str,
message: str,
notification_type: NotificationType = NotificationType.INFO,
job_uuid: Optional[str] = None,
extra_data: Optional[Dict[str, Any]] = None,
expires_at: Optional[datetime] = None,
link: Optional[str] = None
) -> Optional[Notification]:
"""
創建資料庫通知
Args:
user_id: 用戶ID
title: 通知標題
message: 通知內容
notification_type: 通知類型
job_uuid: 關聯任務UUID
extra_data: 額外數據
expires_at: 過期時間
link: 相關連結
Returns:
Notification: 創建的通知對象
"""
try:
# 如果沒有指定連結但有任務UUID自動生成任務詳情連結
if not link and job_uuid:
link = f"/job/{job_uuid}"
notification = Notification(
user_id=user_id,
type=notification_type.value,
title=title,
message=message,
job_uuid=job_uuid,
link=link,
extra_data=extra_data,
expires_at=expires_at
)
db.session.add(notification)
db.session.commit()
logger.info(f"資料庫通知已創建: {notification.notification_uuid} for user {user_id}")
# WebSocket 推送已禁用
# self._send_websocket_notification(notification)
return notification
except Exception as e:
db.session.rollback()
logger.error(f"創建資料庫通知失敗: {e}")
return None
def send_job_started_db_notification(self, job: TranslationJob) -> Optional[Notification]:
"""
發送任務開始處理的資料庫通知
Args:
job: 翻譯任務對象
Returns:
Notification: 創建的通知對象
"""
try:
title = "翻譯任務開始處理"
message = f'您的文件「{job.original_filename}」已開始翻譯處理。'
if job.target_languages:
languages = ', '.join(job.target_languages)
message += f" 目標語言: {languages}"
return self.create_db_notification(
user_id=job.user_id,
title=title,
message=message,
notification_type=NotificationType.INFO,
job_uuid=job.job_uuid,
extra_data={
'filename': job.original_filename,
'target_languages': job.target_languages,
'started_at': job.processing_started_at.isoformat() if job.processing_started_at else None
}
)
except Exception as e:
logger.error(f"發送任務開始資料庫通知失敗: {e}")
return None
def send_job_completion_db_notification(self, job: TranslationJob) -> Optional[Notification]:
"""
發送任務完成的資料庫通知
Args:
job: 翻譯任務對象
Returns:
Notification: 創建的通知對象
"""
try:
if job.status != 'COMPLETED':
logger.warning(f"任務 {job.job_uuid} 狀態不是已完成,跳過完成通知")
return None
# 構建通知內容
title = "翻譯任務完成"
message = f'您的文件「{job.original_filename}」已成功翻譯完成。'
# 添加目標語言信息
if job.target_languages:
languages = ', '.join(job.target_languages)
message += f" 目標語言: {languages}"
# 添加處理時間信息
if job.processing_started_at and job.completed_at:
duration = job.completed_at - job.processing_started_at
minutes = int(duration.total_seconds() / 60)
if minutes > 0:
message += f" 處理時間: {minutes} 分鐘"
else:
message += f" 處理時間: {int(duration.total_seconds())}"
return self.create_db_notification(
user_id=job.user_id,
title=title,
message=message,
notification_type=NotificationType.SUCCESS,
job_uuid=job.job_uuid,
extra_data={
'filename': job.original_filename,
'target_languages': job.target_languages,
'total_cost': float(job.total_cost) if job.total_cost else 0,
'completed_at': job.completed_at.isoformat() if job.completed_at else None
}
)
except Exception as e:
logger.error(f"發送任務完成資料庫通知失敗: {e}")
return None
def send_job_completion_db_notification_direct(self, job: TranslationJob) -> Optional[Notification]:
"""
直接發送任務完成的資料庫通知(不檢查狀態)
"""
try:
# 構建通知內容
title = "翻譯任務完成"
message = f'您的文件「{job.original_filename}」已成功翻譯完成。'
# 添加目標語言信息
if job.target_languages:
languages = ', '.join(job.target_languages)
message += f" 目標語言: {languages}"
message += " 您可以在任務列表中下載翻譯結果。"
# 創建資料庫通知
return self.create_db_notification(
user_id=job.user_id,
title=title,
message=message,
notification_type=NotificationType.SUCCESS,
job_uuid=job.job_uuid,
extra_data={
'filename': job.original_filename,
'target_languages': job.target_languages,
'total_cost': float(job.total_cost) if job.total_cost else 0,
'completed_at': job.completed_at.isoformat() if job.completed_at else None
}
)
except Exception as e:
logger.error(f"發送任務完成資料庫通知失敗: {e}")
return None
def send_job_failure_db_notification(self, job: TranslationJob, error_message: str = None) -> Optional[Notification]:
"""
發送任務失敗的資料庫通知
Args:
job: 翻譯任務對象
error_message: 錯誤訊息
Returns:
Notification: 創建的通知對象
"""
try:
title = "翻譯任務失敗"
message = f'您的文件「{job.original_filename}」翻譯失敗。'
if error_message:
message += f" 錯誤訊息: {error_message}"
if job.retry_count > 0:
message += f" 已重試 {job.retry_count} 次。"
return self.create_db_notification(
user_id=job.user_id,
title=title,
message=message,
notification_type=NotificationType.ERROR,
job_uuid=job.job_uuid,
extra_data={
'filename': job.original_filename,
'error_message': error_message,
'retry_count': job.retry_count,
'failed_at': datetime.now().isoformat()
}
)
except Exception as e:
logger.error(f"發送任務失敗資料庫通知失敗: {e}")
return None
def _send_websocket_notification(self, notification: Notification):
"""
通過 WebSocket 發送通知 - 已禁用
Args:
notification: 通知對象
"""
# WebSocket 功能已完全禁用
logger.debug(f"WebSocket 推送已禁用,跳過通知: {notification.notification_uuid}")
pass
def get_unread_count(self, user_id: int) -> int:
"""
獲取用戶未讀通知數量
Args:
user_id: 用戶ID
Returns:
int: 未讀通知數量
"""
try:
return Notification.query.filter_by(
user_id=user_id,
is_read=False
).filter(
(Notification.expires_at.is_(None)) |
(Notification.expires_at > datetime.now())
).count()
except Exception as e:
logger.error(f"獲取未讀通知數量失敗: {e}")
return 0

282
app/services/ocr_cache.py Normal file
View File

@@ -0,0 +1,282 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
OCR 快取管理模組
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
import hashlib
import json
import sqlite3
from datetime import datetime, timedelta
from pathlib import Path
from typing import Optional, Dict, Any
import logging
logger = logging.getLogger(__name__)
class OCRCache:
"""OCR 結果快取管理器"""
def __init__(self, cache_db_path: str = "ocr_cache.db", cache_expire_days: int = 30):
"""
初始化 OCR 快取管理器
Args:
cache_db_path: 快取資料庫路徑
cache_expire_days: 快取過期天數
"""
self.cache_db_path = Path(cache_db_path)
self.cache_expire_days = cache_expire_days
self.init_database()
def init_database(self):
"""初始化快取資料庫"""
try:
with sqlite3.connect(self.cache_db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS ocr_cache (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_hash TEXT UNIQUE NOT NULL,
filename TEXT,
file_size INTEGER,
extracted_text TEXT NOT NULL,
extraction_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
access_count INTEGER DEFAULT 1,
last_access_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
metadata TEXT
)
''')
# 創建索引以提高查詢效能
cursor.execute('''
CREATE INDEX IF NOT EXISTS idx_file_hash
ON ocr_cache(file_hash)
''')
cursor.execute('''
CREATE INDEX IF NOT EXISTS idx_extraction_time
ON ocr_cache(extraction_time)
''')
conn.commit()
logger.info("OCR 快取資料庫初始化完成")
except Exception as e:
logger.error(f"初始化 OCR 快取資料庫失敗: {e}")
raise
def _calculate_file_hash(self, file_data: bytes, additional_info: str = "") -> str:
"""
計算檔案內容的 SHA256 雜湊值
Args:
file_data: 檔案二進位資料
additional_info: 額外資訊(如頁數、處理參數等)
Returns:
檔案的 SHA256 雜湊值
"""
hash_input = file_data + additional_info.encode('utf-8')
return hashlib.sha256(hash_input).hexdigest()
def get_cached_text(self, file_data: bytes, filename: str = "",
additional_info: str = "") -> Optional[str]:
"""
獲取快取的 OCR 文字
Args:
file_data: 檔案二進位資料
filename: 檔案名稱
additional_info: 額外資訊
Returns:
快取的文字內容,如果不存在則返回 None
"""
try:
file_hash = self._calculate_file_hash(file_data, additional_info)
with sqlite3.connect(self.cache_db_path) as conn:
cursor = conn.cursor()
# 查詢快取
cursor.execute('''
SELECT extracted_text, access_count
FROM ocr_cache
WHERE file_hash = ? AND
extraction_time > datetime('now', '-{} days')
'''.format(self.cache_expire_days), (file_hash,))
result = cursor.fetchone()
if result:
extracted_text, access_count = result
# 更新訪問計數和時間
cursor.execute('''
UPDATE ocr_cache
SET access_count = ?, last_access_time = CURRENT_TIMESTAMP
WHERE file_hash = ?
''', (access_count + 1, file_hash))
conn.commit()
logger.info(f"[OCR-CACHE] 快取命中: {filename} (訪問次數: {access_count + 1})")
return extracted_text
logger.debug(f"[OCR-CACHE] 快取未命中: {filename}")
return None
except Exception as e:
logger.error(f"獲取 OCR 快取失敗: {e}")
return None
def save_cached_text(self, file_data: bytes, extracted_text: str,
filename: str = "", additional_info: str = "",
metadata: Dict[str, Any] = None) -> bool:
"""
儲存 OCR 文字到快取
Args:
file_data: 檔案二進位資料
extracted_text: 提取的文字
filename: 檔案名稱
additional_info: 額外資訊
metadata: 中繼資料
Returns:
是否儲存成功
"""
try:
file_hash = self._calculate_file_hash(file_data, additional_info)
file_size = len(file_data)
metadata_json = json.dumps(metadata or {}, ensure_ascii=False)
with sqlite3.connect(self.cache_db_path) as conn:
cursor = conn.cursor()
# 使用 INSERT OR REPLACE 來處理重複的雜湊值
cursor.execute('''
INSERT OR REPLACE INTO ocr_cache
(file_hash, filename, file_size, extracted_text, metadata)
VALUES (?, ?, ?, ?, ?)
''', (file_hash, filename, file_size, extracted_text, metadata_json))
conn.commit()
logger.info(f"[OCR-CACHE] 儲存快取成功: {filename} ({len(extracted_text)} 字元)")
return True
except Exception as e:
logger.error(f"儲存 OCR 快取失敗: {e}")
return False
def get_cache_stats(self) -> Dict[str, Any]:
"""
獲取快取統計資訊
Returns:
快取統計資料
"""
try:
with sqlite3.connect(self.cache_db_path) as conn:
cursor = conn.cursor()
# 總記錄數
cursor.execute('SELECT COUNT(*) FROM ocr_cache')
total_records = cursor.fetchone()[0]
# 總訪問次數
cursor.execute('SELECT SUM(access_count) FROM ocr_cache')
total_accesses = cursor.fetchone()[0] or 0
# 快取大小
cursor.execute('SELECT SUM(LENGTH(extracted_text)) FROM ocr_cache')
cache_size_chars = cursor.fetchone()[0] or 0
# 最近 7 天的記錄數
cursor.execute('''
SELECT COUNT(*) FROM ocr_cache
WHERE extraction_time > datetime('now', '-7 days')
''')
recent_records = cursor.fetchone()[0]
# 最常訪問的記錄
cursor.execute('''
SELECT filename, access_count, last_access_time
FROM ocr_cache
ORDER BY access_count DESC
LIMIT 5
''')
top_accessed = cursor.fetchall()
return {
'total_records': total_records,
'total_accesses': total_accesses,
'cache_size_chars': cache_size_chars,
'cache_size_mb': cache_size_chars / (1024 * 1024),
'recent_records_7days': recent_records,
'top_accessed_files': [
{
'filename': row[0],
'access_count': row[1],
'last_access': row[2]
}
for row in top_accessed
],
'cache_hit_potential': f"{(total_accesses - total_records) / max(total_accesses, 1) * 100:.1f}%"
}
except Exception as e:
logger.error(f"獲取快取統計失敗: {e}")
return {}
def clean_expired_cache(self) -> int:
"""
清理過期的快取記錄
Returns:
清理的記錄數量
"""
try:
with sqlite3.connect(self.cache_db_path) as conn:
cursor = conn.cursor()
# 刪除過期記錄
cursor.execute('''
DELETE FROM ocr_cache
WHERE extraction_time < datetime('now', '-{} days')
'''.format(self.cache_expire_days))
deleted_count = cursor.rowcount
conn.commit()
logger.info(f"[OCR-CACHE] 清理過期快取: {deleted_count} 筆記錄")
return deleted_count
except Exception as e:
logger.error(f"清理過期快取失敗: {e}")
return 0
def clear_all_cache(self) -> bool:
"""
清空所有快取
Returns:
是否成功
"""
try:
with sqlite3.connect(self.cache_db_path) as conn:
cursor = conn.cursor()
cursor.execute('DELETE FROM ocr_cache')
conn.commit()
logger.info("[OCR-CACHE] 已清空所有快取")
return True
except Exception as e:
logger.error(f"清空快取失敗: {e}")
return False

File diff suppressed because it is too large Load Diff

16
app/tasks/__init__.py Normal file
View File

@@ -0,0 +1,16 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Celery 任務模組
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
from .translation import process_translation_job, cleanup_old_files
__all__ = [
'process_translation_job',
'cleanup_old_files'
]

350
app/tasks/translation.py Normal file
View File

@@ -0,0 +1,350 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
翻譯相關 Celery 任務
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
import os
import shutil
from datetime import datetime, timedelta
from pathlib import Path
from celery import Celery, current_task
from celery.schedules import crontab
from app import create_app, db
logger = None
def get_celery_instance():
"""取得 Celery 實例"""
app = create_app()
return app.celery
# 建立 Celery 實例
celery = get_celery_instance()
# 初始化 logger
from app.utils.logger import get_logger
logger = get_logger(__name__)
from app.models.job import TranslationJob
from app.models.log import SystemLog
from app.services.translation_service import TranslationService
from app.services.notification_service import NotificationService
from app.utils.exceptions import TranslationError
@celery.task(bind=True, max_retries=3)
def process_translation_job(self, job_id: int):
"""處理翻譯任務"""
app = create_app()
with app.app_context():
try:
# 取得任務資訊
job = TranslationJob.query.get(job_id)
if not job:
raise ValueError(f"Job {job_id} not found")
logger.info(f"Starting translation job processing: {job.job_uuid}")
# 記錄任務開始
SystemLog.info(
'tasks.translation',
f'Translation job started: {job.job_uuid}',
user_id=job.user_id,
job_id=job.id,
extra_data={
'filename': job.original_filename,
'target_languages': job.target_languages,
'retry_count': self.request.retries
}
)
# 建立翻譯服務
translation_service = TranslationService()
# 執行翻譯
result = translation_service.translate_document(job.job_uuid)
if result['success']:
logger.info(f"Translation job completed successfully: {job.job_uuid}")
# 重新獲取任務以確保狀態是最新的
db.session.refresh(job)
# 發送完成通知
try:
notification_service = NotificationService()
# 發送郵件通知
notification_service.send_job_completion_notification(job)
# 發送資料庫通知 - 跳過狀態檢查,直接發送
notification_service.send_job_completion_db_notification_direct(job)
except Exception as e:
logger.warning(f"Failed to send completion notification: {str(e)}")
# 記錄完成日誌
SystemLog.info(
'tasks.translation',
f'Translation job completed: {job.job_uuid}',
user_id=job.user_id,
job_id=job.id,
extra_data={
'total_cost': result.get('total_cost', 0),
'total_sentences': result.get('total_sentences', 0),
'output_files': list(result.get('output_files', {}).keys())
}
)
else:
raise TranslationError(result.get('error', 'Unknown translation error'))
except Exception as exc:
logger.error(f"Translation job failed: {job.job_uuid}. Error: {str(exc)}")
with app.app_context():
# 更新任務狀態
job = TranslationJob.query.get(job_id)
if job:
job.error_message = str(exc)
job.retry_count = self.request.retries + 1
if self.request.retries < self.max_retries:
# 準備重試
job.update_status('RETRY')
# 計算重試延遲30s, 60s, 120s
countdown = [30, 60, 120][self.request.retries]
SystemLog.warning(
'tasks.translation',
f'Translation job retry scheduled: {job.job_uuid} (attempt {self.request.retries + 2})',
user_id=job.user_id,
job_id=job.id,
extra_data={
'error': str(exc),
'retry_count': self.request.retries + 1,
'countdown': countdown
}
)
logger.info(f"Retrying translation job in {countdown}s: {job.job_uuid}")
raise self.retry(exc=exc, countdown=countdown)
else:
# 重試次數用盡,標記失敗
job.update_status('FAILED')
# 發送失敗通知
try:
notification_service = NotificationService()
# 發送郵件通知
notification_service.send_job_failure_notification(job)
# 發送資料庫通知
notification_service.send_job_failure_db_notification(job, str(exc))
except Exception as e:
logger.warning(f"Failed to send failure notification: {str(e)}")
SystemLog.error(
'tasks.translation',
f'Translation job failed permanently: {job.job_uuid}',
user_id=job.user_id,
job_id=job.id,
extra_data={
'error': str(exc),
'total_retries': self.request.retries
}
)
# 發送失敗通知
try:
notification_service = NotificationService()
notification_service.send_job_failure_notification(job)
except Exception as e:
logger.warning(f"Failed to send failure notification: {str(e)}")
logger.error(f"Translation job failed permanently: {job.job_uuid}")
raise exc
@celery.task
def cleanup_old_files():
"""清理舊檔案(定期任務)"""
app = create_app()
with app.app_context():
try:
logger.info("Starting file cleanup task")
upload_folder = Path(app.config.get('UPLOAD_FOLDER'))
retention_days = app.config.get('FILE_RETENTION_DAYS', 7)
cutoff_date = datetime.utcnow() - timedelta(days=retention_days)
if not upload_folder.exists():
logger.warning(f"Upload folder does not exist: {upload_folder}")
return
deleted_files = 0
deleted_dirs = 0
total_size_freed = 0
# 遍歷上傳目錄中的所有 UUID 目錄
for item in upload_folder.iterdir():
if not item.is_dir():
continue
try:
# 檢查目錄的修改時間
dir_mtime = datetime.fromtimestamp(item.stat().st_mtime)
if dir_mtime < cutoff_date:
# 計算目錄大小
dir_size = sum(f.stat().st_size for f in item.rglob('*') if f.is_file())
# 檢查是否還有相關的資料庫記錄
job_uuid = item.name
job = TranslationJob.query.filter_by(job_uuid=job_uuid).first()
if job:
# 檢查任務是否已完成且超過保留期
if job.completed_at and job.completed_at < cutoff_date:
# 刪除目錄
shutil.rmtree(item)
deleted_dirs += 1
total_size_freed += dir_size
logger.info(f"Cleaned up job directory: {job_uuid}")
# 記錄清理日誌
SystemLog.info(
'tasks.cleanup',
f'Cleaned up files for completed job: {job_uuid}',
user_id=job.user_id,
job_id=job.id,
extra_data={
'files_size_mb': dir_size / (1024 * 1024),
'retention_days': retention_days
}
)
else:
# 沒有對應的資料庫記錄,直接刪除
shutil.rmtree(item)
deleted_dirs += 1
total_size_freed += dir_size
logger.info(f"Cleaned up orphaned directory: {job_uuid}")
except Exception as e:
logger.error(f"Failed to process directory {item}: {str(e)}")
continue
# 記錄清理結果
cleanup_result = {
'deleted_directories': deleted_dirs,
'total_size_freed_mb': total_size_freed / (1024 * 1024),
'retention_days': retention_days,
'cutoff_date': cutoff_date.isoformat()
}
SystemLog.info(
'tasks.cleanup',
f'File cleanup completed: {deleted_dirs} directories, {total_size_freed / (1024 * 1024):.2f} MB freed',
extra_data=cleanup_result
)
logger.info(f"File cleanup completed: {cleanup_result}")
return cleanup_result
except Exception as e:
logger.error(f"File cleanup task failed: {str(e)}")
SystemLog.error(
'tasks.cleanup',
f'File cleanup task failed: {str(e)}',
extra_data={'error': str(e)}
)
raise e
@celery.task
def send_daily_admin_report():
"""發送每日管理員報告"""
app = create_app()
with app.app_context():
try:
logger.info("Generating daily admin report")
from app.models.stats import APIUsageStats
from app.services.notification_service import NotificationService
# 取得昨日統計
yesterday = datetime.utcnow() - timedelta(days=1)
daily_stats = APIUsageStats.get_daily_statistics(days=1)
# 取得系統錯誤摘要
error_summary = SystemLog.get_error_summary(days=1)
# 準備報告內容
if daily_stats:
yesterday_data = daily_stats[0]
subject = f"每日系統報告 - {yesterday_data['date']}"
message = f"""
昨日系統使用狀況:
• 翻譯任務: {yesterday_data['total_calls']}
• 成功任務: {yesterday_data['successful_calls']}
• 失敗任務: {yesterday_data['failed_calls']}
• 總成本: ${yesterday_data['total_cost']:.4f}
• 總 Token 數: {yesterday_data['total_tokens']}
系統錯誤摘要:
• 錯誤數量: {error_summary['total_errors']}
請查看管理後台了解詳細資訊。
"""
else:
subject = f"每日系統報告 - {yesterday.strftime('%Y-%m-%d')}"
message = "昨日無翻譯任務記錄。"
# 發送管理員通知
notification_service = NotificationService()
result = notification_service.send_admin_notification(subject, message)
if result:
logger.info("Daily admin report sent successfully")
else:
logger.warning("Failed to send daily admin report")
return result
except Exception as e:
logger.error(f"Daily admin report task failed: {str(e)}")
raise e
# 定期任務設定
@celery.on_after_configure.connect
def setup_periodic_tasks(sender, **kwargs):
"""設定定期任務"""
# 每日凌晨 2 點執行檔案清理
sender.add_periodic_task(
crontab(hour=2, minute=0),
cleanup_old_files.s(),
name='cleanup-old-files-daily'
)
# 每日早上 8 點發送管理員報告
sender.add_periodic_task(
crontab(hour=8, minute=0),
send_daily_admin_report.s(),
name='daily-admin-report'
)

34
app/utils/__init__.py Normal file
View File

@@ -0,0 +1,34 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
工具模組
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
from .decorators import login_required, admin_required
from .validators import validate_file, validate_languages
from .helpers import generate_filename, format_file_size
from .exceptions import (
DocumentTranslatorError,
AuthenticationError,
ValidationError,
TranslationError,
FileProcessingError
)
__all__ = [
'login_required',
'admin_required',
'validate_file',
'validate_languages',
'generate_filename',
'format_file_size',
'DocumentTranslatorError',
'AuthenticationError',
'ValidationError',
'TranslationError',
'FileProcessingError'
]

277
app/utils/api_auth.py Normal file
View File

@@ -0,0 +1,277 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
API 認證服務
用於與 PANJIT Auth API 整合認證
Author: PANJIT IT Team
Created: 2025-10-01
"""
import requests
import json
from datetime import datetime, timedelta
from typing import Optional, Dict, Any, Tuple
from flask import current_app
from .logger import get_logger
from .exceptions import AuthenticationError
logger = get_logger(__name__)
class APIAuthService:
"""API 認證服務"""
def __init__(self):
self.config = current_app.config
self.api_base_url = "https://pj-auth-api.vercel.app"
self.login_endpoint = "/api/auth/login"
self.logout_endpoint = "/api/auth/logout"
self.timeout = 30 # 30 秒超時
def authenticate_user(self, username: str, password: str) -> Dict[str, Any]:
"""
透過 API 驗證使用者憑證
Args:
username: 使用者帳號
password: 密碼
Returns:
Dict: 包含使用者資訊和 Token 的字典
Raises:
AuthenticationError: 認證失敗時拋出
"""
try:
login_url = f"{self.api_base_url}{self.login_endpoint}"
payload = {
"username": username,
"password": password
}
headers = {
"Content-Type": "application/json"
}
logger.info(f"正在透過 API 驗證使用者: {username}")
# 發送認證請求
response = requests.post(
login_url,
json=payload,
headers=headers,
timeout=self.timeout
)
# 解析回應
if response.status_code == 200:
data = response.json()
if data.get('success'):
logger.info(f"API 認證成功: {username}")
return self._parse_auth_response(data)
else:
error_msg = data.get('error', '認證失敗')
logger.warning(f"API 認證失敗: {username} - {error_msg}")
raise AuthenticationError(f"認證失敗: {error_msg}")
elif response.status_code == 401:
data = response.json()
error_msg = data.get('error', '帳號或密碼錯誤')
logger.warning(f"API 認證失敗 (401): {username} - {error_msg}")
raise AuthenticationError("帳號或密碼錯誤")
else:
logger.error(f"API 認證請求失敗: HTTP {response.status_code}")
raise AuthenticationError(f"認證服務錯誤 (HTTP {response.status_code})")
except requests.exceptions.Timeout:
logger.error(f"API 認證請求超時: {username}")
raise AuthenticationError("認證服務回應超時,請稍後再試")
except requests.exceptions.ConnectionError:
logger.error(f"API 認證連線錯誤: {username}")
raise AuthenticationError("無法連接認證服務,請檢查網路連線")
except requests.exceptions.RequestException as e:
logger.error(f"API 認證請求錯誤: {username} - {str(e)}")
raise AuthenticationError(f"認證服務錯誤: {str(e)}")
except json.JSONDecodeError:
logger.error(f"API 認證回應格式錯誤: {username}")
raise AuthenticationError("認證服務回應格式錯誤")
except Exception as e:
logger.error(f"API 認證未知錯誤: {username} - {str(e)}")
raise AuthenticationError(f"認證過程發生錯誤: {str(e)}")
def _parse_auth_response(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""
解析 API 認證回應
Args:
data: API 回應資料
Returns:
Dict: 標準化的使用者資訊
"""
try:
auth_data = data.get('data', {})
user_info = auth_data.get('userInfo', {})
# 解析 Token 過期時間
expires_at = None
issued_at = None
if 'expiresAt' in auth_data:
try:
expires_at = datetime.fromisoformat(auth_data['expiresAt'].replace('Z', '+00:00'))
except (ValueError, AttributeError):
logger.warning("無法解析 API Token 過期時間")
if 'issuedAt' in auth_data:
try:
issued_at = datetime.fromisoformat(auth_data['issuedAt'].replace('Z', '+00:00'))
except (ValueError, AttributeError):
logger.warning("無法解析 API Token 發行時間")
# 標準化使用者資訊 (方案 A: API name 是姓名+email 格式)
api_name = user_info.get('name', '') # 例: "劉怡明 ymirliu@panjit.com.tw"
api_email = user_info.get('email', '') # 例: "ymirliu@panjit.com.tw"
result = {
# 基本使用者資訊 (方案 A: username 和 display_name 都用 API name)
'username': api_name, # 姓名+email 格式
'display_name': api_name, # 姓名+email 格式
'email': api_email, # 純 email
'department': user_info.get('jobTitle'), # 使用 jobTitle 作為部門
'user_principal_name': api_email,
# API 特有資訊
'api_user_id': user_info.get('id', ''), # Azure Object ID
'job_title': user_info.get('jobTitle'),
'office_location': user_info.get('officeLocation'),
'business_phones': user_info.get('businessPhones', []),
# Token 資訊
'api_access_token': auth_data.get('access_token', ''),
'api_id_token': auth_data.get('id_token', ''),
'api_token_type': auth_data.get('token_type', 'Bearer'),
'api_expires_in': auth_data.get('expires_in', 0),
'api_issued_at': issued_at,
'api_expires_at': expires_at,
# 完整的 API 回應 (用於記錄)
'full_api_response': data,
'api_user_info': user_info
}
return result
except Exception as e:
logger.error(f"解析 API 回應時發生錯誤: {str(e)}")
raise AuthenticationError(f"解析認證回應時發生錯誤: {str(e)}")
def logout_user(self, access_token: str) -> bool:
"""
透過 API 登出使用者
Args:
access_token: 使用者的 access token
Returns:
bool: 登出是否成功
"""
try:
logout_url = f"{self.api_base_url}{self.logout_endpoint}"
headers = {
"Authorization": f"Bearer {access_token}",
"Content-Type": "application/json"
}
response = requests.post(
logout_url,
headers=headers,
timeout=self.timeout
)
if response.status_code == 200:
data = response.json()
if data.get('success'):
logger.info("API 登出成功")
return True
logger.warning(f"API 登出失敗: HTTP {response.status_code}")
return False
except Exception as e:
logger.error(f"API 登出時發生錯誤: {str(e)}")
return False
def validate_token(self, access_token: str) -> bool:
"""
驗證 Token 是否有效
Args:
access_token: 要驗證的 token
Returns:
bool: Token 是否有效
"""
try:
# 這裡可以實作 Token 驗證邏輯
# 目前 API 沒有提供專門的驗證端點,可以考慮解析 JWT 或調用其他端點
# 簡單的檢查Token 不能為空且格式看起來像 JWT
if not access_token or len(access_token.split('.')) != 3:
return False
# TODO: 實作更完整的 JWT 驗證邏輯
# 可以解析 JWT payload 檢查過期時間等
return True
except Exception as e:
logger.error(f"驗證 Token 時發生錯誤: {str(e)}")
return False
def test_connection(self) -> bool:
"""
測試 API 連線
Returns:
bool: 連線是否正常
"""
try:
# 嘗試連接 API 基礎端點
response = requests.get(
self.api_base_url,
timeout=10
)
return response.status_code in [200, 404] # 404 也算正常,表示能連接到伺服器
except Exception as e:
logger.error(f"API 連線測試失敗: {str(e)}")
return False
def calculate_internal_expiry(self, api_expires_at: Optional[datetime], extend_days: int = 3) -> datetime:
"""
計算內部 Token 過期時間
Args:
api_expires_at: API Token 過期時間
extend_days: 延長天數
Returns:
datetime: 內部 Token 過期時間
"""
if api_expires_at:
# 基於 API Token 過期時間延長
return api_expires_at + timedelta(days=extend_days)
else:
# 如果沒有 API 過期時間,從現在開始計算
return datetime.utcnow() + timedelta(days=extend_days)

238
app/utils/decorators.py Normal file
View File

@@ -0,0 +1,238 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
裝飾器模組
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
from functools import wraps
from flask import session, jsonify, g, current_app
from flask_jwt_extended import jwt_required, get_jwt_identity, get_jwt
def login_required(f):
"""登入驗證裝飾器"""
@wraps(f)
def decorated_function(*args, **kwargs):
from app.utils.logger import get_logger
from flask import request
logger = get_logger(__name__)
user_id = session.get('user_id')
# 調試:記錄 session 檢查
logger.info(f"🔐 [Session Check] Endpoint: {request.endpoint}, Method: {request.method}, URL: {request.url}")
logger.info(f"🔐 [Session Data] UserID: {user_id}, SessionData: {dict(session)}, SessionID: {session.get('_id', 'unknown')}")
if not user_id:
logger.warning(f"❌ [Auth Failed] No user_id in session for {request.endpoint}")
return jsonify({
'success': False,
'error': 'AUTHENTICATION_REQUIRED',
'message': '請先登入'
}), 401
# 取得使用者資訊並設定到 g 物件
from app.models import User
user = User.query.get(user_id)
if not user:
# 清除無效的 session
session.clear()
return jsonify({
'success': False,
'error': 'USER_NOT_FOUND',
'message': '使用者不存在'
}), 401
g.current_user = user
g.current_user_id = user.id
g.is_admin = user.is_admin
return f(*args, **kwargs)
return decorated_function
def jwt_login_required(f):
"""JWT 登入驗證裝飾器"""
@wraps(f)
@jwt_required()
def decorated_function(*args, **kwargs):
from app.utils.logger import get_logger
from flask import request
logger = get_logger(__name__)
try:
username = get_jwt_identity()
claims = get_jwt()
# 設定到 g 物件供其他地方使用
g.current_user_username = username
g.current_user_id = claims.get('user_id')
g.is_admin = claims.get('is_admin', False)
logger.info(f"🔑 [JWT Auth] User: {username}, UserID: {claims.get('user_id')}, Admin: {claims.get('is_admin')}")
except Exception as e:
logger.error(f"❌ [JWT Auth] JWT validation failed: {str(e)}")
return jsonify({
'success': False,
'error': 'AUTHENTICATION_REQUIRED',
'message': '認證失效,請重新登入'
}), 401
return f(*args, **kwargs)
return decorated_function
def admin_required(f):
"""管理員權限裝飾器使用JWT認證"""
@wraps(f)
@jwt_required()
def decorated_function(*args, **kwargs):
from app.utils.logger import get_logger
from flask import request
logger = get_logger(__name__)
try:
username = get_jwt_identity()
claims = get_jwt()
# 設定到 g 物件供其他地方使用
g.current_user_username = username
g.current_user_id = claims.get('user_id')
g.is_admin = claims.get('is_admin', False)
logger.info(f"🔑 [JWT Admin Auth] User: {username}, UserID: {claims.get('user_id')}, Admin: {claims.get('is_admin')}")
# 檢查管理員權限
if not claims.get('is_admin', False):
logger.warning(f"❌ [Admin Auth] Permission denied for user: {username}")
return jsonify({
'success': False,
'error': 'PERMISSION_DENIED',
'message': '權限不足,需要管理員權限'
}), 403
# 驗證用戶是否存在且仍為管理員
from app.models import User
user = User.query.get(claims.get('user_id'))
if not user:
logger.error(f"❌ [Admin Auth] User not found: {claims.get('user_id')}")
return jsonify({
'success': False,
'error': 'USER_NOT_FOUND',
'message': '使用者不存在'
}), 401
if not user.is_admin:
logger.warning(f"❌ [Admin Auth] User no longer admin: {username}")
return jsonify({
'success': False,
'error': 'PERMISSION_DENIED',
'message': '權限不足,需要管理員權限'
}), 403
# 設定完整用戶資訊
g.current_user = user
except Exception as e:
logger.error(f"❌ [Admin Auth] JWT validation failed: {str(e)}")
return jsonify({
'success': False,
'error': 'AUTHENTICATION_REQUIRED',
'message': '認證失效,請重新登入'
}), 401
return f(*args, **kwargs)
return decorated_function
def validate_json(required_fields=None):
"""JSON 驗證裝飾器"""
def decorator(f):
@wraps(f)
def decorated_function(*args, **kwargs):
from flask import request
if not request.is_json:
return jsonify({
'success': False,
'error': 'INVALID_CONTENT_TYPE',
'message': '請求必須為 JSON 格式'
}), 400
data = request.get_json()
if not data:
return jsonify({
'success': False,
'error': 'INVALID_JSON',
'message': 'JSON 資料格式錯誤'
}), 400
# 檢查必要欄位
if required_fields:
missing_fields = [field for field in required_fields if field not in data]
if missing_fields:
return jsonify({
'success': False,
'error': 'MISSING_FIELDS',
'message': f'缺少必要欄位: {", ".join(missing_fields)}'
}), 400
return f(*args, **kwargs)
return decorated_function
return decorator
def rate_limit(max_requests=100, per_seconds=3600):
"""簡單的速率限制裝飾器"""
def decorator(f):
@wraps(f)
def decorated_function(*args, **kwargs):
from flask import request
import redis
import time
try:
# 使用 Redis 進行速率限制
redis_client = redis.from_url(current_app.config['REDIS_URL'])
# 使用 IP 地址作為 key
client_id = request.remote_addr
key = f"rate_limit:{f.__name__}:{client_id}"
current_time = int(time.time())
window_start = current_time - per_seconds
# 清理過期的請求記錄
redis_client.zremrangebyscore(key, 0, window_start)
# 取得當前窗口內的請求數
current_requests = redis_client.zcard(key)
if current_requests >= max_requests:
return jsonify({
'success': False,
'error': 'RATE_LIMIT_EXCEEDED',
'message': '請求過於頻繁,請稍後再試'
}), 429
# 記錄當前請求
redis_client.zadd(key, {str(current_time): current_time})
redis_client.expire(key, per_seconds)
except Exception:
# 如果 Redis 不可用,不阻擋請求
pass
return f(*args, **kwargs)
return decorated_function
return decorator

52
app/utils/exceptions.py Normal file
View File

@@ -0,0 +1,52 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
自定義例外模組
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
class DocumentTranslatorError(Exception):
"""文件翻譯系統基礎例外"""
def __init__(self, message, error_code=None):
self.message = message
self.error_code = error_code
super().__init__(self.message)
class AuthenticationError(DocumentTranslatorError):
"""認證相關例外"""
pass
class ValidationError(DocumentTranslatorError):
"""驗證相關例外"""
pass
class TranslationError(DocumentTranslatorError):
"""翻譯相關例外"""
pass
class FileProcessingError(DocumentTranslatorError):
"""檔案處理相關例外"""
pass
class APIError(DocumentTranslatorError):
"""API 相關例外"""
pass
class ConfigurationError(DocumentTranslatorError):
"""配置相關例外"""
pass
class DatabaseError(DocumentTranslatorError):
"""資料庫相關例外"""
pass

280
app/utils/helpers.py Normal file
View File

@@ -0,0 +1,280 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
輔助工具模組
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
import os
import uuid
import shutil
from pathlib import Path
from datetime import datetime
from werkzeug.utils import secure_filename
from flask import current_app
def generate_filename(original_filename, job_uuid, file_type='original', language_code=None):
"""生成安全的檔案名稱"""
# 取得檔案副檔名
file_ext = Path(original_filename).suffix.lower()
# 清理原始檔名
clean_name = Path(original_filename).stem
clean_name = secure_filename(clean_name)[:50] # 限制長度
if file_type == 'original':
return f"original_{clean_name}_{job_uuid[:8]}{file_ext}"
elif file_type == 'translated':
return f"translated_{clean_name}_{language_code}_{job_uuid[:8]}{file_ext}"
else:
return f"{file_type}_{clean_name}_{job_uuid[:8]}{file_ext}"
def create_job_directory(job_uuid):
"""建立任務專用目錄"""
upload_folder = current_app.config.get('UPLOAD_FOLDER')
job_dir = Path(upload_folder) / job_uuid
# 建立目錄
job_dir.mkdir(parents=True, exist_ok=True)
return job_dir
def save_uploaded_file(file_obj, job_uuid):
"""儲存上傳的檔案"""
try:
# 建立任務目錄
job_dir = create_job_directory(job_uuid)
# 生成檔案名稱
filename = generate_filename(file_obj.filename, job_uuid, 'original')
file_path = job_dir / filename
# 儲存檔案
file_obj.save(str(file_path))
# 取得檔案大小
file_size = file_path.stat().st_size
return {
'success': True,
'filename': filename,
'file_path': str(file_path),
'file_size': file_size
}
except Exception as e:
return {
'success': False,
'error': str(e)
}
def cleanup_job_directory(job_uuid):
"""清理任務目錄"""
try:
upload_folder = current_app.config.get('UPLOAD_FOLDER')
job_dir = Path(upload_folder) / job_uuid
if job_dir.exists() and job_dir.is_dir():
shutil.rmtree(job_dir)
return True
return False
except Exception:
return False
def format_file_size(size_bytes):
"""格式化檔案大小"""
if size_bytes == 0:
return "0 B"
size_names = ["B", "KB", "MB", "GB", "TB"]
i = 0
while size_bytes >= 1024 and i < len(size_names) - 1:
size_bytes /= 1024.0
i += 1
return f"{size_bytes:.1f} {size_names[i]}"
def get_file_icon(file_extension):
"""根據副檔名取得檔案圖示"""
icon_map = {
'.docx': 'file-word',
'.doc': 'file-word',
'.pptx': 'file-powerpoint',
'.ppt': 'file-powerpoint',
'.xlsx': 'file-excel',
'.xls': 'file-excel',
'.pdf': 'file-pdf'
}
return icon_map.get(file_extension.lower(), 'file')
def calculate_processing_time(start_time, end_time=None):
"""計算處理時間"""
if not start_time:
return None
if not end_time:
end_time = datetime.utcnow()
if isinstance(start_time, str):
start_time = datetime.fromisoformat(start_time.replace('Z', '+00:00'))
if isinstance(end_time, str):
end_time = datetime.fromisoformat(end_time.replace('Z', '+00:00'))
duration = end_time - start_time
# 轉換為秒
total_seconds = int(duration.total_seconds())
if total_seconds < 60:
return f"{total_seconds}"
elif total_seconds < 3600:
minutes = total_seconds // 60
seconds = total_seconds % 60
return f"{minutes}{seconds}"
else:
hours = total_seconds // 3600
minutes = (total_seconds % 3600) // 60
return f"{hours}小時{minutes}"
def generate_download_token(job_uuid, language_code, user_id):
"""生成下載令牌"""
import hashlib
import time
# 組合資料
data = f"{job_uuid}:{language_code}:{user_id}:{int(time.time())}"
# 加上應用程式密鑰
secret_key = current_app.config.get('SECRET_KEY', 'default_secret')
data_with_secret = f"{data}:{secret_key}"
# 生成 hash
token = hashlib.sha256(data_with_secret.encode()).hexdigest()
return token
def verify_download_token(token, job_uuid, language_code, user_id, max_age=3600):
"""驗證下載令牌"""
import time
try:
# 取得當前時間戳
current_time = int(time.time())
# 在有效時間範圍內嘗試匹配令牌
for i in range(max_age):
timestamp = current_time - i
expected_token = generate_download_token_with_timestamp(
job_uuid, language_code, user_id, timestamp
)
if token == expected_token:
return True
return False
except Exception:
return False
def generate_download_token_with_timestamp(job_uuid, language_code, user_id, timestamp):
"""使用指定時間戳生成下載令牌"""
import hashlib
data = f"{job_uuid}:{language_code}:{user_id}:{timestamp}"
secret_key = current_app.config.get('SECRET_KEY', 'default_secret')
data_with_secret = f"{data}:{secret_key}"
return hashlib.sha256(data_with_secret.encode()).hexdigest()
def get_supported_languages():
"""取得支援的語言列表"""
return {
'auto': '自動偵測',
'zh-CN': '簡體中文',
'zh-TW': '繁體中文',
'en': '英文',
'ja': '日文',
'ko': '韓文',
'vi': '越南文',
'th': '泰文',
'id': '印尼文',
'ms': '馬來文',
'es': '西班牙文',
'fr': '法文',
'de': '德文',
'ru': '俄文'
}
def parse_json_field(json_str):
"""安全解析JSON欄位"""
import json
if not json_str:
return None
try:
if isinstance(json_str, str):
return json.loads(json_str)
return json_str
except (json.JSONDecodeError, TypeError):
return None
def format_datetime(dt, format_type='full'):
"""格式化日期時間"""
if not dt:
return None
if isinstance(dt, str):
try:
dt = datetime.fromisoformat(dt.replace('Z', '+00:00'))
except ValueError:
return dt
if format_type == 'date':
return dt.strftime('%Y-%m-%d')
elif format_type == 'time':
return dt.strftime('%H:%M:%S')
elif format_type == 'short':
return dt.strftime('%Y-%m-%d %H:%M')
else: # full
return dt.strftime('%Y-%m-%d %H:%M:%S')
def create_response(success=True, data=None, message=None, error=None, error_code=None):
"""建立統一的API回應格式"""
response = {
'success': success
}
if data is not None:
response['data'] = data
if message:
response['message'] = message
if error:
response['error'] = error_code or 'ERROR'
if not message:
response['message'] = error
return response

View File

@@ -0,0 +1,248 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
圖像預處理工具 - 用於提升 OCR 識別準確度
Author: PANJIT IT Team
Created: 2025-10-01
Modified: 2025-10-01
"""
import io
import numpy as np
from PIL import Image, ImageEnhance, ImageFilter
from typing import Optional, Tuple
from app.utils.logger import get_logger
logger = get_logger(__name__)
# 檢查 OpenCV 是否可用
try:
import cv2
_HAS_OPENCV = True
logger.info("OpenCV is available for advanced image preprocessing")
except ImportError:
_HAS_OPENCV = False
logger.warning("OpenCV not available, using PIL-only preprocessing")
class ImagePreprocessor:
"""圖像預處理器 - 提升掃描文件 OCR 品質"""
def __init__(self, use_opencv: bool = True):
"""
初始化圖像預處理器
Args:
use_opencv: 是否使用 OpenCV 進行進階處理(若可用)
"""
self.use_opencv = use_opencv and _HAS_OPENCV
logger.info(f"ImagePreprocessor initialized (OpenCV: {self.use_opencv})")
def preprocess_for_ocr(self, image_bytes: bytes,
enhance_level: str = 'medium') -> bytes:
"""
對圖像進行 OCR 前處理
Args:
image_bytes: 原始圖像字節數據
enhance_level: 增強級別 ('low', 'medium', 'high')
Returns:
處理後的圖像字節數據 (PNG格式)
"""
try:
# 1. 載入圖像
image = Image.open(io.BytesIO(image_bytes))
original_mode = image.mode
logger.debug(f"Original image: {image.size}, mode={original_mode}")
# 2. 轉換為 RGB (如果需要)
if image.mode not in ('RGB', 'L'):
image = image.convert('RGB')
logger.debug(f"Converted to RGB mode")
# 3. 根據增強級別選擇處理流程
if self.use_opencv:
processed_image = self._preprocess_with_opencv(image, enhance_level)
else:
processed_image = self._preprocess_with_pil(image, enhance_level)
# 4. 轉換為 PNG 字節
output_buffer = io.BytesIO()
processed_image.save(output_buffer, format='PNG', optimize=True)
processed_bytes = output_buffer.getvalue()
logger.info(f"Image preprocessed: {len(image_bytes)} -> {len(processed_bytes)} bytes (level={enhance_level})")
return processed_bytes
except Exception as e:
logger.error(f"Image preprocessing failed: {e}, returning original image")
return image_bytes # 失敗時返回原圖
def _preprocess_with_opencv(self, image: Image.Image, level: str) -> Image.Image:
"""使用 OpenCV 進行進階圖像處理"""
# PIL Image -> NumPy array
img_array = np.array(image)
# 轉換為 BGR (OpenCV 格式)
if len(img_array.shape) == 3 and img_array.shape[2] == 3:
img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
else:
img_bgr = img_array
# 1. 灰階化
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
logger.debug("Applied grayscale conversion (OpenCV)")
# 2. 去噪 - 根據級別調整
if level == 'high':
# 高級別:較強去噪
denoised = cv2.fastNlMeansDenoising(gray, None, h=10, templateWindowSize=7, searchWindowSize=21)
logger.debug("Applied strong denoising (h=10)")
elif level == 'medium':
# 中級別:中等去噪
denoised = cv2.fastNlMeansDenoising(gray, None, h=7, templateWindowSize=7, searchWindowSize=21)
logger.debug("Applied medium denoising (h=7)")
else:
# 低級別:輕度去噪
denoised = cv2.bilateralFilter(gray, 5, 50, 50)
logger.debug("Applied light denoising (bilateral)")
# 3. 對比度增強 - CLAHE
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(denoised)
logger.debug("Applied CLAHE contrast enhancement")
# 4. 銳化 (高級別才使用)
if level == 'high':
kernel = np.array([[-1,-1,-1],
[-1, 9,-1],
[-1,-1,-1]])
sharpened = cv2.filter2D(enhanced, -1, kernel)
logger.debug("Applied sharpening filter")
else:
sharpened = enhanced
# 5. 自適應二值化 (根據級別決定是否使用)
if level in ('medium', 'high'):
# 使用自適應閾值
binary = cv2.adaptiveThreshold(
sharpened, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
blockSize=11,
C=2
)
logger.debug("Applied adaptive thresholding")
final_image = binary
else:
final_image = sharpened
# NumPy array -> PIL Image
return Image.fromarray(final_image)
def _preprocess_with_pil(self, image: Image.Image, level: str) -> Image.Image:
"""使用 PIL 進行基礎圖像處理(當 OpenCV 不可用時)"""
# 1. 灰階化
gray = image.convert('L')
logger.debug("Applied grayscale conversion (PIL)")
# 2. 對比度增強
enhancer = ImageEnhance.Contrast(gray)
if level == 'high':
contrast_factor = 2.0
elif level == 'medium':
contrast_factor = 1.5
else:
contrast_factor = 1.2
enhanced = enhancer.enhance(contrast_factor)
logger.debug(f"Applied contrast enhancement (factor={contrast_factor})")
# 3. 銳化
if level in ('medium', 'high'):
sharpness = ImageEnhance.Sharpness(enhanced)
sharp_factor = 2.0 if level == 'high' else 1.5
sharpened = sharpness.enhance(sharp_factor)
logger.debug(f"Applied sharpening (factor={sharp_factor})")
else:
sharpened = enhanced
# 4. 去噪 (使用中值濾波)
if level == 'high':
denoised = sharpened.filter(ImageFilter.MedianFilter(size=3))
logger.debug("Applied median filter (size=3)")
else:
denoised = sharpened
return denoised
def auto_detect_enhance_level(self, image_bytes: bytes) -> str:
"""
自動偵測最佳增強級別
Args:
image_bytes: 圖像字節數據
Returns:
建議的增強級別 ('low', 'medium', 'high')
"""
try:
image = Image.open(io.BytesIO(image_bytes))
if self.use_opencv:
# 使用 OpenCV 計算圖像品質指標
img_array = np.array(image.convert('L'))
# 計算拉普拉斯方差 (評估清晰度)
laplacian_var = cv2.Laplacian(img_array, cv2.CV_64F).var()
# 計算對比度 (標準差)
contrast = np.std(img_array)
logger.debug(f"Image quality metrics: laplacian_var={laplacian_var:.2f}, contrast={contrast:.2f}")
# 根據指標決定增強級別
if laplacian_var < 50 or contrast < 40:
# 模糊或低對比度 -> 高級別增強
return 'high'
elif laplacian_var < 100 or contrast < 60:
# 中等品質 -> 中級別增強
return 'medium'
else:
# 高品質 -> 低級別增強
return 'low'
else:
# PIL 簡易判斷
gray = image.convert('L')
img_array = np.array(gray)
# 簡單對比度評估
contrast = np.std(img_array)
if contrast < 40:
return 'high'
elif contrast < 60:
return 'medium'
else:
return 'low'
except Exception as e:
logger.error(f"Auto enhance level detection failed: {e}")
return 'medium' # 預設使用中級別
def preprocess_smart(self, image_bytes: bytes) -> bytes:
"""
智能預處理 - 自動偵測並應用最佳處理級別
Args:
image_bytes: 原始圖像字節數據
Returns:
處理後的圖像字節數據
"""
enhance_level = self.auto_detect_enhance_level(image_bytes)
logger.info(f"Auto-detected enhancement level: {enhance_level}")
return self.preprocess_for_ocr(image_bytes, enhance_level)

232
app/utils/ldap_auth.py Normal file
View File

@@ -0,0 +1,232 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
LDAP 認證服務
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
import time
from ldap3 import Server, Connection, SUBTREE, ALL_ATTRIBUTES
from flask import current_app
from .logger import get_logger
from .exceptions import AuthenticationError
logger = get_logger(__name__)
class LDAPAuthService:
"""LDAP 認證服務"""
def __init__(self):
self.config = current_app.config
self.server_url = self.config.get('LDAP_SERVER')
self.port = self.config.get('LDAP_PORT', 389)
self.use_ssl = self.config.get('LDAP_USE_SSL', False)
self.bind_user_dn = self.config.get('LDAP_BIND_USER_DN')
self.bind_password = self.config.get('LDAP_BIND_USER_PASSWORD')
self.search_base = self.config.get('LDAP_SEARCH_BASE')
self.login_attr = self.config.get('LDAP_USER_LOGIN_ATTR', 'userPrincipalName')
def create_connection(self, retries=3):
"""建立 LDAP 連線(帶重試機制)"""
for attempt in range(retries):
try:
server = Server(
self.server_url,
port=self.port,
use_ssl=self.use_ssl,
get_info=ALL_ATTRIBUTES
)
conn = Connection(
server,
user=self.bind_user_dn,
password=self.bind_password,
auto_bind=True,
raise_exceptions=True
)
logger.info("LDAP connection established successfully")
return conn
except Exception as e:
logger.error(f"LDAP connection attempt {attempt + 1} failed: {str(e)}")
if attempt == retries - 1:
raise AuthenticationError(f"LDAP connection failed: {str(e)}")
time.sleep(1)
return None
def authenticate_user(self, username, password):
"""驗證使用者憑證"""
try:
conn = self.create_connection()
if not conn:
raise AuthenticationError("Unable to connect to LDAP server")
# 搜尋使用者
search_filter = f"(&(objectClass=person)(objectCategory=person)({self.login_attr}={username}))"
conn.search(
self.search_base,
search_filter,
SUBTREE,
attributes=['displayName', 'mail', 'sAMAccountName', 'userPrincipalName', 'department']
)
if not conn.entries:
logger.warning(f"User not found: {username}")
raise AuthenticationError("帳號不存在")
user_entry = conn.entries[0]
user_dn = user_entry.entry_dn
# 驗證使用者密碼
try:
user_conn = Connection(
conn.server,
user=user_dn,
password=password,
auto_bind=True,
raise_exceptions=True
)
user_conn.unbind()
# 返回使用者資訊
user_info = {
'username': str(user_entry.sAMAccountName) if user_entry.sAMAccountName else username,
'display_name': str(user_entry.displayName) if user_entry.displayName else username,
'email': str(user_entry.mail) if user_entry.mail else f"{username}@panjit.com.tw",
'department': str(user_entry.department) if hasattr(user_entry, 'department') and user_entry.department else None,
'user_principal_name': str(user_entry.userPrincipalName) if user_entry.userPrincipalName else username
}
logger.info(f"User authenticated successfully: {username}")
return user_info
except Exception as e:
logger.warning(f"Authentication failed for user {username}: {str(e)}")
raise AuthenticationError("密碼錯誤")
except AuthenticationError:
raise
except Exception as e:
logger.error(f"LDAP authentication error: {str(e)}")
raise AuthenticationError(f"認證服務錯誤: {str(e)}")
finally:
if 'conn' in locals() and conn:
conn.unbind()
def search_users(self, search_term, limit=20):
"""搜尋使用者"""
try:
conn = self.create_connection()
if not conn:
return []
# 建構搜尋過濾器
search_filter = f"""(&
(objectClass=person)
(objectCategory=person)
(!(userAccountControl:1.2.840.113556.1.4.803:=2))
(|
(displayName=*{search_term}*)
(mail=*{search_term}*)
(sAMAccountName=*{search_term}*)
(userPrincipalName=*{search_term}*)
)
)"""
# 移除多餘空白
search_filter = ' '.join(search_filter.split())
conn.search(
self.search_base,
search_filter,
SUBTREE,
attributes=['sAMAccountName', 'displayName', 'mail', 'department'],
size_limit=limit
)
results = []
for entry in conn.entries:
results.append({
'username': str(entry.sAMAccountName) if entry.sAMAccountName else '',
'display_name': str(entry.displayName) if entry.displayName else '',
'email': str(entry.mail) if entry.mail else '',
'department': str(entry.department) if hasattr(entry, 'department') and entry.department else ''
})
logger.info(f"LDAP search found {len(results)} results for term: {search_term}")
return results
except Exception as e:
logger.error(f"LDAP search error: {str(e)}")
return []
finally:
if 'conn' in locals() and conn:
conn.unbind()
def get_user_info(self, username):
"""取得使用者詳細資訊"""
try:
conn = self.create_connection()
if not conn:
return None
# 支援 sAMAccountName 和 userPrincipalName 格式
if '@' in username:
search_filter = f"""(&
(objectClass=person)
(|
(userPrincipalName={username})
(mail={username})
)
)"""
else:
search_filter = f"(&(objectClass=person)(sAMAccountName={username}))"
# 移除多餘空白
search_filter = ' '.join(search_filter.split())
conn.search(
self.search_base,
search_filter,
SUBTREE,
attributes=['displayName', 'mail', 'sAMAccountName', 'userPrincipalName', 'department']
)
if not conn.entries:
return None
entry = conn.entries[0]
return {
'username': str(entry.sAMAccountName) if entry.sAMAccountName else username,
'display_name': str(entry.displayName) if entry.displayName else username,
'email': str(entry.mail) if entry.mail else f"{username}@panjit.com.tw",
'department': str(entry.department) if hasattr(entry, 'department') and entry.department else None,
'user_principal_name': str(entry.userPrincipalName) if entry.userPrincipalName else ''
}
except Exception as e:
logger.error(f"Error getting user info for {username}: {str(e)}")
return None
finally:
if 'conn' in locals() and conn:
conn.unbind()
def test_connection(self):
"""測試 LDAP 連線(健康檢查用)"""
try:
conn = self.create_connection(retries=1)
if conn:
conn.unbind()
return True
return False
except Exception as e:
logger.error(f"LDAP connection test failed: {str(e)}")
return False

126
app/utils/logger.py Normal file
View File

@@ -0,0 +1,126 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
日誌管理模組
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
import logging
import os
from pathlib import Path
from logging.handlers import RotatingFileHandler
from flask import current_app, has_request_context, request, g
def get_logger(name):
"""取得指定名稱的日誌器"""
logger = logging.getLogger(name)
# 避免重複設定 handler
if not logger.handlers:
setup_logger(logger)
return logger
def setup_logger(logger):
"""設定日誌器"""
if has_request_context() and current_app:
log_level = current_app.config.get('LOG_LEVEL', 'INFO')
log_file = current_app.config.get('LOG_FILE', 'logs/app.log')
else:
log_level = os.environ.get('LOG_LEVEL', 'INFO')
log_file = os.environ.get('LOG_FILE', 'logs/app.log')
# 確保日誌目錄存在
log_path = Path(log_file)
log_path.parent.mkdir(parents=True, exist_ok=True)
# 設定日誌等級
logger.setLevel(getattr(logging, log_level.upper()))
# 建立格式化器
formatter = logging.Formatter(
'%(asctime)s [%(levelname)s] %(name)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
# 檔案處理器(使用輪轉)
file_handler = RotatingFileHandler(
log_file,
maxBytes=10*1024*1024, # 10MB
backupCount=5,
encoding='utf-8'
)
file_handler.setLevel(getattr(logging, log_level.upper()))
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
# 控制台處理器
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)
class DatabaseLogHandler(logging.Handler):
"""資料庫日誌處理器"""
def emit(self, record):
"""發送日誌記錄到資料庫"""
try:
from app.models.log import SystemLog
# 取得使用者和任務資訊(如果有的話)
user_id = None
job_id = None
extra_data = {}
if has_request_context():
user_id = g.get('current_user_id')
extra_data.update({
'method': request.method,
'endpoint': request.endpoint,
'url': request.url,
'ip_address': request.remote_addr,
'user_agent': request.headers.get('User-Agent')
})
# 儲存到資料庫
SystemLog.log(
level=record.levelname,
module=record.name,
message=record.getMessage(),
user_id=user_id,
job_id=job_id,
extra_data=extra_data if extra_data else None
)
except Exception:
# 避免日誌記錄失敗影響主程序
pass
def init_logging(app):
"""初始化應用程式日誌"""
# 設定根日誌器
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)
# 添加資料庫日誌處理器(僅對重要日誌)
if app.config.get('SQLALCHEMY_DATABASE_URI'):
db_handler = DatabaseLogHandler()
db_handler.setLevel(logging.WARNING) # 只記錄警告以上等級到資料庫
root_logger.addHandler(db_handler)
# 設定 Flask 應用日誌
if not app.logger.handlers:
setup_logger(app.logger)
# 設定第三方庫日誌等級
logging.getLogger('werkzeug').setLevel(logging.WARNING)
logging.getLogger('urllib3').setLevel(logging.WARNING)
logging.getLogger('requests').setLevel(logging.WARNING)

84
app/utils/response.py Normal file
View File

@@ -0,0 +1,84 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
API 響應處理工具
Author: PANJIT IT Team
Created: 2025-09-02
"""
from datetime import datetime
from typing import Dict, Any, List, Union
from app.utils.timezone import to_taiwan_time, format_taiwan_time
def convert_datetime_to_taiwan(data: Union[Dict, List, Any]) -> Union[Dict, List, Any]:
"""遞迴轉換資料中的 datetime 欄位為台灣時間
Args:
data: 要轉換的資料(字典、列表或其他)
Returns:
轉換後的資料
"""
if isinstance(data, dict):
result = {}
for key, value in data.items():
if isinstance(value, datetime):
# 將 datetime 轉換為台灣時間的 ISO 字符串
taiwan_dt = to_taiwan_time(value)
result[key] = taiwan_dt.isoformat()
elif key in ['created_at', 'updated_at', 'completed_at', 'processing_started_at', 'last_login', 'timestamp']:
# 特定的時間欄位
if isinstance(value, str):
try:
# 嘗試解析 ISO 格式的時間字符串
dt = datetime.fromisoformat(value.replace('Z', '+00:00'))
taiwan_dt = to_taiwan_time(dt)
result[key] = taiwan_dt.isoformat()
except:
result[key] = value
else:
result[key] = convert_datetime_to_taiwan(value)
else:
result[key] = convert_datetime_to_taiwan(value)
return result
elif isinstance(data, list):
return [convert_datetime_to_taiwan(item) for item in data]
else:
return data
def create_taiwan_response(success: bool = True, data: Any = None, message: str = '',
error: str = '', **kwargs) -> Dict[str, Any]:
"""創建包含台灣時區轉換的 API 響應
Args:
success: 是否成功
data: 響應資料
message: 成功訊息
error: 錯誤訊息
**kwargs: 其他參數
Returns:
包含台灣時區的響應字典
"""
response = {
'success': success,
'timestamp': format_taiwan_time(datetime.now(), "%Y-%m-%d %H:%M:%S")
}
if data is not None:
response['data'] = convert_datetime_to_taiwan(data)
if message:
response['message'] = message
if error:
response['error'] = error
# 加入其他參數
for key, value in kwargs.items():
response[key] = convert_datetime_to_taiwan(value)
return response

104
app/utils/timezone.py Normal file
View File

@@ -0,0 +1,104 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
時區工具函數
Author: PANJIT IT Team
Created: 2025-09-02
"""
from datetime import datetime, timezone, timedelta
from typing import Optional
# 台灣時區 UTC+8
TAIWAN_TZ = timezone(timedelta(hours=8))
def now_taiwan() -> datetime:
"""取得當前台灣時間UTC+8"""
return datetime.now(TAIWAN_TZ)
def now_utc() -> datetime:
"""取得當前 UTC 時間"""
return datetime.now(timezone.utc)
def to_taiwan_time(dt: datetime) -> datetime:
"""將 datetime 轉換為台灣時間
Args:
dt: datetime 物件(可能是 naive 或 aware
Returns:
台灣時區的 datetime 物件
"""
if dt is None:
return None
# 如果是 naive datetime假設為 UTC
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
# 轉換為台灣時區
return dt.astimezone(TAIWAN_TZ)
def to_utc_time(dt: datetime) -> datetime:
"""將 datetime 轉換為 UTC 時間
Args:
dt: datetime 物件(可能是 naive 或 aware
Returns:
UTC 時區的 datetime 物件
"""
if dt is None:
return None
# 如果是 naive datetime假設為台灣時間
if dt.tzinfo is None:
dt = dt.replace(tzinfo=TAIWAN_TZ)
# 轉換為 UTC
return dt.astimezone(timezone.utc)
def format_taiwan_time(dt: datetime, format_str: str = "%Y-%m-%d %H:%M:%S") -> str:
"""格式化台灣時間為字符串
Args:
dt: datetime 物件
format_str: 格式化字符串
Returns:
格式化後的時間字符串
"""
if dt is None:
return ""
taiwan_dt = to_taiwan_time(dt)
return taiwan_dt.strftime(format_str)
def parse_taiwan_time(time_str: str, format_str: str = "%Y-%m-%d %H:%M:%S") -> datetime:
"""解析台灣時間字符串為 datetime
Args:
time_str: 時間字符串
format_str: 解析格式
Returns:
台灣時區的 datetime 物件
"""
naive_dt = datetime.strptime(time_str, format_str)
return naive_dt.replace(tzinfo=TAIWAN_TZ)
# 為了向後兼容,提供替代 datetime.utcnow() 的函數
def utcnow() -> datetime:
"""取得當前 UTC 時間(替代 datetime.utcnow()
注意:新代碼建議使用 now_taiwan() 或 now_utc()
"""
return now_utc().replace(tzinfo=None) # 返回 naive UTC datetime 以保持兼容性

203
app/utils/validators.py Normal file
View File

@@ -0,0 +1,203 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
驗證工具模組
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
import os
from pathlib import Path
from flask import current_app
from .exceptions import ValidationError
def validate_file(file_obj):
"""驗證上傳的檔案"""
if not file_obj:
raise ValidationError("未選擇檔案", "NO_FILE")
if not file_obj.filename:
raise ValidationError("檔案名稱為空", "NO_FILENAME")
# 檢查檔案副檔名
file_ext = Path(file_obj.filename).suffix.lower()
allowed_extensions = current_app.config.get('ALLOWED_EXTENSIONS', {'.docx', '.doc', '.pptx', '.xlsx', '.xls', '.pdf'})
if file_ext not in allowed_extensions:
raise ValidationError(
f"不支援的檔案類型: {file_ext},支援的格式: {', '.join(allowed_extensions)}",
"INVALID_FILE_TYPE"
)
# 檢查檔案大小
max_size = current_app.config.get('MAX_CONTENT_LENGTH', 26214400) # 25MB
# 取得檔案大小
file_obj.seek(0, os.SEEK_END)
file_size = file_obj.tell()
file_obj.seek(0)
if file_size > max_size:
raise ValidationError(
f"檔案大小超過限制 ({format_file_size(max_size)})",
"FILE_TOO_LARGE"
)
if file_size == 0:
raise ValidationError("檔案為空", "EMPTY_FILE")
return {
'filename': file_obj.filename,
'file_extension': file_ext,
'file_size': file_size,
'valid': True
}
def validate_languages(source_language, target_languages):
"""驗證語言設定"""
# 支援的語言列表
supported_languages = {
'auto': '自動偵測',
'zh-CN': '簡體中文',
'zh-TW': '繁體中文',
'en': '英文',
'ja': '日文',
'ko': '韓文',
'vi': '越南文',
'th': '泰文',
'id': '印尼文',
'ms': '馬來文',
'es': '西班牙文',
'fr': '法文',
'de': '德文',
'ru': '俄文'
}
# 驗證來源語言
if source_language and source_language not in supported_languages:
raise ValidationError(
f"不支援的來源語言: {source_language}",
"INVALID_SOURCE_LANGUAGE"
)
# 驗證目標語言
if not target_languages or not isinstance(target_languages, list):
raise ValidationError("必須指定至少一個目標語言", "NO_TARGET_LANGUAGES")
if len(target_languages) == 0:
raise ValidationError("必須指定至少一個目標語言", "NO_TARGET_LANGUAGES")
if len(target_languages) > 10: # 限制最多10個目標語言
raise ValidationError("目標語言數量過多最多支援10個", "TOO_MANY_TARGET_LANGUAGES")
invalid_languages = [lang for lang in target_languages if lang not in supported_languages]
if invalid_languages:
raise ValidationError(
f"不支援的目標語言: {', '.join(invalid_languages)}",
"INVALID_TARGET_LANGUAGE"
)
# 檢查來源語言和目標語言是否有重疊
if source_language and source_language != 'auto' and source_language in target_languages:
raise ValidationError(
"目標語言不能包含來源語言",
"SOURCE_TARGET_OVERLAP"
)
return {
'source_language': source_language or 'auto',
'target_languages': target_languages,
'supported_languages': supported_languages,
'valid': True
}
def validate_job_uuid(job_uuid):
"""驗證任務UUID格式"""
import uuid
if not job_uuid:
raise ValidationError("任務UUID不能為空", "INVALID_UUID")
try:
uuid.UUID(job_uuid)
return True
except ValueError:
raise ValidationError("任務UUID格式錯誤", "INVALID_UUID")
def validate_pagination(page, per_page):
"""驗證分頁參數"""
try:
page = int(page) if page else 1
per_page = int(per_page) if per_page else 20
except (ValueError, TypeError):
raise ValidationError("分頁參數必須為數字", "INVALID_PAGINATION")
if page < 1:
raise ValidationError("頁數必須大於0", "INVALID_PAGE")
if per_page < 1 or per_page > 100:
raise ValidationError("每頁項目數必須在1-100之間", "INVALID_PER_PAGE")
return page, per_page
def format_file_size(size_bytes):
"""格式化檔案大小顯示"""
if size_bytes == 0:
return "0 B"
size_names = ["B", "KB", "MB", "GB", "TB"]
i = 0
while size_bytes >= 1024 and i < len(size_names) - 1:
size_bytes /= 1024.0
i += 1
return f"{size_bytes:.1f} {size_names[i]}"
def sanitize_filename(filename):
"""清理檔案名稱,移除不安全字元"""
import re
# 保留檔案名稱和副檔名
name = Path(filename).stem
ext = Path(filename).suffix
# 移除或替換不安全字元
safe_name = re.sub(r'[^\w\s.-]', '_', name)
safe_name = re.sub(r'\s+', '_', safe_name) # 空白替換為底線
safe_name = safe_name.strip('._') # 移除開頭結尾的點和底線
# 限制長度
if len(safe_name) > 100:
safe_name = safe_name[:100]
return f"{safe_name}{ext}"
def validate_date_range(start_date, end_date):
"""驗證日期範圍"""
from datetime import datetime
if start_date:
try:
start_date = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
except ValueError:
raise ValidationError("開始日期格式錯誤", "INVALID_START_DATE")
if end_date:
try:
end_date = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
except ValueError:
raise ValidationError("結束日期格式錯誤", "INVALID_END_DATE")
if start_date and end_date and start_date > end_date:
raise ValidationError("開始日期不能晚於結束日期", "INVALID_DATE_RANGE")
return start_date, end_date

233
app/websocket.py.disabled Normal file
View File

@@ -0,0 +1,233 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
WebSocket 服務模組
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
import os
from flask_socketio import SocketIO, emit, join_room, leave_room, disconnect
from flask_jwt_extended import decode_token, get_jwt
from flask import request
from functools import wraps
import logging
# 初始化 SocketIO
socketio = SocketIO(
cors_allowed_origins="*",
# Use eventlet for production and enable Redis message queue for multi-process/replica support
async_mode='eventlet',
message_queue=os.getenv('REDIS_URL'),
logger=True,
engineio_logger=False
)
# 存儲用戶連接
connected_users = {}
logger = logging.getLogger(__name__)
def jwt_required_ws(f):
"""WebSocket JWT 驗證裝飾器"""
@wraps(f)
def decorated_function(*args, **kwargs):
try:
# 從查詢參數獲取 token
token = request.args.get('token')
if not token:
disconnect()
return False
# 解碼 token
decoded = decode_token(token)
user_id = decoded.get('sub')
# 儲存用戶信息
request.user_id = user_id
return f(*args, **kwargs)
except Exception as e:
logger.error(f"WebSocket authentication failed: {e}")
disconnect()
return False
return decorated_function
@socketio.on('connect')
def handle_connect(auth):
"""處理客戶端連接"""
try:
# 從認證數據獲取 token
if auth and 'token' in auth:
token = auth['token']
decoded = decode_token(token)
user_id = decoded.get('sub')
# 記錄連接
connected_users[request.sid] = {
'user_id': user_id,
'sid': request.sid
}
# 加入用戶專屬房間
join_room(f"user_{user_id}")
logger.info(f"User {user_id} connected with session {request.sid}")
# 發送連接成功消息
emit('connected', {
'message': '連接成功',
'user_id': user_id
})
return True
else:
logger.warning("Connection attempt without authentication")
disconnect()
return False
except Exception as e:
logger.error(f"Connection error: {e}")
disconnect()
return False
@socketio.on('disconnect')
def handle_disconnect():
"""處理客戶端斷開連接"""
try:
if request.sid in connected_users:
user_info = connected_users[request.sid]
user_id = user_info['user_id']
# 離開房間
leave_room(f"user_{user_id}")
# 移除連接記錄
del connected_users[request.sid]
logger.info(f"User {user_id} disconnected")
except Exception as e:
logger.error(f"Disconnect error: {e}")
@socketio.on('ping')
def handle_ping():
"""處理心跳包"""
emit('pong', {'timestamp': request.args.get('timestamp')})
@socketio.on('subscribe_job')
def handle_subscribe_job(data):
"""訂閱任務更新"""
try:
job_uuid = data.get('job_uuid')
if job_uuid:
join_room(f"job_{job_uuid}")
logger.info(f"Client {request.sid} subscribed to job {job_uuid}")
emit('subscribed', {'job_uuid': job_uuid})
except Exception as e:
logger.error(f"Subscribe job error: {e}")
@socketio.on('unsubscribe_job')
def handle_unsubscribe_job(data):
"""取消訂閱任務更新"""
try:
job_uuid = data.get('job_uuid')
if job_uuid:
leave_room(f"job_{job_uuid}")
logger.info(f"Client {request.sid} unsubscribed from job {job_uuid}")
emit('unsubscribed', {'job_uuid': job_uuid})
except Exception as e:
logger.error(f"Unsubscribe job error: {e}")
# 工具函數:發送通知
def send_notification_to_user(user_id, notification_data):
"""
向特定用戶發送通知
Args:
user_id: 用戶ID
notification_data: 通知數據
"""
try:
socketio.emit(
'new_notification',
notification_data,
room=f"user_{user_id}",
namespace='/'
)
logger.info(f"Notification sent to user {user_id}")
except Exception as e:
logger.error(f"Failed to send notification: {e}")
def send_job_update(job_uuid, update_data):
"""
發送任務更新
Args:
job_uuid: 任務UUID
update_data: 更新數據
"""
try:
socketio.emit(
'job_update',
{
'job_uuid': job_uuid,
**update_data
},
room=f"job_{job_uuid}",
namespace='/'
)
logger.info(f"Job update sent for {job_uuid}")
except Exception as e:
logger.error(f"Failed to send job update: {e}")
def broadcast_system_message(message, message_type='info'):
"""
廣播系統消息給所有連接的用戶
Args:
message: 消息內容
message_type: 消息類型
"""
try:
socketio.emit(
'system_message',
{
'message': message,
'type': message_type
},
namespace='/',
broadcast=True
)
logger.info(f"System message broadcasted: {message}")
except Exception as e:
logger.error(f"Failed to broadcast system message: {e}")
# 初始化函數
def init_websocket(app):
"""
初始化 WebSocket
Args:
app: Flask 應用實例
"""
socketio.init_app(app)
logger.info("WebSocket initialized")
return socketio