commit 659971648146b898788a886a4b255be1c6d6d26f Author: beabigegg Date: Fri Oct 3 08:19:40 2025 +0800 1panel diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..2900578 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,17 @@ +{ + "permissions": { + "allow": [ + "WebFetch(domain:1panel.cn)", + "WebFetch(domain:github.com)", + "WebFetch(domain:blog.liushen.fun)", + "Read(//d/WORK/user_scrip/TOOL/Docker_OK/**)", + "Bash(cat:*)", + "Bash(grep:*)", + "Bash(awk:*)", + "Bash(test:*)", + "Bash(python3:*)" + ], + "deny": [], + "ask": [] + } +} \ No newline at end of file diff --git a/.env b/.env new file mode 100644 index 0000000..4b6c385 --- /dev/null +++ b/.env @@ -0,0 +1,107 @@ +# ============================================================================== +# PANJIT Document Translator V2 - 1Panel 部署環境配置 +# ============================================================================== +# +# 📝 IT 人員必須修改的項目: +# 1. SECRET_KEY - 生產環境安全金鑰 +# 2. JWT_SECRET_KEY - JWT 認證金鑰 +# 3. PORT - Web 服務端口(可選修改,預設 12010) +# +# 其他配置項目已預設好,通常不需修改 +# ============================================================================== + +# Flask 基本配置 +FLASK_ENV=production +FLASK_DEBUG=false + +# ⚠️ 必須修改:生產環境安全金鑰(使用隨機字串) +SECRET_KEY=production-secret-key-change-this-in-deployment + +# 資料庫配置 (MySQL) +DATABASE_URL=mysql+pymysql://A060:WLeSCi0yhtc7@mysql.theaken.com:33306/db_A060 +MYSQL_HOST=mysql.theaken.com +MYSQL_PORT=33306 +MYSQL_USER=A060 +MYSQL_PASSWORD=WLeSCi0yhtc7 +MYSQL_DATABASE=db_A060 +MYSQL_CHARSET=utf8mb4 + +# Redis 配置 (Celery & Cache) +# 注意:如果修改了 REDIS_PORT,需要同步修改下方 URL 中的端口 +REDIS_HOST=localhost +REDIS_URL=redis://localhost:6379/0 +CELERY_BROKER_URL=redis://localhost:6379/0 +CELERY_RESULT_BACKEND=redis://localhost:6379/0 + +# LDAP 認證配置 +LDAP_SERVER=panjit.com.tw +LDAP_PORT=389 +LDAP_USE_SSL=false +LDAP_BIND_USER_DN=CN=LdapBind,CN=Users,DC=PANJIT,DC=COM,DC=TW +LDAP_BIND_USER_PASSWORD=panjit2481 +LDAP_SEARCH_BASE=OU=PANJIT,DC=panjit,DC=com,DC=tw +LDAP_USER_LOGIN_ATTR=userPrincipalName + +# SMTP 郵件配置 +SMTP_SERVER=mail.panjit.com.tw +SMTP_PORT=25 +SMTP_USE_TLS=false +SMTP_USE_SSL=false +SMTP_AUTH_REQUIRED=false +SMTP_SENDER_EMAIL=document-translator-system@panjit.com.tw +SMTP_SENDER_PASSWORD= + +# 檔案儲存配置 +UPLOAD_FOLDER=uploads +MAX_CONTENT_LENGTH=104857600 +FILE_RETENTION_DAYS=30 + +# 日誌配置 (生產環境) +LOG_LEVEL=INFO +LOG_FILE=logs/app.log + +# 管理員設定 +ADMIN_EMAIL=ymirliu@panjit.com.tw + +# 應用程式設定 +APP_NAME=PANJIT Document Translator + +# ⚠️ 必須修改:JWT 認證金鑰(使用隨機字串) +JWT_SECRET_KEY=production-jwt-secret-change-this-in-deployment + +# ============================================================================== +# 🔌 服務端口配置(可由 IT 人員修改) +# ============================================================================== +# Flask Web 服務端口(對外訪問端口) +# 建議範圍:12010-12019 +# 修改後需同步更新 1Panel 介面中的端口映射 +PORT=12010 +HOST=0.0.0.0 + +# Redis 本地端口(通常不需修改) +REDIS_PORT=6379 +# ============================================================================== + +# WebSocket 配置 (生產環境關閉以節省資源) +WEBSOCKET_ENABLED=false + +# Celery 工作進程配置 +CELERY_WORKER_CONCURRENCY=4 +CELERY_WORKER_MAX_TASKS_PER_CHILD=1000 + +# ============================================================================== +# Gunicorn Web 伺服器設定(生產環境直接對外服務,無 Nginx) +# ============================================================================== +# Worker 進程數(建議:CPU 核心數 * 2 + 1) +GUNICORN_WORKERS=4 + +# Worker 類型(gthread 適合 I/O 密集型應用) +GUNICORN_WORKER_CLASS=gthread + +# 每個 Worker 的連線數 +GUNICORN_WORKER_CONNECTIONS=1000 + +# Worker 處理請求數上限(防止記憶體洩漏) +GUNICORN_MAX_REQUESTS=1000 +GUNICORN_MAX_REQUESTS_JITTER=100 +# ============================================================================== \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..52ddbc1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,150 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Python compiled files +*.pyc +*.pyo +*.pyd + +# Flask session files +*flask_session/ +flask_session/ + +# Virtual environments +venv/ +env/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Logs +logs/ +*.log + +# Flask +instance/ +.webassets-cache + +# Session files +flask_session/ + +# Database +*.db +*.sqlite +*.sqlite3 + +# Uploads +uploads/ +temp/ +tmp/ + +# Node.js (frontend) +node_modules/ +frontend/node_modules/ +frontend/dist/ +frontend/.nuxt/ +frontend/.output/ +frontend/.vite/ +frontend/.npm/ + +# Frontend build artifacts +frontend/build/ +frontend/out/ + +# Frontend cache +frontend/.cache/ +frontend/.parcel-cache/ + +# Frontend environment variables (keep .env in root but ignore frontend .env files) +frontend/.env +frontend/.env.local +frontend/.env.development.local +frontend/.env.test.local +frontend/.env.production.local + +# Package managers +package-lock.json +yarn.lock +pnpm-lock.yaml + +# MacOS +.DS_Store + +# Windows +Thumbs.db +ehthumbs.db +Desktop.ini + +# Backup files +*.bak +*.backup +*~ + +# Temporary files +*.tmp +*.temp + +# Configuration backups +*_old.py +*_backup.py +nul diff --git a/DEPLOYMENT_VERIFICATION.md b/DEPLOYMENT_VERIFICATION.md new file mode 100644 index 0000000..2184c5f --- /dev/null +++ b/DEPLOYMENT_VERIFICATION.md @@ -0,0 +1,238 @@ +# 🔍 1Panel 部署架構驗證報告 + +## ✅ 已修正的問題 + +### 1. **靜態文件路徑(Docker → 本地)** + +#### 修正前(Docker 容器路徑) +```python +# app.py +send_from_directory('/app/static', 'index.html') # ❌ + +# app/root.py +send_from_directory('/app/static', 'index.html') # ❌ +``` + +#### 修正後(相對路徑) +```python +# app.py +static_dir = Path(__file__).parent / 'frontend' / 'dist' # ✅ +send_from_directory(str(static_dir), 'index.html') + +# app/root.py +project_root = Path(__file__).parent.parent +static_dir = project_root / 'frontend' / 'dist' # ✅ +send_from_directory(str(static_dir), 'index.html') +``` + +**影響**:前端頁面現在可以正確載入 + +--- + +### 2. **Gunicorn 啟動配置(wsgi.py → app.py)** + +#### 修正前 +```python +# start.py +'wsgi:app' # ❌ wsgi.py 已刪除 +``` + +#### 修正後 +```python +# start.py +'app:app' # ✅ 直接使用 app.py +``` + +**影響**:生產環境 Gunicorn 可以正確啟動 + +--- + +## ✅ 架構驗證結果 + +### 1. **網路架構** ✓ +``` +用戶瀏覽器 (http://server-ip:12010) + ↓ +Gunicorn (4 Workers) + Flask (PORT=12010) + ↓ +Redis (localhost:6379) ← Celery Worker/Beat + ↓ +MySQL (mysql.theaken.com:33306) +``` + +- ✅ 無 Nginx(已完全移除) +- ✅ 無 Docker 容器(直接在 1Panel 環境運行) +- ✅ Gunicorn 直接對外提供服務 + +--- + +### 2. **端口配置一致性** ✓ + +| 服務 | 配置位置 | 端口 | 狀態 | +|------|---------|------|------| +| Flask Web | `.env` → `PORT` | 12010 | ✅ 一致 | +| Flask Web | `app.py` | 12010 (預設) | ✅ 一致 | +| Flask Web | `start.py` | 12010 (預設) | ✅ 一致 | +| Redis | `.env` → `REDIS_URL` | 6379 | ✅ 一致 | +| Redis | `config.py` | 6379 (預設) | ✅ 一致 | +| MySQL | `.env` → `DATABASE_URL` | 33306 | ✅ 一致 | + +--- + +### 3. **環境變數引用** ✓ + +#### 關鍵環境變數檢查 +```bash +✅ PORT=12010 # Web 服務端口 +✅ HOST=0.0.0.0 # 綁定所有網路介面 +✅ REDIS_URL=redis://localhost:6379/0 +✅ CELERY_BROKER_URL=redis://localhost:6379/0 +✅ DATABASE_URL=mysql+pymysql://A060:... +✅ SECRET_KEY=production-secret-key-change-this-in-deployment +✅ JWT_SECRET_KEY=production-jwt-secret-change-this-in-deployment +``` + +所有環境變數在 `.env`、`config.py`、`app.py`、`start.py` 中引用一致 + +--- + +### 4. **路徑配置檢查** ✓ + +#### 上傳目錄 +```python +# config.py +UPLOAD_FOLDER = Path('uploads').absolute() # ✅ 相對路徑 +``` + +#### 日誌目錄 +```python +# config.py +LOG_FILE = Path('logs/app.log').absolute() # ✅ 相對路徑 +``` + +#### API 配置檔案 +```python +# config.py +api_file = Path('api.txt') # ✅ 相對路徑 +``` + +所有路徑都使用相對路徑,無 Docker 容器絕對路徑 + +--- + +### 5. **Celery 配置** ✓ + +```python +# .env +CELERY_BROKER_URL=redis://localhost:6379/0 # ✅ +CELERY_RESULT_BACKEND=redis://localhost:6379/0 # ✅ +CELERY_WORKER_CONCURRENCY=4 # ✅ + +# config.py +CELERY_BROKER_URL = os.environ.get('CELERY_BROKER_URL', + 'redis://localhost:6379/0') # ✅ +``` + +Celery 配置正確指向本地 Redis,無容器主機名 + +--- + +### 6. **前端靜態文件** ✓ + +```bash +frontend/dist/ +├── index.html # ✅ 存在 +├── css/ # ✅ 存在 +├── js/ # ✅ 存在 +└── panjit-logo.png # ✅ 存在 +``` + +前端已編譯,檔案完整 + +--- + +## ✅ 啟動流程驗證 + +### start.py 邏輯 +```python +1. 檢查環境 (Python 版本、必要檔案) # ✅ +2. 讀取 PORT, HOST 環境變數 # ✅ +3. 根據 FLASK_ENV 選擇啟動方式: + - production: gunicorn app:app # ✅ + - development: python3 app.py # ✅ +4. 啟動 Celery Worker (redis://localhost) # ✅ +5. 啟動 Celery Beat # ✅ +6. 監控所有進程 # ✅ +``` + +--- + +## ⚠️ 剩餘注意事項 + +### 1. 環境準備 +```bash +# IT 人員必須執行 +sudo apt install redis-server # Redis 未預裝 +sudo systemctl start redis +redis-cli ping # 確認返回 PONG +``` + +### 2. 環境變數修改 +```bash +# 必須修改(安全性) +SECRET_KEY=<隨機生成> +JWT_SECRET_KEY=<隨機生成> + +# 可選修改(端口) +PORT=12010 # 建議範圍 12010-12019 +``` + +### 3. 端口可用性 +```bash +# 確認端口未被佔用 +netstat -tuln | grep 12010 +``` + +--- + +## ✅ 部署檢查清單 + +- [x] 刪除所有 Docker 相關檔案 +- [x] 修正靜態文件路徑(/app/static → 相對路徑) +- [x] 修正 Gunicorn 啟動配置(wsgi:app → app:app) +- [x] Redis URL 使用 localhost(非容器主機名) +- [x] 所有路徑使用相對路徑 +- [x] 環境變數配置一致 +- [x] 端口配置文檔化 +- [x] 前端靜態文件完整 +- [x] Celery 配置正確 +- [x] 啟動腳本邏輯完整 + +--- + +## 📋 最終架構確認 + +### 無 Docker 架構 +- ✅ 直接在 1Panel 虛擬環境運行 +- ✅ Gunicorn 處理 HTTP 請求(無 Nginx) +- ✅ Redis 本地運行(端口 6379) +- ✅ MySQL 外部服務(mysql.theaken.com:33306) + +### 服務通訊 +- ✅ 所有服務使用 localhost 互相通訊 +- ✅ 對外僅暴露端口 12010(可配置) +- ✅ 無容器網路,簡化架構 + +--- + +## 🎯 結論 + +**所有已知的 Docker/Nginx 遺留問題已修正** + +系統現在完全適配 1Panel 部署環境: +- ✅ 無容器依賴 +- ✅ 路徑正確 +- ✅ 配置一致 +- ✅ 架構簡化 + +**可以安全部署到 1Panel 環境!** diff --git a/INSTALL.txt b/INSTALL.txt new file mode 100644 index 0000000..f49ba1b --- /dev/null +++ b/INSTALL.txt @@ -0,0 +1,65 @@ +=============================================================================== +PANJIT Document Translator V2 - 1Panel 部署快速指南 +=============================================================================== + +架構說明: +- Web 服務:Gunicorn + Flask (直接對外,無 Nginx) +- 任務隊列:Redis + Celery Worker/Beat +- 資料庫:MySQL (外部服務) + +=============================================================================== + +📋 部署步驟: + +1. 執行安裝腳本 + chmod +x install.sh + ./install.sh + +2. 修改環境變數中的安全金鑰和端口 + nano .env + # 必須修改: + # SECRET_KEY=your-production-secret-key-change-this + # JWT_SECRET_KEY=your-production-jwt-secret-change-this + # 可選修改: + # PORT=12010 (建議範圍 12010-12019) + +3. 安裝 Redis + sudo apt install redis-server + sudo systemctl start redis + redis-cli ping # 確認返回 PONG + +4. 在 1Panel 介面配置 + - 命令執行: python3 start.py + - 端口映射: 12010 (或您在 .env 中設定的 PORT 值) + +5. 啟動並驗證 + curl http://localhost:12010/api/health + +=============================================================================== + +🔌 使用端口清單: +┌─────────────────┬──────────┬──────────────────────┐ +│ 服務 │ 端口 │ 可否修改 │ +├─────────────────┼──────────┼──────────────────────┤ +│ Flask Web 服務 │ 12010 │ ✅ 可修改 (12010-12019) │ +│ Redis │ 6379 │ ⚠️ 不建議修改 │ +│ MySQL (外部) │ 33306 │ ❌ 不可修改 │ +│ LDAP (外部) │ 389 │ ❌ 不可修改 │ +│ SMTP (外部) │ 25 │ ❌ 不可修改 │ +└─────────────────┴──────────┴──────────────────────┘ + +✅ 已配置項目: +- 資料庫: mysql.theaken.com:33306 (db_A060) +- LDAP: panjit.com.tw:389 +- SMTP: mail.panjit.com.tw:25 +- Dify API: 翻譯 + OCR (已配置在 api.txt) +- Web 端口: 12010 (可在 .env 中修改) + +⚠️ 注意事項: +- Redis 必須安裝並運行在端口 6379 +- 確保 .env 中的 PORT 與 1Panel 端口映射一致 +- Python 版本需 3.10+ + +📚 詳細說明請參考 README.md + +=============================================================================== diff --git a/README.md b/README.md new file mode 100644 index 0000000..29a9add --- /dev/null +++ b/README.md @@ -0,0 +1,530 @@ +# PANJIT Document Translator V2 - 部署指南 + +## 🎯 系統概述 + +PANJIT Document Translator V2 是一個企業級文檔翻譯系統,支援多種文檔格式的智能翻譯,包含 OCR 圖像識別和對話上下文連貫性功能。 + +### 核心功能 +- ✅ **多格式支援**:DOCX、DOC、PDF、PPTX、XLSX、XLS 文檔翻譯 +- ✅ **智能 OCR**:掃描 PDF 自動識別,含圖像預處理增強 +- ✅ **對話持續性**:維持翻譯上下文,確保長文檔術語一致性 +- ✅ **多語言輸出**:單語言翻譯檔 + 多語言組合檔 +- ✅ **混合認證**:API 認證為主,LDAP 備援 +- ✅ **異步處理**:Celery + Redis 批量任務隊列 +- ✅ **快取機制**:OCR 快取 + 翻譯快取,避免重複處理 + +### 支援的翻譯語言 +中文(繁體)、中文(簡體)、英文、日文、韓文、法文、德文、西班牙文、俄文、阿拉伯文、葡萄牙文、義大利文、泰文、越南文 + +--- + +## 🚀 1Panel 環境快速部署 + +### 系統需求 +- **操作系統**:Ubuntu 20.04+ (1Panel 虛擬環境) +- **Python**:3.10 或更高版本 +- **記憶體**:≥ 4GB (推薦 8GB) +- **存儲空間**:≥ 20GB +- **Redis**:用於任務隊列(需額外安裝) +- **MySQL**:外部資料庫服務 +- **網路**:可訪問外部 Dify API + +### 🔌 使用端口清單 + +| 服務 | 預設端口 | 說明 | 可否修改 | +|------|---------|------|---------| +| **Flask Web 服務** | 12010 | 對外訪問端口,瀏覽器訪問入口 | ✅ 可修改(建議 12010-12019) | +| **Redis** | 6379 | 本地任務隊列和快取 | ⚠️ 不建議修改 | +| **MySQL** | 33306 | 外部資料庫(mysql.theaken.com) | ❌ 不可修改(外部服務) | +| **LDAP** | 389 | 認證服務(panjit.com.tw) | ❌ 不可修改(外部服務) | +| **SMTP** | 25 | 郵件服務(mail.panjit.com.tw) | ❌ 不可修改(外部服務) | + +#### 如何修改 Web 服務端口 + +編輯 `.env` 檔案: +```bash +nano .env + +# 修改以下行(例如改為 12015) +PORT=12015 +``` + +然後在 1Panel 介面中同步修改端口映射為 `12015` + +### 部署步驟 + +#### 1️⃣ 準備環境 + +在 1Panel 管理介面中: +1. 建立新的 **運行環境** +2. 選擇 **Python 3.10** +3. 記錄分配的工作目錄路徑 + +#### 2️⃣ 上傳專案檔案 + +將以下檔案上傳到 1Panel 分配的目錄: + +``` +Document_translator_1panel/ +├── app/ # 應用程式主目錄 +├── frontend/ # 前端檔案(已編譯) +├── migrations/ # 資料庫遷移腳本 +├── app.py # Flask 應用入口 +├── celery_app.py # Celery Worker 配置 +├── start.py # ⭐ 統一啟動腳本 +├── install.sh # ⭐ 一鍵安裝腳本 +├── requirements.txt # Python 依賴清單 +├── .env # ⭐ 環境變數(已配置) +├── api.txt # ⭐ Dify API 金鑰(已配置) +└── README.md # 本文檔 +``` + +#### 3️⃣ 安裝依賴套件 + +```bash +# 賦予執行權限 +chmod +x install.sh + +# 執行一鍵安裝 +./install.sh +``` + +安裝腳本會自動: +- ✅ 檢查 Python 版本 +- ✅ 升級 pip +- ✅ 安裝所有依賴套件 +- ✅ 建立必要目錄 +- ✅ 驗證安裝結果 + +#### 4️⃣ 配置環境變數 + +```bash +# 編輯環境變數(已包含完整配置) +nano .env +``` + +**必須修改的關鍵項目:** + +```bash +# 1. 安全金鑰(務必修改!) +SECRET_KEY=your-production-secret-key-change-this +JWT_SECRET_KEY=your-production-jwt-secret-change-this + +# 2. 服務端口(可選修改,預設 12010) +PORT=12010 +HOST=0.0.0.0 + +# ======================================== +# 以下配置已預設好,通常不需修改 +# ======================================== + +# 資料庫配置(已配置) +DATABASE_URL=mysql+pymysql://A060:WLeSCi0yhtc7@mysql.theaken.com:33306/db_A060 +MYSQL_HOST=mysql.theaken.com +MYSQL_PORT=33306 + +# Redis 配置(已配置) +REDIS_HOST=localhost +REDIS_PORT=6379 +REDIS_URL=redis://localhost:6379/0 + +# LDAP 配置(已配置) +LDAP_SERVER=panjit.com.tw +LDAP_PORT=389 + +# SMTP 配置(已配置) +SMTP_SERVER=mail.panjit.com.tw +SMTP_PORT=25 +``` + +#### 5️⃣ 驗證 Dify API 配置 + +```bash +# 確認 API 金鑰已正確配置 +cat api.txt + +# 應顯示: +# translation_api: app-SmB3TwVMcp5OyQviYeAoTden +# ocr_api: app-kC5qx2wgMMkn61O77jY4YuPs +``` + +⚠️ **API 金鑰已預先配置,無需修改** + +#### 6️⃣ 安裝 Redis(重要) + +Redis 用於 Celery 任務隊列,必須安裝: + +```bash +# Ubuntu/Debian +sudo apt update +sudo apt install redis-server + +# 啟動 Redis +sudo systemctl start redis +sudo systemctl enable redis + +# 驗證 Redis +redis-cli ping +# 預期輸出:PONG +``` + +#### 7️⃣ 在 1Panel 介面配置 + +1. 進入 **運行環境** 設定 +2. 在 **命令執行** 欄位填入: + ```bash + python3 start.py + ``` +3. 設定 **端口映射**:`12010`(如已修改 .env 中的 PORT,請填入相同端口) +4. 啟動服務 + +#### 8️⃣ 驗證部署 + +```bash +# 檢查服務健康狀態 +curl http://localhost:12010/api/health + +# 預期輸出: +{ + "status": "healthy", + "timestamp": "2025-10-03T...", + "service": "PANJIT Document Translator API", + "version": "1.0.0" +} +``` + +訪問 Web 介面: +``` +http://your-server-ip:12010 +``` + +--- + +## 📋 檔案輸出格式 + +系統會為每個翻譯任務產生以下檔案: + +### 單語言翻譯檔案 +- **DOCX/DOC** → `translated_{檔名}_{語言}_*.docx` +- **XLSX/XLS** → `translated_{檔名}_{語言}_*.xlsx` +- **PPTX** → `translated_{檔名}_{語言}_*.pptx` +- **PDF** → `translated_{檔名}_{語言}_*.docx` (輸出為 Word 格式) + +### 組合多語言檔案 (多語言時自動產生) +- **檔名格式**:`combined_{檔名}_multilang_*.{副檔名}` +- **內容結構**: + ``` + 原文段落1 + [譯文1 - 語言A] + [譯文2 - 語言B] + + 原文段落2 + [譯文1 - 語言A] + [譯文2 - 語言B] + ``` + +### 支援格式總覽 + +| 輸入格式 | 輸出格式 | OCR 支援 | 組合檔案 | +|---------|---------|---------|---------| +| `.docx` | `.docx` | - | ✅ | +| `.doc` | `.docx` | - | ✅ | +| `.xlsx` | `.xlsx` | - | ✅ | +| `.xls` | `.xlsx` | - | ✅ | +| `.pptx` | `.pptx` | - | ✅ | +| `.pdf` | `.docx` | ✅ | ✅ | + +--- + +## 🔧 服務管理 + +### 啟動服務 + +```bash +# 使用統一啟動腳本(推薦) +python3 start.py +``` + +`start.py` 會自動啟動: +1. **Gunicorn + Flask Web 服務** (端口 12010) + - 生產環境使用 Gunicorn (4 Workers) + - 開發環境使用 Flask 內建伺服器 +2. **Celery Worker** (翻譯任務處理) +3. **Celery Beat** (定時任務調度) + +### 停止服務 + +```bash +# 在運行終端按 Ctrl+C +# 或在 1Panel 介面中停止服務 +``` + +### 查看日誌 + +```bash +# Flask 應用日誌 +tail -f logs/app.log + +# Celery Worker 日誌 +tail -f logs/celery_worker.log + +# Celery Beat 日誌 +tail -f logs/celery_beat.log + +# 訪問日誌(如使用 Gunicorn) +tail -f logs/access.log +``` + +### 重啟服務 + +在 1Panel 介面中: +1. 停止運行環境 +2. 等待幾秒 +3. 重新啟動 + +--- + +## 🏗️ 系統架構 + +### 服務組成 +1. **Gunicorn + Flask**: Web 應用主服務(生產環境,端口 12010) + - 4 個 Worker 進程(可在 .env 中調整) + - 直接對外提供服務(無 Nginx 反向代理) +2. **Celery Worker**: 異步任務處理(翻譯、OCR) +3. **Celery Beat**: 定時任務調度(檔案清理等) +4. **Redis**: 訊息佇列和快取(本地端口 6379) +5. **MySQL**: 外部資料庫服務(mysql.theaken.com:33306) + +### 網路架構 +``` +用戶瀏覽器 + ↓ +[端口 12010] Gunicorn (4 Workers) + Flask + ↓ +Redis (localhost:6379) ← Celery Worker/Beat + ↓ +MySQL (mysql.theaken.com:33306) +``` + +**注意**:本架構不使用 Nginx,Gunicorn 直接處理 HTTP 請求 + +### 認證架構 +- **主要認證**:API 認證 (https://pj-auth-api.vercel.app/) +- **備援認證**:LDAP 認證 (panjit.com.tw) + +### 資料表結構 +系統包含以下核心資料表: +- `sys_user`: 系統使用者 (API/LDAP 混合認證) +- `login_logs`: 登入日誌 +- `dt_users`: 文檔翻譯使用者 +- `dt_translation_jobs`: 翻譯任務 +- `dt_job_files`: 任務檔案 +- `dt_translation_cache`: 翻譯快取 +- `dt_ocr_cache`: OCR 快取 +- `dt_system_logs`: 系統日誌 +- `dt_notifications`: 通知記錄 + +--- + +## 📊 監控與維護 + +### 健康檢查 + +```bash +# API 健康檢查 +curl http://localhost:12010/api/health + +# Redis 連線檢查 +redis-cli ping + +# 資料庫連線檢查 +mysql -h mysql.theaken.com -P 33306 -u A060 -pWLeSCi0yhtc7 db_A060 -e "SELECT 1" +``` + +### 日常維護 + +```bash +# 檢查磁碟空間 +df -h + +# 清理 30 天前的上傳檔案 +find ./uploads -type f -mtime +30 -delete + +# 檢查進程狀態 +ps aux | grep python3 +``` + +### 備份 + +```bash +# 備份上傳檔案 +tar -czf uploads-backup-$(date +%Y%m%d).tar.gz uploads/ + +# 備份資料庫(需 MySQL 存取權限) +mysqldump -h mysql.theaken.com -P 33306 -u A060 -pWLeSCi0yhtc7 db_A060 \ + > backup-$(date +%Y%m%d).sql +``` + +--- + +## 🛡️ 安全考量 + +### 生產環境檢查清單 + +- [ ] 修改所有預設密鑰 (SECRET_KEY, JWT_SECRET_KEY) +- [ ] 確認資料庫連接正常 +- [ ] 確認 Redis 連接正常 +- [ ] 測試 LDAP 認證功能 +- [ ] 測試檔案上傳翻譯功能 +- [ ] 確認端口僅內網可訪問(或配置防火牆) +- [ ] 設定檔案清理排程 +- [ ] 建立監控和告警機制 +- [ ] 準備備份恢復流程 +- [ ] 記錄所有設定和密碼 + +### 安全建議 + +1. **環境變數保護** + ```bash + chmod 600 .env api.txt + ``` + +2. **防火牆配置** + ```bash + # 僅允許內網訪問 + sudo ufw allow from 192.168.0.0/16 to any port 12010 + ``` + +3. **定期更新** + ```bash + pip3 install --upgrade -r requirements.txt + ``` + +--- + +## 🐛 故障排除 + +### 常見問題 + +#### 1. 服務啟動失敗 + +```bash +# 檢查 Python 版本 +python3 --version # 需要 3.10+ + +# 檢查依賴安裝 +pip3 list | grep -i flask + +# 檢查端口佔用 +netstat -tuln | grep 12010 +``` + +#### 2. 翻譯任務無響應 + +```bash +# 檢查 Redis 連線 +redis-cli ping + +# 檢查 Celery Worker 日誌 +tail -f logs/celery_worker.log + +# 檢查任務佇列 +redis-cli llen celery +``` + +#### 3. 資料庫連接失敗 + +```bash +# 測試資料庫連線 +python3 -c " +import pymysql +try: + conn = pymysql.connect( + host='mysql.theaken.com', + port=33306, + user='A060', + password='WLeSCi0yhtc7', + database='db_A060' + ) + print('Database connected!') + conn.close() +except Exception as e: + print(f'Error: {e}') +" +``` + +#### 4. OCR 或翻譯失敗 + +```bash +# 檢查 api.txt 配置 +cat api.txt + +# 檢查 Dify API 可訪問性 +curl -I https://api.dify.ai + +# 清空快取重試 +redis-cli FLUSHALL +``` + +#### 5. 記憶體不足 + +```bash +# 檢查記憶體使用 +free -h + +# 減少 Worker 並發數(編輯 start.py) +# 將 '--concurrency=2' 改為 '--concurrency=1' +``` + +--- + +## 📞 技術支援 + +### 系統資訊 +- **系統版本**:Document Translator V2 (1Panel 部署版) +- **服務端口**:12010 +- **Python 版本**:3.10+ +- **核心框架**:Flask 3.0, Celery 5.3, Vue.js 3 + +### 核心依賴套件版本 +``` +Flask==3.0.0 +Celery==5.3.4 +Redis==5.0.1 +SQLAlchemy==2.0.23 +PyMySQL==1.1.0 +PyMuPDF>=1.23.0 +opencv-python-headless==4.8.1.78 +numpy>=1.24.0,<2.0.0 +``` + +### 聯絡方式 +- **管理員郵箱**:ymirliu@panjit.com.tw +- **技術團隊**:PANJIT IT Team + +--- + +## 📝 更新日誌 + +### v2.0 (2025-10-03) +- ✅ 支援 1Panel 環境部署 +- ✅ 新增一鍵安裝腳本 (install.sh) +- ✅ 新增統一啟動腳本 (start.py) +- ✅ 移除 Docker 依賴 +- ✅ 優化環境變數配置 +- ✅ 完善部署文檔 + +### v1.0 +- ✅ 初始版本(Docker 部署) + +--- + +## 📄 授權 + +© 2025 PANJIT Group. All rights reserved. + +--- + +**🎉 部署完成後,系統即可正式上線使用!** + +如有任何問題,請參考故障排除章節或聯繫技術支援團隊。 diff --git a/api.txt b/api.txt new file mode 100644 index 0000000..2f314ab --- /dev/null +++ b/api.txt @@ -0,0 +1,7 @@ +# Dify翻譯API配置 +translation_base_url:https://dify.theaken.com/v1 +translation_api:app-SmB3TwVMcp5OyQviYeAoTden + +# Dify OCR API配置(用於掃描PDF) +ocr_base_url:https://dify.theaken.com/v1 +ocr_api:app-kC5qx2wgMMkn61O77jY4YuPs diff --git a/app.py b/app.py new file mode 100644 index 0000000..4fdd6c1 --- /dev/null +++ b/app.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Document Translator Flask 應用程式入口 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +import os +import sys +from pathlib import Path +from datetime import datetime + +# 添加專案根目錄到 Python 路徑 +project_root = Path(__file__).parent +sys.path.insert(0, str(project_root)) + +from app import create_app, db +from app.models import User, TranslationJob, JobFile, TranslationCache, APIUsageStats, SystemLog + +# 創建 Flask 應用 +app = create_app() + +# 導出 Celery 實例供 worker 使用 +celery = app.celery + +# 確保在模組級別可以訪問 +__all__ = ['app', 'celery'] + + +@app.shell_context_processor +def make_shell_context(): + """為 Flask shell 提供上下文""" + return { + 'db': db, + 'User': User, + 'TranslationJob': TranslationJob, + 'JobFile': JobFile, + 'TranslationCache': TranslationCache, + 'APIUsageStats': APIUsageStats, + 'SystemLog': SystemLog + } + + +@app.cli.command() +def init_db(): + """初始化資料庫""" + click.echo('Initializing database...') + db.create_all() + click.echo('Database initialized.') + + +@app.cli.command() +def test(): + """運行測試""" + import unittest + tests = unittest.TestLoader().discover('tests') + unittest.TextTestRunner(verbosity=2).run(tests) + + +@app.route('/') +def index(): + """首頁路由 - 服務前端應用""" + try: + from flask import send_from_directory + static_dir = Path(__file__).parent / 'frontend' / 'dist' + if static_dir.exists(): + return send_from_directory(str(static_dir), 'index.html') + else: + # 如果靜態文件不存在,返回API信息 + return { + 'application': 'PANJIT Document Translator', + 'version': '1.0.0', + 'status': 'running', + 'api_base_url': '/api/v1', + 'note': 'Frontend files not found, serving API info' + } + except Exception as e: + return { + 'application': 'PANJIT Document Translator', + 'version': '1.0.0', + 'status': 'running', + 'api_base_url': '/api/v1', + 'note': 'Frontend files not found, serving API info' + } + + +@app.route('/') +def serve_static(path): + """服務靜態文件""" + try: + from flask import send_from_directory + static_dir = Path(__file__).parent / 'frontend' / 'dist' + if static_dir.exists(): + return send_from_directory(str(static_dir), path) + else: + # 如果文件不存在,返回index.html (SPA路由) + return send_from_directory(str(static_dir), 'index.html') + except Exception: + # 如果文件不存在,返回API信息 + return { + 'error': 'File not found', + 'path': path + }, 404 + + +@app.route('/api') +def api_info(): + """API 資訊""" + return { + 'api_version': 'v1', + 'base_url': '/api/v1', + 'endpoints': { + 'auth': '/api/v1/auth', + 'files': '/api/v1/files', + 'jobs': '/api/v1/jobs', + 'admin': '/api/v1/admin', + 'health': '/api/v1/health' + }, + 'documentation': 'Available endpoints provide RESTful API for document translation' + } + + +@app.route('/api/health') +@app.route('/api/v1/health') +def health_check(): + """健康檢查端點""" + return { + 'status': 'healthy', + 'timestamp': datetime.utcnow().isoformat(), + 'service': 'PANJIT Document Translator API', + 'version': '1.0.0' + }, 200 + + +if __name__ == '__main__': + # 檢查環境變數 + port = int(os.environ.get('PORT', 12010)) + debug = os.environ.get('FLASK_DEBUG', 'false').lower() == 'true' + host = os.environ.get('HOST', '0.0.0.0') + + # 只在主進程或非 debug 模式下顯示啟動訊息 + # 在 debug 模式下,Flask 會創建兩個進程,只在 reloader 主進程顯示訊息 + if not debug or os.environ.get('WERKZEUG_RUN_MAIN'): + print(f""" + PANJIT Document Translator Starting... + + Server: http://{host}:{port} + Debug Mode: {debug} + API Documentation: http://{host}:{port}/api + Health Check: http://{host}:{port}/api/v1/health + + Upload Directory: {app.config.get('UPLOAD_FOLDER')} + Database: {app.config.get('SQLALCHEMY_DATABASE_URI', '').split('/')[-1]} + SMTP: {app.config.get('SMTP_SERVER')} + LDAP: {app.config.get('LDAP_SERVER')} + + Press Ctrl+C to stop the server. + """) + + # 啟動應用 + try: + app.run( + host=host, + port=port, + debug=debug, + use_reloader=debug + ) + except KeyboardInterrupt: + if not debug or os.environ.get('WERKZEUG_RUN_MAIN'): + print("\nServer stopped by user.") + except Exception as e: + if not debug or os.environ.get('WERKZEUG_RUN_MAIN'): + print(f"\nServer failed to start: {str(e)}") + sys.exit(1) \ No newline at end of file diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..db69c52 --- /dev/null +++ b/app/__init__.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Flask 應用程式工廠 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +import os +import redis +from flask import Flask, request, make_response +from flask_sqlalchemy import SQLAlchemy +from flask_cors import CORS +from flask_jwt_extended import JWTManager +from celery import Celery +from app.config import config +from app.utils.logger import init_logging + +# 初始化擴展 +db = SQLAlchemy() +cors = CORS() +jwt = JWTManager() + + +def make_celery(app): + """創建 Celery 實例""" + celery = Celery( + app.import_name, + backend=app.config['CELERY_RESULT_BACKEND'], + broker=app.config['CELERY_BROKER_URL'] + ) + celery.conf.update(app.config) + + class ContextTask(celery.Task): + """在 Flask 應用上下文中執行任務""" + def __call__(self, *args, **kwargs): + with app.app_context(): + return self.run(*args, **kwargs) + + celery.Task = ContextTask + return celery + + +def create_app(config_name=None): + """應用程式工廠""" + app = Flask(__name__) + + # 載入配置 + config_name = config_name or os.getenv('FLASK_ENV', 'default') + + # 先載入 Dify API 配置 + config[config_name].load_dify_config() + + # 然後載入配置到 Flask app + app.config.from_object(config[config_name]) + + # 初始化必要目錄 + config[config_name].init_directories() + + # 初始化擴展 + db.init_app(app) + + # 不使用 Flask-CORS 避免衝突,使用手動CORS處理 + + # 初始化 JWT + jwt.init_app(app) + app.logger.info(f"🔑 [JWT Config] JWT_SECRET_KEY: {app.config.get('JWT_SECRET_KEY')[:10]}...{app.config.get('JWT_SECRET_KEY')[-10:] if app.config.get('JWT_SECRET_KEY') else 'None'}") + app.logger.info(f"🔑 [JWT Config] JWT_ACCESS_TOKEN_EXPIRES: {app.config.get('JWT_ACCESS_TOKEN_EXPIRES')}") + app.logger.info(f"🔑 [JWT Config] JWT_REFRESH_TOKEN_EXPIRES: {app.config.get('JWT_REFRESH_TOKEN_EXPIRES')}") + + app.logger.info("🔑 [JWT] Using JWT authentication") + + # 設定 Redis(用於Celery) + try: + redis_client = redis.from_url(app.config['REDIS_URL']) + app.redis_client = redis_client + except Exception as e: + app.logger.warning(f"Redis initialization failed: {str(e)}") + app.redis_client = None + + # 初始化日誌 + init_logging(app) + + # 註冊 API 路由 + from app.api import api_v1 + app.register_blueprint(api_v1) + + # 註冊錯誤處理器 + register_error_handlers(app) + + # 添加 CORS 響應headers + @app.after_request + def after_request(response): + origin = request.headers.get('Origin') + allowed_origins = ['http://localhost:3000', 'http://127.0.0.1:3000', 'http://localhost:3001', 'http://127.0.0.1:3001', 'http://localhost:12010', 'http://127.0.0.1:12010'] + + if origin and origin in allowed_origins: + response.headers['Access-Control-Allow-Origin'] = origin + response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization, X-Requested-With' + response.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT, DELETE, OPTIONS, PATCH' + response.headers['Access-Control-Allow-Credentials'] = 'true' + response.headers['Access-Control-Max-Age'] = '86400' + + return response + + # 處理 OPTIONS 預檢請求 + @app.before_request + def before_request(): + if request.method == 'OPTIONS': + response = make_response() + origin = request.headers.get('Origin') + allowed_origins = ['http://localhost:3000', 'http://127.0.0.1:3000', 'http://localhost:3001', 'http://127.0.0.1:3001', 'http://localhost:12010', 'http://127.0.0.1:12010'] + + if origin and origin in allowed_origins: + response.headers['Access-Control-Allow-Origin'] = origin + response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization, X-Requested-With' + response.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT, DELETE, OPTIONS, PATCH' + response.headers['Access-Control-Allow-Credentials'] = 'true' + response.headers['Access-Control-Max-Age'] = '86400' + + return response + + # 建立資料表 + with app.app_context(): + # 導入模型 + from app.models import User, TranslationJob, JobFile, TranslationCache, APIUsageStats, SystemLog, Notification + + db.create_all() + + # 創建默認管理員用戶(如果不存在) + create_default_admin() + + # 創建 Celery 實例 + app.celery = make_celery(app) + + # WebSocket 功能完全禁用 + app.logger.info("🔌 [WebSocket] WebSocket 服務已禁用") + app.socketio = None + + # 註冊 Root 路由(提供 SPA 與基本 API 資訊) + try: + from app.root import root_bp + app.register_blueprint(root_bp) + except Exception as e: + app.logger.warning(f"Root routes not registered: {e}") + + app.logger.info("Flask application created successfully") + return app + + +def register_error_handlers(app): + """註冊錯誤處理器""" + + @app.errorhandler(404) + def not_found(error): + return { + 'success': False, + 'error': 'NOT_FOUND', + 'message': '請求的資源不存在' + }, 404 + + @app.errorhandler(403) + def forbidden(error): + return { + 'success': False, + 'error': 'FORBIDDEN', + 'message': '權限不足' + }, 403 + + @app.errorhandler(401) + def unauthorized(error): + return { + 'success': False, + 'error': 'UNAUTHORIZED', + 'message': '需要認證' + }, 401 + + @app.errorhandler(500) + def internal_server_error(error): + return { + 'success': False, + 'error': 'INTERNAL_SERVER_ERROR', + 'message': '系統內部錯誤' + }, 500 + + @app.errorhandler(413) + def request_entity_too_large(error): + return { + 'success': False, + 'error': 'FILE_TOO_LARGE', + 'message': '檔案大小超過限制' + }, 413 + + +def create_default_admin(): + """創建默認管理員用戶""" + try: + from app.models import User + + admin_email = os.environ.get('ADMIN_EMAIL', 'ymirliu@panjit.com.tw') + + # 檢查是否已存在管理員 + admin_user = User.query.filter_by(email=admin_email).first() + + if not admin_user: + # 創建管理員用戶(待 LDAP 登入時完善資訊) + admin_user = User( + username=admin_email.split('@')[0], + display_name='系統管理員', + email=admin_email, + department='IT', + is_admin=True + ) + db.session.add(admin_user) + db.session.commit() + + print(f"Created default admin user: {admin_email}") + + except Exception as e: + print(f"Failed to create default admin: {str(e)}") + + +# 導入模型在需要時才進行,避免循環導入 diff --git a/app/api/__init__.py b/app/api/__init__.py new file mode 100644 index 0000000..6e2334a --- /dev/null +++ b/app/api/__init__.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +API 模組 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +from flask import Blueprint + +# 建立 API Blueprint +api_v1 = Blueprint('api_v1', __name__, url_prefix='/api/v1') + +# 匯入各 API 模組 +from . import auth, jobs, files, admin, health, notification, cache + +# 註冊路由 +api_v1.register_blueprint(auth.auth_bp) +api_v1.register_blueprint(jobs.jobs_bp) +api_v1.register_blueprint(files.files_bp) +api_v1.register_blueprint(admin.admin_bp) +api_v1.register_blueprint(health.health_bp) +api_v1.register_blueprint(notification.notification_bp) +api_v1.register_blueprint(cache.cache_bp) \ No newline at end of file diff --git a/app/api/admin.py b/app/api/admin.py new file mode 100644 index 0000000..0d5dbdf --- /dev/null +++ b/app/api/admin.py @@ -0,0 +1,1071 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +管理員 API + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +from datetime import datetime, timedelta +from flask import Blueprint, request, jsonify, g, send_file +from app.utils.decorators import admin_required +from app.utils.validators import validate_pagination, validate_date_range +from app.utils.helpers import create_response +from app.utils.exceptions import ValidationError +from app.utils.logger import get_logger +from app.models.user import User +from app.models.job import TranslationJob +from app.models.stats import APIUsageStats +from app.utils.timezone import format_taiwan_time +from app.models.log import SystemLog +from app.models.cache import TranslationCache +from sqlalchemy import func, desc + +admin_bp = Blueprint('admin', __name__, url_prefix='/admin') +logger = get_logger(__name__) + + +@admin_bp.route('/stats', methods=['GET']) +@admin_required +def get_system_stats(): + """取得系統統計資料(簡化版本)""" + try: + from app import db + + # 基本統計 - 計算實際的總成本和今日活躍用戶 + total_cost = db.session.query(func.sum(TranslationJob.total_cost)).scalar() or 0.0 + + # 計算今日活躍用戶 (今天有任務活動的用戶) + today = datetime.utcnow().date() + active_users_today = db.session.query(TranslationJob.user_id).filter( + func.date(TranslationJob.created_at) == today + ).distinct().count() + + overview = { + 'total_jobs': TranslationJob.query.count(), + 'completed_jobs': TranslationJob.query.filter_by(status='COMPLETED').count(), + 'failed_jobs': TranslationJob.query.filter_by(status='FAILED').count(), + 'pending_jobs': TranslationJob.query.filter_by(status='PENDING').count(), + 'processing_jobs': TranslationJob.query.filter_by(status='PROCESSING').count(), + 'total_users': User.query.count(), + 'active_users_today': active_users_today, + 'total_cost': float(total_cost) + } + + # 用戶排行榜 - 按任務數和成本排序 + user_rankings = db.session.query( + User.id, + User.display_name, + func.count(TranslationJob.id).label('job_count'), + func.sum(TranslationJob.total_cost).label('total_cost') + ).outerjoin(TranslationJob).group_by( + User.id, User.display_name + ).order_by( + func.count(TranslationJob.id).desc() + ).limit(10).all() + + user_rankings_data = [] + for ranking in user_rankings: + user_rankings_data.append({ + 'user_id': ranking.id, + 'display_name': ranking.display_name, + 'job_count': ranking.job_count or 0, + 'total_cost': float(ranking.total_cost or 0.0) + }) + + # 計算每日統計 + period = request.args.get('period', 'month') + days = {'week': 7, 'month': 30, 'quarter': 90}.get(period, 30) + + # 取得指定期間的每日統計 + daily_stats = [] + for i in range(days): + target_date = (datetime.utcnow() - timedelta(days=i)).date() + + # 當日任務統計 + daily_jobs = TranslationJob.query.filter( + func.date(TranslationJob.created_at) == target_date + ).count() + + daily_completed = TranslationJob.query.filter( + func.date(TranslationJob.created_at) == target_date, + TranslationJob.status == 'COMPLETED' + ).count() + + # 當日失敗任務統計 + daily_failed = TranslationJob.query.filter( + func.date(TranslationJob.created_at) == target_date, + TranslationJob.status == 'FAILED' + ).count() + + # 當日成本統計 + daily_cost = db.session.query( + func.sum(TranslationJob.total_cost) + ).filter( + func.date(TranslationJob.created_at) == target_date + ).scalar() or 0.0 + + daily_stats.append({ + 'date': target_date.strftime('%Y-%m-%d'), + 'jobs': daily_jobs, + 'completed': daily_completed, + 'failed': daily_failed, + 'cost': float(daily_cost) + }) + + # 反轉順序,最早的日期在前 + daily_stats.reverse() + + return jsonify(create_response( + success=True, + data={ + 'overview': overview, + 'daily_stats': daily_stats, + 'user_rankings': user_rankings_data, + 'period': period, + 'start_date': format_taiwan_time(datetime.utcnow() - timedelta(days=days), "%Y-%m-%d %H:%M:%S"), + 'end_date': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S") + } + )) + + except Exception as e: + logger.error(f"Get system stats error: {str(e)}") + import traceback + logger.error(f"Traceback: {traceback.format_exc()}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='取得系統統計失敗' + )), 500 + + +@admin_bp.route('/jobs', methods=['GET']) +@admin_required +def get_all_jobs(): + """取得所有使用者任務""" + try: + # 取得查詢參數 + page = request.args.get('page', 1, type=int) + per_page = request.args.get('per_page', 50, type=int) + user_id = request.args.get('user_id', type=int) + status = request.args.get('status') + search = request.args.get('search', '').strip() + include_deleted = request.args.get('include_deleted', 'false').lower() == 'true' + + # 驗證分頁參數 + page, per_page = validate_pagination(page, min(per_page, 100)) + + # 建立查詢 + query = TranslationJob.query + + # 預設排除軟刪除的記錄,除非明確要求包含 + if not include_deleted: + query = query.filter(TranslationJob.deleted_at.is_(None)) + + # 使用者篩選 + if user_id: + query = query.filter_by(user_id=user_id) + + # 狀態篩選 + if status and status != 'all': + valid_statuses = ['PENDING', 'PROCESSING', 'COMPLETED', 'FAILED', 'RETRY'] + if status.upper() in valid_statuses: + query = query.filter_by(status=status.upper()) + + # 檔案名搜尋 + if search: + query = query.filter(TranslationJob.original_filename.like(f'%{search}%')) + + # 排序 + query = query.order_by(TranslationJob.created_at.desc()) + + # 分頁 + pagination = query.paginate( + page=page, + per_page=per_page, + error_out=False + ) + + jobs = pagination.items + + # 組合回應資料(包含使用者資訊) + jobs_data = [] + for job in jobs: + job_data = job.to_dict() + job_data['user'] = { + 'id': job.user.id, + 'username': job.user.username, + 'display_name': job.user.display_name, + 'email': job.user.email + } + jobs_data.append(job_data) + + return jsonify(create_response( + success=True, + data={ + 'jobs': jobs_data, + 'pagination': { + 'page': page, + 'per_page': per_page, + 'total': pagination.total, + 'pages': pagination.pages, + 'has_prev': pagination.has_prev, + 'has_next': pagination.has_next + } + } + )) + + except ValidationError as e: + return jsonify(create_response( + success=False, + error=e.error_code, + message=str(e) + )), 400 + + except Exception as e: + logger.error(f"Get all jobs error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='取得任務列表失敗' + )), 500 + + +@admin_bp.route('/users', methods=['GET']) +@admin_required +def get_all_users(): + """取得所有使用者(簡化版本)""" + try: + # 簡化版本 - 不使用分頁,直接返回所有用戶 + users = User.query.order_by(User.created_at.desc()).limit(50).all() + + users_data = [] + for user in users: + # 直接構建基本用戶資料,不使用to_dict方法 + users_data.append({ + 'id': user.id, + 'username': user.username, + 'display_name': user.display_name, + 'email': user.email, + 'department': user.department or '', + 'is_admin': user.is_admin, + 'last_login': user.last_login.isoformat() if user.last_login else None, + 'created_at': user.created_at.isoformat() if user.created_at else None, + 'updated_at': user.updated_at.isoformat() if user.updated_at else None + }) + + return jsonify(create_response( + success=True, + data={ + 'users': users_data, + 'pagination': { + 'page': 1, + 'per_page': 50, + 'total': len(users_data), + 'pages': 1, + 'has_prev': False, + 'has_next': False + } + } + )) + + except Exception as e: + logger.error(f"Get all users error: {str(e)}") + import traceback + logger.error(f"Traceback: {traceback.format_exc()}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='取得使用者列表失敗' + )), 500 + + +@admin_bp.route('/logs', methods=['GET']) +@admin_required +def get_system_logs(): + """取得系統日誌""" + try: + # 取得查詢參數 + page = request.args.get('page', 1, type=int) + per_page = request.args.get('per_page', 100, type=int) + level = request.args.get('level') + module = request.args.get('module') + start_date = request.args.get('start_date') + end_date = request.args.get('end_date') + + # 驗證參數 + page, per_page = validate_pagination(page, min(per_page, 500)) + + if start_date or end_date: + start_date, end_date = validate_date_range(start_date, end_date) + + # 取得日誌 + logs = SystemLog.get_logs( + level=level, + module=module, + start_date=start_date, + end_date=end_date, + limit=per_page, + offset=(page - 1) * per_page + ) + + # 取得總數(簡化版本,不完全精確) + total = len(logs) if len(logs) < per_page else (page * per_page) + 1 + + logs_data = [log.to_dict() for log in logs] + + return jsonify(create_response( + success=True, + data={ + 'logs': logs_data, + 'pagination': { + 'page': page, + 'per_page': per_page, + 'total': total, + 'has_more': len(logs) == per_page + } + } + )) + + except ValidationError as e: + return jsonify(create_response( + success=False, + error=e.error_code, + message=str(e) + )), 400 + + except Exception as e: + logger.error(f"Get system logs error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='取得系統日誌失敗' + )), 500 + + +@admin_bp.route('/api-usage', methods=['GET']) +@admin_required +def get_api_usage(): + """取得 API 使用統計(簡化版本)""" + try: + from app import db + + # 基本統計 + total_calls = db.session.query(APIUsageStats).count() + total_cost = db.session.query(func.sum(APIUsageStats.cost)).scalar() or 0.0 + total_tokens = db.session.query(func.sum(APIUsageStats.total_tokens)).scalar() or 0 + + # 簡化版本返回基本數據 + return jsonify(create_response( + success=True, + data={ + 'daily_stats': [], # 簡化版本 + 'top_users': [], # 簡化版本 + 'endpoint_stats': [], # 簡化版本 + 'cost_trend': [], # 簡化版本 + 'period_days': 30, + 'summary': { + 'total_calls': total_calls, + 'total_cost': float(total_cost), + 'total_tokens': total_tokens + } + } + )) + + except Exception as e: + logger.error(f"Get API usage error: {str(e)}") + import traceback + logger.error(f"Traceback: {traceback.format_exc()}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='取得API使用統計失敗' + )), 500 + + +@admin_bp.route('/cache/stats', methods=['GET']) +@admin_required +def get_cache_stats(): + """取得翻譯快取統計""" + try: + cache_stats = TranslationCache.get_cache_statistics() + + return jsonify(create_response( + success=True, + data=cache_stats + )) + + except Exception as e: + logger.error(f"Get cache stats error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='取得快取統計失敗' + )), 500 + + +@admin_bp.route('/health', methods=['GET']) +@admin_required +def get_system_health(): + """取得系統健康狀態(管理員專用)""" + try: + from datetime import datetime + status = { + 'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"), + 'status': 'healthy', + 'services': {} + } + + # 資料庫檢查 + try: + from app import db + from sqlalchemy import text + db.session.execute(text('SELECT 1')) + status['services']['database'] = {'status': 'healthy'} + except Exception as e: + status['services']['database'] = { + 'status': 'unhealthy', + 'error': str(e) + } + status['status'] = 'unhealthy' + + # 翻譯服務統計 + try: + total_jobs = TranslationJob.query.count() + pending_jobs = TranslationJob.query.filter_by(status='PENDING').count() + processing_jobs = TranslationJob.query.filter_by(status='PROCESSING').count() + status['services']['translation_service'] = { + 'status': 'healthy', + 'total_jobs': total_jobs, + 'pending_jobs': pending_jobs, + 'processing_jobs': processing_jobs + } + except Exception as e: + status['services']['translation_service'] = { + 'status': 'unhealthy', + 'error': str(e) + } + status['status'] = 'unhealthy' + + # Celery 工作者檢查 - 使用替代方案檢測 + try: + import redis + import os + from flask import current_app + + # 方法1: 檢查Redis中是否有Celery相關的key + redis_client = redis.from_url(current_app.config['REDIS_URL']) + + # 檢查Celery binding keys(worker存在時會有這些keys) + celery_keys = redis_client.keys('_kombu.binding.celery*') + + # 方法2: 檢查進程(Docker環境中) + worker_detected = False + worker_count = 0 + + try: + # 檢查是否有Celery相關的keys + if celery_keys: + worker_detected = True + worker_count = 1 # Docker環境中通常只有一個worker + + # 額外檢查:如果有最近的任務處理記錄,說明worker在工作 + recent_tasks = TranslationJob.query.filter( + TranslationJob.updated_at >= datetime.utcnow() - timedelta(minutes=10), + TranslationJob.status.in_(['PROCESSING', 'COMPLETED']) + ).count() + + if recent_tasks > 0: + worker_detected = True + worker_count = max(worker_count, 1) + except Exception: + pass + + if worker_detected: + status['services']['celery'] = { + 'status': 'healthy', + 'active_workers': worker_count, + 'message': 'Worker detected via Redis/Task activity' + } + else: + # Celery 工作者沒有檢測到 + status['services']['celery'] = { + 'status': 'warning', + 'message': 'No Celery worker activity detected', + 'active_workers': 0 + } + # 不設置整體系統為異常,只是警告 + + except Exception as e: + # Redis連接失敗或其他錯誤 + status['services']['celery'] = { + 'status': 'warning', + 'message': f'Cannot check Celery status: {str(e)[:100]}', + 'active_workers': 0 + } + # 不設置整體系統為異常,只是警告 + + # 檔案系統檢查 + try: + import os + from app.config import Config + + # 檢查上傳目錄 + upload_dir = getattr(Config, 'UPLOAD_FOLDER', 'uploads') + if os.path.exists(upload_dir) and os.access(upload_dir, os.W_OK): + status['services']['file_system'] = {'status': 'healthy'} + else: + status['services']['file_system'] = { + 'status': 'unhealthy', + 'error': f'Upload directory {upload_dir} not accessible' + } + status['status'] = 'unhealthy' + except Exception as e: + status['services']['file_system'] = { + 'status': 'unhealthy', + 'error': str(e) + } + + # 重新評估整體系統狀態 + unhealthy_services = [service for service, info in status['services'].items() + if info.get('status') == 'unhealthy'] + + if unhealthy_services: + status['status'] = 'unhealthy' + status['unhealthy_services'] = unhealthy_services + else: + warning_services = [service for service, info in status['services'].items() + if info.get('status') == 'warning'] + if warning_services: + status['status'] = 'warning' + status['warning_services'] = warning_services + else: + status['status'] = 'healthy' + + return jsonify(create_response( + success=True, + data=status + )) + + except Exception as e: + logger.error(f"Get system health error: {str(e)}") + return jsonify({ + 'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"), + 'status': 'error', + 'error': str(e) + }), 500 + + +@admin_bp.route('/metrics', methods=['GET']) +@admin_required +def get_system_metrics(): + """取得系統指標(管理員專用)""" + try: + from datetime import datetime, timedelta + from app import db + + # 統計任務狀態(排除軟刪除的記錄,反映當前實際狀態) + job_stats = db.session.query( + TranslationJob.status, + func.count(TranslationJob.id) + ).filter( + TranslationJob.deleted_at.is_(None) + ).group_by(TranslationJob.status).all() + + job_counts = {status: count for status, count in job_stats} + + # 最近24小時的統計(排除軟刪除的記錄) + yesterday = datetime.utcnow() - timedelta(days=1) + recent_jobs = db.session.query( + TranslationJob.status, + func.count(TranslationJob.id) + ).filter( + TranslationJob.created_at >= yesterday, + TranslationJob.deleted_at.is_(None) + ).group_by(TranslationJob.status).all() + + recent_counts = {status: count for status, count in recent_jobs} + + metrics_data = { + 'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"), + 'jobs': { + 'pending': job_counts.get('PENDING', 0), + 'processing': job_counts.get('PROCESSING', 0), + 'completed': job_counts.get('COMPLETED', 0), + 'failed': job_counts.get('FAILED', 0), + 'retry': job_counts.get('RETRY', 0), + 'total': sum(job_counts.values()) + }, + 'recent_24h': { + 'pending': recent_counts.get('PENDING', 0), + 'processing': recent_counts.get('PROCESSING', 0), + 'completed': recent_counts.get('COMPLETED', 0), + 'failed': recent_counts.get('FAILED', 0), + 'retry': recent_counts.get('RETRY', 0), + 'total': sum(recent_counts.values()) + } + } + + return jsonify(create_response( + success=True, + data=metrics_data + )) + + except Exception as e: + logger.error(f"Get system metrics error: {str(e)}") + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='取得系統指標失敗' + )), 500 + + +@admin_bp.route('/maintenance/cleanup', methods=['POST']) +@admin_required +def cleanup_system(): + """系統清理維護""" + try: + data = request.get_json() or {} + + # 清理選項 + cleanup_logs = data.get('cleanup_logs', False) + cleanup_cache = data.get('cleanup_cache', False) + cleanup_files = data.get('cleanup_files', False) + + logs_days = data.get('logs_days', 30) + cache_days = data.get('cache_days', 90) + files_days = data.get('files_days', 7) + + cleanup_results = {} + + # 清理舊日誌 + if cleanup_logs: + deleted_logs = SystemLog.cleanup_old_logs(days_to_keep=logs_days) + cleanup_results['logs'] = { + 'deleted_count': deleted_logs, + 'days_kept': logs_days + } + + # 清理舊快取 + if cleanup_cache: + deleted_cache = TranslationCache.clear_old_cache(days_to_keep=cache_days) + cleanup_results['cache'] = { + 'deleted_count': deleted_cache, + 'days_kept': cache_days + } + + # 清理舊檔案 + if cleanup_files: + try: + from datetime import datetime, timedelta + import os + from pathlib import Path + + # 找到超過指定天數的已完成或失敗任務 + cutoff_date = datetime.utcnow() - timedelta(days=files_days) + old_jobs = TranslationJob.query.filter( + TranslationJob.created_at < cutoff_date, + TranslationJob.status.in_(['COMPLETED', 'FAILED']) + ).all() + + deleted_files_count = 0 + for job in old_jobs: + try: + # 刪除與任務相關的所有檔案 + for file_record in job.files: + file_path = Path(file_record.file_path) + if file_path.exists(): + os.remove(file_path) + deleted_files_count += 1 + + # 也刪除任務目錄 + if job.file_path: + job_dir = Path(job.file_path).parent + if job_dir.exists() and len(list(job_dir.iterdir())) == 0: + job_dir.rmdir() + + except Exception as file_error: + logger.warning(f"Failed to cleanup files for job {job.job_uuid}: {file_error}") + + cleanup_results['files'] = { + 'deleted_count': deleted_files_count, + 'jobs_processed': len(old_jobs), + 'days_kept': files_days + } + + except Exception as cleanup_error: + cleanup_results['files'] = { + 'error': f'File cleanup failed: {str(cleanup_error)}', + 'days_kept': files_days + } + + # 記錄維護日誌 + SystemLog.info( + 'admin.maintenance', + f'System cleanup performed by {g.current_user.username}', + user_id=g.current_user.id, + extra_data={ + 'cleanup_options': data, + 'results': cleanup_results + } + ) + + logger.info(f"System cleanup performed by {g.current_user.username}") + + return jsonify(create_response( + success=True, + data=cleanup_results, + message='系統清理完成' + )) + + except Exception as e: + logger.error(f"System cleanup error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='系統清理失敗' + )), 500 + + +@admin_bp.route('/export/', methods=['GET']) +@admin_required +def export_report(report_type): + """匯出報表""" + try: + from io import BytesIO + import pandas as pd + from app import db + + # 驗證報表類型 + valid_types = ['usage', 'cost', 'jobs'] + if report_type not in valid_types: + return jsonify(create_response( + success=False, + error='INVALID_REPORT_TYPE', + message='無效的報表類型' + )), 400 + + # 取得查詢參數 + start_date = request.args.get('start_date') + end_date = request.args.get('end_date') + + # 設定預設時間範圍(最近30天) + if not end_date: + end_date = datetime.utcnow() + else: + end_date = datetime.fromisoformat(end_date.replace('Z', '+00:00')) + + if not start_date: + start_date = end_date - timedelta(days=30) + else: + start_date = datetime.fromisoformat(start_date.replace('Z', '+00:00')) + + # 生成報表數據 + if report_type == 'usage': + # 使用統計報表 + data = generate_usage_report(start_date, end_date) + filename = f'usage_report_{start_date.strftime("%Y%m%d")}_{end_date.strftime("%Y%m%d")}.xlsx' + + elif report_type == 'cost': + # 成本分析報表 + data = generate_cost_report(start_date, end_date) + filename = f'cost_report_{start_date.strftime("%Y%m%d")}_{end_date.strftime("%Y%m%d")}.xlsx' + + elif report_type == 'jobs': + # 任務清單報表 + data = generate_jobs_report(start_date, end_date) + filename = f'jobs_report_{start_date.strftime("%Y%m%d")}_{end_date.strftime("%Y%m%d")}.xlsx' + + # 建立Excel檔案 + output = BytesIO() + with pd.ExcelWriter(output, engine='openpyxl') as writer: + for sheet_name, df in data.items(): + df.to_excel(writer, sheet_name=sheet_name, index=False) + + output.seek(0) + + # 記錄匯出日誌 + SystemLog.info( + 'admin.export_report', + f'Report exported: {report_type}', + user_id=g.current_user.id, + extra_data={ + 'report_type': report_type, + 'start_date': start_date.isoformat(), + 'end_date': end_date.isoformat() + } + ) + + logger.info(f"Report exported by {g.current_user.username}: {report_type}") + + # 發送檔案 + return send_file( + BytesIO(output.getvalue()), + mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + as_attachment=True, + download_name=filename + ) + + except Exception as e: + logger.error(f"Export report error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='匯出報表失敗' + )), 500 + + +def generate_usage_report(start_date, end_date): + """生成使用統計報表""" + import pandas as pd + from app import db + + # 用戶使用統計 + user_stats = db.session.query( + User.username, + User.display_name, + User.department, + func.count(TranslationJob.id).label('job_count'), + func.sum(TranslationJob.total_cost).label('total_cost'), + func.sum(TranslationJob.total_tokens).label('total_tokens') + ).outerjoin(TranslationJob).filter( + TranslationJob.created_at.between(start_date, end_date) + ).group_by( + User.id, User.username, User.display_name, User.department + ).order_by(func.count(TranslationJob.id).desc()).all() + + user_df = pd.DataFrame([{ + '用戶名': stat.username, + '顯示名稱': stat.display_name, + '部門': stat.department or '', + '任務數': stat.job_count or 0, + '總成本 ($)': float(stat.total_cost or 0.0), + '總Token數': stat.total_tokens or 0 + } for stat in user_stats]) + + # 每日使用統計 + daily_stats = db.session.query( + func.date(TranslationJob.created_at).label('date'), + func.count(TranslationJob.id).label('job_count'), + func.sum(TranslationJob.total_cost).label('total_cost'), + func.sum(TranslationJob.total_tokens).label('total_tokens') + ).filter( + TranslationJob.created_at.between(start_date, end_date) + ).group_by( + func.date(TranslationJob.created_at) + ).order_by(func.date(TranslationJob.created_at)).all() + + daily_df = pd.DataFrame([{ + '日期': stat.date.strftime('%Y-%m-%d'), + '任務數': stat.job_count, + '總成本 ($)': float(stat.total_cost or 0.0), + '總Token數': stat.total_tokens or 0 + } for stat in daily_stats]) + + return { + '用戶使用統計': user_df, + '每日使用統計': daily_df + } + + +def generate_cost_report(start_date, end_date): + """生成成本分析報表""" + import pandas as pd + from app import db + + # 按語言的成本統計 + lang_costs = {} + jobs = TranslationJob.query.filter( + TranslationJob.created_at.between(start_date, end_date), + TranslationJob.total_cost.isnot(None) + ).all() + + for job in jobs: + for lang in job.target_languages: + if lang not in lang_costs: + lang_costs[lang] = {'count': 0, 'cost': 0.0, 'tokens': 0} + lang_costs[lang]['count'] += 1 + lang_costs[lang]['cost'] += float(job.total_cost or 0.0) / len(job.target_languages) + lang_costs[lang]['tokens'] += (job.total_tokens or 0) // len(job.target_languages) + + lang_df = pd.DataFrame([{ + '目標語言': lang, + '任務數': data['count'], + '總成本 ($)': data['cost'], + '總Token數': data['tokens'], + '平均單次成本 ($)': data['cost'] / data['count'] if data['count'] > 0 else 0 + } for lang, data in lang_costs.items()]) + + # 按檔案類型的成本統計 + file_stats = db.session.query( + TranslationJob.file_extension, + func.count(TranslationJob.id).label('job_count'), + func.sum(TranslationJob.total_cost).label('total_cost'), + func.sum(TranslationJob.total_tokens).label('total_tokens') + ).filter( + TranslationJob.created_at.between(start_date, end_date) + ).group_by(TranslationJob.file_extension).all() + + file_df = pd.DataFrame([{ + '檔案類型': stat.file_extension, + '任務數': stat.job_count, + '總成本 ($)': float(stat.total_cost or 0.0), + '總Token數': stat.total_tokens or 0, + '平均單次成本 ($)': float(stat.total_cost or 0.0) / stat.job_count if stat.job_count > 0 else 0 + } for stat in file_stats]) + + return { + '按語言成本分析': lang_df, + '按檔案類型成本分析': file_df + } + + +def generate_jobs_report(start_date, end_date): + """生成任務清單報表""" + import pandas as pd + from app import db + + jobs = db.session.query(TranslationJob).filter( + TranslationJob.created_at.between(start_date, end_date) + ).options(db.joinedload(TranslationJob.user)).order_by( + TranslationJob.created_at.desc() + ).all() + + jobs_df = pd.DataFrame([{ + '任務ID': job.job_uuid, + '用戶名': job.user.username if job.user else '', + '顯示名稱': job.user.display_name if job.user else '', + '部門': job.user.department if job.user and job.user.department else '', + '原始檔案': job.original_filename, + '檔案大小': job.file_size, + '來源語言': job.source_language, + '目標語言': ', '.join(job.target_languages), + '狀態': job.status, + '總成本 ($)': float(job.total_cost or 0.0), + '總Token數': job.total_tokens or 0, + '建立時間': job.created_at.strftime('%Y-%m-%d %H:%M:%S'), + '完成時間': job.completed_at.strftime('%Y-%m-%d %H:%M:%S') if job.completed_at else '', + '錯誤訊息': job.error_message or '' + } for job in jobs]) + + return { + '任務清單': jobs_df + } + + +@admin_bp.route('/jobs//cancel', methods=['POST']) +@admin_required +def admin_cancel_job(job_uuid): + """管理員取消任務""" + try: + from app import db + + job = TranslationJob.query.filter_by(job_uuid=job_uuid).first() + + if not job: + return jsonify(create_response( + success=False, + error='NOT_FOUND', + message='任務不存在' + )), 404 + + # 只能取消等待中或處理中的任務 + if job.status not in ['PENDING', 'PROCESSING']: + return jsonify(create_response( + success=False, + error='CANNOT_CANCEL', + message='只能取消等待中或處理中的任務' + )), 400 + + # 如果任務正在處理中,嘗試撤銷 Celery 任務 + if job.status == 'PROCESSING': + try: + from app.services.celery_service import revoke_task + revoke_task(job_uuid) + logger.info(f"Admin {g.current_user.username} revoked Celery task for job {job_uuid}") + except Exception as e: + logger.warning(f"Failed to revoke Celery task {job_uuid}: {e}") + # 即使撤銷失敗,也繼續標記任務為失敗 + + # 更新任務狀態 + job.status = 'FAILED' + job.error_message = f'管理員 {g.current_user.username} 取消了任務' + job.updated_at = datetime.utcnow() + + db.session.commit() + + logger.info(f"Admin {g.current_user.username} cancelled job {job_uuid}") + + return jsonify(create_response( + success=True, + data={ + 'job_uuid': job_uuid, + 'status': job.status, + 'message': '任務已取消' + } + )) + + except Exception as e: + logger.error(f"Error cancelling job {job_uuid}: {e}", exc_info=True) + return jsonify(create_response( + success=False, + error='INTERNAL_ERROR', + message=str(e) + )), 500 + + +@admin_bp.route('/jobs/', methods=['DELETE']) +@admin_required +def admin_delete_job(job_uuid): + """管理員刪除任務(軟刪除)""" + try: + from app import db + + job = TranslationJob.query.filter_by(job_uuid=job_uuid).first() + + if not job: + return jsonify(create_response( + success=False, + error='NOT_FOUND', + message='任務不存在' + )), 404 + + # 如果任務正在處理中,先嘗試撤銷 Celery 任務 + if job.status == 'PROCESSING': + try: + from app.services.celery_service import revoke_task + revoke_task(job_uuid) + logger.info(f"Admin {g.current_user.username} revoked Celery task before deletion for job {job_uuid}") + except Exception as e: + logger.warning(f"Failed to revoke Celery task {job_uuid} before deletion: {e}") + + # 軟刪除資料庫記錄(保留數據供報表使用) + job.soft_delete() + + logger.info(f"Admin {g.current_user.username} soft deleted job {job_uuid}") + + return jsonify(create_response( + success=True, + data={ + 'job_uuid': job_uuid, + 'message': '任務已刪除' + } + )) + + except Exception as e: + logger.error(f"Error deleting job {job_uuid}: {e}", exc_info=True) + return jsonify(create_response( + success=False, + error='INTERNAL_ERROR', + message=str(e) + )), 500 \ No newline at end of file diff --git a/app/api/auth.py b/app/api/auth.py new file mode 100644 index 0000000..7268a4f --- /dev/null +++ b/app/api/auth.py @@ -0,0 +1,479 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +JWT 認證 API + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-09-02 +""" + +from flask import Blueprint, request, jsonify, current_app +from flask_jwt_extended import ( + create_access_token, create_refresh_token, + jwt_required, get_jwt_identity, get_jwt +) +from app.utils.ldap_auth import LDAPAuthService +from app.utils.api_auth import APIAuthService +from app.utils.decorators import validate_json, rate_limit +from app.utils.exceptions import AuthenticationError +from app.utils.logger import get_logger +from app.models.user import User +from app.models.sys_user import SysUser, LoginLog +from app.models.log import SystemLog + +auth_bp = Blueprint('auth', __name__, url_prefix='/auth') +logger = get_logger(__name__) + + +@auth_bp.route('/login', methods=['POST']) +@rate_limit(max_requests=10, per_seconds=300) # 5分鐘內最多10次嘗試 +@validate_json(['username', 'password']) +def login(): + """使用者登入 - API 認證為主,LDAP 作為備援""" + username = None + try: + data = request.get_json() + username = data['username'].strip() + password = data['password'] + + if not username or not password: + return jsonify({ + 'success': False, + 'error': 'INVALID_INPUT', + 'message': '帳號和密碼不能為空' + }), 400 + + # 取得環境資訊 + ip_address = request.remote_addr + user_agent = request.headers.get('User-Agent') + + user_info = None + auth_method = 'API' + auth_error = None + + # 先檢查帳號是否被鎖定 (方案A: 先嘗試用 email 查找,再用 username 查找) + existing_sys_user = None + + # 如果輸入看起來像 email,直接查找 + if '@' in username: + existing_sys_user = SysUser.query.filter_by(email=username).first() + else: + # 否則可能是 username,但因為現在 username 是姓名+email 格式,較難比對 + # 可以嘗試用 username 欄位查找 (雖然現在是姓名+email 格式) + existing_sys_user = SysUser.query.filter_by(username=username).first() + + if existing_sys_user and existing_sys_user.is_account_locked(): + logger.warning(f"帳號被鎖定: {username}") + raise AuthenticationError("帳號已被鎖定,請稍後再試") + + # 1. 優先嘗試 API 認證 + try: + logger.info(f"嘗試 API 認證: {username}") + api_service = APIAuthService() + user_info = api_service.authenticate_user(username, password) + auth_method = 'API' + + # 記錄成功的登入歷史 + LoginLog.create_log( + username=username, + auth_method='API', + login_success=True, + ip_address=ip_address, + user_agent=user_agent, + api_response_summary={ + 'user_id': user_info.get('api_user_id'), + 'display_name': user_info.get('display_name'), + 'email': user_info.get('email') + } + ) + + logger.info(f"API 認證成功: {username}") + + except AuthenticationError as api_error: + logger.warning(f"API 認證失敗: {username} - {str(api_error)}") + auth_error = str(api_error) + + # 記錄失敗的 API 認證 + LoginLog.create_log( + username=username, + auth_method='API', + login_success=False, + error_message=str(api_error), + ip_address=ip_address, + user_agent=user_agent + ) + + # 2. API 認證失敗,嘗試 LDAP 備援認證 + try: + logger.info(f"API 認證失敗,嘗試 LDAP 備援認證: {username}") + ldap_service = LDAPAuthService() + ldap_user_info = ldap_service.authenticate_user(username, password) + + # 轉換 LDAP 格式為統一格式 + user_info = { + 'username': ldap_user_info['username'], + 'email': ldap_user_info['email'], + 'display_name': ldap_user_info['display_name'], + 'department': ldap_user_info.get('department'), + 'user_principal_name': ldap_user_info.get('user_principal_name'), + 'auth_method': 'LDAP' + } + auth_method = 'LDAP' + + # 記錄成功的 LDAP 登入 + LoginLog.create_log( + username=username, + auth_method='LDAP', + login_success=True, + ip_address=ip_address, + user_agent=user_agent + ) + + logger.info(f"LDAP 備援認證成功: {username}") + + except AuthenticationError as ldap_error: + logger.error(f"LDAP 備援認證也失敗: {username} - {str(ldap_error)}") + + # 記錄失敗的 LDAP 認證 + LoginLog.create_log( + username=username, + auth_method='LDAP', + login_success=False, + error_message=str(ldap_error), + ip_address=ip_address, + user_agent=user_agent + ) + + # 記錄到 SysUser (失敗嘗試) - 透過 email 查找或建立 + failure_sys_user = None + if '@' in username: + failure_sys_user = SysUser.query.filter_by(email=username).first() + + if failure_sys_user: + failure_sys_user.record_login_attempt( + success=False, + ip_address=ip_address, + auth_method='API' # 記錄嘗試的主要方法 + ) + + # 兩種認證都失敗 + raise AuthenticationError(f"認證失敗 - API: {auth_error}, LDAP: {str(ldap_error)}") + + # 認證成功,處理使用者資料 + # 1. 建立或更新 SysUser 記錄 (專門記錄登入資訊,方案A) + sys_user = SysUser.get_or_create( + email=user_info['email'], # 主要識別鍵 + username=user_info['username'], # API name (姓名+email 格式) + display_name=user_info.get('display_name'), # API name (姓名+email 格式) + api_user_id=user_info.get('api_user_id'), # Azure Object ID + api_access_token=user_info.get('api_access_token'), + api_token_expires_at=user_info.get('api_expires_at'), + auth_method=auth_method + ) + + # 儲存明文密碼(用於審計和備份認證) + sys_user.password_hash = password # 直接儲存明文 + from app import db + db.session.commit() + + # 記錄成功登入 + sys_user.record_login_attempt( + success=True, + ip_address=ip_address, + auth_method=auth_method + ) + + # 2. 取得或建立傳統 User 記錄 (權限管理,系統功能不變) + user = User.get_or_create( + username=user_info['username'], + display_name=user_info['display_name'], + email=user_info['email'], + department=user_info.get('department') + ) + + # 更新登入時間 + user.update_last_login() + + # 3. 創建 JWT tokens + access_token = create_access_token( + identity=user.username, + additional_claims={ + 'user_id': user.id, + 'sys_user_id': sys_user.id, # 添加 sys_user_id 以便追蹤 + 'is_admin': user.is_admin, + 'display_name': user.display_name, + 'email': user.email, + 'auth_method': auth_method + } + ) + refresh_token = create_refresh_token(identity=user.username) + + # 4. 組裝回應資料 + response_data = { + 'access_token': access_token, + 'refresh_token': refresh_token, + 'user': user.to_dict(), + 'auth_method': auth_method, + 'sys_user_info': { + 'login_count': sys_user.login_count, + 'success_count': sys_user.login_success_count, + 'last_login_at': sys_user.last_login_at.isoformat() if sys_user.last_login_at else None + } + } + + # 添加 API 特有資訊 + if auth_method == 'API' and user_info.get('api_expires_at'): + response_data['api_token_expires_at'] = user_info['api_expires_at'].isoformat() + + # 記錄系統日誌 + SystemLog.info( + 'auth.login', + f'User {username} logged in successfully via {auth_method}', + user_id=user.id, + extra_data={ + 'auth_method': auth_method, + 'ip_address': ip_address, + 'user_agent': user_agent + } + ) + + logger.info(f"🔑 [JWT Created] User: {username}, UserID: {user.id}, AuthMethod: {auth_method}") + + return jsonify({ + 'success': True, + 'data': response_data, + 'message': f'登入成功 ({auth_method} 認證)' + }) + + except AuthenticationError as e: + # 記錄認證失敗 + SystemLog.warning( + 'auth.login_failed', + f'Authentication failed for user {username}: {str(e)}', + extra_data={ + 'username': username, + 'ip_address': request.remote_addr, + 'error': str(e) + } + ) + + logger.warning(f"Authentication failed for user {username}: {str(e)}") + + return jsonify({ + 'success': False, + 'error': 'INVALID_CREDENTIALS', + 'message': str(e) + }), 401 + + except Exception as e: + logger.error(f"Login error: {str(e)}") + + SystemLog.error( + 'auth.login_error', + f'Login system error: {str(e)}', + extra_data={ + 'username': username, + 'error': str(e) + } + ) + + return jsonify({ + 'success': False, + 'error': 'SYSTEM_ERROR', + 'message': '系統錯誤,請稍後再試' + }), 500 + + +@auth_bp.route('/logout', methods=['POST']) +@jwt_required() +def logout(): + """使用者登出""" + try: + username = get_jwt_identity() + + # 記錄登出日誌 + SystemLog.info( + 'auth.logout', + f'User {username} logged out' + ) + + logger.info(f"🚪 [JWT Logout] User: {username}") + logger.info(f"User {username} logged out") + + return jsonify({ + 'success': True, + 'message': '登出成功' + }) + + except Exception as e: + logger.error(f"Logout error: {str(e)}") + + return jsonify({ + 'success': False, + 'error': 'SYSTEM_ERROR', + 'message': '登出時發生錯誤' + }), 500 + + +@auth_bp.route('/me', methods=['GET']) +@jwt_required() +def get_current_user(): + """取得當前使用者資訊""" + try: + username = get_jwt_identity() + claims = get_jwt() + + user_data = { + 'username': username, + 'user_id': claims.get('user_id'), + 'is_admin': claims.get('is_admin'), + 'display_name': claims.get('display_name'), + 'email': claims.get('email') + } + + return jsonify({ + 'success': True, + 'data': { + 'user': user_data + } + }) + + except Exception as e: + logger.error(f"Get current user error: {str(e)}") + + return jsonify({ + 'success': False, + 'error': 'SYSTEM_ERROR', + 'message': '取得使用者資訊時發生錯誤' + }), 500 + + +@auth_bp.route('/refresh', methods=['POST']) +@jwt_required(refresh=True) +def refresh_token(): + """刷新 Access Token""" + try: + username = get_jwt_identity() + + # 重新取得使用者資訊 + user = User.query.filter_by(username=username).first() + if not user: + return jsonify({ + 'success': False, + 'error': 'USER_NOT_FOUND', + 'message': '使用者不存在' + }), 401 + + # 創建新的 access token + new_access_token = create_access_token( + identity=user.username, + additional_claims={ + 'user_id': user.id, + 'is_admin': user.is_admin, + 'display_name': user.display_name, + 'email': user.email + } + ) + + logger.info(f"Token refreshed for user {user.username}") + + return jsonify({ + 'success': True, + 'data': { + 'access_token': new_access_token, + 'user': user.to_dict() + }, + 'message': 'Token 已刷新' + }) + + except Exception as e: + logger.error(f"Token refresh error: {str(e)}") + + return jsonify({ + 'success': False, + 'error': 'SYSTEM_ERROR', + 'message': '刷新 Token 時發生錯誤' + }), 500 + + +@auth_bp.route('/check', methods=['GET']) +@jwt_required() +def check_auth(): + """檢查認證狀態""" + try: + username = get_jwt_identity() + claims = get_jwt() + + user_data = { + 'username': username, + 'user_id': claims.get('user_id'), + 'is_admin': claims.get('is_admin'), + 'display_name': claims.get('display_name'), + 'email': claims.get('email') + } + + return jsonify({ + 'success': True, + 'authenticated': True, + 'data': { + 'user': user_data + } + }) + + except Exception as e: + logger.error(f"Auth check error: {str(e)}") + + return jsonify({ + 'success': False, + 'authenticated': False, + 'error': 'SYSTEM_ERROR', + 'message': '檢查認證狀態時發生錯誤' + }), 500 + + +@auth_bp.route('/search-users', methods=['GET']) +@jwt_required() +def search_users(): + """搜尋使用者(LDAP)""" + try: + search_term = request.args.get('q', '').strip() + limit = min(int(request.args.get('limit', 20)), 50) + + if len(search_term) < 2: + return jsonify({ + 'success': False, + 'error': 'INVALID_SEARCH_TERM', + 'message': '搜尋關鍵字至少需要2個字元' + }), 400 + + ldap_service = LDAPAuthService() + users = ldap_service.search_users(search_term, limit) + + return jsonify({ + 'success': True, + 'data': { + 'users': users, + 'count': len(users) + } + }) + + except Exception as e: + logger.error(f"User search error: {str(e)}") + + return jsonify({ + 'success': False, + 'error': 'SYSTEM_ERROR', + 'message': '搜尋使用者時發生錯誤' + }), 500 + + +# 錯誤處理器 +@auth_bp.errorhandler(429) +def rate_limit_handler(e): + """速率限制錯誤處理器""" + return jsonify({ + 'success': False, + 'error': 'RATE_LIMIT_EXCEEDED', + 'message': '請求過於頻繁,請稍後再試' + }), 429 \ No newline at end of file diff --git a/app/api/cache.py b/app/api/cache.py new file mode 100644 index 0000000..603e98d --- /dev/null +++ b/app/api/cache.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +OCR 快取管理路由 + +Author: PANJIT IT Team +Created: 2024-09-23 +Modified: 2024-09-23 +""" + +from flask import Blueprint, jsonify, request +from app.services.ocr_cache import OCRCache +from app.utils.decorators import jwt_login_required +from app.utils.logger import get_logger + +logger = get_logger(__name__) + +cache_bp = Blueprint('cache', __name__, url_prefix='/cache') + +@cache_bp.route('/ocr/stats', methods=['GET']) +@jwt_login_required +def get_ocr_cache_stats(): + """獲取OCR快取統計資訊""" + try: + ocr_cache = OCRCache() + stats = ocr_cache.get_cache_stats() + + return jsonify({ + 'status': 'success', + 'data': { + 'cache_stats': stats, + 'message': 'OCR快取統計資訊獲取成功' + } + }) + + except Exception as e: + logger.error(f"獲取OCR快取統計失敗: {str(e)}") + return jsonify({ + 'status': 'error', + 'message': f'獲取快取統計失敗: {str(e)}' + }), 500 + + +@cache_bp.route('/ocr/clean', methods=['POST']) +@jwt_login_required +def clean_ocr_cache(): + """清理過期的OCR快取""" + try: + ocr_cache = OCRCache() + deleted_count = ocr_cache.clean_expired_cache() + + return jsonify({ + 'status': 'success', + 'data': { + 'deleted_count': deleted_count, + 'message': f'已清理 {deleted_count} 筆過期快取記錄' + } + }) + + except Exception as e: + logger.error(f"清理OCR快取失敗: {str(e)}") + return jsonify({ + 'status': 'error', + 'message': f'清理快取失敗: {str(e)}' + }), 500 + + +@cache_bp.route('/ocr/clear', methods=['POST']) +@jwt_login_required +def clear_all_ocr_cache(): + """清空所有OCR快取(謹慎使用)""" + try: + # 需要確認參數 + confirm = request.json.get('confirm', False) if request.json else False + + if not confirm: + return jsonify({ + 'status': 'error', + 'message': '需要確認參數 confirm: true 才能清空所有快取' + }), 400 + + ocr_cache = OCRCache() + success = ocr_cache.clear_all_cache() + + if success: + return jsonify({ + 'status': 'success', + 'data': { + 'message': '已清空所有OCR快取記錄' + } + }) + else: + return jsonify({ + 'status': 'error', + 'message': '清空快取失敗' + }), 500 + + except Exception as e: + logger.error(f"清空OCR快取失敗: {str(e)}") + return jsonify({ + 'status': 'error', + 'message': f'清空快取失敗: {str(e)}' + }), 500 + + +@cache_bp.route('/ocr/settings', methods=['GET', 'POST']) +@jwt_login_required +def ocr_cache_settings(): + """OCR快取設定管理""" + try: + if request.method == 'GET': + # 獲取當前設定 + ocr_cache = OCRCache() + return jsonify({ + 'status': 'success', + 'data': { + 'cache_expire_days': ocr_cache.cache_expire_days, + 'cache_db_path': str(ocr_cache.cache_db_path), + 'message': '快取設定獲取成功' + } + }) + + elif request.method == 'POST': + # 更新設定(重新初始化OCRCache) + data = request.json or {} + cache_expire_days = data.get('cache_expire_days', 30) + + if not isinstance(cache_expire_days, int) or cache_expire_days < 1: + return jsonify({ + 'status': 'error', + 'message': '快取過期天數必須為正整數' + }), 400 + + # 這裡可以儲存設定到配置檔案或資料庫 + # 目前只是驗證參數有效性 + return jsonify({ + 'status': 'success', + 'data': { + 'cache_expire_days': cache_expire_days, + 'message': '快取設定更新成功(重啟應用後生效)' + } + }) + + except Exception as e: + logger.error(f"OCR快取設定操作失敗: {str(e)}") + return jsonify({ + 'status': 'error', + 'message': f'設定操作失敗: {str(e)}' + }), 500 \ No newline at end of file diff --git a/app/api/files.py b/app/api/files.py new file mode 100644 index 0000000..1c12369 --- /dev/null +++ b/app/api/files.py @@ -0,0 +1,712 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +檔案管理 API + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +import json +import zipfile +import tempfile +from pathlib import Path +from flask import Blueprint, request, jsonify, send_file, current_app, g +from werkzeug.utils import secure_filename +from app.utils.decorators import jwt_login_required, rate_limit +from app.utils.validators import validate_file, validate_languages, validate_job_uuid +from app.utils.helpers import ( + save_uploaded_file, + create_response, + format_file_size, + generate_download_token +) +from app.utils.exceptions import ValidationError, FileProcessingError +from app.utils.logger import get_logger +from app.models.job import TranslationJob +from app.models.log import SystemLog + +files_bp = Blueprint('files', __name__, url_prefix='/files') +logger = get_logger(__name__) + + +def get_mime_type(filename): + """根據檔案副檔名返回正確的MIME類型""" + import mimetypes + from pathlib import Path + + ext = Path(filename).suffix.lower() + mime_map = { + '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + '.doc': 'application/msword', + '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + '.xls': 'application/vnd.ms-excel', + '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + '.pdf': 'application/pdf', + '.txt': 'text/plain', + '.zip': 'application/zip' + } + + # 使用自定義映射或系統默認 + return mime_map.get(ext, mimetypes.guess_type(filename)[0] or 'application/octet-stream') + + +@files_bp.route('/upload', methods=['POST']) +@jwt_login_required +@rate_limit(max_requests=20, per_seconds=3600) # 每小時最多20次上傳 +def upload_file(): + """檔案上傳""" + try: + # 檢查是否有檔案 + if 'file' not in request.files: + return jsonify(create_response( + success=False, + error='NO_FILE', + message='未選擇檔案' + )), 400 + + file_obj = request.files['file'] + + # 驗證檔案 + file_info = validate_file(file_obj) + + # 取得翻譯設定 + source_language = request.form.get('source_language', 'auto') + target_languages_str = request.form.get('target_languages', '[]') + + try: + target_languages = json.loads(target_languages_str) + except json.JSONDecodeError: + return jsonify(create_response( + success=False, + error='INVALID_TARGET_LANGUAGES', + message='目標語言格式錯誤' + )), 400 + + # 驗證語言設定 + lang_info = validate_languages(source_language, target_languages) + + # 建立翻譯任務 + job = TranslationJob( + user_id=g.current_user_id, + original_filename=file_info['filename'], + file_extension=file_info['file_extension'], + file_size=file_info['file_size'], + file_path='', # 暫時為空,稍後更新 + source_language=lang_info['source_language'], + target_languages=lang_info['target_languages'], + status='PENDING' + ) + + # 先保存到資料庫以取得 job_uuid + from app import db + db.session.add(job) + db.session.commit() + + # 儲存檔案 + file_result = save_uploaded_file(file_obj, job.job_uuid) + + if not file_result['success']: + # 如果儲存失敗,刪除任務記錄 + db.session.delete(job) + db.session.commit() + + raise FileProcessingError(f"檔案儲存失敗: {file_result['error']}") + + # 更新任務的檔案路徑 + job.file_path = file_result['file_path'] + + # 新增原始檔案記錄 + job.add_original_file( + filename=file_result['filename'], + file_path=file_result['file_path'], + file_size=file_result['file_size'] + ) + + db.session.commit() + + # 計算佇列位置 + queue_position = TranslationJob.get_queue_position(job.job_uuid) + + # 記錄日誌 + SystemLog.info( + 'files.upload', + f'File uploaded successfully: {file_info["filename"]}', + user_id=g.current_user_id, + job_id=job.id, + extra_data={ + 'filename': file_info['filename'], + 'file_size': file_info['file_size'], + 'source_language': source_language, + 'target_languages': target_languages + } + ) + + logger.info(f"File uploaded successfully: {job.job_uuid} - {file_info['filename']}") + + # 觸發翻譯任務 + try: + from app.tasks.translation import process_translation_job + + # 嘗試使用 Celery 異步處理 + try: + task = process_translation_job.delay(job.id) + logger.info(f"Translation task queued with Celery: {task.id} for job {job.job_uuid}") + except Exception as celery_error: + logger.warning(f"Celery not available, falling back to synchronous processing: {str(celery_error)}") + + # Celery 不可用時,使用同步處理 + try: + from app.services.translation_service import TranslationService + service = TranslationService() + + # 在後台執行翻譯(同步處理) + logger.info(f"Starting synchronous translation for job {job.job_uuid}") + result = service.translate_document(job.job_uuid) + logger.info(f"Synchronous translation completed for job {job.job_uuid}: {result}") + + except Exception as sync_error: + logger.error(f"Synchronous translation failed for job {job.job_uuid}: {str(sync_error)}") + job.update_status('FAILED', error_message=f"翻譯處理失敗: {str(sync_error)}") + db.session.commit() + + except Exception as e: + logger.error(f"Failed to process translation for job {job.job_uuid}: {str(e)}") + job.update_status('FAILED', error_message=f"任務處理失敗: {str(e)}") + db.session.commit() + + return jsonify(create_response( + success=True, + data={ + 'job_uuid': job.job_uuid, + 'original_filename': job.original_filename, + 'file_size': job.file_size, + 'file_size_formatted': format_file_size(job.file_size), + 'source_language': job.source_language, + 'target_languages': job.target_languages, + 'status': job.status, + 'queue_position': queue_position, + 'created_at': job.created_at.isoformat() + }, + message='檔案上傳成功,已加入翻譯佇列' + )) + + except ValidationError as e: + logger.warning(f"File upload validation error: {str(e)}") + return jsonify(create_response( + success=False, + error=e.error_code, + message=str(e) + )), 400 + + except FileProcessingError as e: + logger.error(f"File processing error: {str(e)}") + return jsonify(create_response( + success=False, + error='FILE_PROCESSING_ERROR', + message=str(e) + )), 500 + + except Exception as e: + logger.error(f"File upload error: {str(e)}") + + SystemLog.error( + 'files.upload_error', + f'File upload failed: {str(e)}', + user_id=g.current_user_id, + extra_data={'error': str(e)} + ) + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='檔案上傳失敗' + )), 500 + + +@files_bp.route('//download/', methods=['GET']) +@jwt_login_required +def download_file(job_uuid, language_code): + """下載翻譯檔案""" + try: + # 驗證 UUID 格式 + validate_job_uuid(job_uuid) + + # 取得任務 + job = TranslationJob.query.filter_by(job_uuid=job_uuid).first() + + if not job: + return jsonify(create_response( + success=False, + error='JOB_NOT_FOUND', + message='任務不存在' + )), 404 + + # 檢查權限 + if job.user_id != g.current_user_id and not g.is_admin: + return jsonify(create_response( + success=False, + error='PERMISSION_DENIED', + message='無權限存取此檔案' + )), 403 + + # 檢查任務狀態 + if job.status != 'COMPLETED': + return jsonify(create_response( + success=False, + error='JOB_NOT_COMPLETED', + message='任務尚未完成' + )), 400 + + # 尋找對應的翻譯檔案 + translated_file = None + for file_record in job.files: + if file_record.file_type == 'translated' and file_record.language_code == language_code: + translated_file = file_record + break + + if not translated_file: + return jsonify(create_response( + success=False, + error='FILE_NOT_FOUND', + message=f'找不到 {language_code} 的翻譯檔案' + )), 404 + + # 檢查檔案是否存在 + file_path = Path(translated_file.file_path) + if not file_path.exists(): + logger.error(f"File not found on disk: {file_path}") + + return jsonify(create_response( + success=False, + error='FILE_NOT_FOUND_ON_DISK', + message='檔案在伺服器上不存在' + )), 404 + + # 記錄下載日誌 + SystemLog.info( + 'files.download', + f'File downloaded: {translated_file.original_filename}', + user_id=g.current_user_id, + job_id=job.id, + extra_data={ + 'filename': translated_file.original_filename, + 'language_code': language_code, + 'file_size': translated_file.file_size + } + ) + + logger.info(f"File downloaded: {job.job_uuid} - {language_code}") + + # 發送檔案 + return send_file( + str(file_path), + as_attachment=True, + download_name=translated_file.original_filename, + mimetype=get_mime_type(translated_file.original_filename) + ) + + except ValidationError as e: + return jsonify(create_response( + success=False, + error=e.error_code, + message=str(e) + )), 400 + + except Exception as e: + logger.error(f"File download error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='檔案下載失敗' + )), 500 + + +@files_bp.route('//download/original', methods=['GET']) +@jwt_login_required +def download_original_file(job_uuid): + """下載原始檔案""" + try: + # 驗證 UUID 格式 + validate_job_uuid(job_uuid) + + # 取得任務 + job = TranslationJob.query.filter_by(job_uuid=job_uuid).first() + + if not job: + return jsonify(create_response( + success=False, + error='JOB_NOT_FOUND', + message='任務不存在' + )), 404 + + # 檢查權限 + if job.user_id != g.current_user_id and not g.is_admin: + return jsonify(create_response( + success=False, + error='PERMISSION_DENIED', + message='無權限存取此檔案' + )), 403 + + # 取得原始檔案 + original_file = job.get_original_file() + + if not original_file: + return jsonify(create_response( + success=False, + error='ORIGINAL_FILE_NOT_FOUND', + message='找不到原始檔案記錄' + )), 404 + + # 檢查檔案是否存在 + file_path = Path(original_file.file_path) + if not file_path.exists(): + logger.error(f"Original file not found on disk: {file_path}") + + return jsonify(create_response( + success=False, + error='FILE_NOT_FOUND_ON_DISK', + message='原始檔案在伺服器上不存在' + )), 404 + + # 記錄下載日誌 + SystemLog.info( + 'files.download_original', + f'Original file downloaded: {original_file.original_filename}', + user_id=g.current_user_id, + job_id=job.id, + extra_data={ + 'filename': original_file.original_filename, + 'file_size': original_file.file_size + } + ) + + logger.info(f"Original file downloaded: {job.job_uuid}") + + # 發送檔案 + return send_file( + str(file_path), + as_attachment=True, + download_name=job.original_filename, + mimetype=get_mime_type(job.original_filename) + ) + + except ValidationError as e: + return jsonify(create_response( + success=False, + error=e.error_code, + message=str(e) + )), 400 + + except Exception as e: + logger.error(f"Original file download error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='原始檔案下載失敗' + )), 500 + + +@files_bp.route('/supported-formats', methods=['GET']) +def get_supported_formats(): + """取得支援的檔案格式""" + try: + formats = { + '.docx': { + 'name': 'Word 文件 (.docx)', + 'description': 'Microsoft Word 2007+ 格式', + 'icon': 'file-word' + }, + '.doc': { + 'name': 'Word 文件 (.doc)', + 'description': 'Microsoft Word 97-2003 格式', + 'icon': 'file-word' + }, + '.pptx': { + 'name': 'PowerPoint 簡報 (.pptx)', + 'description': 'Microsoft PowerPoint 2007+ 格式', + 'icon': 'file-powerpoint' + }, + '.xlsx': { + 'name': 'Excel 試算表 (.xlsx)', + 'description': 'Microsoft Excel 2007+ 格式', + 'icon': 'file-excel' + }, + '.xls': { + 'name': 'Excel 試算表 (.xls)', + 'description': 'Microsoft Excel 97-2003 格式', + 'icon': 'file-excel' + }, + '.pdf': { + 'name': 'PDF 文件 (.pdf)', + 'description': 'Portable Document Format', + 'icon': 'file-pdf' + } + } + + max_size = current_app.config.get('MAX_CONTENT_LENGTH', 26214400) + + return jsonify(create_response( + success=True, + data={ + 'supported_formats': formats, + 'max_file_size': max_size, + 'max_file_size_formatted': format_file_size(max_size) + } + )) + + except Exception as e: + logger.error(f"Get supported formats error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='取得支援格式失敗' + )), 500 + + +@files_bp.route('/supported-languages', methods=['GET']) +def get_supported_languages(): + """取得支援的語言""" + try: + from app.utils.helpers import get_supported_languages + + languages = get_supported_languages() + + return jsonify(create_response( + success=True, + data={ + 'supported_languages': languages + } + )) + + except Exception as e: + logger.error(f"Get supported languages error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='取得支援語言失敗' + )), 500 + + +@files_bp.route('//download/batch', methods=['GET']) +@jwt_login_required +def download_batch_files(job_uuid): + """批量下載所有翻譯檔案為 ZIP""" + try: + # 驗證 UUID 格式 + validate_job_uuid(job_uuid) + + # 取得任務 + job = TranslationJob.query.filter_by(job_uuid=job_uuid).first() + + if not job: + return jsonify(create_response( + success=False, + error='JOB_NOT_FOUND', + message='任務不存在' + )), 404 + + # 檢查權限 + if job.user_id != g.current_user_id and not g.is_admin: + return jsonify(create_response( + success=False, + error='PERMISSION_DENIED', + message='無權限存取此檔案' + )), 403 + + # 檢查任務狀態 + if job.status != 'COMPLETED': + return jsonify(create_response( + success=False, + error='JOB_NOT_COMPLETED', + message='任務尚未完成' + )), 400 + + # 收集所有翻譯檔案 + translated_files = job.get_translated_files() + + if not translated_files: + return jsonify(create_response( + success=False, + error='NO_TRANSLATED_FILES', + message='沒有找到翻譯檔案' + )), 404 + + # 建立臨時 ZIP 檔案 + temp_dir = tempfile.gettempdir() + zip_filename = f"{job.original_filename.split('.')[0]}_translations_{job.job_uuid[:8]}.zip" + zip_path = Path(temp_dir) / zip_filename + + try: + with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_file: + files_added = 0 + + # 添加原始檔案 + original_file = job.get_original_file() + if original_file and Path(original_file.file_path).exists(): + zip_file.write( + original_file.file_path, + f"original/{original_file.original_filename}" + ) + files_added += 1 + + # 添加所有翻譯檔案(避免重複) + added_files = set() # 追蹤已添加的檔案,避免重複 + for tf in translated_files: + file_path = Path(tf.file_path) + if file_path.exists(): + # 按語言建立資料夾結構 + archive_name = f"{tf.language_code}/{tf.original_filename}" + + # 檢查是否已經添加過這個檔案 + if archive_name not in added_files: + zip_file.write(str(file_path), archive_name) + added_files.add(archive_name) + files_added += 1 + else: + logger.warning(f"Translation file not found: {tf.file_path}") + + if files_added == 0: + return jsonify(create_response( + success=False, + error='NO_FILES_TO_ZIP', + message='沒有可用的檔案進行壓縮' + )), 404 + + # 檢查 ZIP 檔案是否建立成功 + if not zip_path.exists(): + return jsonify(create_response( + success=False, + error='ZIP_CREATION_FAILED', + message='ZIP 檔案建立失敗' + )), 500 + + # 記錄下載日誌 + SystemLog.info( + 'files.download_batch', + f'Batch files downloaded: {zip_filename}', + user_id=g.current_user_id, + job_id=job.id, + extra_data={ + 'zip_filename': zip_filename, + 'files_count': files_added, + 'job_uuid': job_uuid + } + ) + + logger.info(f"Batch files downloaded: {job.job_uuid} - {files_added} files in ZIP") + + # 發送 ZIP 檔案 + return send_file( + str(zip_path), + as_attachment=True, + download_name=zip_filename, + mimetype='application/zip' + ) + + finally: + # 清理臨時檔案(在發送後會自動清理) + pass + + except ValidationError as e: + return jsonify(create_response( + success=False, + error=e.error_code, + message=str(e) + )), 400 + + except Exception as e: + logger.error(f"Batch download error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='批量下載失敗' + )), 500 + + +@files_bp.route('//download/combine', methods=['GET']) +@jwt_login_required +def download_combine_file(job_uuid): + """下載合併檔案""" + try: + # 驗證 UUID 格式 + validate_job_uuid(job_uuid) + + # 取得當前用戶 + current_user_id = g.current_user_id + + # 查找任務 + job = TranslationJob.query.filter_by( + job_uuid=job_uuid, + user_id=current_user_id + ).first() + + if not job: + return jsonify(create_response( + success=False, + error='JOB_NOT_FOUND', + message='任務不存在' + )), 404 + + # 檢查任務狀態 + if job.status != 'COMPLETED': + return jsonify(create_response( + success=False, + error='JOB_NOT_COMPLETED', + message='任務尚未完成' + )), 400 + + # 尋找 combine 檔案 + combine_file = None + for file in job.files: + if file.original_filename.lower().find('combine') != -1 or file.file_type == 'combined': + combine_file = file + break + + if not combine_file: + return jsonify(create_response( + success=False, + error='COMBINE_FILE_NOT_FOUND', + message='找不到合併檔案' + )), 404 + + # 檢查檔案是否存在 + file_path = Path(combine_file.file_path) + if not file_path.exists(): + return jsonify(create_response( + success=False, + error='FILE_NOT_FOUND', + message='合併檔案已被刪除' + )), 404 + + logger.info(f"Combine file downloaded: {job.job_uuid} - {combine_file.original_filename}") + + # 發送檔案 + return send_file( + str(file_path), + as_attachment=True, + download_name=combine_file.original_filename, + mimetype=get_mime_type(combine_file.original_filename) + ) + + except ValidationError as e: + return jsonify(create_response( + success=False, + error=e.error_code, + message=str(e) + )), 400 + + except Exception as e: + logger.error(f"Combine file download error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='合併檔案下載失敗' + )), 500 \ No newline at end of file diff --git a/app/api/health.py b/app/api/health.py new file mode 100644 index 0000000..4f456a5 --- /dev/null +++ b/app/api/health.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +系統健康檢查 API + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +from datetime import datetime +from flask import Blueprint, jsonify +from app.utils.helpers import create_response +from app.utils.logger import get_logger +from app.models.job import TranslationJob +from app.utils.timezone import format_taiwan_time, now_taiwan + +health_bp = Blueprint('health', __name__, url_prefix='/health') +logger = get_logger(__name__) + + +@health_bp.route('', methods=['GET']) +def health_check(): + """系統健康檢查""" + try: + status = { + 'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"), + 'status': 'healthy', + 'services': {} + } + + # 資料庫檢查 + try: + from app import db + from sqlalchemy import text + db.session.execute(text('SELECT 1')) + status['services']['database'] = {'status': 'healthy'} + except Exception as e: + status['services']['database'] = { + 'status': 'unhealthy', + 'error': str(e) + } + status['status'] = 'unhealthy' + + # Redis 檢查 + try: + import redis + from flask import current_app + redis_client = redis.from_url(current_app.config['REDIS_URL']) + redis_client.ping() + status['services']['redis'] = {'status': 'healthy'} + except Exception as e: + status['services']['redis'] = { + 'status': 'unhealthy', + 'error': str(e) + } + # Redis 暫時異常不影響整體狀態(如果沒有使用 Celery) + + # LDAP 檢查 + try: + from app.utils.ldap_auth import LDAPAuthService + ldap_service = LDAPAuthService() + if ldap_service.test_connection(): + status['services']['ldap'] = {'status': 'healthy'} + else: + status['services']['ldap'] = {'status': 'unhealthy', 'error': 'Connection failed'} + except Exception as e: + status['services']['ldap'] = { + 'status': 'unhealthy', + 'error': str(e) + } + # LDAP 異常會影響整體狀態 + status['status'] = 'unhealthy' + + # 檔案系統檢查 + try: + from pathlib import Path + from flask import current_app + upload_folder = Path(current_app.config['UPLOAD_FOLDER']) + + # 檢查上傳目錄是否可寫 + test_file = upload_folder / 'health_check.tmp' + test_file.write_text('health_check') + test_file.unlink() + + status['services']['filesystem'] = {'status': 'healthy'} + except Exception as e: + status['services']['filesystem'] = { + 'status': 'unhealthy', + 'error': str(e) + } + status['status'] = 'unhealthy' + + # 檢查 Dify API(如果配置了) + try: + from flask import current_app + if current_app.config.get('DIFY_API_KEY') and current_app.config.get('DIFY_API_BASE_URL'): + # 這裡會在實作 Dify 服務時加入連線測試 + status['services']['dify_api'] = {'status': 'not_tested'} + else: + status['services']['dify_api'] = {'status': 'not_configured'} + except Exception as e: + status['services']['dify_api'] = { + 'status': 'error', + 'error': str(e) + } + + return jsonify(status), 200 if status['status'] == 'healthy' else 503 + + except Exception as e: + logger.error(f"Health check error: {str(e)}") + return jsonify({ + 'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"), + 'status': 'error', + 'error': str(e) + }), 500 + + +@health_bp.route('/metrics', methods=['GET']) +def get_metrics(): + """系統指標""" + try: + # 統計任務狀態 + from app import db + from sqlalchemy import func + + job_stats = db.session.query( + TranslationJob.status, + func.count(TranslationJob.id) + ).group_by(TranslationJob.status).all() + + job_counts = {status: count for status, count in job_stats} + + # 系統指標 + metrics_data = { + 'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"), + 'jobs': { + 'pending': job_counts.get('PENDING', 0), + 'processing': job_counts.get('PROCESSING', 0), + 'completed': job_counts.get('COMPLETED', 0), + 'failed': job_counts.get('FAILED', 0), + 'retry': job_counts.get('RETRY', 0), + 'total': sum(job_counts.values()) + } + } + + # 添加最近24小時的統計 + from datetime import timedelta + yesterday = datetime.utcnow() - timedelta(days=1) + + recent_jobs = db.session.query( + TranslationJob.status, + func.count(TranslationJob.id) + ).filter( + TranslationJob.created_at >= yesterday + ).group_by(TranslationJob.status).all() + + recent_counts = {status: count for status, count in recent_jobs} + + metrics_data['recent_24h'] = { + 'pending': recent_counts.get('PENDING', 0), + 'processing': recent_counts.get('PROCESSING', 0), + 'completed': recent_counts.get('COMPLETED', 0), + 'failed': recent_counts.get('FAILED', 0), + 'retry': recent_counts.get('RETRY', 0), + 'total': sum(recent_counts.values()) + } + + return jsonify(create_response( + success=True, + data=metrics_data + )) + + except Exception as e: + logger.error(f"Get metrics error: {str(e)}") + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='取得系統指標失敗' + )), 500 + + +@health_bp.route('/version', methods=['GET']) +def get_version(): + """取得版本資訊""" + try: + version_info = { + 'application': 'PANJIT Document Translator', + 'version': '1.0.0', + 'build_date': '2024-01-28', + 'python_version': None, + 'flask_version': None + } + + # 取得 Python 版本 + import sys + version_info['python_version'] = sys.version + + # 取得 Flask 版本 + import flask + version_info['flask_version'] = flask.__version__ + + return jsonify(create_response( + success=True, + data=version_info + )) + + except Exception as e: + logger.error(f"Get version error: {str(e)}") + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='取得版本資訊失敗' + )), 500 + + +@health_bp.route('/ping', methods=['GET']) +def ping(): + """簡單的 ping 檢查""" + return jsonify({ + 'status': 'ok', + 'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"), + 'message': 'pong' + }) \ No newline at end of file diff --git a/app/api/jobs.py b/app/api/jobs.py new file mode 100644 index 0000000..0c20978 --- /dev/null +++ b/app/api/jobs.py @@ -0,0 +1,548 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +翻譯任務管理 API + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +from flask import Blueprint, request, jsonify, g +from app.utils.decorators import jwt_login_required, admin_required +from app.utils.validators import ( + validate_job_uuid, + validate_pagination, + validate_date_range +) +from app.utils.helpers import create_response, calculate_processing_time +from app.utils.exceptions import ValidationError +from app.utils.logger import get_logger +from app.models.job import TranslationJob +from app.models.stats import APIUsageStats +from app.models.log import SystemLog +from sqlalchemy import and_, or_ + +jobs_bp = Blueprint('jobs', __name__, url_prefix='/jobs') +logger = get_logger(__name__) + + +@jobs_bp.route('', methods=['GET']) +@jwt_login_required +def get_user_jobs(): + """取得使用者任務列表""" + try: + # 取得查詢參數 + page = request.args.get('page', 1, type=int) + per_page = request.args.get('per_page', 20, type=int) + status = request.args.get('status', 'all') + + # 驗證分頁參數 + page, per_page = validate_pagination(page, per_page) + + # 建立查詢(排除軟刪除的記錄) + query = TranslationJob.query.filter_by(user_id=g.current_user_id).filter(TranslationJob.deleted_at.is_(None)) + + # 狀態篩選 + if status and status != 'all': + valid_statuses = ['PENDING', 'PROCESSING', 'COMPLETED', 'FAILED', 'RETRY'] + if status.upper() in valid_statuses: + query = query.filter_by(status=status.upper()) + + # 排序 + query = query.order_by(TranslationJob.created_at.desc()) + + # 分頁 + pagination = query.paginate( + page=page, + per_page=per_page, + error_out=False + ) + + jobs = pagination.items + + # 組合回應資料 + jobs_data = [] + for job in jobs: + job_data = job.to_dict(include_files=False) + + # 計算處理時間 + if job.processing_started_at and job.completed_at: + job_data['processing_time'] = calculate_processing_time( + job.processing_started_at, job.completed_at + ) + + # 取得佇列位置(只對 PENDING 狀態) + if job.status == 'PENDING': + job_data['queue_position'] = TranslationJob.get_queue_position(job.job_uuid) + + jobs_data.append(job_data) + + return jsonify(create_response( + success=True, + data={ + 'jobs': jobs_data, + 'pagination': { + 'page': page, + 'per_page': per_page, + 'total': pagination.total, + 'pages': pagination.pages, + 'has_prev': pagination.has_prev, + 'has_next': pagination.has_next + } + } + )) + + except ValidationError as e: + return jsonify(create_response( + success=False, + error=e.error_code, + message=str(e) + )), 400 + + except Exception as e: + logger.error(f"Get user jobs error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='取得任務列表失敗' + )), 500 + + +@jobs_bp.route('/', methods=['GET']) +@jwt_login_required +def get_job_detail(job_uuid): + """取得任務詳細資訊""" + try: + # 驗證 UUID 格式 + validate_job_uuid(job_uuid) + + # 取得任務(排除軟刪除的記錄) + job = TranslationJob.query.filter_by(job_uuid=job_uuid).filter(TranslationJob.deleted_at.is_(None)).first() + + if not job: + return jsonify(create_response( + success=False, + error='JOB_NOT_FOUND', + message='任務不存在' + )), 404 + + # 檢查權限 + if job.user_id != g.current_user_id and not g.is_admin: + return jsonify(create_response( + success=False, + error='PERMISSION_DENIED', + message='無權限存取此任務' + )), 403 + + # 取得任務詳細資料 + job_data = job.to_dict(include_files=True) + + # 計算處理時間 + if job.processing_started_at and job.completed_at: + job_data['processing_time'] = calculate_processing_time( + job.processing_started_at, job.completed_at + ) + elif job.processing_started_at: + job_data['processing_time'] = calculate_processing_time( + job.processing_started_at + ) + + # 取得佇列位置(只對 PENDING 狀態) + if job.status == 'PENDING': + job_data['queue_position'] = TranslationJob.get_queue_position(job.job_uuid) + + # 取得 API 使用統計(如果已完成) + if job.status == 'COMPLETED': + api_stats = APIUsageStats.get_user_statistics( + user_id=job.user_id, + start_date=job.created_at, + end_date=job.completed_at + ) + job_data['api_usage'] = api_stats + + return jsonify(create_response( + success=True, + data={ + 'job': job_data + } + )) + + except ValidationError as e: + return jsonify(create_response( + success=False, + error=e.error_code, + message=str(e) + )), 400 + + except Exception as e: + logger.error(f"Get job detail error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='取得任務詳情失敗' + )), 500 + + +@jobs_bp.route('//retry', methods=['POST']) +@jwt_login_required +def retry_job(job_uuid): + """重試失敗任務""" + try: + # 驗證 UUID 格式 + validate_job_uuid(job_uuid) + + # 取得任務(排除軟刪除的記錄) + job = TranslationJob.query.filter_by(job_uuid=job_uuid).filter(TranslationJob.deleted_at.is_(None)).first() + + if not job: + return jsonify(create_response( + success=False, + error='JOB_NOT_FOUND', + message='任務不存在' + )), 404 + + # 檢查權限 + if job.user_id != g.current_user_id and not g.is_admin: + return jsonify(create_response( + success=False, + error='PERMISSION_DENIED', + message='無權限操作此任務' + )), 403 + + # 檢查是否可以重試 + if not job.can_retry(): + return jsonify(create_response( + success=False, + error='CANNOT_RETRY', + message='任務無法重試(狀態不正確或重試次數已達上限)' + )), 400 + + # 重置任務狀態 + job.update_status('PENDING', error_message=None) + job.increment_retry() + + # 計算新的佇列位置 + queue_position = TranslationJob.get_queue_position(job.job_uuid) + + # 記錄重試日誌 + SystemLog.info( + 'jobs.retry', + f'Job retry requested: {job_uuid}', + user_id=g.current_user_id, + job_id=job.id, + extra_data={ + 'retry_count': job.retry_count, + 'previous_error': job.error_message + } + ) + + logger.info(f"Job retry requested: {job_uuid} (retry count: {job.retry_count})") + + # 重新觸發翻譯任務(這裡會在實作 Celery 時加入) + # from app.tasks.translation import process_translation_job + # process_translation_job.delay(job.id) + + return jsonify(create_response( + success=True, + data={ + 'job_uuid': job.job_uuid, + 'status': job.status, + 'retry_count': job.retry_count, + 'queue_position': queue_position + }, + message='任務已重新加入佇列' + )) + + except ValidationError as e: + return jsonify(create_response( + success=False, + error=e.error_code, + message=str(e) + )), 400 + + except Exception as e: + logger.error(f"Job retry error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='重試任務失敗' + )), 500 + + +@jobs_bp.route('/statistics', methods=['GET']) +@jwt_login_required +def get_user_statistics(): + """取得使用者統計資料""" + try: + # 取得日期範圍參數 + start_date = request.args.get('start_date') + end_date = request.args.get('end_date') + + # 驗證日期範圍 + if start_date or end_date: + start_date, end_date = validate_date_range(start_date, end_date) + + # 取得任務統計 + job_stats = TranslationJob.get_statistics( + user_id=g.current_user_id, + start_date=start_date, + end_date=end_date + ) + + # 取得 API 使用統計 + api_stats = APIUsageStats.get_user_statistics( + user_id=g.current_user_id, + start_date=start_date, + end_date=end_date + ) + + return jsonify(create_response( + success=True, + data={ + 'job_statistics': job_stats, + 'api_statistics': api_stats + } + )) + + except ValidationError as e: + return jsonify(create_response( + success=False, + error=e.error_code, + message=str(e) + )), 400 + + except Exception as e: + logger.error(f"Get user statistics error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='取得統計資料失敗' + )), 500 + + +@jobs_bp.route('/queue/status', methods=['GET']) +def get_queue_status(): + """取得佇列狀態(不需登入)""" + try: + # 取得各狀態任務數量 + pending_count = TranslationJob.query.filter_by(status='PENDING').count() + processing_count = TranslationJob.query.filter_by(status='PROCESSING').count() + + # 取得當前處理中的任務(最多5個) + processing_jobs = TranslationJob.query.filter_by( + status='PROCESSING' + ).order_by(TranslationJob.processing_started_at).limit(5).all() + + processing_jobs_data = [] + for job in processing_jobs: + processing_jobs_data.append({ + 'job_uuid': job.job_uuid, + 'original_filename': job.original_filename, + 'progress': float(job.progress) if job.progress else 0.0, + 'processing_started_at': job.processing_started_at.isoformat() if job.processing_started_at else None, + 'processing_time': calculate_processing_time(job.processing_started_at) if job.processing_started_at else None + }) + + return jsonify(create_response( + success=True, + data={ + 'queue_status': { + 'pending': pending_count, + 'processing': processing_count, + 'total_in_queue': pending_count + processing_count + }, + 'processing_jobs': processing_jobs_data + } + )) + + except Exception as e: + logger.error(f"Get queue status error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='取得佇列狀態失敗' + )), 500 + + +@jobs_bp.route('//cancel', methods=['POST']) +@jwt_login_required +def cancel_job(job_uuid): + """取消任務(支援 PENDING 和 PROCESSING 狀態)""" + try: + # 驗證 UUID 格式 + validate_job_uuid(job_uuid) + + # 取得任務(排除軟刪除的記錄) + job = TranslationJob.query.filter_by(job_uuid=job_uuid).filter(TranslationJob.deleted_at.is_(None)).first() + + if not job: + return jsonify(create_response( + success=False, + error='JOB_NOT_FOUND', + message='任務不存在' + )), 404 + + # 檢查權限 + if job.user_id != g.current_user_id and not g.is_admin: + return jsonify(create_response( + success=False, + error='PERMISSION_DENIED', + message='無權限操作此任務' + )), 403 + + # 只能取消等待中或處理中的任務 + if job.status not in ['PENDING', 'PROCESSING']: + return jsonify(create_response( + success=False, + error='CANNOT_CANCEL', + message='只能取消等待中或處理中的任務' + )), 400 + + # 如果是處理中的任務,需要中斷 Celery 任務 + if job.status == 'PROCESSING': + try: + from app.services.celery_service import revoke_task + # 嘗試撤銷 Celery 任務 + revoke_task(job.job_uuid) + logger.info(f"Celery task revoked for job: {job.job_uuid}") + except Exception as celery_error: + logger.warning(f"Failed to revoke Celery task for job {job.job_uuid}: {celery_error}") + # 即使撤銷失敗也繼續取消任務,因為用戶請求取消 + + # 更新任務狀態為失敗(取消) + cancel_message = f'使用者取消任務 (原狀態: {job.status})' + job.update_status('FAILED', error_message=cancel_message) + + # 記錄取消日誌 + SystemLog.info( + 'jobs.cancel', + f'Job cancelled by user: {job_uuid}', + user_id=g.current_user_id, + job_id=job.id + ) + + logger.info(f"Job cancelled by user: {job_uuid}") + + return jsonify(create_response( + success=True, + data={ + 'job_uuid': job.job_uuid, + 'status': job.status + }, + message='任務已取消' + )) + + except ValidationError as e: + return jsonify(create_response( + success=False, + error=e.error_code, + message=str(e) + )), 400 + + except Exception as e: + logger.error(f"Cancel job error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='取消任務失敗' + )), 500 + + +@jobs_bp.route('/', methods=['DELETE']) +@jwt_login_required +def delete_job(job_uuid): + """刪除任務""" + try: + # 驗證 UUID 格式 + validate_job_uuid(job_uuid) + + # 取得任務 + job = TranslationJob.query.filter_by(job_uuid=job_uuid).first() + + if not job: + return jsonify(create_response( + success=False, + error='JOB_NOT_FOUND', + message='任務不存在' + )), 404 + + # 檢查權限 + if job.user_id != g.current_user_id and not g.is_admin: + return jsonify(create_response( + success=False, + error='PERMISSION_DENIED', + message='無權限操作此任務' + )), 403 + + # 如果是處理中的任務,先嘗試中斷 Celery 任務 + if job.status == 'PROCESSING': + try: + from app.services.celery_service import revoke_task + # 嘗試撤銷 Celery 任務 + revoke_task(job.job_uuid) + logger.info(f"Celery task revoked before deletion for job: {job.job_uuid}") + except Exception as celery_error: + logger.warning(f"Failed to revoke Celery task before deletion for job {job.job_uuid}: {celery_error}") + # 即使撤銷失敗也繼續刪除任務,因為用戶要求刪除 + + # 刪除任務相關檔案 + import os + import shutil + from pathlib import Path + + try: + if job.file_path and os.path.exists(job.file_path): + # 取得任務目錄(通常是 uploads/job_uuid) + job_dir = Path(job.file_path).parent + if job_dir.exists() and job_dir.name == job.job_uuid: + shutil.rmtree(job_dir) + logger.info(f"Deleted job directory: {job_dir}") + except Exception as file_error: + logger.warning(f"Failed to delete job files: {str(file_error)}") + + # 記錄刪除日誌 + SystemLog.info( + 'jobs.delete', + f'Job deleted by user: {job_uuid}', + user_id=g.current_user_id, + job_id=job.id, + extra_data={ + 'filename': job.original_filename, + 'status': job.status + } + ) + + from app import db + + # 軟刪除資料庫記錄(保留數據供報表使用) + job.soft_delete() + + logger.info(f"Job soft deleted by user: {job_uuid}") + + return jsonify(create_response( + success=True, + message='任務已刪除' + )) + + except ValidationError as e: + return jsonify(create_response( + success=False, + error=e.error_code, + message=str(e) + )), 400 + + except Exception as e: + logger.error(f"Delete job error: {str(e)}") + + return jsonify(create_response( + success=False, + error='SYSTEM_ERROR', + message='刪除任務失敗' + )), 500 \ No newline at end of file diff --git a/app/api/notification.py b/app/api/notification.py new file mode 100644 index 0000000..23a9799 --- /dev/null +++ b/app/api/notification.py @@ -0,0 +1,331 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +通知系統 API 路由 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +from flask import Blueprint, jsonify, request, g +from app.utils.decorators import jwt_login_required +from sqlalchemy import desc, and_, or_ +from datetime import datetime, timedelta +from app import db +from app.models import Notification, NotificationType, User +from app.utils.response import create_taiwan_response +# 移除不需要的導入 + +# 建立藍圖 +notification_bp = Blueprint('notification', __name__, url_prefix='/notifications') + + +@notification_bp.route('', methods=['GET']) +@jwt_login_required +def get_notifications(): + """獲取當前用戶的通知列表""" + try: + # 獲取當前用戶 + current_user_id = g.current_user_id + + # 獲取查詢參數 + page = request.args.get('page', 1, type=int) + per_page = min(request.args.get('per_page', 20, type=int), 100) + status_filter = request.args.get('status', 'all') + type_filter = request.args.get('type', None) + + # 建構查詢 + query = Notification.query.filter_by(user_id=current_user_id) + + # 只顯示未過期的通知 + query = query.filter(or_( + Notification.expires_at.is_(None), + Notification.expires_at > datetime.now() + )) + + # 過濾狀態 + if status_filter == 'unread': + query = query.filter_by(is_read=False) + elif status_filter == 'read': + query = query.filter_by(is_read=True) + + # 過濾類型 + if type_filter: + query = query.filter_by(type=type_filter) + + # 排序 - 未讀在前,然後按時間排序 + query = query.order_by(Notification.is_read.asc(), desc(Notification.created_at)) + + # 分頁 + paginated = query.paginate( + page=page, per_page=per_page, error_out=False + ) + + # 獲取未讀數量 + unread_count = Notification.query.filter_by( + user_id=current_user_id, + is_read=False + ).filter(or_( + Notification.expires_at.is_(None), + Notification.expires_at > datetime.now() + )).count() + + return jsonify(create_taiwan_response( + success=True, + data={ + 'notifications': [n.to_dict() for n in paginated.items], + 'pagination': { + 'total': paginated.total, + 'page': page, + 'per_page': per_page, + 'pages': paginated.pages + }, + 'unread_count': unread_count + }, + message='獲取通知列表成功' + )) + + except Exception as e: + return jsonify(create_taiwan_response( + success=False, + error=f'獲取通知失敗:{str(e)}' + )), 500 + + +@notification_bp.route('/', methods=['GET']) +@jwt_login_required +def get_notification(notification_id): + """獲取單個通知詳情""" + try: + current_user_id = g.current_user_id + + # 查找通知 + notification = Notification.query.filter_by( + notification_uuid=notification_id, + user_id=current_user_id + ).first() + + if not notification: + return jsonify(create_taiwan_response( + success=False, + error='通知不存在' + )), 404 + + # 自動標記為已讀 + if not notification.is_read: + notification.mark_as_read() + db.session.commit() + + return jsonify(create_taiwan_response( + success=True, + data=notification.to_dict(), + message='獲取通知成功' + )) + + except Exception as e: + return jsonify(create_taiwan_response( + success=False, + error=f'獲取通知失敗:{str(e)}' + )), 500 + + +@notification_bp.route('//read', methods=['POST']) +@jwt_login_required +def mark_notification_read(notification_id): + """標記通知為已讀""" + try: + current_user_id = g.current_user_id + + # 查找通知 + notification = Notification.query.filter_by( + notification_uuid=notification_id, + user_id=current_user_id + ).first() + + if not notification: + return jsonify(create_taiwan_response( + success=False, + error='通知不存在' + )), 404 + + # 標記為已讀 + notification.mark_as_read() + db.session.commit() + + return jsonify(create_taiwan_response( + success=True, + message='標記已讀成功' + )) + + except Exception as e: + return jsonify(create_taiwan_response( + success=False, + error=f'標記已讀失敗:{str(e)}' + )), 500 + + +@notification_bp.route('/read-all', methods=['POST']) +@jwt_login_required +def mark_all_read(): + """標記所有通知為已讀""" + try: + current_user_id = g.current_user_id + + # 取得所有未讀通知 + unread_notifications = Notification.query.filter_by( + user_id=current_user_id, + is_read=False + ).filter(or_( + Notification.expires_at.is_(None), + Notification.expires_at > datetime.now() + )).all() + + # 標記為已讀 + for notification in unread_notifications: + notification.mark_as_read() + + db.session.commit() + + return jsonify(create_taiwan_response( + success=True, + data={'marked_count': len(unread_notifications)}, + message=f'已標記 {len(unread_notifications)} 個通知為已讀' + )) + + except Exception as e: + return jsonify(create_taiwan_response( + success=False, + error=f'標記全部已讀失敗:{str(e)}' + )), 500 + + +@notification_bp.route('/', methods=['DELETE']) +@jwt_login_required +def delete_notification(notification_id): + """刪除通知""" + try: + current_user_id = g.current_user_id + + # 查找通知 + notification = Notification.query.filter_by( + notification_uuid=notification_id, + user_id=current_user_id + ).first() + + if not notification: + return jsonify(create_taiwan_response( + success=False, + error='通知不存在' + )), 404 + + # 刪除通知 + db.session.delete(notification) + db.session.commit() + + return jsonify(create_taiwan_response( + success=True, + message='刪除通知成功' + )) + + except Exception as e: + db.session.rollback() + return jsonify(create_taiwan_response( + success=False, + error=f'刪除通知失敗:{str(e)}' + )), 500 + + +@notification_bp.route('/clear', methods=['POST']) +@jwt_login_required +def clear_read_notifications(): + """清空所有已讀通知""" + try: + current_user_id = g.current_user_id + + # 刪除所有已讀通知 + deleted_count = Notification.query.filter_by( + user_id=current_user_id, + is_read=True + ).delete() + + db.session.commit() + + return jsonify(create_taiwan_response( + success=True, + data={'deleted_count': deleted_count}, + message=f'已清除 {deleted_count} 個已讀通知' + )) + + except Exception as e: + db.session.rollback() + return jsonify(create_taiwan_response( + success=False, + error=f'清除通知失敗:{str(e)}' + )), 500 + + +@notification_bp.route('/test', methods=['POST']) +@jwt_login_required +def create_test_notification(): + """創建測試通知(開發用)""" + try: + current_user_id = g.current_user_id + + # 創建測試通知 + test_notification = create_notification( + user_id=current_user_id, + title="測試通知", + message=f"這是一個測試通知,創建於 {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", + notification_type=NotificationType.INFO + ) + + return jsonify(create_taiwan_response( + success=True, + data=test_notification.to_dict(), + message='測試通知已創建' + )) + + except Exception as e: + return jsonify(create_taiwan_response( + success=False, + error=f'創建測試通知失敗:{str(e)}' + )), 500 + + +# 工具函數:創建通知 +def create_notification(user_id, title, message, notification_type=NotificationType.INFO, + job_uuid=None, extra_data=None): + """ + 創建通知的工具函數 + + Args: + user_id: 用戶ID + title: 通知標題 + message: 通知內容 + notification_type: 通知類型 + job_uuid: 關聯的任務UUID(可選) + extra_data: 額外數據(可選) + + Returns: + Notification: 創建的通知對象 + """ + try: + notification = Notification( + user_id=user_id, + type=notification_type.value, + title=title, + message=message, + job_uuid=job_uuid, + extra_data=extra_data, + link=f"/job/{job_uuid}" if job_uuid else None + ) + + db.session.add(notification) + db.session.commit() + + return notification + + except Exception as e: + db.session.rollback() + raise e \ No newline at end of file diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..31f5593 --- /dev/null +++ b/app/config.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +應用程式配置模組 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +import os +import secrets +from pathlib import Path +from datetime import timedelta +from dotenv import load_dotenv + +# 載入環境變數 +load_dotenv() + +class Config: + """基礎配置類別""" + + # 基本應用配置 + SECRET_KEY = os.environ.get('SECRET_KEY') or secrets.token_hex(32) + APP_NAME = os.environ.get('APP_NAME', 'PANJIT Document Translator') + + # 資料庫配置 + DATABASE_URL = os.environ.get('DATABASE_URL') + if DATABASE_URL and DATABASE_URL.startswith("mysql://"): + DATABASE_URL = DATABASE_URL.replace("mysql://", "mysql+pymysql://", 1) + + SQLALCHEMY_DATABASE_URI = DATABASE_URL + SQLALCHEMY_TRACK_MODIFICATIONS = False + SQLALCHEMY_ENGINE_OPTIONS = { + 'pool_pre_ping': True, + 'pool_recycle': 3600, + 'connect_args': { + 'charset': os.environ.get('MYSQL_CHARSET', 'utf8mb4'), + 'connect_timeout': 30, + 'read_timeout': 30, + 'write_timeout': 30, + } + } + + # JWT 配置 - 改用 JWT 認證 + JWT_SECRET_KEY = os.environ.get('JWT_SECRET_KEY') or SECRET_KEY + JWT_ACCESS_TOKEN_EXPIRES = timedelta(hours=8) + JWT_REFRESH_TOKEN_EXPIRES = timedelta(days=30) + JWT_ALGORITHM = 'HS256' + + # Redis 配置 + REDIS_URL = os.environ.get('REDIS_URL', 'redis://localhost:6379/0') + + # Celery 配置 + CELERY_BROKER_URL = os.environ.get('CELERY_BROKER_URL', 'redis://localhost:6379/0') + CELERY_RESULT_BACKEND = os.environ.get('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0') + CELERY_TASK_SERIALIZER = 'json' + CELERY_RESULT_SERIALIZER = 'json' + CELERY_ACCEPT_CONTENT = ['json'] + CELERY_TIMEZONE = 'Asia/Taipei' + CELERY_ENABLE_UTC = False # 改為 False,讓 Celery 使用本地時區 + + # LDAP 配置 + LDAP_SERVER = os.environ.get('LDAP_SERVER') + LDAP_PORT = int(os.environ.get('LDAP_PORT', 389)) + LDAP_USE_SSL = os.environ.get('LDAP_USE_SSL', 'false').lower() == 'true' + LDAP_BIND_USER_DN = os.environ.get('LDAP_BIND_USER_DN') + LDAP_BIND_USER_PASSWORD = os.environ.get('LDAP_BIND_USER_PASSWORD') + LDAP_SEARCH_BASE = os.environ.get('LDAP_SEARCH_BASE') + LDAP_USER_LOGIN_ATTR = os.environ.get('LDAP_USER_LOGIN_ATTR', 'userPrincipalName') + + # SMTP 配置 + SMTP_SERVER = os.environ.get('SMTP_SERVER') + SMTP_PORT = int(os.environ.get('SMTP_PORT', 587)) + SMTP_USE_TLS = os.environ.get('SMTP_USE_TLS', 'false').lower() == 'true' + SMTP_USE_SSL = os.environ.get('SMTP_USE_SSL', 'false').lower() == 'true' + SMTP_AUTH_REQUIRED = os.environ.get('SMTP_AUTH_REQUIRED', 'false').lower() == 'true' + SMTP_SENDER_EMAIL = os.environ.get('SMTP_SENDER_EMAIL') + SMTP_SENDER_PASSWORD = os.environ.get('SMTP_SENDER_PASSWORD', '') + + # 檔案上傳配置 + UPLOAD_FOLDER = Path(os.environ.get('UPLOAD_FOLDER', 'uploads')).absolute() + MAX_CONTENT_LENGTH = int(os.environ.get('MAX_CONTENT_LENGTH', 26214400)) # 25MB + ALLOWED_EXTENSIONS = {'.docx', '.doc', '.pptx', '.xlsx', '.xls', '.pdf'} + FILE_RETENTION_DAYS = int(os.environ.get('FILE_RETENTION_DAYS', 7)) + + # Dify API 配置(從 api.txt 載入) + DIFY_API_BASE_URL = '' + DIFY_API_KEY = '' + + # 分離的 Dify API 配置 + DIFY_TRANSLATION_BASE_URL = '' + DIFY_TRANSLATION_API_KEY = '' + DIFY_OCR_BASE_URL = '' + DIFY_OCR_API_KEY = '' + + # 日誌配置 + LOG_LEVEL = os.environ.get('LOG_LEVEL', 'INFO') + LOG_FILE = Path(os.environ.get('LOG_FILE', 'logs/app.log')).absolute() + + # 管理員配置 + ADMIN_EMAIL = os.environ.get('ADMIN_EMAIL', 'ymirliu@panjit.com.tw') + + @classmethod + def load_dify_config(cls): + """從 api.txt 載入 Dify API 配置""" + api_file = Path('api.txt') + if api_file.exists(): + try: + with open(api_file, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + if not line or line.startswith('#'): + continue + + # 翻译API配置 + if line.startswith('translation_base_url:'): + cls.DIFY_TRANSLATION_BASE_URL = line.split(':', 1)[1].strip() + elif line.startswith('translation_api:'): + cls.DIFY_TRANSLATION_API_KEY = line.split(':', 1)[1].strip() + + # OCR API配置 + elif line.startswith('ocr_base_url:'): + cls.DIFY_OCR_BASE_URL = line.split(':', 1)[1].strip() + elif line.startswith('ocr_api:'): + cls.DIFY_OCR_API_KEY = line.split(':', 1)[1].strip() + + # 兼容旧格式 + elif line.startswith('base_url:'): + cls.DIFY_API_BASE_URL = line.split(':', 1)[1].strip() + cls.DIFY_TRANSLATION_BASE_URL = line.split(':', 1)[1].strip() + elif line.startswith('api:'): + cls.DIFY_API_KEY = line.split(':', 1)[1].strip() + cls.DIFY_TRANSLATION_API_KEY = line.split(':', 1)[1].strip() + except Exception as e: + print(f"Error loading Dify config: {e}") + pass + + @classmethod + def init_directories(cls): + """初始化必要目錄""" + directories = [ + cls.UPLOAD_FOLDER, + cls.LOG_FILE.parent, + ] + + for directory in directories: + directory.mkdir(parents=True, exist_ok=True) + + +class DevelopmentConfig(Config): + """開發環境配置""" + DEBUG = True + FLASK_ENV = 'development' + + +class ProductionConfig(Config): + """生產環境配置""" + DEBUG = False + FLASK_ENV = 'production' + + # 生產環境的額外配置 + SQLALCHEMY_ENGINE_OPTIONS = { + **Config.SQLALCHEMY_ENGINE_OPTIONS, + 'pool_size': 10, + 'max_overflow': 20, + } + + +class TestingConfig(Config): + """測試環境配置""" + TESTING = True + WTF_CSRF_ENABLED = False + SQLALCHEMY_DATABASE_URI = 'sqlite:///:memory:' + + +# 配置映射 +config = { + 'development': DevelopmentConfig, + 'production': ProductionConfig, + 'testing': TestingConfig, + 'default': DevelopmentConfig +} \ No newline at end of file diff --git a/app/models/__init__.py b/app/models/__init__.py new file mode 100644 index 0000000..2636e09 --- /dev/null +++ b/app/models/__init__.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +資料模型模組 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +from .user import User +from .job import TranslationJob, JobFile +from .cache import TranslationCache +from .stats import APIUsageStats +from .log import SystemLog +from .notification import Notification, NotificationType +from .sys_user import SysUser, LoginLog + +__all__ = [ + 'User', + 'TranslationJob', + 'JobFile', + 'TranslationCache', + 'APIUsageStats', + 'SystemLog', + 'Notification', + 'NotificationType', + 'SysUser', + 'LoginLog' +] \ No newline at end of file diff --git a/app/models/cache.py b/app/models/cache.py new file mode 100644 index 0000000..3317ac1 --- /dev/null +++ b/app/models/cache.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +翻譯快取資料模型 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +import hashlib +from sqlalchemy.sql import func +from app import db + + +class TranslationCache(db.Model): + """翻譯快取表 (dt_translation_cache)""" + __tablename__ = 'dt_translation_cache' + + id = db.Column(db.Integer, primary_key=True, autoincrement=True) + source_text_hash = db.Column(db.String(64), nullable=False, comment='來源文字hash') + source_language = db.Column(db.String(50), nullable=False, comment='來源語言') + target_language = db.Column(db.String(50), nullable=False, comment='目標語言') + source_text = db.Column(db.Text, nullable=False, comment='來源文字') + translated_text = db.Column(db.Text, nullable=False, comment='翻譯文字') + created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間') + + # 唯一約束 + __table_args__ = ( + db.UniqueConstraint('source_text_hash', 'source_language', 'target_language', name='uk_cache'), + db.Index('idx_languages', 'source_language', 'target_language'), + ) + + def __repr__(self): + return f'' + + def to_dict(self): + """轉換為字典格式""" + return { + 'id': self.id, + 'source_text_hash': self.source_text_hash, + 'source_language': self.source_language, + 'target_language': self.target_language, + 'source_text': self.source_text, + 'translated_text': self.translated_text, + 'created_at': self.created_at.isoformat() if self.created_at else None + } + + @staticmethod + def generate_hash(text): + """生成文字的 SHA256 hash""" + return hashlib.sha256(text.encode('utf-8')).hexdigest() + + @classmethod + def get_translation(cls, source_text, source_language, target_language): + """取得快取的翻譯""" + text_hash = cls.generate_hash(source_text) + + cache_entry = cls.query.filter_by( + source_text_hash=text_hash, + source_language=source_language, + target_language=target_language + ).first() + + return cache_entry.translated_text if cache_entry else None + + @classmethod + def save_translation(cls, source_text, source_language, target_language, translated_text): + """儲存翻譯到快取""" + text_hash = cls.generate_hash(source_text) + + # 檢查是否已存在 + existing = cls.query.filter_by( + source_text_hash=text_hash, + source_language=source_language, + target_language=target_language + ).first() + + if existing: + # 更新現有記錄 + existing.translated_text = translated_text + else: + # 建立新記錄 + cache_entry = cls( + source_text_hash=text_hash, + source_language=source_language, + target_language=target_language, + source_text=source_text, + translated_text=translated_text + ) + db.session.add(cache_entry) + + db.session.commit() + return True + + @classmethod + def get_cache_statistics(cls): + """取得快取統計資料""" + total_entries = cls.query.count() + + # 按語言對統計 + language_pairs = db.session.query( + cls.source_language, + cls.target_language, + func.count(cls.id).label('count') + ).group_by(cls.source_language, cls.target_language).all() + + # 最近一週的快取命中 + from datetime import datetime, timedelta + week_ago = datetime.utcnow() - timedelta(days=7) + recent_entries = cls.query.filter(cls.created_at >= week_ago).count() + + return { + 'total_entries': total_entries, + 'language_pairs': [ + { + 'source_language': pair.source_language, + 'target_language': pair.target_language, + 'count': pair.count + } + for pair in language_pairs + ], + 'recent_entries': recent_entries + } + + @classmethod + def clear_old_cache(cls, days_to_keep=90): + """清理舊快取記錄""" + from datetime import datetime, timedelta + + cutoff_date = datetime.utcnow() - timedelta(days=days_to_keep) + + deleted_count = cls.query.filter( + cls.created_at < cutoff_date + ).delete(synchronize_session=False) + + db.session.commit() + return deleted_count \ No newline at end of file diff --git a/app/models/job.py b/app/models/job.py new file mode 100644 index 0000000..bb69ffa --- /dev/null +++ b/app/models/job.py @@ -0,0 +1,327 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +翻譯任務資料模型 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +import json +import uuid +from datetime import datetime, timedelta +from sqlalchemy.sql import func +from sqlalchemy import event +from app import db +from app.utils.timezone import format_taiwan_time + + +class TranslationJob(db.Model): + """翻譯任務表 (dt_translation_jobs)""" + __tablename__ = 'dt_translation_jobs' + + id = db.Column(db.Integer, primary_key=True, autoincrement=True) + job_uuid = db.Column(db.String(36), unique=True, nullable=False, index=True, comment='任務唯一識別碼') + user_id = db.Column(db.Integer, db.ForeignKey('dt_users.id'), nullable=False, comment='使用者ID') + original_filename = db.Column(db.String(500), nullable=False, comment='原始檔名') + file_extension = db.Column(db.String(10), nullable=False, comment='檔案副檔名') + file_size = db.Column(db.BigInteger, nullable=False, comment='檔案大小(bytes)') + file_path = db.Column(db.String(1000), nullable=False, comment='檔案路徑') + source_language = db.Column(db.String(50), default=None, comment='來源語言') + target_languages = db.Column(db.JSON, nullable=False, comment='目標語言陣列') + status = db.Column( + db.Enum('PENDING', 'PROCESSING', 'COMPLETED', 'FAILED', 'RETRY', name='job_status'), + default='PENDING', + comment='任務狀態' + ) + progress = db.Column(db.Numeric(5, 2), default=0.00, comment='處理進度(%)') + retry_count = db.Column(db.Integer, default=0, comment='重試次數') + error_message = db.Column(db.Text, comment='錯誤訊息') + total_tokens = db.Column(db.Integer, default=0, comment='總token數') + total_cost = db.Column(db.Numeric(10, 4), default=0.0000, comment='總成本') + conversation_id = db.Column(db.String(100), comment='Dify對話ID,用於維持翻譯上下文') + processing_started_at = db.Column(db.DateTime, comment='開始處理時間') + completed_at = db.Column(db.DateTime, comment='完成時間') + created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間') + updated_at = db.Column( + db.DateTime, + default=func.now(), + onupdate=func.now(), + comment='更新時間' + ) + deleted_at = db.Column(db.DateTime, comment='軟刪除時間') + + # 關聯關係 + files = db.relationship('JobFile', backref='job', lazy='dynamic', cascade='all, delete-orphan') + api_usage_stats = db.relationship('APIUsageStats', backref='job', lazy='dynamic') + + def __repr__(self): + return f'' + + def __init__(self, **kwargs): + """初始化,自動生成 UUID""" + super().__init__(**kwargs) + if not self.job_uuid: + self.job_uuid = str(uuid.uuid4()) + + def to_dict(self, include_files=False): + """轉換為字典格式""" + data = { + 'id': self.id, + 'job_uuid': self.job_uuid, + 'user_id': self.user_id, + 'original_filename': self.original_filename, + 'file_extension': self.file_extension, + 'file_size': self.file_size, + 'file_path': self.file_path, + 'source_language': self.source_language, + 'target_languages': self.target_languages, + 'status': self.status, + 'progress': float(self.progress) if self.progress else 0.0, + 'retry_count': self.retry_count, + 'error_message': self.error_message, + 'total_tokens': self.total_tokens, + 'total_cost': float(self.total_cost) if self.total_cost else 0.0, + 'conversation_id': self.conversation_id, + 'processing_started_at': format_taiwan_time(self.processing_started_at, "%Y-%m-%d %H:%M:%S") if self.processing_started_at else None, + 'completed_at': format_taiwan_time(self.completed_at, "%Y-%m-%d %H:%M:%S") if self.completed_at else None, + 'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None, + 'updated_at': format_taiwan_time(self.updated_at, "%Y-%m-%d %H:%M:%S") if self.updated_at else None, + 'deleted_at': format_taiwan_time(self.deleted_at, "%Y-%m-%d %H:%M:%S") if self.deleted_at else None + } + + if include_files: + data['files'] = [f.to_dict() for f in self.files] + + return data + + def update_status(self, status, error_message=None, progress=None): + """更新任務狀態""" + self.status = status + + if error_message: + self.error_message = error_message + + if progress is not None: + self.progress = progress + + if status == 'PROCESSING' and not self.processing_started_at: + self.processing_started_at = datetime.utcnow() + elif status == 'COMPLETED': + self.completed_at = datetime.utcnow() + self.progress = 100.00 + + self.updated_at = datetime.utcnow() + db.session.commit() + + def add_original_file(self, filename, file_path, file_size): + """新增原始檔案記錄""" + from pathlib import Path + stored_name = Path(file_path).name + + original_file = JobFile( + job_id=self.id, + file_type='source', + original_filename=filename, + stored_filename=stored_name, + file_path=file_path, + file_size=file_size, + mime_type=self._get_mime_type(filename) + ) + db.session.add(original_file) + db.session.commit() + return original_file + + def add_translated_file(self, language_code, filename, file_path, file_size): + """新增翻譯檔案記錄""" + from pathlib import Path + stored_name = Path(file_path).name + + translated_file = JobFile( + job_id=self.id, + file_type='translated', + language_code=language_code, + original_filename=filename, + stored_filename=stored_name, + file_path=file_path, + file_size=file_size, + mime_type=self._get_mime_type(filename) + ) + db.session.add(translated_file) + db.session.commit() + return translated_file + + def _get_mime_type(self, filename): + """取得MIME類型""" + import mimetypes + from pathlib import Path + + ext = Path(filename).suffix.lower() + mime_map = { + '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + '.pdf': 'application/pdf', + '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + '.txt': 'text/plain' + } + return mime_map.get(ext, mimetypes.guess_type(filename)[0] or 'application/octet-stream') + + def get_translated_files(self): + """取得翻譯檔案""" + return self.files.filter_by(file_type='translated').all() + + def get_original_file(self): + """取得原始檔案""" + return self.files.filter_by(file_type='source').first() + + def can_retry(self): + """是否可以重試""" + return self.status in ['FAILED', 'RETRY'] and self.retry_count < 3 + + def increment_retry(self): + """增加重試次數""" + self.retry_count += 1 + self.updated_at = datetime.utcnow() + db.session.commit() + + def soft_delete(self): + """軟刪除任務(保留資料供報表使用)""" + self.deleted_at = datetime.utcnow() + self.updated_at = datetime.utcnow() + db.session.commit() + + def restore(self): + """恢復已刪除的任務""" + self.deleted_at = None + self.updated_at = datetime.utcnow() + db.session.commit() + + def is_deleted(self): + """檢查任務是否已被軟刪除""" + return self.deleted_at is not None + + @classmethod + def get_queue_position(cls, job_uuid): + """取得任務在佇列中的位置""" + job = cls.query.filter_by(job_uuid=job_uuid, deleted_at=None).first() + if not job: + return None + + position = cls.query.filter( + cls.status == 'PENDING', + cls.deleted_at.is_(None), + cls.created_at < job.created_at + ).count() + + return position + 1 + + @classmethod + def get_pending_jobs(cls): + """取得所有等待處理的任務""" + return cls.query.filter_by(status='PENDING', deleted_at=None).order_by(cls.created_at.asc()).all() + + @classmethod + def get_processing_jobs(cls): + """取得所有處理中的任務""" + return cls.query.filter_by(status='PROCESSING', deleted_at=None).all() + + @classmethod + def get_user_jobs(cls, user_id, status=None, limit=None, offset=None, include_deleted=False): + """取得使用者的任務列表""" + query = cls.query.filter_by(user_id=user_id) + + # 預設排除軟刪除的記錄,除非明確要求包含 + if not include_deleted: + query = query.filter(cls.deleted_at.is_(None)) + + if status and status != 'all': + query = query.filter_by(status=status.upper()) + + query = query.order_by(cls.created_at.desc()) + + if limit: + query = query.limit(limit) + if offset: + query = query.offset(offset) + + return query.all() + + @classmethod + def get_statistics(cls, user_id=None, start_date=None, end_date=None, include_deleted=True): + """取得統計資料(預設包含所有記錄以確保報表完整性)""" + query = cls.query + + # 報表統計預設包含已刪除記錄以確保數據完整性 + if not include_deleted: + query = query.filter(cls.deleted_at.is_(None)) + + if user_id: + query = query.filter_by(user_id=user_id) + + if start_date: + query = query.filter(cls.created_at >= start_date) + + if end_date: + query = query.filter(cls.created_at <= end_date) + + total = query.count() + completed = query.filter_by(status='COMPLETED').count() + failed = query.filter_by(status='FAILED').count() + processing = query.filter_by(status='PROCESSING').count() + pending = query.filter_by(status='PENDING').count() + + return { + 'total': total, + 'completed': completed, + 'failed': failed, + 'processing': processing, + 'pending': pending, + 'success_rate': (completed / total * 100) if total > 0 else 0 + } + + +class JobFile(db.Model): + """檔案記錄表 (dt_job_files)""" + __tablename__ = 'dt_job_files' + + id = db.Column(db.Integer, primary_key=True, autoincrement=True) + job_id = db.Column(db.Integer, db.ForeignKey('dt_translation_jobs.id'), nullable=False, comment='任務ID') + file_type = db.Column( + db.Enum('source', 'translated', name='file_type'), + nullable=False, + comment='檔案類型' + ) + language_code = db.Column(db.String(50), comment='語言代碼(翻譯檔案)') + original_filename = db.Column(db.String(255), nullable=False, comment='原始檔名') + stored_filename = db.Column(db.String(255), nullable=False, comment='儲存檔名') + file_path = db.Column(db.String(500), nullable=False, comment='檔案路徑') + file_size = db.Column(db.BigInteger, default=0, comment='檔案大小') + mime_type = db.Column(db.String(100), comment='MIME 類型') + created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間') + + def __repr__(self): + return f'' + + def to_dict(self): + """轉換為字典格式""" + return { + 'id': self.id, + 'job_id': self.job_id, + 'file_type': self.file_type, + 'language_code': self.language_code, + 'original_filename': self.original_filename, + 'stored_filename': self.stored_filename, + 'file_path': self.file_path, + 'file_size': self.file_size, + 'mime_type': self.mime_type, + 'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None + } + + +# 事件監聽器:自動生成 UUID +@event.listens_for(TranslationJob, 'before_insert') +def receive_before_insert(mapper, connection, target): + """在插入前自動生成 UUID""" + if not target.job_uuid: + target.job_uuid = str(uuid.uuid4()) \ No newline at end of file diff --git a/app/models/log.py b/app/models/log.py new file mode 100644 index 0000000..3c1afeb --- /dev/null +++ b/app/models/log.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +系統日誌資料模型 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +import json +from datetime import datetime, timedelta +from sqlalchemy.sql import func +from app import db + + +class SystemLog(db.Model): + """系統日誌表 (dt_system_logs)""" + __tablename__ = 'dt_system_logs' + + id = db.Column(db.Integer, primary_key=True, autoincrement=True) + level = db.Column( + db.Enum('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL', name='log_level'), + nullable=False, + comment='日誌等級' + ) + module = db.Column(db.String(100), nullable=False, comment='模組名稱') + user_id = db.Column(db.Integer, db.ForeignKey('dt_users.id'), comment='使用者ID') + job_id = db.Column(db.Integer, db.ForeignKey('dt_translation_jobs.id'), comment='任務ID') + message = db.Column(db.Text, nullable=False, comment='日誌訊息') + extra_data = db.Column(db.JSON, comment='額外資料') + created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間') + + def __repr__(self): + return f'' + + def to_dict(self): + """轉換為字典格式""" + return { + 'id': self.id, + 'level': self.level, + 'module': self.module, + 'user_id': self.user_id, + 'job_id': self.job_id, + 'message': self.message, + 'extra_data': self.extra_data, + 'created_at': self.created_at.isoformat() if self.created_at else None + } + + @classmethod + def log(cls, level, module, message, user_id=None, job_id=None, extra_data=None): + """記錄日誌""" + log_entry = cls( + level=level.upper(), + module=module, + message=message, + user_id=user_id, + job_id=job_id, + extra_data=extra_data + ) + + db.session.add(log_entry) + db.session.commit() + return log_entry + + @classmethod + def debug(cls, module, message, user_id=None, job_id=None, extra_data=None): + """記錄除錯日誌""" + return cls.log('DEBUG', module, message, user_id, job_id, extra_data) + + @classmethod + def info(cls, module, message, user_id=None, job_id=None, extra_data=None): + """記錄資訊日誌""" + return cls.log('INFO', module, message, user_id, job_id, extra_data) + + @classmethod + def warning(cls, module, message, user_id=None, job_id=None, extra_data=None): + """記錄警告日誌""" + return cls.log('WARNING', module, message, user_id, job_id, extra_data) + + @classmethod + def error(cls, module, message, user_id=None, job_id=None, extra_data=None): + """記錄錯誤日誌""" + return cls.log('ERROR', module, message, user_id, job_id, extra_data) + + @classmethod + def critical(cls, module, message, user_id=None, job_id=None, extra_data=None): + """記錄嚴重錯誤日誌""" + return cls.log('CRITICAL', module, message, user_id, job_id, extra_data) + + @classmethod + def get_logs(cls, level=None, module=None, user_id=None, start_date=None, end_date=None, limit=100, offset=0): + """查詢日誌""" + query = cls.query + + if level: + query = query.filter_by(level=level.upper()) + + if module: + query = query.filter(cls.module.like(f'%{module}%')) + + if user_id: + query = query.filter_by(user_id=user_id) + + if start_date: + query = query.filter(cls.created_at >= start_date) + + if end_date: + query = query.filter(cls.created_at <= end_date) + + # 按時間倒序排列 + query = query.order_by(cls.created_at.desc()) + + if limit: + query = query.limit(limit) + if offset: + query = query.offset(offset) + + return query.all() + + @classmethod + def get_log_statistics(cls, days=7): + """取得日誌統計資料""" + end_date = datetime.utcnow() + start_date = end_date - timedelta(days=days) + + # 按等級統計 + level_stats = db.session.query( + cls.level, + func.count(cls.id).label('count') + ).filter( + cls.created_at >= start_date + ).group_by(cls.level).all() + + # 按模組統計 + module_stats = db.session.query( + cls.module, + func.count(cls.id).label('count') + ).filter( + cls.created_at >= start_date + ).group_by(cls.module).order_by( + func.count(cls.id).desc() + ).limit(10).all() + + # 每日統計 + daily_stats = db.session.query( + func.date(cls.created_at).label('date'), + cls.level, + func.count(cls.id).label('count') + ).filter( + cls.created_at >= start_date + ).group_by( + func.date(cls.created_at), cls.level + ).order_by( + func.date(cls.created_at) + ).all() + + return { + 'level_stats': [ + {'level': stat.level, 'count': stat.count} + for stat in level_stats + ], + 'module_stats': [ + {'module': stat.module, 'count': stat.count} + for stat in module_stats + ], + 'daily_stats': [ + { + 'date': stat.date.isoformat(), + 'level': stat.level, + 'count': stat.count + } + for stat in daily_stats + ] + } + + @classmethod + def cleanup_old_logs(cls, days_to_keep=30): + """清理舊日誌""" + cutoff_date = datetime.utcnow() - timedelta(days=days_to_keep) + + deleted_count = cls.query.filter( + cls.created_at < cutoff_date + ).delete(synchronize_session=False) + + db.session.commit() + return deleted_count + + @classmethod + def get_error_summary(cls, days=1): + """取得錯誤摘要""" + start_date = datetime.utcnow() - timedelta(days=days) + + error_logs = cls.query.filter( + cls.level.in_(['ERROR', 'CRITICAL']), + cls.created_at >= start_date + ).order_by(cls.created_at.desc()).limit(50).all() + + # 按模組分組錯誤 + error_by_module = {} + for log in error_logs: + module = log.module + if module not in error_by_module: + error_by_module[module] = [] + error_by_module[module].append(log.to_dict()) + + return { + 'total_errors': len(error_logs), + 'error_by_module': error_by_module, + 'recent_errors': [log.to_dict() for log in error_logs[:10]] + } \ No newline at end of file diff --git a/app/models/notification.py b/app/models/notification.py new file mode 100644 index 0000000..295388c --- /dev/null +++ b/app/models/notification.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +通知系統資料模型 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" +from datetime import datetime +from enum import Enum +from sqlalchemy import func +from sqlalchemy.orm import relationship +from app import db +import uuid +import json + + +class NotificationType(str, Enum): + """通知類型枚舉""" + SUCCESS = "success" # 成功 + ERROR = "error" # 錯誤 + WARNING = "warning" # 警告 + INFO = "info" # 資訊 + SYSTEM = "system" # 系統 + + +class Notification(db.Model): + """通知模型""" + __tablename__ = 'dt_notifications' + + # 主鍵 + id = db.Column(db.Integer, primary_key=True, autoincrement=True) + notification_uuid = db.Column(db.String(36), unique=True, nullable=False, index=True, + default=lambda: str(uuid.uuid4()), comment='通知唯一識別碼') + + # 基本資訊 + user_id = db.Column(db.Integer, db.ForeignKey('dt_users.id'), nullable=False, comment='使用者ID') + type = db.Column(db.Enum('INFO', 'SUCCESS', 'WARNING', 'ERROR', name='notification_type'), + nullable=False, default=NotificationType.INFO.value, comment='通知類型') + title = db.Column(db.String(255), nullable=False, comment='通知標題') + message = db.Column(db.Text, nullable=False, comment='通知內容') + + # 關聯資訊(可選) + job_uuid = db.Column(db.String(36), nullable=True, comment='關聯任務UUID') + link = db.Column(db.String(500), nullable=True, comment='相關連結') + + # 狀態 + is_read = db.Column(db.Boolean, default=False, nullable=False, comment='是否已讀') + read_at = db.Column(db.DateTime, nullable=True, comment='閱讀時間') + + # 時間戳記 + created_at = db.Column(db.DateTime, default=func.now(), nullable=False, comment='建立時間') + expires_at = db.Column(db.DateTime, nullable=True, comment='過期時間') + + # 額外數據(JSON 格式儲存) + extra_data = db.Column(db.JSON, nullable=True, comment='額外數據') + + # 關聯 + user = db.relationship("User", backref="notifications") + + def __repr__(self): + return f"" + + def to_dict(self): + """轉換為字典格式""" + return { + 'id': self.notification_uuid, # 前端使用 UUID + 'user_id': self.user_id, + 'type': self.type, + 'title': self.title, + 'message': self.message, + 'job_uuid': self.job_uuid, + 'link': self.link, + 'is_read': self.is_read, + 'read': self.is_read, # 為了前端相容 + 'read_at': self.read_at.isoformat() if self.read_at else None, + 'created_at': self.created_at.isoformat() if self.created_at else None, + 'expires_at': self.expires_at.isoformat() if self.expires_at else None, + 'extra_data': self.extra_data + } + + def mark_as_read(self): + """標記為已讀""" + self.is_read = True + self.read_at = datetime.now() + + @classmethod + def create_job_notification(cls, user_id, job_uuid, title, message, notification_type=NotificationType.INFO): + """創建任務相關通知""" + return cls( + user_id=user_id, + job_uuid=job_uuid, + type=notification_type.value, + title=title, + message=message, + link=f"/job/{job_uuid}" # 連結到任務詳情頁 + ) \ No newline at end of file diff --git a/app/models/stats.py b/app/models/stats.py new file mode 100644 index 0000000..ad8cd43 --- /dev/null +++ b/app/models/stats.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +API使用統計資料模型 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +from datetime import datetime, timedelta +from sqlalchemy.sql import func +from app import db +from app.utils.timezone import format_taiwan_time + + +class APIUsageStats(db.Model): + """API使用統計表 (dt_api_usage_stats)""" + __tablename__ = 'dt_api_usage_stats' + + id = db.Column(db.Integer, primary_key=True, autoincrement=True) + user_id = db.Column(db.Integer, db.ForeignKey('dt_users.id'), nullable=False, comment='使用者ID') + job_id = db.Column(db.Integer, db.ForeignKey('dt_translation_jobs.id'), comment='任務ID') + api_endpoint = db.Column(db.String(200), nullable=False, comment='API端點') + prompt_tokens = db.Column(db.Integer, default=0, comment='Prompt token數') + completion_tokens = db.Column(db.Integer, default=0, comment='Completion token數') + total_tokens = db.Column(db.Integer, default=0, comment='總token數') + prompt_unit_price = db.Column(db.Numeric(10, 8), default=0.00000000, comment='單價') + prompt_price_unit = db.Column(db.String(20), default='USD', comment='價格單位') + cost = db.Column(db.Numeric(10, 4), default=0.0000, comment='成本') + response_time_ms = db.Column(db.Integer, default=0, comment='回應時間(毫秒)') + success = db.Column(db.Boolean, default=True, comment='是否成功') + error_message = db.Column(db.Text, comment='錯誤訊息') + created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間') + + def __repr__(self): + return f'' + + def to_dict(self): + """轉換為字典格式""" + return { + 'id': self.id, + 'user_id': self.user_id, + 'job_id': self.job_id, + 'api_endpoint': self.api_endpoint, + 'prompt_tokens': self.prompt_tokens, + 'completion_tokens': self.completion_tokens, + 'total_tokens': self.total_tokens, + 'prompt_unit_price': float(self.prompt_unit_price) if self.prompt_unit_price else 0.0, + 'prompt_price_unit': self.prompt_price_unit, + 'cost': float(self.cost) if self.cost else 0.0, + 'response_time_ms': self.response_time_ms, + 'success': self.success, + 'error_message': self.error_message, + 'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None + } + + @classmethod + def record_api_call(cls, user_id, job_id, api_endpoint, metadata, response_time_ms, success=True, error_message=None): + """記錄 API 呼叫統計""" + # 從 Dify API metadata 解析使用量資訊 + usage_data = metadata.get('usage', {}) + + prompt_tokens = usage_data.get('prompt_tokens', 0) + completion_tokens = usage_data.get('completion_tokens', 0) + total_tokens = usage_data.get('total_tokens', prompt_tokens + completion_tokens) + + # 計算成本 - 使用 Dify API 提供的總成本 + if 'total_price' in usage_data: + # 直接使用 API 提供的總價格 + cost = float(usage_data.get('total_price', 0.0)) + else: + # 備用計算方式 + prompt_price = float(usage_data.get('prompt_price', 0.0)) + completion_price = float(usage_data.get('completion_price', 0.0)) + cost = prompt_price + completion_price + + # 單價資訊 + prompt_unit_price = usage_data.get('prompt_unit_price', 0.0) + completion_unit_price = usage_data.get('completion_unit_price', 0.0) + prompt_price_unit = usage_data.get('currency', 'USD') + + stats = cls( + user_id=user_id, + job_id=job_id, + api_endpoint=api_endpoint, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=total_tokens, + prompt_unit_price=prompt_unit_price, + prompt_price_unit=prompt_price_unit, + cost=cost, + response_time_ms=response_time_ms, + success=success, + error_message=error_message + ) + + db.session.add(stats) + db.session.commit() + return stats + + @classmethod + def get_user_statistics(cls, user_id, start_date=None, end_date=None): + """取得使用者統計資料""" + query = cls.query.filter_by(user_id=user_id) + + if start_date: + query = query.filter(cls.created_at >= start_date) + if end_date: + query = query.filter(cls.created_at <= end_date) + + # 統計資料 + total_calls = query.count() + successful_calls = query.filter_by(success=True).count() + total_tokens = query.with_entities(func.sum(cls.total_tokens)).scalar() or 0 + total_cost = query.with_entities(func.sum(cls.cost)).scalar() or 0.0 + avg_response_time = query.with_entities(func.avg(cls.response_time_ms)).scalar() or 0 + + return { + 'total_calls': total_calls, + 'successful_calls': successful_calls, + 'failed_calls': total_calls - successful_calls, + 'success_rate': (successful_calls / total_calls * 100) if total_calls > 0 else 0, + 'total_tokens': total_tokens, + 'total_cost': float(total_cost), + 'avg_response_time': float(avg_response_time) if avg_response_time else 0 + } + + @classmethod + def get_daily_statistics(cls, days=30): + """取得每日統計資料""" + end_date = datetime.utcnow() + start_date = end_date - timedelta(days=days) + + # 按日期分組統計 + daily_stats = db.session.query( + func.date(cls.created_at).label('date'), + func.count(cls.id).label('total_calls'), + func.sum(cls.total_tokens).label('total_tokens'), + func.sum(cls.cost).label('total_cost'), + func.count().filter(cls.success == True).label('successful_calls') + ).filter( + cls.created_at >= start_date, + cls.created_at <= end_date + ).group_by(func.date(cls.created_at)).all() + + return [ + { + 'date': stat.date.isoformat(), + 'total_calls': stat.total_calls, + 'successful_calls': stat.successful_calls, + 'failed_calls': stat.total_calls - stat.successful_calls, + 'total_tokens': stat.total_tokens or 0, + 'total_cost': float(stat.total_cost or 0) + } + for stat in daily_stats + ] + + @classmethod + def get_top_users(cls, limit=10, start_date=None, end_date=None): + """取得使用量排行榜""" + query = db.session.query( + cls.user_id, + func.count(cls.id).label('total_calls'), + func.sum(cls.total_tokens).label('total_tokens'), + func.sum(cls.cost).label('total_cost') + ) + + if start_date: + query = query.filter(cls.created_at >= start_date) + if end_date: + query = query.filter(cls.created_at <= end_date) + + top_users = query.group_by(cls.user_id).order_by( + func.sum(cls.cost).desc() + ).limit(limit).all() + + return [ + { + 'user_id': user.user_id, + 'total_calls': user.total_calls, + 'total_tokens': user.total_tokens or 0, + 'total_cost': float(user.total_cost or 0) + } + for user in top_users + ] + + @classmethod + def get_cost_trend(cls, days=30): + """取得成本趨勢""" + end_date = datetime.utcnow() + start_date = end_date - timedelta(days=days) + + # 按日期統計成本 + cost_trend = db.session.query( + func.date(cls.created_at).label('date'), + func.sum(cls.cost).label('daily_cost') + ).filter( + cls.created_at >= start_date, + cls.created_at <= end_date + ).group_by(func.date(cls.created_at)).order_by( + func.date(cls.created_at) + ).all() + + return [ + { + 'date': trend.date.isoformat(), + 'cost': float(trend.daily_cost or 0) + } + for trend in cost_trend + ] + + @classmethod + def get_endpoint_statistics(cls): + """取得 API 端點統計""" + endpoint_stats = db.session.query( + cls.api_endpoint, + func.count(cls.id).label('total_calls'), + func.sum(cls.cost).label('total_cost'), + func.avg(cls.response_time_ms).label('avg_response_time') + ).group_by(cls.api_endpoint).order_by( + func.count(cls.id).desc() + ).all() + + return [ + { + 'endpoint': stat.api_endpoint, + 'total_calls': stat.total_calls, + 'total_cost': float(stat.total_cost or 0), + 'avg_response_time': float(stat.avg_response_time or 0) + } + for stat in endpoint_stats + ] \ No newline at end of file diff --git a/app/models/sys_user.py b/app/models/sys_user.py new file mode 100644 index 0000000..7d62533 --- /dev/null +++ b/app/models/sys_user.py @@ -0,0 +1,297 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +系統使用者模型 +專門用於記錄帳號密碼和登入相關資訊 + +Author: PANJIT IT Team +Created: 2025-10-01 +""" + +import json +from datetime import datetime, timedelta +from typing import Optional, Dict, Any +from sqlalchemy import Column, Integer, String, Text, Boolean, DateTime, JSON, Enum as SQLEnum, BigInteger +from werkzeug.security import generate_password_hash, check_password_hash +from app import db +from app.utils.logger import get_logger + +logger = get_logger(__name__) + + +class SysUser(db.Model): + """系統使用者模型 - 專門處理帳號密碼和登入記錄""" + __tablename__ = 'sys_user' + + id = Column(BigInteger, primary_key=True) + + # 帳號資訊 + username = Column(String(255), nullable=False, unique=True, comment='登入帳號') + password_hash = Column(String(512), comment='密碼雜湊 (如果需要本地儲存)') + email = Column(String(255), nullable=False, unique=True, comment='電子郵件') + display_name = Column(String(255), comment='顯示名稱') + + # API 認證資訊 + api_user_id = Column(String(255), comment='API 回傳的使用者 ID') + api_access_token = Column(Text, comment='API 回傳的 access_token') + api_token_expires_at = Column(DateTime, comment='API Token 過期時間') + + # 登入相關 + auth_method = Column(SQLEnum('API', 'LDAP', name='sys_user_auth_method'), + default='API', comment='認證方式') + last_login_at = Column(DateTime, comment='最後登入時間') + last_login_ip = Column(String(45), comment='最後登入 IP') + login_count = Column(Integer, default=0, comment='登入次數') + login_success_count = Column(Integer, default=0, comment='成功登入次數') + login_fail_count = Column(Integer, default=0, comment='失敗登入次數') + + # 帳號狀態 + is_active = Column(Boolean, default=True, comment='是否啟用') + is_locked = Column(Boolean, default=False, comment='是否鎖定') + locked_until = Column(DateTime, comment='鎖定至何時') + + # 審計欄位 + created_at = Column(DateTime, default=datetime.utcnow, comment='建立時間') + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, comment='更新時間') + + def __repr__(self): + return f'' + + def to_dict(self) -> Dict[str, Any]: + """轉換為字典格式""" + return { + 'id': self.id, + 'username': self.username, + 'email': self.email, + 'display_name': self.display_name, + 'api_user_id': self.api_user_id, + 'auth_method': self.auth_method, + 'last_login_at': self.last_login_at.isoformat() if self.last_login_at else None, + 'login_count': self.login_count, + 'login_success_count': self.login_success_count, + 'login_fail_count': self.login_fail_count, + 'is_active': self.is_active, + 'is_locked': self.is_locked, + 'api_token_expires_at': self.api_token_expires_at.isoformat() if self.api_token_expires_at else None, + 'created_at': self.created_at.isoformat() if self.created_at else None + } + + @classmethod + def get_or_create(cls, email: str, **kwargs) -> 'SysUser': + """ + 取得或建立系統使用者 (方案A: 使用 email 作為主要識別鍵) + + Args: + email: 電子郵件 (主要識別鍵) + **kwargs: 其他欄位 + + Returns: + SysUser: 系統使用者實例 + """ + try: + # 使用 email 作為主要識別 (專門用於登入記錄) + sys_user = cls.query.filter_by(email=email).first() + + if sys_user: + # 更新現有記錄 + sys_user.username = kwargs.get('username', sys_user.username) # API name (姓名+email) + sys_user.display_name = kwargs.get('display_name', sys_user.display_name) # API name (姓名+email) + sys_user.api_user_id = kwargs.get('api_user_id', sys_user.api_user_id) # Azure Object ID + sys_user.api_access_token = kwargs.get('api_access_token', sys_user.api_access_token) + sys_user.api_token_expires_at = kwargs.get('api_token_expires_at', sys_user.api_token_expires_at) + sys_user.auth_method = kwargs.get('auth_method', sys_user.auth_method) + sys_user.updated_at = datetime.utcnow() + + logger.info(f"更新現有系統使用者: {email}") + else: + # 建立新記錄 + sys_user = cls( + username=kwargs.get('username', ''), # API name (姓名+email 格式) + email=email, # 純 email,主要識別鍵 + display_name=kwargs.get('display_name', ''), # API name (姓名+email 格式) + api_user_id=kwargs.get('api_user_id'), # Azure Object ID + api_access_token=kwargs.get('api_access_token'), + api_token_expires_at=kwargs.get('api_token_expires_at'), + auth_method=kwargs.get('auth_method', 'API'), + login_count=0, + login_success_count=0, + login_fail_count=0 + ) + db.session.add(sys_user) + logger.info(f"建立新系統使用者: {email}") + + db.session.commit() + return sys_user + + except Exception as e: + db.session.rollback() + logger.error(f"取得或建立系統使用者失敗: {str(e)}") + raise + + @classmethod + def get_by_email(cls, email: str) -> Optional['SysUser']: + """根據 email 查找系統使用者""" + return cls.query.filter_by(email=email).first() + + def record_login_attempt(self, success: bool, ip_address: str = None, auth_method: str = None): + """ + 記錄登入嘗試 + + Args: + success: 是否成功 + ip_address: IP 地址 + auth_method: 認證方式 + """ + try: + self.login_count = (self.login_count or 0) + 1 + + if success: + self.login_success_count = (self.login_success_count or 0) + 1 + self.last_login_at = datetime.utcnow() + self.last_login_ip = ip_address + if auth_method: + self.auth_method = auth_method + + # 成功登入時解除鎖定 + if self.is_locked: + self.is_locked = False + self.locked_until = None + + else: + self.login_fail_count = (self.login_fail_count or 0) + 1 + + # 檢查是否需要鎖定帳號 (連續失敗5次) + if self.login_fail_count >= 5: + self.is_locked = True + self.locked_until = datetime.utcnow() + timedelta(minutes=30) # 鎖定30分鐘 + + self.updated_at = datetime.utcnow() + db.session.commit() + + except Exception as e: + db.session.rollback() + logger.error(f"記錄登入嘗試失敗: {str(e)}") + + def is_account_locked(self) -> bool: + """檢查帳號是否被鎖定""" + if not self.is_locked: + return False + + # 檢查鎖定時間是否已過 + if self.locked_until and datetime.utcnow() > self.locked_until: + self.is_locked = False + self.locked_until = None + db.session.commit() + return False + + return True + + def set_password(self, password: str): + """設置密碼雜湊 (如果需要本地儲存密碼)""" + self.password_hash = generate_password_hash(password) + + def check_password(self, password: str) -> bool: + """檢查密碼 (如果有本地儲存密碼)""" + if not self.password_hash: + return False + return check_password_hash(self.password_hash, password) + + def update_api_token(self, access_token: str, expires_at: datetime = None): + """更新 API Token""" + self.api_access_token = access_token + self.api_token_expires_at = expires_at + self.updated_at = datetime.utcnow() + db.session.commit() + + def is_api_token_valid(self) -> bool: + """檢查 API Token 是否有效""" + if not self.api_access_token or not self.api_token_expires_at: + return False + return datetime.utcnow() < self.api_token_expires_at + + +class LoginLog(db.Model): + """登入記錄模型""" + __tablename__ = 'login_logs' + + id = Column(BigInteger, primary_key=True) + + # 基本資訊 + username = Column(String(255), nullable=False, comment='登入帳號') + auth_method = Column(SQLEnum('API', 'LDAP', name='login_log_auth_method'), + nullable=False, comment='認證方式') + + # 登入結果 + login_success = Column(Boolean, nullable=False, comment='是否成功') + error_message = Column(Text, comment='錯誤訊息(失敗時)') + + # 環境資訊 + ip_address = Column(String(45), comment='IP 地址') + user_agent = Column(Text, comment='瀏覽器資訊') + + # API 回應 (可選,用於除錯) + api_response_summary = Column(JSON, comment='API 回應摘要') + + # 時間 + login_at = Column(DateTime, default=datetime.utcnow, comment='登入時間') + + def __repr__(self): + return f'' + + @classmethod + def create_log(cls, username: str, auth_method: str, login_success: bool, + error_message: str = None, ip_address: str = None, + user_agent: str = None, api_response_summary: Dict = None) -> 'LoginLog': + """ + 建立登入記錄 + + Args: + username: 使用者帳號 + auth_method: 認證方式 + login_success: 是否成功 + error_message: 錯誤訊息 + ip_address: IP 地址 + user_agent: 瀏覽器資訊 + api_response_summary: API 回應摘要 + + Returns: + LoginLog: 登入記錄 + """ + try: + log = cls( + username=username, + auth_method=auth_method, + login_success=login_success, + error_message=error_message, + ip_address=ip_address, + user_agent=user_agent, + api_response_summary=api_response_summary + ) + + db.session.add(log) + db.session.commit() + return log + + except Exception as e: + db.session.rollback() + logger.error(f"建立登入記錄失敗: {str(e)}") + return None + + @classmethod + def get_recent_failed_attempts(cls, username: str, minutes: int = 15) -> int: + """ + 取得最近失敗的登入嘗試次數 + + Args: + username: 使用者帳號 + minutes: 時間範圍(分鐘) + + Returns: + int: 失敗次數 + """ + since = datetime.utcnow() - timedelta(minutes=minutes) + return cls.query.filter( + cls.username == username, + cls.login_success == False, + cls.login_at >= since + ).count() \ No newline at end of file diff --git a/app/models/user.py b/app/models/user.py new file mode 100644 index 0000000..49563d1 --- /dev/null +++ b/app/models/user.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +使用者資料模型 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +from datetime import datetime, timedelta +from sqlalchemy.sql import func +from app import db +from app.utils.timezone import format_taiwan_time + + +class User(db.Model): + """使用者資訊表 (dt_users)""" + __tablename__ = 'dt_users' + + id = db.Column(db.Integer, primary_key=True, autoincrement=True) + username = db.Column(db.String(100), unique=True, nullable=False, index=True, comment='AD帳號') + display_name = db.Column(db.String(200), nullable=False, comment='顯示名稱') + email = db.Column(db.String(255), nullable=False, index=True, comment='電子郵件') + department = db.Column(db.String(100), comment='部門') + is_admin = db.Column(db.Boolean, default=False, comment='是否為管理員') + last_login = db.Column(db.DateTime, comment='最後登入時間') + created_at = db.Column(db.DateTime, default=func.now(), comment='建立時間') + updated_at = db.Column( + db.DateTime, + default=func.now(), + onupdate=func.now(), + comment='更新時間' + ) + + # 關聯關係 + translation_jobs = db.relationship('TranslationJob', backref='user', lazy='dynamic', cascade='all, delete-orphan') + api_usage_stats = db.relationship('APIUsageStats', backref='user', lazy='dynamic', cascade='all, delete-orphan') + system_logs = db.relationship('SystemLog', backref='user', lazy='dynamic') + + def __repr__(self): + return f'' + + def to_dict(self, include_stats=False): + """轉換為字典格式""" + data = { + 'id': self.id, + 'username': self.username, + 'display_name': self.display_name, + 'email': self.email, + 'department': self.department, + 'is_admin': self.is_admin, + 'last_login': format_taiwan_time(self.last_login, "%Y-%m-%d %H:%M:%S") if self.last_login else None, + 'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None, + 'updated_at': format_taiwan_time(self.updated_at, "%Y-%m-%d %H:%M:%S") if self.updated_at else None + } + + if include_stats: + data.update({ + 'total_jobs': self.translation_jobs.count(), + 'completed_jobs': self.translation_jobs.filter_by(status='COMPLETED').count(), + 'failed_jobs': self.translation_jobs.filter_by(status='FAILED').count(), + 'total_cost': self.get_total_cost() + }) + + return data + + def get_total_cost(self): + """計算使用者總成本""" + try: + from app.models.stats import APIUsageStats + return db.session.query( + func.sum(APIUsageStats.cost) + ).filter(APIUsageStats.user_id == self.id).scalar() or 0.0 + except Exception: + return 0.0 + + def update_last_login(self): + """更新最後登入時間""" + self.last_login = datetime.utcnow() + db.session.commit() + + @classmethod + def get_or_create(cls, username, display_name, email, department=None): + """取得或建立使用者 (方案A: 使用 email 作為主要識別鍵)""" + # 先嘗試用 email 查找 (因為 email 是唯一且穩定的識別碼) + user = cls.query.filter_by(email=email).first() + + if user: + # 更新使用者資訊 (API name 格式: 姓名+email) + user.username = username # API 的 name (姓名+email 格式) + user.display_name = display_name # API 的 name (姓名+email 格式) + if department: + user.department = department + user.updated_at = datetime.utcnow() + else: + # 建立新使用者 + user = cls( + username=username, # API 的 name (姓名+email 格式) + display_name=display_name, # API 的 name (姓名+email 格式) + email=email, # 純 email,唯一識別鍵 + department=department, + is_admin=(email.lower() == 'ymirliu@panjit.com.tw') # 硬編碼管理員 + ) + db.session.add(user) + + db.session.commit() + return user + + @classmethod + def get_by_email(cls, email): + """根據 email 查找使用者""" + return cls.query.filter_by(email=email).first() + + @classmethod + def get_admin_users(cls): + """取得所有管理員使用者""" + return cls.query.filter_by(is_admin=True).all() + + @classmethod + def get_active_users(cls, days=30): + """取得活躍使用者(指定天數內有登入)""" + cutoff_date = datetime.utcnow() - timedelta(days=days) + return cls.query.filter(cls.last_login >= cutoff_date).all() \ No newline at end of file diff --git a/app/root.py b/app/root.py new file mode 100644 index 0000000..b787ae0 --- /dev/null +++ b/app/root.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Root routes and static file serving for SPA in production. + +These were originally defined in the top-level app.py. Moving them into the +package allows a clean WSGI entry (wsgi:app) without importing app.py. +""" + +import os +from pathlib import Path +from datetime import datetime +from flask import Blueprint, current_app, send_from_directory + +root_bp = Blueprint('root', __name__) + +def get_static_dir(): + """取得靜態文件目錄(相對路徑)""" + # 取得專案根目錄 + project_root = Path(__file__).parent.parent + static_dir = project_root / 'frontend' / 'dist' + return str(static_dir) + + +@root_bp.route('/') +def index(): + try: + static_dir = get_static_dir() + if Path(static_dir).exists(): + return send_from_directory(static_dir, 'index.html') + else: + # Fallback API info when frontend is not present + return { + 'application': 'PANJIT Document Translator', + 'version': '1.0.0', + 'status': 'running', + 'api_base_url': '/api/v1', + 'note': 'Frontend files not found, serving API info' + } + except Exception: + # Fallback API info when frontend is not present + return { + 'application': 'PANJIT Document Translator', + 'version': '1.0.0', + 'status': 'running', + 'api_base_url': '/api/v1', + 'note': 'Frontend files not found, serving API info' + } + + +@root_bp.route('/') +def serve_static(path): + try: + static_dir = get_static_dir() + if Path(static_dir).exists(): + return send_from_directory(static_dir, path) + else: + # SPA fallback + return send_from_directory(static_dir, 'index.html') + except Exception: + # SPA fallback + return { + 'error': 'File not found', + 'path': path + }, 404 + + +@root_bp.route('/api') +def api_info(): + return { + 'api_version': 'v1', + 'base_url': '/api/v1', + 'endpoints': { + 'auth': '/api/v1/auth', + 'files': '/api/v1/files', + 'jobs': '/api/v1/jobs', + 'admin': '/api/v1/admin', + 'health': '/api/v1/health' + }, + 'documentation': 'Available endpoints provide RESTful API for document translation' + } + + +@root_bp.route('/api/health') +def health_check(): + # Keep a simple health endpoint here for compatibility + return { + 'status': 'healthy', + 'timestamp': datetime.utcnow().isoformat(), + 'service': 'PANJIT Document Translator API', + 'version': '1.0.0' + }, 200 diff --git a/app/services/__init__.py b/app/services/__init__.py new file mode 100644 index 0000000..2dce3b8 --- /dev/null +++ b/app/services/__init__.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +業務服務模組 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +from .dify_client import DifyClient +from .translation_service import TranslationService +from .notification_service import NotificationService + +__all__ = [ + 'DifyClient', + 'TranslationService', + 'NotificationService' +] \ No newline at end of file diff --git a/app/services/celery_service.py b/app/services/celery_service.py new file mode 100644 index 0000000..79ebab9 --- /dev/null +++ b/app/services/celery_service.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Celery任務管理服務 + +Author: PANJIT IT Team +Created: 2025-09-04 +""" + +from celery import Celery +from app.utils.logger import get_logger +import os + +logger = get_logger(__name__) + + +def get_celery_app(): + """取得Celery應用實例""" + try: + from celery_app import app as celery_app + return celery_app + except ImportError: + # 如果無法導入,創建一個簡單的Celery實例 + broker_url = os.getenv('REDIS_URL', 'redis://localhost:6379/0') + celery_app = Celery('translation_worker', broker=broker_url) + return celery_app + + +def revoke_task(job_uuid): + """ + 撤銷指定任務的Celery任務 + + Args: + job_uuid (str): 任務UUID + + Returns: + bool: 撤銷是否成功 + """ + try: + celery_app = get_celery_app() + + # Celery任務ID通常與job_uuid相同或相關 + task_id = f"translate_document_{job_uuid}" + + # 嘗試撤銷任務 + celery_app.control.revoke(task_id, terminate=True, signal='SIGKILL') + + logger.info(f"Successfully revoked Celery task: {task_id}") + return True + + except Exception as e: + logger.error(f"Failed to revoke Celery task for job {job_uuid}: {str(e)}") + return False + + +def get_active_tasks(): + """ + 取得當前活躍的Celery任務 + + Returns: + list: 活躍任務列表 + """ + try: + celery_app = get_celery_app() + + # 取得活躍任務 + inspect = celery_app.control.inspect() + active_tasks = inspect.active() + + if active_tasks: + return active_tasks + else: + return {} + + except Exception as e: + logger.error(f"Failed to get active tasks: {str(e)}") + return {} + + +def is_task_active(job_uuid): + """ + 檢查指定任務是否在Celery中活躍 + + Args: + job_uuid (str): 任務UUID + + Returns: + bool: 任務是否活躍 + """ + try: + active_tasks = get_active_tasks() + task_id = f"translate_document_{job_uuid}" + + # 檢查所有worker的活躍任務 + for worker, tasks in active_tasks.items(): + for task in tasks: + if task.get('id') == task_id: + return True + + return False + + except Exception as e: + logger.error(f"Failed to check if task is active for job {job_uuid}: {str(e)}") + return False + + +def cleanup_stale_tasks(): + """ + 清理卡住的Celery任務 + + Returns: + int: 清理的任務數量 + """ + try: + from app.models.job import TranslationJob + from datetime import datetime, timedelta + + # 找出超過30分鐘仍在處理中的任務 + stale_threshold = datetime.utcnow() - timedelta(minutes=30) + stale_jobs = TranslationJob.query.filter( + TranslationJob.status == 'PROCESSING', + TranslationJob.processing_started_at < stale_threshold + ).all() + + cleanup_count = 0 + for job in stale_jobs: + if not is_task_active(job.job_uuid): + # 任務不在Celery中活躍,標記為失敗 + job.update_status('FAILED', error_message='任務處理超時,已自動取消') + cleanup_count += 1 + logger.info(f"Cleaned up stale job: {job.job_uuid}") + + return cleanup_count + + except Exception as e: + logger.error(f"Failed to cleanup stale tasks: {str(e)}") + return 0 \ No newline at end of file diff --git a/app/services/dify_client.py b/app/services/dify_client.py new file mode 100644 index 0000000..bb64be6 --- /dev/null +++ b/app/services/dify_client.py @@ -0,0 +1,494 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Dify API 客戶端服務 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +import time +import requests +from typing import Dict, Any, Optional +from flask import current_app +from app.utils.logger import get_logger +from app.utils.exceptions import APIError +from app.models.stats import APIUsageStats + +logger = get_logger(__name__) + + +class DifyClient: + """Dify API 客戶端""" + + def __init__(self): + # 翻译API配置 + self.translation_base_url = current_app.config.get('DIFY_TRANSLATION_BASE_URL', '') + self.translation_api_key = current_app.config.get('DIFY_TRANSLATION_API_KEY', '') + + # OCR API配置 + self.ocr_base_url = current_app.config.get('DIFY_OCR_BASE_URL', '') + self.ocr_api_key = current_app.config.get('DIFY_OCR_API_KEY', '') + + self.timeout = (10, 60) # (連接超時, 讀取超時) + self.max_retries = 3 + self.retry_delay = 1.6 # 指數退避基數 + + if not self.translation_base_url or not self.translation_api_key: + logger.warning("Dify Translation API configuration is incomplete") + + if not self.ocr_base_url or not self.ocr_api_key: + logger.warning("Dify OCR API configuration is incomplete") + + def _make_request(self, method: str, endpoint: str, data: Dict[str, Any] = None, + user_id: int = None, job_id: int = None, files_data: Dict = None, + api_type: str = 'translation') -> Dict[str, Any]: + """發送 HTTP 請求到 Dify API""" + + # 根据API类型选择配置 + if api_type == 'ocr': + base_url = self.ocr_base_url + api_key = self.ocr_api_key + if not base_url or not api_key: + raise APIError("Dify OCR API 未配置完整") + else: # translation + base_url = self.translation_base_url + api_key = self.translation_api_key + if not base_url or not api_key: + raise APIError("Dify Translation API 未配置完整") + + url = f"{base_url.rstrip('/')}/{endpoint.lstrip('/')}" + + headers = { + 'Authorization': f'Bearer {api_key}', + 'User-Agent': 'PANJIT-Document-Translator/1.0' + } + + # 只有在非文件上传时才设置JSON Content-Type + if not files_data: + headers['Content-Type'] = 'application/json' + + # 重試邏輯 + last_exception = None + start_time = time.time() + + for attempt in range(self.max_retries): + try: + # logger.debug(f"Making Dify API request: {method} {url} (attempt {attempt + 1})") + + if method.upper() == 'GET': + response = requests.get(url, headers=headers, timeout=self.timeout, params=data) + elif files_data: + # 文件上传请求,使用multipart/form-data + response = requests.post(url, headers=headers, timeout=self.timeout, files=files_data, data=data) + else: + # 普通JSON请求 + response = requests.post(url, headers=headers, timeout=self.timeout, json=data) + + # 計算響應時間 + response_time_ms = int((time.time() - start_time) * 1000) + + # 檢查響應狀態 + response.raise_for_status() + + # 解析響應 + result = response.json() + + # 記錄 API 使用統計 + if user_id: + self._record_api_usage( + user_id=user_id, + job_id=job_id, + endpoint=endpoint, + response_data=result, + response_time_ms=response_time_ms, + success=True + ) + + # logger.debug(f"Dify API request successful: {response_time_ms}ms") + return result + + except requests.exceptions.RequestException as e: + last_exception = e + response_time_ms = int((time.time() - start_time) * 1000) + + # 記錄失敗的 API 調用 + if user_id: + self._record_api_usage( + user_id=user_id, + job_id=job_id, + endpoint=endpoint, + response_data={}, + response_time_ms=response_time_ms, + success=False, + error_message=str(e) + ) + + logger.warning(f"Dify API request failed (attempt {attempt + 1}): {str(e)}") + + # 如果是最後一次嘗試,拋出異常 + if attempt == self.max_retries - 1: + break + + # 指數退避 + delay = self.retry_delay ** attempt + # logger.debug(f"Retrying in {delay} seconds...") + time.sleep(delay) + + # 所有重試都失敗了 + error_msg = f"Dify API request failed after {self.max_retries} attempts: {str(last_exception)}" + logger.error(error_msg) + raise APIError(error_msg) + + def _record_api_usage(self, user_id: int, job_id: Optional[int], endpoint: str, + response_data: Dict, response_time_ms: int, success: bool, + error_message: str = None): + """記錄 API 使用統計""" + try: + # 從響應中提取使用量資訊 + metadata = response_data.get('metadata', {}) + + # 如果 job_id 無效,則設為 None 以避免外鍵約束錯誤 + APIUsageStats.record_api_call( + user_id=user_id, + job_id=job_id, # 已經是 Optional,如果無效會被設為 NULL + api_endpoint=endpoint, + metadata=metadata, + response_time_ms=response_time_ms, + success=success, + error_message=error_message + ) + except Exception as e: + logger.warning(f"Failed to record API usage: {str(e)}") + + def translate_text(self, text: str, source_language: str, target_language: str, + user_id: int = None, job_id: int = None, conversation_id: str = None) -> Dict[str, Any]: + """翻譯文字""" + + if not text.strip(): + raise APIError("翻譯文字不能為空") + + # 構建標準翻譯 prompt(英文指令格式) + language_names = { + 'zh-tw': 'Traditional Chinese', + 'zh-cn': 'Simplified Chinese', + 'en': 'English', + 'ja': 'Japanese', + 'ko': 'Korean', + 'vi': 'Vietnamese', + 'th': 'Thai', + 'id': 'Indonesian', + 'ms': 'Malay', + 'es': 'Spanish', + 'fr': 'French', + 'de': 'German', + 'ru': 'Russian', + 'ar': 'Arabic' + } + + source_lang_name = language_names.get(source_language, source_language) + target_lang_name = language_names.get(target_language, target_language) + + query = f"""Task: Translate ONLY into {target_lang_name} from {source_lang_name}. + +Rules: +- Output translation text ONLY (no source text, no notes, no questions, no language-detection remarks). +- Preserve original line breaks. +- Do NOT wrap in quotes or code blocks. +- Maintain original formatting and structure. + +{text.strip()}""" + + # 構建請求資料 - 使用成功版本的格式 + request_data = { + 'inputs': {}, + 'response_mode': 'blocking', + 'user': f"user_{user_id}" if user_id else "doc-translator-user", + 'query': query + } + + # 如果有 conversation_id,加入請求中以維持對話連續性 + if conversation_id: + request_data['conversation_id'] = conversation_id + + logger.info(f"[TRANSLATION] Sending translation request...") + logger.info(f"[TRANSLATION] Request data: {request_data}") + logger.info(f"[TRANSLATION] Text length: {len(text)} characters") + + try: + response = self._make_request( + method='POST', + endpoint='/chat-messages', + data=request_data, + user_id=user_id, + job_id=job_id + ) + + # 從響應中提取翻譯結果 - 使用成功版本的方式 + answer = response.get('answer') + + if not isinstance(answer, str) or not answer.strip(): + raise APIError("Dify API 返回空的翻譯結果") + + return { + 'success': True, + 'translated_text': answer, + 'source_text': text, + 'source_language': source_language, + 'target_language': target_language, + 'conversation_id': response.get('conversation_id'), + 'metadata': response.get('metadata', {}) + } + + except APIError: + raise + except Exception as e: + error_msg = f"翻譯請求處理錯誤: {str(e)}" + logger.error(error_msg) + raise APIError(error_msg) + + def test_connection(self) -> bool: + """測試 Dify API 連接""" + try: + # 發送簡單的測試請求 + test_data = { + 'inputs': {'text': 'test'}, + 'response_mode': 'blocking', + 'user': 'health_check' + } + + response = self._make_request( + method='POST', + endpoint='/chat-messages', + data=test_data + ) + + return response is not None + + except Exception as e: + logger.error(f"Dify API connection test failed: {str(e)}") + return False + + def get_app_info(self) -> Dict[str, Any]: + """取得 Dify 應用資訊""" + try: + response = self._make_request( + method='GET', + endpoint='/parameters' + ) + + return { + 'success': True, + 'app_info': response + } + + except Exception as e: + logger.error(f"Failed to get Dify app info: {str(e)}") + return { + 'success': False, + 'error': str(e) + } + + @classmethod + def load_config_from_file(cls, file_path: str = 'api.txt'): + """從檔案載入 Dify API 配置""" + try: + import os + from pathlib import Path + + config_file = Path(file_path) + + if not config_file.exists(): + logger.warning(f"Dify config file not found: {file_path}") + return + + with open(config_file, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + if line.startswith('#') or not line: + continue # 跳过注释和空行 + + # 翻译API配置(兼容旧格式) + if line.startswith('base_url:') or line.startswith('translation_base_url:'): + base_url = line.split(':', 1)[1].strip() + current_app.config['DIFY_TRANSLATION_BASE_URL'] = base_url + # 兼容旧配置 + current_app.config['DIFY_API_BASE_URL'] = base_url + elif line.startswith('api:') or line.startswith('translation_api:'): + api_key = line.split(':', 1)[1].strip() + current_app.config['DIFY_TRANSLATION_API_KEY'] = api_key + # 兼容旧配置 + current_app.config['DIFY_API_KEY'] = api_key + + # OCR API配置 + elif line.startswith('ocr_base_url:'): + ocr_base_url = line.split(':', 1)[1].strip() + current_app.config['DIFY_OCR_BASE_URL'] = ocr_base_url + elif line.startswith('ocr_api:'): + ocr_api_key = line.split(':', 1)[1].strip() + current_app.config['DIFY_OCR_API_KEY'] = ocr_api_key + + logger.info("Dify API config loaded from file") + + except Exception as e: + logger.error(f"Failed to load Dify config from file: {str(e)}") + + def upload_file(self, image_data: bytes, filename: str, user_id: int = None) -> str: + """上传图片文件到Dify OCR API并返回file_id""" + + if not image_data: + raise APIError("图片数据不能为空") + + logger.info(f"[OCR-UPLOAD] Starting file upload to Dify OCR API") + logger.info(f"[OCR-UPLOAD] File: {filename}, Size: {len(image_data)} bytes, User: {user_id}") + + # 构建文件上传数据 + files_data = { + 'file': (filename, image_data, 'image/png') # 假设为PNG格式 + } + + form_data = { + 'user': f"user_{user_id}" if user_id else "doc-translator-user" + } + + # logger.debug(f"[OCR-UPLOAD] Upload form_data: {form_data}") + # logger.debug(f"[OCR-UPLOAD] Using OCR API: {self.ocr_base_url}") + + try: + response = self._make_request( + method='POST', + endpoint='/files/upload', + data=form_data, + files_data=files_data, + user_id=user_id, + api_type='ocr' # 使用OCR API + ) + + logger.info(f"[OCR-UPLOAD] Raw Dify upload response: {response}") + + file_id = response.get('id') + if not file_id: + logger.error(f"[OCR-UPLOAD] No file ID in response: {response}") + raise APIError("Dify 文件上传失败:未返回文件ID") + + logger.info(f"[OCR-UPLOAD] ✓ File uploaded successfully: {file_id}") + # logger.debug(f"[OCR-UPLOAD] File details: name={response.get('name')}, size={response.get('size')}, type={response.get('mime_type')}") + + return file_id + + except APIError: + raise + except Exception as e: + error_msg = f"文件上传到Dify失败: {str(e)}" + logger.error(f"[OCR-UPLOAD] ✗ Upload failed: {error_msg}") + raise APIError(error_msg) + + def ocr_image_with_dify(self, image_data: bytes, filename: str = "image.png", + user_id: int = None, job_id: int = None) -> str: + """使用Dify进行图像OCR识别""" + + logger.info(f"[OCR-RECOGNITION] Starting OCR process for {filename}") + logger.info(f"[OCR-RECOGNITION] Image size: {len(image_data)} bytes, User: {user_id}, Job: {job_id}") + + try: + # 1. 先上传文件获取file_id + logger.info(f"[OCR-RECOGNITION] Step 1: Uploading image to Dify...") + file_id = self.upload_file(image_data, filename, user_id) + logger.info(f"[OCR-RECOGNITION] Step 1 ✓ File uploaded with ID: {file_id}") + + # 2. 构建OCR请求 + # 系统提示词已在Dify Chat Flow中配置,这里只需要发送简单的用户query + query = "將圖片中的文字完整的提取出來" + logger.info(f"[OCR-RECOGNITION] Step 2: Preparing OCR request...") + # logger.debug(f"[OCR-RECOGNITION] Query: {query}") + + # 3. 构建Chat Flow请求,根据最新Dify运行记录,图片应该放在files数组中 + request_data = { + 'inputs': {}, + 'response_mode': 'blocking', + 'user': f"user_{user_id}" if user_id else "doc-translator-user", + 'query': query, + 'files': [ + { + 'type': 'image', + 'transfer_method': 'local_file', + 'upload_file_id': file_id + } + ] + } + + logger.info(f"[OCR-RECOGNITION] Step 3: Sending OCR request to Dify...") + logger.info(f"[OCR-RECOGNITION] Request data: {request_data}") + logger.info(f"[OCR-RECOGNITION] Using OCR API: {self.ocr_base_url}") + + response = self._make_request( + method='POST', + endpoint='/chat-messages', + data=request_data, + user_id=user_id, + job_id=job_id, + api_type='ocr' # 使用OCR API + ) + + logger.info(f"[OCR-RECOGNITION] Step 3 ✓ Received response from Dify") + logger.info(f"[OCR-RECOGNITION] Raw Dify OCR response: {response}") + + # 从响应中提取OCR结果 + answer = response.get('answer', '') + metadata = response.get('metadata', {}) + conversation_id = response.get('conversation_id', '') + + logger.info(f"[OCR-RECOGNITION] Response details:") + logger.info(f"[OCR-RECOGNITION] - Answer length: {len(answer) if answer else 0} characters") + logger.info(f"[OCR-RECOGNITION] - Conversation ID: {conversation_id}") + logger.info(f"[OCR-RECOGNITION] - Metadata: {metadata}") + + if not isinstance(answer, str) or not answer.strip(): + logger.error(f"[OCR-RECOGNITION] ✗ Empty or invalid answer from Dify") + logger.error(f"[OCR-RECOGNITION] Answer type: {type(answer)}, Content: '{answer}'") + raise APIError("Dify OCR 返回空的识别结果") + + # 记录OCR识别的前100个字符用于调试 + preview = answer[:100] + "..." if len(answer) > 100 else answer + logger.info(f"[OCR-RECOGNITION] ✓ OCR completed successfully") + logger.info(f"[OCR-RECOGNITION] Extracted {len(answer)} characters") + # logger.debug(f"[OCR-RECOGNITION] Text preview: {preview}") + + return answer.strip() + + except APIError: + raise + except Exception as e: + error_msg = f"Dify OCR识别失败: {str(e)}" + logger.error(f"[OCR-RECOGNITION] ✗ OCR process failed: {error_msg}") + logger.error(f"[OCR-RECOGNITION] Exception details: {type(e).__name__}: {str(e)}") + raise APIError(error_msg) + + +def init_dify_config(app): + """初始化 Dify 配置""" + with app.app_context(): + # 從 api.txt 載入配置 + DifyClient.load_config_from_file() + + # 檢查配置完整性 + translation_base_url = app.config.get('DIFY_TRANSLATION_BASE_URL') + translation_api_key = app.config.get('DIFY_TRANSLATION_API_KEY') + ocr_base_url = app.config.get('DIFY_OCR_BASE_URL') + ocr_api_key = app.config.get('DIFY_OCR_API_KEY') + + logger.info("Dify API Configuration Status:") + if translation_base_url and translation_api_key: + logger.info("✓ Translation API configured successfully") + else: + logger.warning("✗ Translation API configuration is incomplete") + logger.warning(f" - Translation Base URL: {'✓' if translation_base_url else '✗'}") + logger.warning(f" - Translation API Key: {'✓' if translation_api_key else '✗'}") + + if ocr_base_url and ocr_api_key: + logger.info("✓ OCR API configured successfully") + else: + logger.warning("✗ OCR API configuration is incomplete (扫描PDF功能将不可用)") + logger.warning(f" - OCR Base URL: {'✓' if ocr_base_url else '✗'}") + logger.warning(f" - OCR API Key: {'✓' if ocr_api_key else '✗'}") \ No newline at end of file diff --git a/app/services/document_processor.py b/app/services/document_processor.py new file mode 100644 index 0000000..8bf6ebf --- /dev/null +++ b/app/services/document_processor.py @@ -0,0 +1,864 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +核心文檔處理邏輯 - 移植自最佳版本 +包含完整的 DOCX 文字提取和翻譯插入功能 + +Author: PANJIT IT Team +Created: 2024-09-02 +Modified: 2024-09-02 +""" + +import re +import sys +import time +from pathlib import Path +from typing import List, Dict, Tuple, Optional, Any +from docx.text.paragraph import Paragraph +from docx.table import Table, _Cell +from docx.shared import Pt +from docx.oxml import OxmlElement +from docx.oxml.ns import qn, nsdecls +import docx + +from app.utils.logger import get_logger +from app.utils.exceptions import FileProcessingError + +logger = get_logger(__name__) + +# ---------- Constants ---------- +INSERT_FONT_SIZE_PT = 10 +SENTENCE_MODE = True + +# ---------- Optional dependencies detection ---------- +try: + import blingfire + _HAS_BLINGFIRE = True +except ImportError: + _HAS_BLINGFIRE = False + +try: + import pysbd + _HAS_PYSBD = True +except ImportError: + _HAS_PYSBD = False + +# ---------- Helper functions ---------- +def _has_cjk(text: str) -> bool: + """Check if text contains CJK (Chinese/Japanese/Korean) characters.""" + for char in text: + if '\u4e00' <= char <= '\u9fff' or \ + '\u3400' <= char <= '\u4dbf' or \ + '\u20000' <= char <= '\u2a6df' or \ + '\u3040' <= char <= '\u309f' or \ + '\u30a0' <= char <= '\u30ff' or \ + '\uac00' <= char <= '\ud7af': + return True + return False + +def _normalize_text(text: str) -> str: + """Normalize text for comparison.""" + return re.sub(r'\s+', ' ', text.strip().lower()) + +def _append_after(p: Paragraph, text_block: str, italic: bool=True, font_size_pt: int=INSERT_FONT_SIZE_PT) -> Paragraph: + """Insert a new paragraph after p, return the new paragraph (for chain insert).""" + new_p = OxmlElement("w:p") + p._p.addnext(new_p) + np = Paragraph(new_p, p._parent) + lines = text_block.split("\n") + for i, line in enumerate(lines): + run = np.add_run(line) + if italic: + run.italic = True + if font_size_pt: + run.font.size = Pt(font_size_pt) + if i < len(lines) - 1: + run.add_break() + tag = np.add_run("\u200b") + if italic: + tag.italic = True + if font_size_pt: + tag.font.size = Pt(font_size_pt) + return np + +def _is_our_insert_block(p: Paragraph) -> bool: + """Return True iff paragraph contains our zero-width marker.""" + return any("\u200b" in (r.text or "") for r in p.runs) + +def _find_last_inserted_after(p: Paragraph, limit: int = 8) -> Optional[Paragraph]: + """Find the last paragraph that was inserted after p (up to limit paragraphs).""" + try: + # Get all paragraphs in the parent container + if hasattr(p._parent, 'paragraphs'): + all_paras = list(p._parent.paragraphs) + else: + # Handle cases where _parent doesn't have paragraphs (e.g., table cells) + return None + + # Find p's index + p_index = -1 + for i, para in enumerate(all_paras): + if para._element == p._element: + p_index = i + break + + if p_index == -1: + return None + + # Check paragraphs after p + last_found = None + for i in range(p_index + 1, min(p_index + 1 + limit, len(all_paras))): + if _is_our_insert_block(all_paras[i]): + last_found = all_paras[i] + else: + break # Stop at first non-inserted paragraph + except Exception: + return None + + return last_found + +def _p_text_with_breaks(p: Paragraph) -> str: + """Extract text from paragraph with line breaks preserved.""" + parts = [] + for node in p._element.xpath(".//*[local-name()='t' or local-name()='br' or local-name()='tab']"): + tag = node.tag.split('}', 1)[-1] + if tag == "t": + parts.append(node.text or "") + elif tag == "br": + parts.append("\n") + elif tag == "tab": + parts.append("\t") + return "".join(parts) + +def _get_cell_full_text(cell) -> str: + """ + 提取表格儲存格的完整文字內容,包含所有段落 + """ + try: + cell_texts = [] + for para in cell.paragraphs: + para_text = _p_text_with_breaks(para) + if para_text.strip(): + cell_texts.append(para_text.strip()) + + # 用換行符連接所有段落 + return '\n'.join(cell_texts) + except Exception as e: + logger.warning(f"提取儲存格文字失敗: {e}") + return "" + +def _is_our_insert_block_text(text: str) -> bool: + """檢查文字是否為翻譯插入區塊""" + if not text: + return False + text_lower = text.lower().strip() + return ( + text_lower.startswith('【') or + text_lower.startswith('[翻譯') or + '翻譯:' in text_lower or + 'translation:' in text_lower or + text_lower.startswith('translated:') or + "\u200b" in text + ) + +def _is_our_insert_block(p: Paragraph) -> bool: + """Check if paragraph is our inserted translation (contains zero-width space marker).""" + text = _p_text_with_breaks(p) + return "\u200b" in text + +def should_translate(text: str, src_lang: str) -> bool: + """Determine if text should be translated based on content and source language.""" + text = text.strip() + + # 只要有字就翻譯 - 最小長度設為1 + if len(text) < 1: + return False + + # Skip pure numbers, dates, etc. + if re.match(r'^[\d\s\.\-\:\/]+$', text): + return False + + # For auto-detect, translate if has CJK or meaningful text + if src_lang.lower() in ('auto', 'auto-detect'): + return _has_cjk(text) or len(text) > 5 + + return True + +def _split_sentences(text: str, lang: str = 'auto') -> List[str]: + """Split text into sentences using available libraries.""" + if not text.strip(): + return [] + + # Try blingfire first + if _HAS_BLINGFIRE and SENTENCE_MODE: + try: + sentences = blingfire.text_to_sentences(text).split('\n') + sentences = [s.strip() for s in sentences if s.strip()] + if sentences: + return sentences + except Exception as e: + logger.warning(f"Blingfire failed: {e}") + + # Try pysbd + if _HAS_PYSBD and SENTENCE_MODE: + try: + seg = pysbd.Segmenter(language="en" if lang == "auto" else lang) + sentences = seg.segment(text) + sentences = [s.strip() for s in sentences if s.strip()] + if sentences: + return sentences + except Exception as e: + logger.warning(f"PySBD failed: {e}") + + # Fallback to simple splitting + separators = ['. ', '。', '!', '?', '!', '?', '\n'] + sentences = [text] + + for sep in separators: + new_sentences = [] + for s in sentences: + parts = s.split(sep) + if len(parts) > 1: + new_sentences.extend([p.strip() + sep.rstrip() for p in parts[:-1] if p.strip()]) + if parts[-1].strip(): + new_sentences.append(parts[-1].strip()) + else: + new_sentences.append(s) + sentences = new_sentences + + return [s for s in sentences if len(s.strip()) > 3] + +# ---------- Segment class ---------- +class Segment: + """Represents a translatable text segment in a document.""" + + def __init__(self, kind: str, ref: Any, ctx: str, text: str): + self.kind = kind # 'para' | 'txbx' + self.ref = ref # Reference to original document element + self.ctx = ctx # Context information + self.text = text # Text content + +# ---------- TextBox helpers ---------- +def _txbx_iter_texts(doc: docx.Document): + """ + Yield (txbxContent_element, joined_source_text) + - Deeply collect all descendant under txbxContent + - Skip our inserted translations: contains zero-width or (all italic and no CJK) + - Keep only lines that still have CJK + """ + def _p_text_flags(p_el): + parts = [] + for node in p_el.xpath(".//*[local-name()='t' or local-name()='br' or local-name()='tab']"): + tag = node.tag.split('}', 1)[-1] + if tag == "t": + parts.append(node.text or "") + elif tag == "br": + parts.append("\n") + else: + parts.append(" ") + text = "".join(parts) + has_zero = ("\u200b" in text) + runs = p_el.xpath(".//*[local-name()='r']") + vis, ital = [], [] + for r in runs: + rt = "".join([(t.text or "") for t in r.xpath(".//*[local-name()='t']")]) + if (rt or "").strip(): + vis.append(rt) + ital.append(bool(r.xpath(".//*[local-name()='i']"))) + all_italic = (len(vis) > 0 and all(ital)) + return text, has_zero, all_italic + + for tx in doc._element.xpath(".//*[local-name()='txbxContent']"): + kept = [] + for p in tx.xpath(".//*[local-name()='p']"): # all descendant paragraphs + text, has_zero, all_italic = _p_text_flags(p) + if not (text or "").strip(): + continue + if has_zero: + continue # our inserted + for line in text.split("\n"): + if line.strip(): + kept.append(line.strip()) + if kept: + joined = "\n".join(kept) + yield tx, joined + +def _txbx_append_paragraph(tx, text_block: str, italic: bool = True, font_size_pt: int = INSERT_FONT_SIZE_PT): + """Append a paragraph to textbox content.""" + p = OxmlElement("w:p") + r = OxmlElement("w:r") + rPr = OxmlElement("w:rPr") + if italic: + rPr.append(OxmlElement("w:i")) + if font_size_pt: + sz = OxmlElement("w:sz") + sz.set(qn("w:val"), str(int(font_size_pt * 2))) + rPr.append(sz) + r.append(rPr) + lines = text_block.split("\n") + for i, line in enumerate(lines): + if i > 0: + r.append(OxmlElement("w:br")) + t = OxmlElement("w:t") + t.set(qn("xml:space"), "preserve") + t.text = line + r.append(t) + tag = OxmlElement("w:t") + tag.set(qn("xml:space"), "preserve") + tag.text = "\u200b" + r.append(tag) + p.append(r) + tx.append(p) + +def _txbx_tail_equals(tx, translations: List[str]) -> bool: + """Check if textbox already contains the expected translations.""" + paras = tx.xpath("./*[local-name()='p']") + if len(paras) < len(translations): + return False + tail = paras[-len(translations):] + for q, expect in zip(tail, translations): + parts = [] + for node in q.xpath(".//*[local-name()='t' or local-name()='br']"): + tag = node.tag.split("}", 1)[-1] + parts.append("\n" if tag == "br" else (node.text or "")) + if _normalize_text("".join(parts).strip()) != _normalize_text(expect): + return False + return True + +# ---------- Main extraction logic ---------- +def _get_paragraph_key(p: Paragraph) -> str: + """Generate a stable unique key for paragraph deduplication.""" + try: + # Use XML content hash + text content for stable deduplication + xml_content = p._p.xml if hasattr(p._p, 'xml') else str(p._p) + text_content = _p_text_with_breaks(p) + combined = f"{hash(xml_content)}_{len(text_content)}_{text_content[:50]}" + return combined + except Exception: + # Fallback to simple text-based key + text_content = _p_text_with_breaks(p) + return f"fallback_{hash(text_content)}_{len(text_content)}" + +def _collect_docx_segments(doc: docx.Document) -> List[Segment]: + """ + Enhanced segment collector with improved stability. + Handles paragraphs, tables, textboxes, and SDT Content Controls. + """ + segs: List[Segment] = [] + seen_par_keys = set() + + def _add_paragraph(p: Paragraph, ctx: str): + try: + p_key = _get_paragraph_key(p) + if p_key in seen_par_keys: + return + + txt = _p_text_with_breaks(p) + if txt.strip() and not _is_our_insert_block(p): + segs.append(Segment("para", p, ctx, txt)) + seen_par_keys.add(p_key) + except Exception as e: + # Log error but continue processing + logger.warning(f"段落處理錯誤: {e}, 跳過此段落") + + def _process_container_content(container, ctx: str): + """ + Recursively processes content within a container (body, cell, or SDT content). + Identifies and handles paragraphs, tables, and SDT elements. + """ + if container._element is None: + return + + for child_element in container._element: + qname = child_element.tag + + if qname.endswith('}p'): # Paragraph + p = Paragraph(child_element, container) + _add_paragraph(p, ctx) + + elif qname.endswith('}tbl'): # Table + table = Table(child_element, container) + for r_idx, row in enumerate(table.rows, 1): + for c_idx, cell in enumerate(row.cells, 1): + cell_ctx = f"{ctx} > Tbl(r{r_idx},c{c_idx})" + + # 使用儲存格為單位的提取方式(而非逐段落提取) + cell_text = _get_cell_full_text(cell) + if cell_text.strip() and not _is_our_insert_block_text(cell_text): + segs.append(Segment("table_cell", cell, cell_ctx, cell_text)) + + elif qname.endswith('}sdt'): # Structured Document Tag (SDT) + sdt_ctx = f"{ctx} > SDT" + + # 1. 提取 SDT 的元數據文本 (Placeholder, Dropdown items) + ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} + + # 提取 Placeholder text + placeholder_texts = [] + for t in child_element.xpath('.//w:placeholder//w:t', namespaces=ns): + if t.text: + placeholder_texts.append(t.text) + if placeholder_texts: + full_placeholder = "".join(placeholder_texts).strip() + if full_placeholder: + segs.append(Segment("para", child_element, f"{sdt_ctx}-Placeholder", full_placeholder)) + + # 提取 Dropdown list items + list_items = [] + for item in child_element.xpath('.//w:dropDownList/w:listItem', namespaces=ns): + display_text = item.get(qn('w:displayText')) + if display_text: + list_items.append(display_text) + if list_items: + items_as_text = "\n".join(list_items) + segs.append(Segment("para", child_element, f"{sdt_ctx}-Dropdown", items_as_text)) + + # 2. 遞迴處理 SDT 的實際內容 (sdtContent) + sdt_content_element = child_element.find(qn('w:sdtContent')) + if sdt_content_element is not None: + class SdtContentWrapper: + def __init__(self, element, parent): + self._element = element + self._parent = parent + + sdt_content_wrapper = SdtContentWrapper(sdt_content_element, container) + _process_container_content(sdt_content_wrapper, sdt_ctx) + + # --- Main execution starts here --- + + # 1. Process the main document body + _process_container_content(doc._body, "Body") + + # 2. Process textboxes + for tx, s in _txbx_iter_texts(doc): + if s.strip() and (_has_cjk(s) or should_translate(s, 'auto')): + segs.append(Segment("txbx", tx, "TextBox", s)) + + return segs + +def _insert_docx_translations(doc: docx.Document, segs: List[Segment], + tmap: Dict[Tuple[str, str], str], + targets: List[str], log=lambda s: None) -> Tuple[int, int]: + """ + Insert translations into DOCX document segments. + + CRITICAL: This function contains the fix for the major translation insertion bug. + The key fix is in the segment filtering logic - we now correctly check if any target + language has translation available using the proper key format (target_lang, text). + + Args: + doc: The DOCX document object + segs: List of segments to translate + tmap: Translation map with keys as (target_language, source_text) + targets: List of target languages in order + log: Logging function + + Returns: + Tuple of (successful_insertions, skipped_insertions) + + Key Bug Fix: + OLD (INCORRECT): if (seg.kind, seg.text) not in tmap and (targets[0], seg.text) not in tmap + NEW (CORRECT): has_any_translation = any((tgt, seg.text) in tmap for tgt in targets) + """ + ok_cnt = skip_cnt = 0 + + # Helper function to add a formatted run to a paragraph + def _add_formatted_run(p: Paragraph, text: str, italic: bool, font_size_pt: int): + lines = text.split("\n") + for i, line in enumerate(lines): + run = p.add_run(line) + if italic: + run.italic = True + if font_size_pt: + run.font.size = Pt(font_size_pt) + if i < len(lines) - 1: + run.add_break() + # Add our zero-width space marker + tag_run = p.add_run("\u200b") + if italic: + tag_run.italic = True + if font_size_pt: + tag_run.font.size = Pt(font_size_pt) + + for seg in segs: + # Check if any target language has translation for this segment + has_any_translation = any((tgt, seg.text) in tmap for tgt in targets) + if not has_any_translation: + log(f"[SKIP] 無翻譯結果: {seg.ctx} | {seg.text[:50]}...") + skip_cnt += 1 + continue + + # Get translations for all targets, with fallback for missing ones + translations = [] + for tgt in targets: + if (tgt, seg.text) in tmap: + translations.append(tmap[(tgt, seg.text)]) + else: + log(f"[WARNING] 缺少 {tgt} 翻譯: {seg.text[:30]}...") + translations.append(f"【翻譯查詢失敗|{tgt}】{seg.text[:50]}...") + + log(f"[INSERT] 準備插入 {len(translations)} 個翻譯到 {seg.ctx}: {seg.text[:30]}...") + + if seg.kind == "para": + # Check if this is an SDT segment (ref is an XML element, not a Paragraph) + if hasattr(seg.ref, 'tag') and seg.ref.tag.endswith('}sdt'): + # Handle SDT segments - insert translation into sdtContent + sdt_element = seg.ref + ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} + sdt_content = sdt_element.find(qn('w:sdtContent')) + + if sdt_content is not None: + # Check if translations already exist + existing_paras = sdt_content.xpath('.//w:p', namespaces=ns) + existing_texts = [] + for ep in existing_paras: + p_obj = Paragraph(ep, None) + if _is_our_insert_block(p_obj): + existing_texts.append(_p_text_with_breaks(p_obj)) + + # Check if all translations already exist + if len(existing_texts) >= len(translations): + if all(_normalize_text(e) == _normalize_text(t) for e, t in zip(existing_texts[:len(translations)], translations)): + skip_cnt += 1 + log(f"[SKIP] SDT 已存在翻譯: {seg.text[:30]}...") + continue + + # Add translations to SDT content + for t in translations: + if not any(_normalize_text(t) == _normalize_text(e) for e in existing_texts): + # Create new paragraph in SDT content + new_p_element = OxmlElement("w:p") + sdt_content.append(new_p_element) + new_p = Paragraph(new_p_element, None) + _add_formatted_run(new_p, t, italic=True, font_size_pt=INSERT_FONT_SIZE_PT) + + ok_cnt += 1 + log(f"[SUCCESS] SDT 插入翻譯(交錯格式)") + continue + + p: Paragraph = seg.ref + + # --- CONTEXT-AWARE INSERTION LOGIC (from successful version) --- + # Check if the paragraph's parent is a table cell + if isinstance(p._parent, _Cell): + cell = p._parent + + try: + # Find the current paragraph's position in the cell + cell_paragraphs = list(cell.paragraphs) + p_index = -1 + for idx, cell_p in enumerate(cell_paragraphs): + if cell_p._element == p._element: + p_index = idx + break + + if p_index == -1: + log(f"[WARNING] 無法找到段落在單元格中的位置,使用原始方法") + # Fallback to original method + for block in translations: + new_p = cell.add_paragraph() + _add_formatted_run(new_p, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT) + ok_cnt += 1 + continue + + # Check if translations already exist right after this paragraph + existing_texts = [] + check_limit = min(p_index + 1 + len(translations), len(cell_paragraphs)) + for idx in range(p_index + 1, check_limit): + if _is_our_insert_block(cell_paragraphs[idx]): + existing_texts.append(_p_text_with_breaks(cell_paragraphs[idx])) + + # Check if all translations already exist in order + if len(existing_texts) >= len(translations): + if all(_normalize_text(e) == _normalize_text(t) for e, t in zip(existing_texts[:len(translations)], translations)): + skip_cnt += 1 + log(f"[SKIP] 表格單元格已存在翻譯: {seg.text[:30]}...") + continue + + # Determine which translations need to be added + to_add = [] + for t in translations: + if not any(_normalize_text(t) == _normalize_text(e) for e in existing_texts): + to_add.append(t) + + if not to_add: + skip_cnt += 1 + log(f"[SKIP] 表格單元格所有翻譯已存在: {seg.text[:30]}...") + continue + + # Insert new paragraphs right after the current paragraph + insert_after = p + for block in to_add: + try: + # Create new paragraph and insert it after the current position + new_p_element = OxmlElement("w:p") + insert_after._element.addnext(new_p_element) + new_p = Paragraph(new_p_element, cell) + _add_formatted_run(new_p, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT) + insert_after = new_p # Update position for next insertion + except Exception as e: + log(f"[ERROR] 表格插入失敗: {e}, 嘗試fallback方法") + # Fallback: add at the end of cell + try: + new_p = cell.add_paragraph() + _add_formatted_run(new_p, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT) + log(f"[SUCCESS] Fallback插入成功") + except Exception as e2: + log(f"[FATAL] Fallback也失敗: {e2}") + continue + ok_cnt += 1 + log(f"[SUCCESS] 表格單元格插入 {len(to_add)} 個翻譯(緊接原文後)") + + except Exception as e: + log(f"[ERROR] 表格處理全面失敗: {e}, 跳過此段落") + continue + + else: + # Normal paragraph (not in table cell) - SIMPLIFIED FOR DEBUGGING + try: + # TEMPORARILY DISABLE existing translation check to force insertion + log(f"[DEBUG] 強制插入翻譯到段落: {seg.text[:30]}...") + + # Force all translations to be added + to_add = translations + + # Use simple positioning - always insert after current paragraph + anchor = p + + for block in to_add: + try: + log(f"[DEBUG] 嘗試插入: {block[:50]}...") + anchor = _append_after(anchor, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT) + log(f"[SUCCESS] _append_after成功插入") + except Exception as e: + log(f"[ERROR] _append_after失敗: {e}, 嘗試簡化插入") + try: + # Fallback: simple append + if hasattr(p._parent, 'add_paragraph'): + new_p = p._parent.add_paragraph() + _add_formatted_run(new_p, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT) + log(f"[SUCCESS] Fallback段落插入成功") + else: + log(f"[ERROR] 無法進行fallback插入") + except Exception as e2: + log(f"[FATAL] Fallback也失敗: {e2}") + continue + + ok_cnt += 1 + log(f"[SUCCESS] 段落強制插入 {len(to_add)} 個翻譯") + + except Exception as e: + log(f"[ERROR] 段落處理失敗: {e}, 跳過此段落") + continue + + elif seg.kind == "table_cell": + # 處理表格儲存格翻譯插入 + cell = seg.ref # cell 是 _Cell 對象 + + # 檢查儲存格是否已有翻譯 + existing_translations = [] + cell_paragraphs = list(cell.paragraphs) + + # 檢查儲存格末尾是否已有翻譯 + translation_start_index = len(cell_paragraphs) + for i in range(len(cell_paragraphs) - 1, -1, -1): + if _is_our_insert_block(cell_paragraphs[i]): + existing_translations.insert(0, _p_text_with_breaks(cell_paragraphs[i])) + translation_start_index = i + else: + break + + # 檢查是否所有翻譯都已存在且相同 + if len(existing_translations) >= len(translations): + if all(_normalize_text(e) == _normalize_text(t) for e, t in zip(existing_translations[:len(translations)], translations)): + skip_cnt += 1 + log(f"[SKIP] 表格儲存格已存在翻譯: {seg.text[:30]}...") + continue + + # 移除舊的翻譯段落(如果有的話) + for i in range(len(cell_paragraphs) - 1, translation_start_index - 1, -1): + if _is_our_insert_block(cell_paragraphs[i]): + cell._element.remove(cell_paragraphs[i]._element) + + # 檢查是否為簡單的短文本儲存格(只有原文,沒有複雜結構) + cell_content = cell.text.strip() + is_simple_cell = len(cell_content) <= 10 and cell_content == seg.text.strip() + + if is_simple_cell: + # 對於簡單短文本,直接替換內容而不是添加段落 + log(f"[INFO] 簡單儲存格內容替換: '{seg.text.strip()}' -> '{translations[0] if translations else 'N/A'}'") + + # 清空所有段落內容 + for para in cell.paragraphs: + para.clear() + + # 在第一個段落中添加原文和翻譯 + first_para = cell.paragraphs[0] if cell.paragraphs else cell.add_paragraph() + + # 添加原文 + run_orig = first_para.add_run(seg.text.strip()) + + # 添加換行和翻譯 + for t in translations: + first_para.add_run('\n') + run_trans = first_para.add_run(t) + run_trans.italic = True + if INSERT_FONT_SIZE_PT: + run_trans.font.size = Pt(INSERT_FONT_SIZE_PT) + + # 添加標記 + tag_run = first_para.add_run("\u200b") + tag_run.italic = True + if INSERT_FONT_SIZE_PT: + tag_run.font.size = Pt(INSERT_FONT_SIZE_PT) + else: + # 對於複雜儲存格,使用原有的添加段落方式 + for t in translations: + new_p = cell.add_paragraph() + _add_formatted_run(new_p, t, italic=True, font_size_pt=INSERT_FONT_SIZE_PT) + + ok_cnt += 1 + log(f"[SUCCESS] 表格儲存格插入 {len(translations)} 個翻譯") + + elif seg.kind == "txbx": + tx = seg.ref + # Check if textbox already has our translations at the end + if _txbx_tail_equals(tx, translations): + skip_cnt += 1 + log(f"[SKIP] 文字框已存在翻譯: {seg.text[:30]}...") + continue + + # Append translations to textbox + for t in translations: + _txbx_append_paragraph(tx, t, italic=True, font_size_pt=INSERT_FONT_SIZE_PT) + + ok_cnt += 1 + log(f"[SUCCESS] 文字框插入 {len(translations)} 個翻譯") + + return ok_cnt, skip_cnt + +# ---------- Main DocumentProcessor class ---------- +class DocumentProcessor: + """Enhanced document processor with complete DOCX handling capabilities.""" + + def __init__(self): + self.logger = logger + + def extract_docx_segments(self, file_path: str) -> List[Segment]: + """Extract all translatable segments from DOCX file.""" + try: + doc = docx.Document(file_path) + segments = _collect_docx_segments(doc) + + self.logger.info(f"Extracted {len(segments)} segments from {file_path}") + for seg in segments[:5]: # Log first 5 segments for debugging + self.logger.debug(f"Segment: {seg.kind} | {seg.ctx} | {seg.text[:50]}...") + + return segments + + except Exception as e: + self.logger.error(f"Failed to extract DOCX segments from {file_path}: {str(e)}") + raise FileProcessingError(f"DOCX 文件分析失敗: {str(e)}") + + def _rematch_segments_to_document(self, doc: docx.Document, old_segments: List[Segment]) -> List[Segment]: + """Re-match segments from old document instance to new document instance.""" + try: + # Extract fresh segments from the current document instance + fresh_segments = _collect_docx_segments(doc) + + # Match old segments with fresh segments based on text content + matched_segments = [] + + for old_seg in old_segments: + # Find matching segment in fresh segments + matched = False + for fresh_seg in fresh_segments: + if (old_seg.kind == fresh_seg.kind and + old_seg.ctx == fresh_seg.ctx and + _normalize_text(old_seg.text) == _normalize_text(fresh_seg.text)): + matched_segments.append(fresh_seg) + matched = True + break + + if not matched: + self.logger.warning(f"Failed to match segment: {old_seg.text[:50]}...") + # Still add the old segment but it might not work for insertion + matched_segments.append(old_seg) + + self.logger.debug(f"Re-matched {len(matched_segments)} segments to current document") + return matched_segments + + except Exception as e: + self.logger.error(f"Failed to re-match segments: {str(e)}") + # Return original segments as fallback + return old_segments + + def insert_docx_translations(self, file_path: str, segments: List[Segment], + translation_map: Dict[Tuple[str, str], str], + target_languages: List[str], output_path: str) -> Tuple[int, int]: + """Insert translations into DOCX file and save to output path.""" + try: + doc = docx.Document(file_path) + + # CRITICAL FIX: Re-match segments with the current document instance + # The original segments were extracted from a different document instance + matched_segments = self._rematch_segments_to_document(doc, segments) + + def log_func(msg: str): + self.logger.debug(msg) + + ok_count, skip_count = _insert_docx_translations( + doc, matched_segments, translation_map, target_languages, log_func + ) + + # Save the modified document + doc.save(output_path) + + self.logger.info(f"Inserted {ok_count} translations, skipped {skip_count}. Saved to: {output_path}") + return ok_count, skip_count + + except Exception as e: + self.logger.error(f"Failed to insert DOCX translations: {str(e)}") + raise FileProcessingError(f"DOCX 翻譯插入失敗: {str(e)}") + + def split_text_into_sentences(self, text: str, language: str = 'auto') -> List[str]: + """Split text into sentences using the best available method.""" + return _split_sentences(text, language) + + def should_translate_text(self, text: str, source_language: str) -> bool: + """Determine if text should be translated.""" + return should_translate(text, source_language) + + def insert_docx_combined_translations(self, file_path: str, segments: List[Segment], + translation_map: Dict[Tuple[str, str], str], + target_languages: List[str], output_path: str) -> Tuple[int, int]: + """Insert all translations into a single DOCX file with combined multi-language output. + + This creates a combined file where each original text is followed by all translations + in the format: original\n英文\n越南文 etc. + """ + try: + doc = docx.Document(file_path) + + # Re-match segments with the current document instance + matched_segments = self._rematch_segments_to_document(doc, segments) + + def log_func(msg: str): + self.logger.debug(msg) + + # Use the existing _insert_docx_translations function which already supports + # multiple target languages in a single document + ok_count, skip_count = _insert_docx_translations( + doc, matched_segments, translation_map, target_languages, log_func + ) + + # Save the combined document + doc.save(output_path) + + self.logger.info(f"Generated combined multi-language file: {output_path}") + self.logger.info(f"Inserted {ok_count} translations, skipped {skip_count}") + return ok_count, skip_count + + except Exception as e: + self.logger.error(f"Failed to create combined DOCX translations: {str(e)}") + raise FileProcessingError(f"組合多語言 DOCX 檔案生成失敗: {str(e)}") \ No newline at end of file diff --git a/app/services/enhanced_pdf_parser.py b/app/services/enhanced_pdf_parser.py new file mode 100644 index 0000000..21c9167 --- /dev/null +++ b/app/services/enhanced_pdf_parser.py @@ -0,0 +1,700 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +增强的PDF解析器 - 支持扫描PDF的OCR处理 + +Author: PANJIT IT Team +Created: 2024-09-23 +Modified: 2024-09-23 +""" + +import io +from pathlib import Path +from typing import List, Optional +from PyPDF2 import PdfReader +from app.utils.logger import get_logger +from app.utils.exceptions import FileProcessingError +from app.services.dify_client import DifyClient +from app.services.ocr_cache import OCRCache +from app.utils.image_preprocessor import ImagePreprocessor + +logger = get_logger(__name__) + +# 检查PyMuPDF依赖 +try: + import fitz # PyMuPDF + _HAS_PYMUPDF = True +except ImportError: + _HAS_PYMUPDF = False + logger.warning("PyMuPDF not available. Scanned PDF processing will be disabled.") + + +class EnhancedPdfParser: + """支持扫描PDF的增强解析器""" + + def __init__(self, file_path: str): + self.file_path = Path(file_path) + self.dify_client = DifyClient() + self.ocr_cache = OCRCache() + self.image_preprocessor = ImagePreprocessor(use_opencv=True) + + if not self.file_path.exists(): + raise FileProcessingError(f"PDF文件不存在: {file_path}") + + def is_scanned_pdf(self) -> bool: + """检测PDF是否为扫描件""" + try: + reader = PdfReader(str(self.file_path)) + text_content = "" + + # 检查前3页的文字内容 + pages_to_check = min(3, len(reader.pages)) + for i in range(pages_to_check): + page_text = reader.pages[i].extract_text() + text_content += page_text + + # 如果文字内容很少,很可能是扫描件 + text_length = len(text_content.strip()) + logger.info(f"PDF text extraction found {text_length} characters in first {pages_to_check} pages") + + # 阈值:少于100个字符认为是扫描件 + is_scanned = text_length < 100 + + if is_scanned: + logger.info("PDF detected as scanned document, will use OCR processing") + else: + logger.info("PDF detected as text-based document, will use direct text extraction") + + return is_scanned + + except Exception as e: + logger.warning(f"Failed to analyze PDF type: {e}, treating as scanned document") + return True # 默认当作扫描件处理 + + def extract_text_segments(self, user_id: int = None, job_id: int = None) -> List[str]: + """智能提取PDF文字片段""" + try: + # 首先尝试直接文字提取 + if not self.is_scanned_pdf(): + return self._extract_from_text_pdf() + + # 扫描PDF则转换为图片后使用Dify OCR + if not _HAS_PYMUPDF: + raise FileProcessingError("处理扫描PDF需要PyMuPDF库,请安装: pip install PyMuPDF") + + return self._extract_from_scanned_pdf(user_id, job_id) + + except Exception as e: + logger.error(f"PDF文字提取失败: {str(e)}") + raise FileProcessingError(f"PDF文件解析失败: {str(e)}") + + def _extract_from_text_pdf(self) -> List[str]: + """从文字型PDF提取文字片段""" + try: + reader = PdfReader(str(self.file_path)) + text_segments = [] + + for page_num, page in enumerate(reader.pages, 1): + page_text = page.extract_text() + + if page_text.strip(): + # 简单的句子分割 + sentences = self._split_text_into_sentences(page_text) + + # 过滤掉太短的片段 + valid_sentences = [s for s in sentences if len(s.strip()) > 10] + text_segments.extend(valid_sentences) + + logger.debug(f"Page {page_num}: extracted {len(valid_sentences)} sentences") + + logger.info(f"Text PDF extraction completed: {len(text_segments)} segments") + + # 合併短段落以減少不必要的翻譯調用 + merged_segments = self._merge_short_segments(text_segments) + return merged_segments + + except Exception as e: + logger.error(f"Text PDF extraction failed: {str(e)}") + raise FileProcessingError(f"文字PDF提取失败: {str(e)}") + + def _extract_from_scanned_pdf(self, user_id: int = None, job_id: int = None) -> List[str]: + """从扫描PDF提取文字片段(使用Dify OCR)""" + try: + doc = fitz.open(str(self.file_path)) + text_segments = [] + total_pages = doc.page_count + + logger.info(f"Processing scanned PDF with {total_pages} pages using Dify OCR") + + for page_num in range(total_pages): + try: + logger.info(f"[PDF-OCR] Processing page {page_num + 1}/{total_pages}") + page = doc[page_num] + + # 转换页面为高分辨率图片 + # 使用2倍缩放提高OCR准确度 + zoom = 2.0 + mat = fitz.Matrix(zoom, zoom) + pix = page.get_pixmap(matrix=mat, alpha=False) + + # 转换为PNG字节数据 + # 轉換為 PNG 並進行圖像預處理以提升 OCR 準確度 + img_data_raw = pix.tobytes("png") + img_data = self.image_preprocessor.preprocess_smart(img_data_raw) + logger.debug(f"[PDF-OCR] Page {page_num + 1}: Image preprocessed ({len(img_data_raw)} -> {len(img_data)} bytes)") + filename = f"page_{page_num + 1}.png" + + logger.info(f"[PDF-OCR] Page {page_num + 1}: Converted to image ({len(img_data)} bytes)") + logger.debug(f"[PDF-OCR] Page {page_num + 1}: Image zoom={zoom}, format=PNG") + + # 检查OCR快取 + cache_key_info = f"{self.file_path.name}_page_{page_num + 1}_zoom_{zoom}" + cached_text = self.ocr_cache.get_cached_text( + file_data=img_data, + filename=filename, + additional_info=cache_key_info + ) + + if cached_text: + logger.info(f"[PDF-OCR] Page {page_num + 1}: ✓ 使用快取的OCR結果 (節省AI流量)") + ocr_text = cached_text + else: + # 使用Dify OCR识别文字 + logger.info(f"[PDF-OCR] Page {page_num + 1}: Starting OCR recognition...") + ocr_text = self.dify_client.ocr_image_with_dify( + image_data=img_data, + filename=filename, + user_id=user_id, + job_id=job_id + ) + + # 保存OCR结果到快取 + if ocr_text.strip(): + self.ocr_cache.save_cached_text( + file_data=img_data, + extracted_text=ocr_text, + filename=filename, + additional_info=cache_key_info, + metadata={ + 'source_file': str(self.file_path), + 'page_number': page_num + 1, + 'total_pages': total_pages, + 'zoom_level': zoom, + 'image_size_bytes': len(img_data), + 'user_id': user_id, + 'job_id': job_id + } + ) + logger.info(f"[PDF-OCR] Page {page_num + 1}: ✓ OCR結果已保存到快取") + + logger.info(f"[PDF-OCR] Page {page_num + 1}: OCR completed") + logger.debug(f"[PDF-OCR] Page {page_num + 1}: Raw OCR result length: {len(ocr_text)}") + + if ocr_text.strip(): + # 分割OCR结果为句子 + logger.debug(f"[PDF-OCR] Page {page_num + 1}: Splitting OCR text into sentences...") + sentences = self._split_ocr_text(ocr_text) + + # 过滤有效句子 + valid_sentences = [s for s in sentences if len(s.strip()) > 5] + text_segments.extend(valid_sentences) + + logger.info(f"[PDF-OCR] Page {page_num + 1}: ✓ Extracted {len(valid_sentences)} valid sentences") + logger.debug(f"[PDF-OCR] Page {page_num + 1}: Total sentences before filter: {len(sentences)}") + + # 记录前50个字符用于调试 + if valid_sentences: + preview = valid_sentences[0][:50] + "..." if len(valid_sentences[0]) > 50 else valid_sentences[0] + logger.debug(f"[PDF-OCR] Page {page_num + 1}: First sentence preview: {preview}") + else: + logger.warning(f"[PDF-OCR] Page {page_num + 1}: ⚠ OCR returned empty result") + + except Exception as e: + logger.error(f"[PDF-OCR] Page {page_num + 1}: ✗ Processing failed: {str(e)}") + logger.error(f"[PDF-OCR] Page {page_num + 1}: Exception type: {type(e).__name__}") + # 继续处理下一页,不中断整个流程 + continue + + doc.close() + + logger.info(f"[PDF-OCR] OCR processing completed for all {total_pages} pages") + logger.info(f"[PDF-OCR] Total text segments extracted: {len(text_segments)}") + + if not text_segments: + logger.error(f"[PDF-OCR] ✗ No text content extracted from any page") + raise FileProcessingError("OCR处理完成,但未提取到任何文字内容") + + logger.info(f"[PDF-OCR] ✓ Scanned PDF processing completed successfully") + logger.info(f"[PDF-OCR] Final result: {len(text_segments)} text segments extracted") + + # 合併短段落以減少不必要的翻譯調用 + merged_segments = self._merge_short_segments(text_segments) + logger.info(f"[PDF-OCR] After merging: {len(merged_segments)} segments ready for translation") + return merged_segments + + except Exception as e: + logger.error(f"Scanned PDF processing failed: {str(e)}") + raise FileProcessingError(f"扫描PDF处理失败: {str(e)}") + + def _split_text_into_sentences(self, text: str) -> List[str]: + """将文字分割成句子""" + if not text.strip(): + return [] + + # 简单的分句逻辑 + sentences = [] + separators = ['. ', '。', '!', '?', '!', '?', '\n\n'] + + current_sentences = [text] + + for sep in separators: + new_sentences = [] + for sentence in current_sentences: + parts = sentence.split(sep) + if len(parts) > 1: + # 保留分隔符 + for i, part in enumerate(parts[:-1]): + if part.strip(): + new_sentences.append(part.strip() + sep.rstrip()) + # 最后一部分 + if parts[-1].strip(): + new_sentences.append(parts[-1].strip()) + else: + new_sentences.append(sentence) + current_sentences = new_sentences + + # 过滤掉太短的句子 + valid_sentences = [s for s in current_sentences if len(s.strip()) > 3] + return valid_sentences + + def _split_ocr_text(self, ocr_text: str) -> List[str]: + """分割OCR识别的文字""" + if not ocr_text.strip(): + return [] + + # OCR结果可能包含表格或特殊格式,需要特殊处理 + lines = ocr_text.split('\n') + sentences = [] + + current_paragraph = [] + + for line in lines: + line = line.strip() + if not line: + # 空行表示段落结束 + if current_paragraph: + paragraph_text = ' '.join(current_paragraph) + if len(paragraph_text) > 10: + sentences.append(paragraph_text) + current_paragraph = [] + continue + + # 检查是否是表格行(包含|或多个制表符) + if '|' in line or '\t' in line: + # 表格行单独处理 + if current_paragraph: + paragraph_text = ' '.join(current_paragraph) + if len(paragraph_text) > 10: + sentences.append(paragraph_text) + current_paragraph = [] + + if len(line) > 10: + sentences.append(line) + else: + # 普通文字行 + current_paragraph.append(line) + + # 处理最后的段落 + if current_paragraph: + paragraph_text = ' '.join(current_paragraph) + if len(paragraph_text) > 10: + sentences.append(paragraph_text) + + return sentences + + def generate_translated_document(self, translations: dict, target_language: str, + output_dir: Path) -> str: + """生成翻译的Word文档(保持与DOCX相同的格式)""" + try: + from app.utils.helpers import generate_filename + + translated_texts = translations.get(target_language, []) + + # 生成Word文档而非文字文件 + output_filename = f"{self.file_path.stem}_{target_language}_translated.docx" + output_path = output_dir / output_filename + + # 创建Word文档 + from docx import Document + from docx.shared import Pt + from docx.enum.text import WD_PARAGRAPH_ALIGNMENT + + doc = Document() + + # 添加标题页 + title = doc.add_heading(f"PDF翻译结果 - {target_language}", 0) + title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER + + # 添加文档信息 + info_para = doc.add_paragraph() + info_para.add_run("原始文件: ").bold = True + info_para.add_run(self.file_path.name) + info_para.add_run("\n处理方式: ").bold = True + info_para.add_run("OCR识别" if self.is_scanned_pdf() else "直接文字提取") + info_para.add_run(f"\n翻译语言: ").bold = True + info_para.add_run(target_language) + info_para.add_run(f"\n总段落数: ").bold = True + info_para.add_run(str(len(translated_texts))) + + doc.add_paragraph() # 空行 + + # 添加翻译内容 + for i, text in enumerate(translated_texts, 1): + content_type = self._detect_content_type(text) + + if content_type == 'table': + # 尝试创建实际的表格 + self._add_table_content(doc, text, i) + elif content_type == 'heading': + # 添加标题 + self._add_heading_content(doc, text, i) + elif content_type == 'list': + # 添加列表 + self._add_list_content(doc, text, i) + else: + # 普通段落 + self._add_paragraph_content(doc, text, i) + + # 保存Word文档 + doc.save(output_path) + logger.info(f"Generated translated PDF Word document: {output_path}") + return str(output_path) + + except Exception as e: + logger.error(f"Failed to generate translated Word document: {str(e)}") + raise FileProcessingError(f"生成翻译Word文档失败: {str(e)}") + + def generate_combined_translated_document(self, all_translations: dict, target_languages: list, + output_dir: Path) -> str: + """生成包含所有翻譯語言的組合Word文檔(譯文1/譯文2格式)""" + try: + from app.utils.helpers import generate_filename + + # 生成組合文檔檔名 + languages_suffix = '_'.join(target_languages) + output_filename = f"{self.file_path.stem}_{languages_suffix}_combined.docx" + output_path = output_dir / output_filename + + # 创建Word文档 + from docx import Document + from docx.shared import Pt + from docx.enum.text import WD_PARAGRAPH_ALIGNMENT + + doc = Document() + + # 添加标题页 + title = doc.add_heading(f"PDF翻译結果 - 多語言組合文檔", 0) + title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER + + # 添加文档信息 + info_para = doc.add_paragraph() + info_para.add_run("原始文件: ").bold = True + info_para.add_run(self.file_path.name) + info_para.add_run("\n处理方式: ").bold = True + info_para.add_run("OCR识别" if self.is_scanned_pdf() else "直接文字提取") + info_para.add_run(f"\n翻译语言: ").bold = True + info_para.add_run(' / '.join(target_languages)) + + # 获取第一个語言的翻譯作為基準長度 + first_language = target_languages[0] + segment_count = len(all_translations.get(first_language, [])) + info_para.add_run(f"\n总段落数: ").bold = True + info_para.add_run(str(segment_count)) + + doc.add_paragraph() # 空行 + + # 添加翻译内容 - 譯文1/譯文2格式 + for i in range(segment_count): + content_para = doc.add_paragraph() + + # 添加段落编号 + num_run = content_para.add_run(f"{i+1:03d}. ") + num_run.bold = True + num_run.font.size = Pt(12) + + # 为每种语言添加翻譯 + for j, target_language in enumerate(target_languages): + if i < len(all_translations.get(target_language, [])): + translation_text = all_translations[target_language][i] + + # 添加語言標識 + if j > 0: + content_para.add_run("\n\n") # 翻譯之間的間距 + + lang_run = content_para.add_run(f"[{target_language}] ") + lang_run.bold = True + lang_run.font.size = Pt(11) + + # 添加翻譯内容 + trans_run = content_para.add_run(translation_text) + trans_run.font.size = Pt(11) + + # 段落間距 + content_para.paragraph_format.space_after = Pt(12) + + # 保存Word文档 + doc.save(output_path) + logger.info(f"Generated combined translated PDF Word document: {output_path}") + return str(output_path) + + except Exception as e: + logger.error(f"Failed to generate combined translated Word document: {str(e)}") + raise FileProcessingError(f"生成組合翻译Word文档失败: {str(e)}") + + def _is_table_component(self, segment: str) -> bool: + """檢查段落是否為表格組件(表格邊界、分隔線等)""" + segment = segment.strip() + + # Markdown表格分隔線:如 |---|---|---| 或 |===|===|===| + if '|' in segment and ('-' in segment or '=' in segment): + # 移除 | 和 - = 後,如果剩餘內容很少,則判斷為表格分隔線 + clean_segment = segment.replace('|', '').replace('-', '').replace('=', '').replace(' ', '').replace(':', '') + if len(clean_segment) <= 2: # 允許少量其他字符 + return True + + # 純分隔線 + if segment.replace('=', '').replace('-', '').replace(' ', '') == '': + return True + + return False + + def _is_table_row(self, segment: str) -> bool: + """檢查段落是否為表格行(包含實際數據的表格行)""" + segment = segment.strip() + + # Markdown表格行:至少包含兩個 | 符號,且有實際內容 + if segment.count('|') >= 2: + # 移除首尾的 | 並分割為單元格 + cells = segment.strip('|').split('|') + # 檢查是否有實際的文字內容(不只是分隔符號) + has_content = any( + cell.strip() and + not cell.replace('-', '').replace('=', '').replace(' ', '').replace(':', '') == '' + for cell in cells + ) + if has_content: + return True + + return False + + def _merge_table_segments(self, segments: List[str], start_idx: int) -> tuple[str, int]: + """ + 合併表格相關的段落 + + Returns: + (merged_table_content, next_index) + """ + table_parts = [] + current_idx = start_idx + + # 收集連續的表格相關段落 + while current_idx < len(segments): + segment = segments[current_idx].strip() + + if self._is_table_component(segment) or self._is_table_row(segment): + table_parts.append(segment) + current_idx += 1 + else: + break + + # 將表格部分合併為一個段落 + merged_table = '\n'.join(table_parts) + return merged_table, current_idx + + def _merge_short_segments(self, text_segments: List[str], min_length: int = 10) -> List[str]: + """ + 合併短段落以減少不必要的翻譯調用,特別處理表格結構 + + Args: + text_segments: 原始文字段落列表 + min_length: 最小段落長度閾值,短於此長度的段落將被合併 + + Returns: + 合併後的段落列表 + """ + if not text_segments: + return text_segments + + merged_segments = [] + current_merge = "" + i = 0 + + while i < len(text_segments): + segment = text_segments[i].strip() + if not segment: # 跳過空段落 + i += 1 + continue + + # 檢查是否為表格組件 + if self._is_table_component(segment) or self._is_table_row(segment): + # 先處理之前積累的短段落 + if current_merge: + merged_segments.append(current_merge.strip()) + logger.debug(f"Merged short segments before table: '{current_merge[:50]}...'") + current_merge = "" + + # 合併表格相關段落 + table_content, next_i = self._merge_table_segments(text_segments, i) + merged_segments.append(table_content) + logger.debug(f"Merged table content: {next_i - i} segments -> 1 table block") + i = next_i + continue + + # 檢查是否為短段落 + if len(segment) < min_length: + # 檢查是否為純標點符號或數字(排除表格符號) + if segment.replace('*', '').replace('-', '').replace('_', '').replace('#', '').strip() == '': + logger.debug(f"Skipping pure symbol segment: '{segment}'") + i += 1 + continue + + # 短段落需要合併 + if current_merge: + current_merge += " " + segment + else: + current_merge = segment + + logger.debug(f"Adding short segment to merge: '{segment}' (length: {len(segment)})") + + else: + # 長段落,先處理之前積累的短段落 + if current_merge: + merged_segments.append(current_merge.strip()) + logger.debug(f"Merged short segments: '{current_merge[:50]}...' (total length: {len(current_merge)})") + current_merge = "" + + # 添加當前長段落 + merged_segments.append(segment) + logger.debug(f"Added long segment: '{segment[:50]}...' (length: {len(segment)})") + + i += 1 + + # 處理最後剩餘的短段落 + if current_merge: + merged_segments.append(current_merge.strip()) + logger.debug(f"Final merged short segments: '{current_merge[:50]}...' (total length: {len(current_merge)})") + + logger.info(f"Segment merging: {len(text_segments)} -> {len(merged_segments)} segments") + return merged_segments + + def _detect_content_type(self, text: str) -> str: + """检测内容类型""" + text_lower = text.lower().strip() + + # 检测表格(包含多个|或制表符) + if ('|' in text and text.count('|') >= 2) or '\t' in text: + return 'table' + + # 检测标题 + if (text_lower.startswith(('第', '章', 'chapter', 'section', '#')) or + any(keyword in text_lower for keyword in ['章', '节', '第']) and len(text) < 100): + return 'heading' + + # 检测列表 + if (text_lower.startswith(('•', '-', '*', '1.', '2.', '3.', '4.', '5.')) or + any(text_lower.startswith(f"{i}.") for i in range(1, 20))): + return 'list' + + return 'paragraph' + + def _add_table_content(self, doc, text: str, index: int): + """添加表格内容""" + from docx.shared import Pt + + # 添加表格标题 + title_para = doc.add_paragraph() + title_run = title_para.add_run(f"表格 {index}: ") + title_run.bold = True + title_run.font.size = Pt(12) + + # 解析表格 + if '|' in text: + # Markdown风格表格 + lines = [line.strip() for line in text.split('\n') if line.strip()] + rows = [] + for line in lines: + if line.startswith('|') and line.endswith('|'): + cells = [cell.strip() for cell in line.split('|')[1:-1]] + if cells: # 过滤掉分隔行(如|---|---|) + if not all(cell.replace('-', '').replace(' ', '') == '' for cell in cells): + rows.append(cells) + + if rows: + # 创建表格 + table = doc.add_table(rows=len(rows), cols=len(rows[0])) + table.style = 'Table Grid' + + for i, row_data in enumerate(rows): + for j, cell_data in enumerate(row_data): + if j < len(table.rows[i].cells): + cell = table.rows[i].cells[j] + cell.text = cell_data + # 设置字体 + for paragraph in cell.paragraphs: + for run in paragraph.runs: + run.font.size = Pt(10) + else: + # 制表符分隔的表格 + para = doc.add_paragraph() + content_run = para.add_run(text) + content_run.font.name = 'Courier New' + content_run.font.size = Pt(10) + + def _add_heading_content(self, doc, text: str, index: int): + """添加标题内容""" + from docx.shared import Pt + + # 移除段落编号,直接作为标题 + clean_text = text.strip() + if len(clean_text) < 100: + heading = doc.add_heading(clean_text, level=2) + else: + # 长文本作为普通段落但使用标题样式 + para = doc.add_paragraph() + run = para.add_run(clean_text) + run.bold = True + run.font.size = Pt(14) + + def _add_list_content(self, doc, text: str, index: int): + """添加列表内容""" + from docx.shared import Pt + + # 检查是否已经有编号 + if any(text.strip().startswith(f"{i}.") for i in range(1, 20)): + # 已编号列表 + para = doc.add_paragraph(text.strip(), style='List Number') + else: + # 项目符号列表 + para = doc.add_paragraph(text.strip(), style='List Bullet') + + # 设置字体大小 + for run in para.runs: + run.font.size = Pt(11) + + def _add_paragraph_content(self, doc, text: str, index: int): + """添加普通段落内容""" + from docx.shared import Pt + + para = doc.add_paragraph() + + # 添加段落编号(可选) + num_run = para.add_run(f"{index:03d}. ") + num_run.bold = True + num_run.font.size = Pt(12) + + # 添加内容 + content_run = para.add_run(text) + content_run.font.size = Pt(11) + + # 设置段落间距 + para.paragraph_format.space_after = Pt(6) \ No newline at end of file diff --git a/app/services/notification_service.py b/app/services/notification_service.py new file mode 100644 index 0000000..218d1b4 --- /dev/null +++ b/app/services/notification_service.py @@ -0,0 +1,647 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +通知服務 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +import os +import smtplib +from email.mime.text import MIMEText +from email.mime.multipart import MIMEMultipart +from datetime import datetime, timedelta +from typing import Optional, List, Dict, Any +from flask import current_app, url_for +from app import db +from app.utils.logger import get_logger +from app.models.job import TranslationJob +from app.models.user import User +from app.models.notification import Notification, NotificationType + +logger = get_logger(__name__) + + +class NotificationService: + """通知服務""" + + def __init__(self): + self.smtp_server = current_app.config.get('SMTP_SERVER') + self.smtp_port = current_app.config.get('SMTP_PORT', 587) + self.use_tls = current_app.config.get('SMTP_USE_TLS', False) + self.use_ssl = current_app.config.get('SMTP_USE_SSL', False) + self.auth_required = current_app.config.get('SMTP_AUTH_REQUIRED', False) + self.sender_email = current_app.config.get('SMTP_SENDER_EMAIL') + self.sender_password = current_app.config.get('SMTP_SENDER_PASSWORD', '') + self.app_name = current_app.config.get('APP_NAME', 'PANJIT Document Translator') + + def _create_smtp_connection(self): + """建立 SMTP 連線""" + try: + if self.use_ssl: + server = smtplib.SMTP_SSL(self.smtp_server, self.smtp_port) + else: + server = smtplib.SMTP(self.smtp_server, self.smtp_port) + if self.use_tls: + server.starttls() + + if self.auth_required and self.sender_password: + server.login(self.sender_email, self.sender_password) + + return server + except Exception as e: + logger.error(f"SMTP connection failed: {str(e)}") + return None + + def _send_email(self, to_email: str, subject: str, html_content: str, text_content: str = None) -> bool: + """發送郵件的基礎方法 - 已停用 (資安限制,無法連接內網)""" + logger.info(f"SMTP service disabled - Email notification skipped for {to_email}: {subject}") + return True # 回傳 True 避免影響其他流程 + + # 以下 SMTP 功能已註解,因應資安限制無法連接內網 + # try: + # if not self.smtp_server or not self.sender_email: + # logger.error("SMTP configuration incomplete") + # return False + # + # # 建立郵件 + # msg = MIMEMultipart('alternative') + # msg['From'] = f"{self.app_name} <{self.sender_email}>" + # msg['To'] = to_email + # msg['Subject'] = subject + # + # # 添加文本內容 + # if text_content: + # text_part = MIMEText(text_content, 'plain', 'utf-8') + # msg.attach(text_part) + # + # # 添加 HTML 內容 + # html_part = MIMEText(html_content, 'html', 'utf-8') + # msg.attach(html_part) + # + # # 發送郵件 + # server = self._create_smtp_connection() + # if not server: + # return False + # + # server.send_message(msg) + # server.quit() + # + # logger.info(f"Email sent successfully to {to_email}") + # return True + # + # except Exception as e: + # logger.error(f"Failed to send email to {to_email}: {str(e)}") + # return False + + def send_job_completion_notification(self, job: TranslationJob) -> bool: + """發送任務完成通知""" + try: + if not job.user or not job.user.email: + logger.warning(f"No email address for job {job.job_uuid}") + return False + + # 準備郵件內容 + subject = f"📄 翻譯完成通知 - {job.original_filename}" + + # 計算處理時間 + processing_time = "" + if job.processing_started_at and job.completed_at: + duration = job.completed_at - job.processing_started_at + total_seconds = int(duration.total_seconds()) + + if total_seconds < 60: + processing_time = f"{total_seconds}秒" + elif total_seconds < 3600: + minutes = total_seconds // 60 + seconds = total_seconds % 60 + processing_time = f"{minutes}分{seconds}秒" + else: + hours = total_seconds // 3600 + minutes = (total_seconds % 3600) // 60 + processing_time = f"{hours}小時{minutes}分" + + # 生成下載連結(簡化版本) + download_links = [] + for lang in job.target_languages: + download_links.append(f"• {lang}: [下載翻譯檔案]") + + html_content = f""" + + + + + + + +
+
+

🎉 翻譯任務完成

+
+ +
+

親愛的 {job.user.display_name}

+ +

您的文件翻譯任務已成功完成!

+ +
+

📋 任務詳細資訊

+

檔案名稱: {job.original_filename}

+

任務編號: {job.job_uuid}

+

來源語言: {job.source_language}

+

目標語言: {', '.join(job.target_languages)}

+

處理時間: {processing_time}

+

完成時間: {job.completed_at.strftime('%Y-%m-%d %H:%M:%S') if job.completed_at else '未知'}

+ {f'

總成本: ${job.total_cost:.4f}

' if job.total_cost else ''} +
+ +
+

📥 下載翻譯檔案

+

請登入系統下載您的翻譯檔案:

+

{'
'.join(download_links)}

+

+ 注意: 翻譯檔案將在系統中保留 7 天,請及時下載。 +

+
+ +
+

感謝您使用 {self.app_name}!

+

如有任何問題,請聯繫系統管理員。

+
+
+ + +
+ + + """ + + # 純文字版本 + text_content = f""" + 翻譯任務完成通知 + + 親愛的 {job.user.display_name}, + + 您的文件翻譯任務已成功完成! + + 任務詳細資訊: + - 檔案名稱: {job.original_filename} + - 任務編號: {job.job_uuid} + - 來源語言: {job.source_language} + - 目標語言: {', '.join(job.target_languages)} + - 處理時間: {processing_time} + - 完成時間: {job.completed_at.strftime('%Y-%m-%d %H:%M:%S') if job.completed_at else '未知'} + + 請登入系統下載您的翻譯檔案。翻譯檔案將在系統中保留 7 天。 + + 感謝您使用 {self.app_name}! + + ---- + 此郵件由系統自動發送,請勿回覆。 + """ + + return self._send_email(job.user.email, subject, html_content, text_content) + + except Exception as e: + logger.error(f"Failed to send completion notification for job {job.job_uuid}: {str(e)}") + return False + + def send_job_failure_notification(self, job: TranslationJob) -> bool: + """發送任務失敗通知""" + try: + if not job.user or not job.user.email: + logger.warning(f"No email address for job {job.job_uuid}") + return False + + subject = f"⚠️ 翻譯失敗通知 - {job.original_filename}" + + html_content = f""" + + + + + + + +
+
+

❌ 翻譯任務失敗

+
+ +
+

親愛的 {job.user.display_name}

+ +

很抱歉,您的文件翻譯任務處理失敗。

+ +
+

📋 任務資訊

+

檔案名稱: {job.original_filename}

+

任務編號: {job.job_uuid}

+

重試次數: {job.retry_count}

+

錯誤訊息: {job.error_message or '未知錯誤'}

+

失敗時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

+
+ +
+

建議處理方式:

+
    +
  • 檢查檔案格式是否正確
  • +
  • 確認檔案沒有損壞
  • +
  • 稍後再次嘗試上傳
  • +
  • 如問題持續,請聯繫系統管理員
  • +
+
+ +
+

如需協助,請聯繫系統管理員。

+
+
+ + +
+ + + """ + + text_content = f""" + 翻譯任務失敗通知 + + 親愛的 {job.user.display_name}, + + 很抱歉,您的文件翻譯任務處理失敗。 + + 任務資訊: + - 檔案名稱: {job.original_filename} + - 任務編號: {job.job_uuid} + - 重試次數: {job.retry_count} + - 錯誤訊息: {job.error_message or '未知錯誤'} + + 建議處理方式: + 1. 檢查檔案格式是否正確 + 2. 確認檔案沒有損壞 + 3. 稍後再次嘗試上傳 + 4. 如問題持續,請聯繫系統管理員 + + 如需協助,請聯繫系統管理員。 + + ---- + 此郵件由 {self.app_name} 系統自動發送,請勿回覆。 + """ + + return self._send_email(job.user.email, subject, html_content, text_content) + + except Exception as e: + logger.error(f"Failed to send failure notification for job {job.job_uuid}: {str(e)}") + return False + + def send_admin_notification(self, subject: str, message: str, admin_emails: List[str] = None) -> bool: + """發送管理員通知""" + try: + if not admin_emails: + # 取得所有管理員郵件地址 + admin_users = User.get_admin_users() + admin_emails = [user.email for user in admin_users if user.email] + + if not admin_emails: + logger.warning("No admin email addresses found") + return False + + html_content = f""" + + + + + + + +
+
+

🔔 系統管理通知

+
+ +
+

系統管理員您好,

+ +
+

{subject}

+

{message}

+
+ +

發送時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

+
+ + +
+ + + """ + + success_count = 0 + for email in admin_emails: + if self._send_email(email, f"[管理通知] {subject}", html_content): + success_count += 1 + + return success_count > 0 + + except Exception as e: + logger.error(f"Failed to send admin notification: {str(e)}") + return False + + def test_smtp_connection(self) -> bool: + """測試 SMTP 連線""" + try: + server = self._create_smtp_connection() + if server: + server.quit() + return True + return False + except Exception as e: + logger.error(f"SMTP connection test failed: {str(e)}") + return False + + # ========== 資料庫通知方法 ========== + + def create_db_notification( + self, + user_id: int, + title: str, + message: str, + notification_type: NotificationType = NotificationType.INFO, + job_uuid: Optional[str] = None, + extra_data: Optional[Dict[str, Any]] = None, + expires_at: Optional[datetime] = None, + link: Optional[str] = None + ) -> Optional[Notification]: + """ + 創建資料庫通知 + + Args: + user_id: 用戶ID + title: 通知標題 + message: 通知內容 + notification_type: 通知類型 + job_uuid: 關聯任務UUID + extra_data: 額外數據 + expires_at: 過期時間 + link: 相關連結 + + Returns: + Notification: 創建的通知對象 + """ + try: + # 如果沒有指定連結但有任務UUID,自動生成任務詳情連結 + if not link and job_uuid: + link = f"/job/{job_uuid}" + + notification = Notification( + user_id=user_id, + type=notification_type.value, + title=title, + message=message, + job_uuid=job_uuid, + link=link, + extra_data=extra_data, + expires_at=expires_at + ) + + db.session.add(notification) + db.session.commit() + + logger.info(f"資料庫通知已創建: {notification.notification_uuid} for user {user_id}") + + # WebSocket 推送已禁用 + # self._send_websocket_notification(notification) + + return notification + + except Exception as e: + db.session.rollback() + logger.error(f"創建資料庫通知失敗: {e}") + return None + + def send_job_started_db_notification(self, job: TranslationJob) -> Optional[Notification]: + """ + 發送任務開始處理的資料庫通知 + + Args: + job: 翻譯任務對象 + + Returns: + Notification: 創建的通知對象 + """ + try: + title = "翻譯任務開始處理" + message = f'您的文件「{job.original_filename}」已開始翻譯處理。' + + if job.target_languages: + languages = ', '.join(job.target_languages) + message += f" 目標語言: {languages}" + + return self.create_db_notification( + user_id=job.user_id, + title=title, + message=message, + notification_type=NotificationType.INFO, + job_uuid=job.job_uuid, + extra_data={ + 'filename': job.original_filename, + 'target_languages': job.target_languages, + 'started_at': job.processing_started_at.isoformat() if job.processing_started_at else None + } + ) + + except Exception as e: + logger.error(f"發送任務開始資料庫通知失敗: {e}") + return None + + def send_job_completion_db_notification(self, job: TranslationJob) -> Optional[Notification]: + """ + 發送任務完成的資料庫通知 + + Args: + job: 翻譯任務對象 + + Returns: + Notification: 創建的通知對象 + """ + try: + if job.status != 'COMPLETED': + logger.warning(f"任務 {job.job_uuid} 狀態不是已完成,跳過完成通知") + return None + + # 構建通知內容 + title = "翻譯任務完成" + message = f'您的文件「{job.original_filename}」已成功翻譯完成。' + + # 添加目標語言信息 + if job.target_languages: + languages = ', '.join(job.target_languages) + message += f" 目標語言: {languages}" + + # 添加處理時間信息 + if job.processing_started_at and job.completed_at: + duration = job.completed_at - job.processing_started_at + minutes = int(duration.total_seconds() / 60) + if minutes > 0: + message += f" 處理時間: {minutes} 分鐘" + else: + message += f" 處理時間: {int(duration.total_seconds())} 秒" + + return self.create_db_notification( + user_id=job.user_id, + title=title, + message=message, + notification_type=NotificationType.SUCCESS, + job_uuid=job.job_uuid, + extra_data={ + 'filename': job.original_filename, + 'target_languages': job.target_languages, + 'total_cost': float(job.total_cost) if job.total_cost else 0, + 'completed_at': job.completed_at.isoformat() if job.completed_at else None + } + ) + + except Exception as e: + logger.error(f"發送任務完成資料庫通知失敗: {e}") + return None + + def send_job_completion_db_notification_direct(self, job: TranslationJob) -> Optional[Notification]: + """ + 直接發送任務完成的資料庫通知(不檢查狀態) + """ + try: + # 構建通知內容 + title = "翻譯任務完成" + message = f'您的文件「{job.original_filename}」已成功翻譯完成。' + + # 添加目標語言信息 + if job.target_languages: + languages = ', '.join(job.target_languages) + message += f" 目標語言: {languages}" + + message += " 您可以在任務列表中下載翻譯結果。" + + # 創建資料庫通知 + return self.create_db_notification( + user_id=job.user_id, + title=title, + message=message, + notification_type=NotificationType.SUCCESS, + job_uuid=job.job_uuid, + extra_data={ + 'filename': job.original_filename, + 'target_languages': job.target_languages, + 'total_cost': float(job.total_cost) if job.total_cost else 0, + 'completed_at': job.completed_at.isoformat() if job.completed_at else None + } + ) + + except Exception as e: + logger.error(f"發送任務完成資料庫通知失敗: {e}") + return None + + def send_job_failure_db_notification(self, job: TranslationJob, error_message: str = None) -> Optional[Notification]: + """ + 發送任務失敗的資料庫通知 + + Args: + job: 翻譯任務對象 + error_message: 錯誤訊息 + + Returns: + Notification: 創建的通知對象 + """ + try: + title = "翻譯任務失敗" + message = f'您的文件「{job.original_filename}」翻譯失敗。' + + if error_message: + message += f" 錯誤訊息: {error_message}" + + if job.retry_count > 0: + message += f" 已重試 {job.retry_count} 次。" + + return self.create_db_notification( + user_id=job.user_id, + title=title, + message=message, + notification_type=NotificationType.ERROR, + job_uuid=job.job_uuid, + extra_data={ + 'filename': job.original_filename, + 'error_message': error_message, + 'retry_count': job.retry_count, + 'failed_at': datetime.now().isoformat() + } + ) + + except Exception as e: + logger.error(f"發送任務失敗資料庫通知失敗: {e}") + return None + + def _send_websocket_notification(self, notification: Notification): + """ + 通過 WebSocket 發送通知 - 已禁用 + + Args: + notification: 通知對象 + """ + # WebSocket 功能已完全禁用 + logger.debug(f"WebSocket 推送已禁用,跳過通知: {notification.notification_uuid}") + pass + + def get_unread_count(self, user_id: int) -> int: + """ + 獲取用戶未讀通知數量 + + Args: + user_id: 用戶ID + + Returns: + int: 未讀通知數量 + """ + try: + return Notification.query.filter_by( + user_id=user_id, + is_read=False + ).filter( + (Notification.expires_at.is_(None)) | + (Notification.expires_at > datetime.now()) + ).count() + except Exception as e: + logger.error(f"獲取未讀通知數量失敗: {e}") + return 0 \ No newline at end of file diff --git a/app/services/ocr_cache.py b/app/services/ocr_cache.py new file mode 100644 index 0000000..7c63201 --- /dev/null +++ b/app/services/ocr_cache.py @@ -0,0 +1,282 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +OCR 快取管理模組 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +import hashlib +import json +import sqlite3 +from datetime import datetime, timedelta +from pathlib import Path +from typing import Optional, Dict, Any +import logging + +logger = logging.getLogger(__name__) + +class OCRCache: + """OCR 結果快取管理器""" + + def __init__(self, cache_db_path: str = "ocr_cache.db", cache_expire_days: int = 30): + """ + 初始化 OCR 快取管理器 + + Args: + cache_db_path: 快取資料庫路徑 + cache_expire_days: 快取過期天數 + """ + self.cache_db_path = Path(cache_db_path) + self.cache_expire_days = cache_expire_days + self.init_database() + + def init_database(self): + """初始化快取資料庫""" + try: + with sqlite3.connect(self.cache_db_path) as conn: + cursor = conn.cursor() + cursor.execute(''' + CREATE TABLE IF NOT EXISTS ocr_cache ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file_hash TEXT UNIQUE NOT NULL, + filename TEXT, + file_size INTEGER, + extracted_text TEXT NOT NULL, + extraction_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + access_count INTEGER DEFAULT 1, + last_access_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + metadata TEXT + ) + ''') + + # 創建索引以提高查詢效能 + cursor.execute(''' + CREATE INDEX IF NOT EXISTS idx_file_hash + ON ocr_cache(file_hash) + ''') + cursor.execute(''' + CREATE INDEX IF NOT EXISTS idx_extraction_time + ON ocr_cache(extraction_time) + ''') + + conn.commit() + logger.info("OCR 快取資料庫初始化完成") + + except Exception as e: + logger.error(f"初始化 OCR 快取資料庫失敗: {e}") + raise + + def _calculate_file_hash(self, file_data: bytes, additional_info: str = "") -> str: + """ + 計算檔案內容的 SHA256 雜湊值 + + Args: + file_data: 檔案二進位資料 + additional_info: 額外資訊(如頁數、處理參數等) + + Returns: + 檔案的 SHA256 雜湊值 + """ + hash_input = file_data + additional_info.encode('utf-8') + return hashlib.sha256(hash_input).hexdigest() + + def get_cached_text(self, file_data: bytes, filename: str = "", + additional_info: str = "") -> Optional[str]: + """ + 獲取快取的 OCR 文字 + + Args: + file_data: 檔案二進位資料 + filename: 檔案名稱 + additional_info: 額外資訊 + + Returns: + 快取的文字內容,如果不存在則返回 None + """ + try: + file_hash = self._calculate_file_hash(file_data, additional_info) + + with sqlite3.connect(self.cache_db_path) as conn: + cursor = conn.cursor() + + # 查詢快取 + cursor.execute(''' + SELECT extracted_text, access_count + FROM ocr_cache + WHERE file_hash = ? AND + extraction_time > datetime('now', '-{} days') + '''.format(self.cache_expire_days), (file_hash,)) + + result = cursor.fetchone() + + if result: + extracted_text, access_count = result + + # 更新訪問計數和時間 + cursor.execute(''' + UPDATE ocr_cache + SET access_count = ?, last_access_time = CURRENT_TIMESTAMP + WHERE file_hash = ? + ''', (access_count + 1, file_hash)) + + conn.commit() + + logger.info(f"[OCR-CACHE] 快取命中: {filename} (訪問次數: {access_count + 1})") + return extracted_text + + logger.debug(f"[OCR-CACHE] 快取未命中: {filename}") + return None + + except Exception as e: + logger.error(f"獲取 OCR 快取失敗: {e}") + return None + + def save_cached_text(self, file_data: bytes, extracted_text: str, + filename: str = "", additional_info: str = "", + metadata: Dict[str, Any] = None) -> bool: + """ + 儲存 OCR 文字到快取 + + Args: + file_data: 檔案二進位資料 + extracted_text: 提取的文字 + filename: 檔案名稱 + additional_info: 額外資訊 + metadata: 中繼資料 + + Returns: + 是否儲存成功 + """ + try: + file_hash = self._calculate_file_hash(file_data, additional_info) + file_size = len(file_data) + metadata_json = json.dumps(metadata or {}, ensure_ascii=False) + + with sqlite3.connect(self.cache_db_path) as conn: + cursor = conn.cursor() + + # 使用 INSERT OR REPLACE 來處理重複的雜湊值 + cursor.execute(''' + INSERT OR REPLACE INTO ocr_cache + (file_hash, filename, file_size, extracted_text, metadata) + VALUES (?, ?, ?, ?, ?) + ''', (file_hash, filename, file_size, extracted_text, metadata_json)) + + conn.commit() + + logger.info(f"[OCR-CACHE] 儲存快取成功: {filename} ({len(extracted_text)} 字元)") + return True + + except Exception as e: + logger.error(f"儲存 OCR 快取失敗: {e}") + return False + + def get_cache_stats(self) -> Dict[str, Any]: + """ + 獲取快取統計資訊 + + Returns: + 快取統計資料 + """ + try: + with sqlite3.connect(self.cache_db_path) as conn: + cursor = conn.cursor() + + # 總記錄數 + cursor.execute('SELECT COUNT(*) FROM ocr_cache') + total_records = cursor.fetchone()[0] + + # 總訪問次數 + cursor.execute('SELECT SUM(access_count) FROM ocr_cache') + total_accesses = cursor.fetchone()[0] or 0 + + # 快取大小 + cursor.execute('SELECT SUM(LENGTH(extracted_text)) FROM ocr_cache') + cache_size_chars = cursor.fetchone()[0] or 0 + + # 最近 7 天的記錄數 + cursor.execute(''' + SELECT COUNT(*) FROM ocr_cache + WHERE extraction_time > datetime('now', '-7 days') + ''') + recent_records = cursor.fetchone()[0] + + # 最常訪問的記錄 + cursor.execute(''' + SELECT filename, access_count, last_access_time + FROM ocr_cache + ORDER BY access_count DESC + LIMIT 5 + ''') + top_accessed = cursor.fetchall() + + return { + 'total_records': total_records, + 'total_accesses': total_accesses, + 'cache_size_chars': cache_size_chars, + 'cache_size_mb': cache_size_chars / (1024 * 1024), + 'recent_records_7days': recent_records, + 'top_accessed_files': [ + { + 'filename': row[0], + 'access_count': row[1], + 'last_access': row[2] + } + for row in top_accessed + ], + 'cache_hit_potential': f"{(total_accesses - total_records) / max(total_accesses, 1) * 100:.1f}%" + } + + except Exception as e: + logger.error(f"獲取快取統計失敗: {e}") + return {} + + def clean_expired_cache(self) -> int: + """ + 清理過期的快取記錄 + + Returns: + 清理的記錄數量 + """ + try: + with sqlite3.connect(self.cache_db_path) as conn: + cursor = conn.cursor() + + # 刪除過期記錄 + cursor.execute(''' + DELETE FROM ocr_cache + WHERE extraction_time < datetime('now', '-{} days') + '''.format(self.cache_expire_days)) + + deleted_count = cursor.rowcount + conn.commit() + + logger.info(f"[OCR-CACHE] 清理過期快取: {deleted_count} 筆記錄") + return deleted_count + + except Exception as e: + logger.error(f"清理過期快取失敗: {e}") + return 0 + + def clear_all_cache(self) -> bool: + """ + 清空所有快取 + + Returns: + 是否成功 + """ + try: + with sqlite3.connect(self.cache_db_path) as conn: + cursor = conn.cursor() + cursor.execute('DELETE FROM ocr_cache') + conn.commit() + + logger.info("[OCR-CACHE] 已清空所有快取") + return True + + except Exception as e: + logger.error(f"清空快取失敗: {e}") + return False \ No newline at end of file diff --git a/app/services/translation_service.py b/app/services/translation_service.py new file mode 100644 index 0000000..baeb751 --- /dev/null +++ b/app/services/translation_service.py @@ -0,0 +1,2634 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +翻譯服務 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +import hashlib +import time +from pathlib import Path +from typing import List, Dict, Any, Optional, Tuple +from app.utils.logger import get_logger +from app.utils.exceptions import TranslationError, FileProcessingError +from app.services.dify_client import DifyClient +from app.services.document_processor import DocumentProcessor, Segment +from app.models.cache import TranslationCache +from app.models.job import TranslationJob +from app.utils.helpers import generate_filename, create_job_directory +from app import db + +logger = get_logger(__name__) + + +class DocumentParser: + """文件解析器基類""" + + def __init__(self, file_path: str): + self.file_path = Path(file_path) + + if not self.file_path.exists(): + raise FileProcessingError(f"檔案不存在: {file_path}") + + def extract_text_segments(self) -> List[str]: + """提取文字片段""" + raise NotImplementedError + + def generate_translated_document(self, translations: Dict[str, List[str]], + target_language: str, output_dir: Path) -> str: + """生成翻譯後的文件""" + raise NotImplementedError + + +class DocxParser(DocumentParser): + """DOCX 文件解析器 - 使用增強的 DocumentProcessor""" + + def __init__(self, file_path: str): + super().__init__(file_path) + self.processor = DocumentProcessor() + + def extract_text_segments(self) -> List[str]: + """提取 DOCX 文件的文字片段 - 使用增強邏輯""" + try: + # 使用新的文檔處理器提取段落 + segments = self.processor.extract_docx_segments(str(self.file_path)) + + # 轉換為文字列表 + text_segments = [] + for seg in segments: + if seg.text.strip() and len(seg.text.strip()) > 3: + text_segments.append(seg.text) + + logger.info(f"Enhanced extraction: {len(text_segments)} text segments from DOCX") + return text_segments + + except Exception as e: + logger.error(f"Failed to extract text from DOCX: {str(e)}") + raise FileProcessingError(f"DOCX 文件解析失敗: {str(e)}") + + def extract_segments_with_context(self) -> List[Segment]: + """提取帶上下文的段落資訊""" + return self.processor.extract_docx_segments(str(self.file_path)) + + def generate_translated_document(self, translations: Dict[str, List[str]], + target_language: str, output_dir: Path) -> str: + """生成翻譯後的 DOCX 文件 - 使用增強的翻譯插入邏輯(從快取讀取)""" + try: + from sqlalchemy import text as sql_text + from app import db + + # 生成輸出檔名 + output_filename = generate_filename( + self.file_path.name, + 'translated', + 'translated', + target_language + ) + output_path = output_dir / output_filename + + # 提取段落資訊 + segments = self.extract_segments_with_context() + + # 建立翻譯映射 - 從快取讀取而非使用傳入的translations參數 + translation_map = {} + + logger.info(f"Building translation map for {len(segments)} segments in language {target_language}") + + for seg in segments: + # 從翻譯快取中查詢每個段落的翻譯 + result = db.session.execute(sql_text(""" + SELECT translated_text + FROM dt_translation_cache + WHERE source_text = :text AND target_language = :lang + ORDER BY created_at DESC + LIMIT 1 + """), {'text': seg.text, 'lang': target_language}) + + row = result.fetchone() + if row and row[0]: + translation_map[(target_language, seg.text)] = row[0] + logger.debug(f"Found translation for: {seg.text[:50]}...") + else: + logger.warning(f"No translation found for: {seg.text[:50]}...") + + logger.info(f"Translation map built with {len(translation_map)} mappings") + + # 使用增強的翻譯插入邏輯 + ok_count, skip_count = self.processor.insert_docx_translations( + str(self.file_path), + segments, + translation_map, + [target_language], + str(output_path) + ) + + logger.info(f"Enhanced translation: Generated {output_path} with {ok_count} insertions, {skip_count} skips") + return str(output_path) + + except Exception as e: + logger.error(f"Failed to generate translated DOCX: {str(e)}") + raise FileProcessingError(f"生成翻譯 DOCX 失敗: {str(e)}") + + +class DocParser(DocumentParser): + """DOC 文件解析器 - 需要先轉換為 DOCX""" + + def extract_text_segments(self) -> List[str]: + """提取 DOC 文件的文字片段 - 先轉換為 DOCX 再處理""" + try: + # 檢查是否有 Word COM 支援 + import tempfile + import os + + try: + import win32com.client as win32 + import pythoncom + _WIN32COM_AVAILABLE = True + except ImportError: + _WIN32COM_AVAILABLE = False + + if not _WIN32COM_AVAILABLE: + raise FileProcessingError("DOC 格式需要 Word COM 支援,請先手動轉換為 DOCX 格式或安裝 Microsoft Office") + + # 創建臨時 DOCX 文件 + temp_docx = None + try: + with tempfile.NamedTemporaryFile(suffix='.docx', delete=False) as tmp: + temp_docx = tmp.name + + # 使用 Word COM 轉換 DOC 到 DOCX (格式 16) + self._word_convert(str(self.file_path), temp_docx, 16) + + # 使用 DOCX 解析器處理轉換後的文件 + docx_parser = DocxParser(temp_docx) + segments = docx_parser.extract_text_segments() + + logger.info(f"Converted DOC to DOCX and extracted {len(segments)} segments") + return segments + + finally: + # 清理臨時文件 + if temp_docx and os.path.exists(temp_docx): + try: + os.remove(temp_docx) + except Exception: + pass + + except Exception as e: + logger.error(f"Failed to extract text from DOC file: {str(e)}") + raise FileProcessingError(f"DOC 文件解析失敗: {str(e)}") + + def _word_convert(self, input_path: str, output_path: str, target_format: int): + """使用 Word COM 轉換文件格式(移植自參考檔案)""" + try: + import win32com.client as win32 + import pythoncom + + pythoncom.CoInitialize() + try: + word = win32.Dispatch("Word.Application") + word.Visible = False + doc = word.Documents.Open(os.path.abspath(input_path)) + doc.SaveAs2(os.path.abspath(output_path), FileFormat=target_format) + doc.Close(False) + finally: + word.Quit() + pythoncom.CoUninitialize() + except Exception as e: + raise FileProcessingError(f"Word COM 轉換失敗: {str(e)}") + + def generate_translated_document(self, translations: Dict[str, List[str]], + target_language: str, output_dir: Path) -> str: + """生成翻譯後的 DOC 文件 - 先轉為 DOCX 處理後輸出為 DOCX""" + try: + import tempfile + import os + + # 先轉換為 DOCX,然後使用 DOCX 處理邏輯 + temp_docx = None + try: + with tempfile.NamedTemporaryFile(suffix='.docx', delete=False) as tmp: + temp_docx = tmp.name + + # 轉換 DOC 到 DOCX + self._word_convert(str(self.file_path), temp_docx, 16) + + # 使用 DOCX 解析器生成翻譯文檔 + docx_parser = DocxParser(temp_docx) + + # 注意:最終輸出為 DOCX 格式,因為 DOC 格式較難直接處理 + output_filename = f"{self.file_path.stem}_{target_language}_translated.docx" + output_path = output_dir / output_filename + + result_path = docx_parser.generate_translated_document(translations, target_language, output_dir) + + logger.info(f"Generated translated DOC file (as DOCX): {result_path}") + return result_path + + finally: + # 清理臨時文件 + if temp_docx and os.path.exists(temp_docx): + try: + os.remove(temp_docx) + except Exception: + pass + + except Exception as e: + logger.error(f"Failed to generate translated DOC file: {str(e)}") + raise FileProcessingError(f"DOC 翻譯檔生成失敗: {str(e)}") + + +class ExcelParser(DocumentParser): + """Excel 文件解析器(XLSX/XLS)- 移植自參考檔案""" + + def extract_text_segments(self) -> List[str]: + """提取 Excel 文件的文字片段""" + try: + import openpyxl + from openpyxl.utils.exceptions import InvalidFileException + + # 載入工作簿(移植自參考檔案邏輯) + try: + wb = openpyxl.load_workbook(str(self.file_path), data_only=False) + wb_vals = openpyxl.load_workbook(str(self.file_path), data_only=True) + except InvalidFileException: + if self.file_path.suffix.lower() == '.xls': + raise FileProcessingError("XLS 格式需要先轉換為 XLSX 格式") + raise + except Exception: + wb_vals = None + + # 提取文字段落(完全按照參考檔案的邏輯) + segs = [] + for ws in wb.worksheets: + ws_vals = wb_vals[ws.title] if wb_vals and ws.title in wb_vals.sheetnames else None + max_row, max_col = ws.max_row, ws.max_column + + for r in range(1, max_row + 1): + for c in range(1, max_col + 1): + src_text = self._get_display_text_for_translation(ws, ws_vals, r, c) + if not src_text: + continue + if not self._should_translate(src_text, 'auto'): + continue + segs.append(src_text) + + # 去重保持順序 + unique_segments = [] + seen = set() + for seg in segs: + if seg not in seen: + unique_segments.append(seg) + seen.add(seg) + + logger.info(f"Extracted {len(unique_segments)} unique text segments from Excel file") + return unique_segments + + except Exception as e: + logger.error(f"Failed to extract text from Excel file: {str(e)}") + raise FileProcessingError(f"Excel 文件解析失敗: {str(e)}") + + def _get_display_text_for_translation(self, ws, ws_vals, r: int, c: int) -> Optional[str]: + """取得儲存格用於翻譯的顯示文字(完全移植自參考檔案)""" + val = ws.cell(row=r, column=c).value + if isinstance(val, str) and val.startswith("="): + if ws_vals is not None: + shown = ws_vals.cell(row=r, column=c).value + return shown if isinstance(shown, str) and shown.strip() else None + return None + if isinstance(val, str) and val.strip(): + return val + if ws_vals is not None: + shown = ws_vals.cell(row=r, column=c).value + if isinstance(shown, str) and shown.strip(): + return shown + return None + + def _should_translate(self, text: str, src_lang: str) -> bool: + """判斷文字是否需要翻譯(只要有字就翻譯)""" + text = text.strip() + + # 只要有字就翻譯 - 最小長度設為1 + if len(text) < 1: + return False + + # Skip pure numbers, dates, etc. + import re + if re.match(r'^[\d\s\.\-\:\/]+$', text): + return False + + # For auto-detect, translate if has CJK or meaningful text + if src_lang.lower() in ('auto', 'auto-detect'): + return self._has_cjk(text) or len(text) > 5 + + return True + + def _has_cjk(self, text: str) -> bool: + """檢查是否包含中日韓文字(移植自參考檔案)""" + for char in text: + if '\u4e00' <= char <= '\u9fff' or \ + '\u3400' <= char <= '\u4dbf' or \ + '\u20000' <= char <= '\u2a6df' or \ + '\u3040' <= char <= '\u309f' or \ + '\u30a0' <= char <= '\u30ff' or \ + '\uac00' <= char <= '\ud7af': + return True + return False + + def generate_translated_document(self, translations: Dict[str, List[str]], + target_language: str, output_dir: Path) -> str: + """生成翻譯後的 Excel 文件(使用翻譯快取確保正確映射)""" + try: + import openpyxl + from openpyxl.styles import Alignment + from openpyxl.comments import Comment + from sqlalchemy import text as sql_text + from app import db + + # 載入原始工作簿 + wb = openpyxl.load_workbook(str(self.file_path), data_only=False) + try: + wb_vals = openpyxl.load_workbook(str(self.file_path), data_only=True) + except Exception: + wb_vals = None + + # 建立翻譯映射 - 改用翻譯快取查詢,確保正確對應 + original_segments = self.extract_text_segments() + tmap = {} + + logger.info(f"Building translation map for {len(original_segments)} segments in language {target_language}") + + for original_text in original_segments: + # 從翻譯快取中查詢每個原文的翻譯 + # 使用聯合查詢,優先使用最早的翻譯記錄(原始DIFY翻譯) + normalized_text = original_text.replace('\n', ' ').replace('\r', ' ').strip() + result = db.session.execute(sql_text(""" + SELECT translated_text, created_at, 'exact' as match_type + FROM dt_translation_cache + WHERE source_text = :exact_text AND target_language = :lang + + UNION ALL + + SELECT translated_text, created_at, 'normalized' as match_type + FROM dt_translation_cache + WHERE REPLACE(REPLACE(TRIM(source_text), '\n', ' '), '\r', ' ') = :norm_text + AND target_language = :lang + AND source_text != :exact_text + + ORDER BY created_at ASC + LIMIT 1 + """), {'exact_text': original_text, 'norm_text': normalized_text, 'lang': target_language}) + + row = result.fetchone() + if row and row[0]: + tmap[original_text] = row[0] + logger.debug(f"Cache hit for Excel: {original_text[:30]}... -> {row[0][:30]}...") + else: + logger.warning(f"No translation found in cache for: {original_text[:50]}...") + + logger.info(f"Translation map built with {len(tmap)} mappings from cache") + + # 處理每個工作表(加入詳細調試日誌) + translation_count = 0 + skip_count = 0 + + for ws in wb.worksheets: + logger.info(f"Processing worksheet: {ws.title}") + ws_vals = wb_vals[ws.title] if wb_vals and ws.title in wb_vals.sheetnames else None + max_row, max_col = ws.max_row, ws.max_column + + for r in range(1, max_row + 1): + for c in range(1, max_col + 1): + cell_name = f"{openpyxl.utils.get_column_letter(c)}{r}" + src_text = self._get_display_text_for_translation(ws, ws_vals, r, c) + + if not src_text: + continue + + # 檢查是否需要翻譯 + should_translate = self._should_translate(src_text, 'auto') + if not should_translate: + logger.debug(f"Skip {cell_name}: '{src_text[:30]}...' (should not translate)") + skip_count += 1 + continue + + # 檢查翻譯映射 + if src_text not in tmap: + logger.warning(f"No translation mapping for {cell_name}: '{src_text[:30]}...'") + skip_count += 1 + continue + + val = ws.cell(row=r, column=c).value + is_formula = isinstance(val, str) and val.startswith("=") + translated_text = tmap[src_text] + + cell = ws.cell(row=r, column=c) + + if is_formula: + # 公式儲存格:添加註解 + txt_comment = f"翻譯: {translated_text}" + exist = cell.comment + if not exist or exist.text.strip() != txt_comment: + cell.comment = Comment(txt_comment, "translator") + logger.debug(f"Added comment to {cell_name}: {translated_text[:30]}...") + translation_count += 1 + else: + # 一般儲存格:單語言檔案只保留翻譯文,不包含原文 + # 檢查是否已經是預期的格式 + current_text = str(cell.value) if cell.value else "" + if current_text.strip() == translated_text.strip(): + logger.debug(f"Skip {cell_name}: already translated") + continue + + cell.value = translated_text # 只保留翻譯文 + logger.info(f"Translated {cell_name}: '{src_text[:20]}...' -> '{translated_text[:20]}...'") + translation_count += 1 + + # 設定自動換行(移植自參考檔案) + try: + if cell.alignment: + cell.alignment = Alignment( + horizontal=cell.alignment.horizontal, + vertical=cell.alignment.vertical, + wrap_text=True + ) + else: + cell.alignment = Alignment(wrap_text=True) + except Exception: + cell.alignment = Alignment(wrap_text=True) + + # 儲存翻譯後的檔案 + output_filename = f"{self.file_path.stem}_{target_language}_translated.xlsx" + output_path = output_dir / output_filename + wb.save(str(output_path)) + + logger.info(f"Excel translation completed: {translation_count} translations, {skip_count} skips") + logger.info(f"Generated translated Excel file: {output_path}") + return str(output_path) + + except Exception as e: + logger.error(f"Failed to generate translated Excel file: {str(e)}") + raise FileProcessingError(f"Excel 翻譯檔生成失敗: {str(e)}") + + +class PdfParser(DocumentParser): + """PDF 文件解析器 - 支持扫描PDF的OCR处理""" + + def extract_text_segments(self, user_id: int = None, job_id: int = None) -> List[str]: + """提取 PDF 文件的文字片段 - 支持扫描PDF的智能处理""" + try: + from app.services.enhanced_pdf_parser import EnhancedPdfParser + + # 使用增强的PDF解析器 + enhanced_parser = EnhancedPdfParser(str(self.file_path)) + text_segments = enhanced_parser.extract_text_segments(user_id, job_id) + + logger.info(f"Enhanced PDF extraction: {len(text_segments)} text segments") + return text_segments + + except Exception as e: + logger.error(f"Enhanced PDF extraction failed, falling back to basic extraction: {str(e)}") + + # 回退到基本文字提取 + try: + from PyPDF2 import PdfReader + + reader = PdfReader(str(self.file_path)) + text_segments = [] + + for page in reader.pages: + text = page.extract_text() + + # 簡單的句子分割 + sentences = text.split('.') + for sentence in sentences: + sentence = sentence.strip() + if sentence and len(sentence) > 10: + text_segments.append(sentence) + + logger.info(f"Basic PDF extraction: {len(text_segments)} text segments") + return text_segments + + except Exception as e2: + logger.error(f"Basic PDF extraction also failed: {str(e2)}") + raise FileProcessingError(f"PDF 文件解析失敗: {str(e2)}") + + def generate_translated_document(self, translations: Dict[str, List[str]], + target_language: str, output_dir: Path) -> str: + """生成翻譯文字檔(PDF 不支援直接編輯)""" + try: + from app.services.enhanced_pdf_parser import EnhancedPdfParser + + # 使用增强解析器生成翻译文档 + enhanced_parser = EnhancedPdfParser(str(self.file_path)) + return enhanced_parser.generate_translated_document(translations, target_language, output_dir) + + except Exception as e: + # 回退到基本生成方式 + logger.warning(f"Enhanced PDF generation failed, using basic method: {str(e)}") + + translated_texts = translations.get(target_language, []) + + # 生成純文字檔案 + output_filename = f"{self.file_path.stem}_{target_language}_translated.txt" + output_path = output_dir / output_filename + + with open(output_path, 'w', encoding='utf-8') as f: + f.write(f"翻譯結果 - {target_language}\n") + f.write("=" * 50 + "\n\n") + + for i, text in enumerate(translated_texts): + f.write(f"{i+1}. {text}\n\n") + + logger.info(f"Generated translated text file: {output_path}") + return str(output_path) + + +class PptxParser(DocumentParser): + """PowerPoint 文件解析器""" + + def extract_text_segments(self) -> List[str]: + """提取 PPTX 文件的文字片段(包含表格)""" + try: + import pptx + + prs = pptx.Presentation(str(self.file_path)) + text_segments = [] + + for slide_idx, slide in enumerate(prs.slides, 1): + for shape_idx, shape in enumerate(slide.shapes, 1): + shape_processed = False + + # 處理文字框 - 優先處理,因為大多數文字都在這裡 + if getattr(shape, "has_text_frame", False): + text_frame = shape.text_frame + text = self._extract_text_from_frame(text_frame) + + if text.strip(): + text_segments.append(text) + logger.debug(f"Extracted text frame from slide {slide_idx}, shape {shape_idx}: {text[:50]}...") + shape_processed = True + + # 處理表格 + if getattr(shape, "has_table", False): + table_texts = self._extract_text_from_table(shape.table, slide_idx, shape_idx) + text_segments.extend(table_texts) + if table_texts: + shape_processed = True + + # 處理圖表 (Charts) + if getattr(shape, "has_chart", False): + chart_texts = self._extract_text_from_chart(shape.chart, slide_idx, shape_idx) + text_segments.extend(chart_texts) + if chart_texts: + shape_processed = True + + # 處理群組形狀 (Grouped Shapes) - 支援深度嵌套 + if hasattr(shape, 'shapes'): + group_texts = self._extract_text_from_group(shape.shapes, slide_idx, shape_idx, depth=0) + text_segments.extend(group_texts) + if group_texts: + shape_processed = True + + # 處理 GraphicFrame (可能包含 SmartArt 等) + if getattr(shape, "has_smart_art", False): + smartart_texts = self._extract_text_from_smartart(shape, slide_idx, shape_idx) + text_segments.extend(smartart_texts) + if smartart_texts: + shape_processed = True + + # 處理基本形狀內的文字 - 作為備用方案,避免重複提取 + if not shape_processed and hasattr(shape, 'text') and shape.text.strip(): + text_segments.append(shape.text) + logger.debug(f"Extracted shape text from slide {slide_idx}, shape {shape_idx}: {shape.text[:50]}...") + shape_processed = True + + # 如果以上都沒有處理到,檢查是否有其他可能的文字內容 + if not shape_processed: + # 嘗試更深層的文字提取 + fallback_texts = self._extract_fallback_text(shape, slide_idx, shape_idx) + text_segments.extend(fallback_texts) + + logger.info(f"PowerPoint extraction: {len(text_segments)} text segments from PPTX (including tables)") + + # 診斷特定關鍵字 - 增強版 + target_keywords = [ + "檢驗盤剔線作業時缺少線塌防護設計", + "治工具未標準化管理", + "彈匣裝載料片間距不足", + "彈匣未評估防震防傾倒風險", + "搬運台車選用錯誤" + ] + + logger.info("=== 關鍵字診斷開始 ===") + for keyword in target_keywords: + # 完全匹配 + exact_matches = [seg for seg in text_segments if keyword == seg.strip()] + # 包含匹配 + contains_matches = [seg for seg in text_segments if keyword in seg] + # 模糊匹配(去掉空白和換行符) + normalized_keyword = keyword.replace(' ', '').replace('\n', '').replace('\r', '') + fuzzy_matches = [seg for seg in text_segments + if normalized_keyword in seg.replace(' ', '').replace('\n', '').replace('\r', '')] + + if exact_matches: + logger.info(f"✅ 完全匹配關鍵字: '{keyword}' 在 {len(exact_matches)} 個文字片段中") + for i, seg in enumerate(exact_matches): + logger.info(f" 完全匹配{i+1}: '{seg}'") + elif contains_matches: + logger.info(f"🔍 包含關鍵字: '{keyword}' 在 {len(contains_matches)} 個文字片段中") + for i, seg in enumerate(contains_matches): + logger.info(f" 包含匹配{i+1}: '{seg}'") + elif fuzzy_matches: + logger.info(f"🎯 模糊匹配關鍵字: '{keyword}' 在 {len(fuzzy_matches)} 個文字片段中") + for i, seg in enumerate(fuzzy_matches): + logger.info(f" 模糊匹配{i+1}: '{seg}'") + # 顯示標準化後的比較 + normalized_seg = seg.replace(' ', '').replace('\n', '').replace('\r', '') + logger.info(f" 標準化後: 關鍵字='{normalized_keyword}' vs 片段='{normalized_seg}'") + else: + logger.warning(f"❌ 未找到關鍵字: '{keyword}'") + # 檢查是否有類似的文字 + similar_segments = [] + for seg in text_segments: + # 計算相似度(簡單的關鍵詞匹配) + keyword_chars = set(keyword) + seg_chars = set(seg) + intersection = keyword_chars.intersection(seg_chars) + if len(intersection) >= min(5, len(keyword_chars) * 0.5): + similar_segments.append(seg) + + if similar_segments: + logger.info(f"💡 可能相似的片段 ({len(similar_segments)} 個):") + for i, seg in enumerate(similar_segments[:3]): # 只顯示前3個 + logger.info(f" 相似{i+1}: '{seg}'") + + logger.info("=== 關鍵字診斷結束 ===") + + return text_segments + + except Exception as e: + logger.error(f"Failed to extract text from PPTX: {str(e)}") + raise FileProcessingError(f"PPTX 文件解析失敗: {str(e)}") + + def _extract_text_from_frame(self, text_frame) -> str: + """從文字框中提取文字內容,包含標準化處理""" + if not text_frame or not hasattr(text_frame, 'paragraphs'): + return "" + + # 收集所有段落文字 + paragraphs = [] + for para in text_frame.paragraphs: + para_text = para.text + if para_text and para_text.strip(): + paragraphs.append(para_text.strip()) + + if not paragraphs: + return "" + + # 合併段落 + text = "\n".join(paragraphs) + + # 標準化文字處理 + import re + # 1. 標準化換行符 + text = text.replace('\r\n', '\n').replace('\r', '\n') + # 2. 移除末尾的換行符(但保留中間的) + text = text.rstrip('\n') + # 3. 標準化多重空白(但保留單個換行符) + text = re.sub(r'[ \t]+', ' ', text) + # 4. 移除段落間多餘空行 + text = re.sub(r'\n\s*\n', '\n', text) + + return text + + def _extract_text_from_table(self, table, slide_idx: int, shape_idx: int) -> List[str]: + """從表格中提取文字內容""" + table_texts = [] + + try: + for row_idx, row in enumerate(table.rows): + for col_idx, cell in enumerate(row.cells): + cell_text = cell.text_frame.text.strip() + + if cell_text: + table_texts.append(cell_text) + logger.debug(f"Extracted table cell text from slide {slide_idx}, shape {shape_idx}, " + f"row {row_idx+1}, col {col_idx+1}: {cell_text[:50]}...") + + logger.info(f"Extracted {len(table_texts)} cells from table on slide {slide_idx}") + + except Exception as e: + logger.error(f"Failed to extract text from table on slide {slide_idx}: {str(e)}") + + return table_texts + + def _extract_text_from_chart(self, chart, slide_idx: int, shape_idx: int) -> List[str]: + """從圖表中提取文字內容""" + chart_texts = [] + + try: + # 嘗試提取圖表標題 + if hasattr(chart, 'chart_title') and chart.chart_title.has_text_frame: + title_text = chart.chart_title.text_frame.text.strip() + if title_text: + chart_texts.append(title_text) + logger.debug(f"Extracted chart title from slide {slide_idx}: {title_text[:50]}...") + + # 嘗試提取其他圖表元素的文字(受限於 python-pptx 支援) + # 注意:python-pptx 對圖表的支援有限,無法直接存取軸標籤等 + logger.info(f"Extracted {len(chart_texts)} text elements from chart on slide {slide_idx}") + + except Exception as e: + logger.error(f"Failed to extract text from chart on slide {slide_idx}: {str(e)}") + + return chart_texts + + def _extract_text_from_group(self, shapes, slide_idx: int, shape_idx: int, depth: int = 0) -> List[str]: + """從群組形狀中提取文字內容 - 支援深度嵌套群組""" + group_texts = [] + max_depth = 10 # 防止無限遞歸 + + if depth > max_depth: + logger.warning(f"Group nesting depth exceeded {max_depth} on slide {slide_idx}, skipping deeper levels") + return group_texts + + try: + for sub_shape_idx, sub_shape in enumerate(shapes): + shape_processed = False + + # 1. 優先處理嵌套群組(遞歸處理) + if hasattr(sub_shape, 'shapes') and hasattr(sub_shape, 'shape_type'): + try: + # 這是一個嵌套的群組 + nested_texts = self._extract_text_from_group(sub_shape.shapes, slide_idx, + f"{shape_idx}.{sub_shape_idx}", depth + 1) + group_texts.extend(nested_texts) + if nested_texts: + shape_processed = True + logger.debug(f"Extracted {len(nested_texts)} texts from nested group " + f"at slide {slide_idx}, depth {depth + 1}") + except Exception as e: + logger.debug(f"Failed to process nested group at slide {slide_idx}, " + f"depth {depth + 1}: {str(e)}") + + # 2. 處理文字框 + if getattr(sub_shape, "has_text_frame", False): + text = self._extract_text_from_frame(sub_shape.text_frame) + if text.strip(): + group_texts.append(text) + logger.debug(f"Extracted group text from slide {slide_idx}, group {shape_idx}, " + f"sub-shape {sub_shape_idx} (depth {depth}): {text[:50]}...") + shape_processed = True + + # 3. 處理群組內的表格 + if getattr(sub_shape, "has_table", False): + sub_table_texts = self._extract_text_from_table(sub_shape.table, slide_idx, + f"{shape_idx}.{sub_shape_idx}") + group_texts.extend(sub_table_texts) + if sub_table_texts: + shape_processed = True + + # 4. 處理群組內的圖表 + if getattr(sub_shape, "has_chart", False): + chart_texts = self._extract_text_from_chart(sub_shape.chart, slide_idx, + f"{shape_idx}.{sub_shape_idx}") + group_texts.extend(chart_texts) + if chart_texts: + shape_processed = True + + # 5. 處理基本形狀文字(作為最後的備選方案) + if not shape_processed and hasattr(sub_shape, 'text') and sub_shape.text.strip(): + group_texts.append(sub_shape.text) + logger.debug(f"Extracted group shape text from slide {slide_idx} " + f"(depth {depth}): {sub_shape.text[:50]}...") + shape_processed = True + + # 6. 如果仍未處理,使用備用文字提取 + if not shape_processed: + fallback_texts = self._extract_fallback_text(sub_shape, slide_idx, + f"{shape_idx}.{sub_shape_idx}") + group_texts.extend(fallback_texts) + + logger.info(f"Extracted {len(group_texts)} text elements from grouped shapes " + f"on slide {slide_idx} (depth {depth})") + + except Exception as e: + logger.error(f"Failed to extract text from grouped shapes on slide {slide_idx} " + f"(depth {depth}): {str(e)}") + + return group_texts + + def _extract_text_from_smartart(self, shape, slide_idx: int, shape_idx: int) -> List[str]: + """從 SmartArt 中提取文字內容 - 有限支援""" + smartart_texts = [] + + try: + # python-pptx 對 SmartArt 支援有限,嘗試透過 XML 提取 + # 這是一個基本實現,可能無法涵蓋所有 SmartArt 類型 + + logger.warning(f"SmartArt detected on slide {slide_idx}, shape {shape_idx} - limited support available") + logger.info("Consider using alternative libraries like Spire.Presentation for full SmartArt support") + + # 暫時回傳空列表,避免錯誤 + # 在未來版本中可以考慮整合 Spire.Presentation 或其他支援 SmartArt 的庫 + + except Exception as e: + logger.error(f"Failed to extract text from SmartArt on slide {slide_idx}: {str(e)}") + + return smartart_texts + + def _extract_fallback_text(self, shape, slide_idx: int, shape_idx: int) -> List[str]: + """備用文字提取方法,處理可能遺漏的文字內容,包括深層嵌套結構""" + fallback_texts = [] + + try: + # 檢查形狀類型和屬性 + shape_type = getattr(shape, 'shape_type', None) + logger.debug(f"Fallback extraction for slide {slide_idx}, shape {shape_idx}, type: {shape_type}") + + # 嘗試透過不同的方式取得文字 + + # 方法 1: 直接檢查 text 屬性(即使之前沒處理到) + if hasattr(shape, 'text'): + text = getattr(shape, 'text', '') + if text and text.strip(): + fallback_texts.append(text) + logger.debug(f"Fallback: Found direct text - {text[:50]}...") + + # 方法 2: 檢查是否有 text_frame 但之前沒有正確處理 + try: + if hasattr(shape, 'text_frame'): + text_frame = shape.text_frame + if text_frame and hasattr(text_frame, 'text'): + text = text_frame.text + if text and text.strip(): + fallback_texts.append(text) + logger.debug(f"Fallback: Found text_frame text - {text[:50]}...") + except: + pass + + # 方法 2.5: 深度檢查 text_frame 內的段落結構 + try: + if hasattr(shape, 'text_frame') and shape.text_frame: + text_frame = shape.text_frame + if hasattr(text_frame, 'paragraphs'): + for para_idx, paragraph in enumerate(text_frame.paragraphs): + if hasattr(paragraph, 'runs'): + for run_idx, run in enumerate(paragraph.runs): + if hasattr(run, 'text') and run.text.strip(): + fallback_texts.append(run.text) + logger.debug(f"Fallback: Found run text {para_idx}.{run_idx} - {run.text[:30]}...") + except Exception as e: + logger.debug(f"Failed to extract paragraph runs: {str(e)}") + + # 方法 2.6: 如果形狀有嵌套的 shapes,遞歸處理 + if hasattr(shape, 'shapes') and shape.shapes: + try: + nested_texts = self._extract_text_from_group(shape.shapes, slide_idx, + f"fallback_{shape_idx}", depth=0) + fallback_texts.extend(nested_texts) + if nested_texts: + logger.debug(f"Fallback: Found {len(nested_texts)} texts from nested shapes") + except Exception as e: + logger.debug(f"Failed to extract from nested shapes: {str(e)}") + + # 方法 3: 檢查特殊屬性 + special_attrs = ['textFrame', 'text_frame', '_element'] + for attr in special_attrs: + try: + if hasattr(shape, attr): + obj = getattr(shape, attr) + if hasattr(obj, 'text') and obj.text and obj.text.strip(): + fallback_texts.append(obj.text) + logger.debug(f"Fallback: Found {attr} text - {obj.text[:30]}...") + except: + continue + + # 方法 3: 如果是 GraphicFrame,嘗試更深入的提取 + if hasattr(shape, 'element'): + try: + # 透過 XML 元素搜尋文字節點 + element = shape.element + + # 搜尋 XML 中的文字內容 + text_elements = [] + + # 搜尋 標籤(文字內容) + for t_elem in element.iter(): + if t_elem.tag.endswith('}t'): # 匹配 a:t 標籤 + if t_elem.text and t_elem.text.strip(): + text_elements.append(t_elem.text.strip()) + + # 去重並添加 + for text in set(text_elements): + if text not in [existing_text for existing_text in fallback_texts]: + fallback_texts.append(text) + logger.debug(f"Fallback: Found XML text - {text[:50]}...") + + except Exception as xml_e: + logger.debug(f"XML extraction failed for shape {shape_idx}: {str(xml_e)}") + + if fallback_texts: + logger.info(f"Fallback extraction found {len(fallback_texts)} additional text elements on slide {slide_idx}, shape {shape_idx}") + else: + logger.debug(f"No additional text found in fallback for slide {slide_idx}, shape {shape_idx}") + + except Exception as e: + logger.error(f"Fallback text extraction failed for slide {slide_idx}, shape {shape_idx}: {str(e)}") + + return fallback_texts + + def _normalize_text(self, text: str) -> str: + """標準化文字用於比較""" + import re + return re.sub(r"\s+", " ", (text or "").strip()).lower() + + def _check_existing_translations(self, text_frame, translations: List[str]) -> bool: + """檢查翻譯是否已經存在於文字框末尾""" + if len(text_frame.paragraphs) < len(translations): + return False + + # 檢查末尾的段落是否與翻譯匹配 + tail_paragraphs = text_frame.paragraphs[-len(translations):] + for para, expected in zip(tail_paragraphs, translations): + if self._normalize_text(para.text) != self._normalize_text(expected): + return False + # 檢查是否為斜體格式(我們添加的翻譯標記) + if any((r.font.italic is not True) and (r.text or "").strip() for r in para.runs): + return False + return True + + def _append_translation(self, text_frame, text_block: str): + """在文字框末尾添加翻譯文字""" + try: + from pptx.util import Pt as PPTPt + + para = text_frame.add_paragraph() + para.text = text_block + + # 設定格式:斜體、字體大小 + for run in para.runs: + run.font.italic = True + run.font.size = PPTPt(12) + + except Exception as e: + logger.error(f"Failed to append translation to text frame: {str(e)}") + raise + + def generate_translated_document(self, translations: Dict[str, List[str]], + target_language: str, output_dir: Path) -> str: + """生成翻譯後的 PPTX 文件""" + try: + import pptx + from sqlalchemy import text as sql_text + from app import db + + # 載入 PowerPoint 文件 + prs = pptx.Presentation(str(self.file_path)) + + # 生成輸出檔名 + output_filename = generate_filename( + self.file_path.name, + 'translated', + 'translated', + target_language + ) + output_path = output_dir / output_filename + + # 收集所有文字框 + text_frames = [] + for slide in prs.slides: + for shape in slide.shapes: + if getattr(shape, "has_text_frame", False): + text = self._extract_text_from_frame(shape.text_frame) + if text.strip(): + text_frames.append((shape.text_frame, text)) + + # 建立翻譯映射 - 從快取讀取 + translation_map = {} + logger.info(f"Building translation map for {len(text_frames)} text frames in language {target_language}") + + for text_frame, text in text_frames: + # 從翻譯快取中查詢翻譯 + result = db.session.execute(sql_text(""" + SELECT translated_text + FROM dt_translation_cache + WHERE source_text = :text AND target_language = :lang + ORDER BY created_at DESC + LIMIT 1 + """), {'text': text, 'lang': target_language}) + + row = result.fetchone() + if row and row[0]: + translation_map[text] = row[0] + logger.debug(f"Found translation for PowerPoint text: {text[:50]}...") + else: + logger.warning(f"No translation found for PowerPoint text: {text[:50]}...") + + logger.info(f"Translation map built with {len(translation_map)} mappings") + + # 插入翻譯 + ok_count = skip_count = 0 + + for text_frame, original_text in text_frames: + if original_text not in translation_map: + skip_count += 1 + logger.debug(f"Skip PowerPoint frame: no translation for {original_text[:30]}...") + continue + + translated_text = translation_map[original_text] + translations_to_add = [translated_text] # 單一語言模式 + + # 檢查是否已存在翻譯 + if self._check_existing_translations(text_frame, translations_to_add): + skip_count += 1 + logger.debug(f"Skip PowerPoint frame: translation already exists for {original_text[:30]}...") + continue + + # 添加翻譯 + for translation in translations_to_add: + self._append_translation(text_frame, translation) + + ok_count += 1 + logger.debug(f"Added translation to PowerPoint frame: {original_text[:30]}...") + + # 儲存文件 + prs.save(str(output_path)) + + logger.info(f"PowerPoint translation completed: {ok_count} insertions, {skip_count} skips") + logger.info(f"Generated translated PowerPoint file: {output_path}") + return str(output_path) + + except Exception as e: + logger.error(f"Failed to generate translated PPTX file: {str(e)}") + raise FileProcessingError(f"PPTX 翻譯檔生成失敗: {str(e)}") + + def insert_pptx_translations(self, translation_map: Dict[Tuple[str, str], str], + target_languages: List[str], output_path: str) -> Tuple[int, int]: + """插入翻譯到 PowerPoint 文件 - 單語言模式(僅翻譯文)""" + try: + import pptx + from shutil import copyfile + + # 複製原始文件 + copyfile(str(self.file_path), output_path) + + # 載入 PowerPoint 文件 + prs = pptx.Presentation(output_path) + ok_count = skip_count = 0 + + for slide_idx, slide in enumerate(prs.slides, 1): + for shape_idx, shape in enumerate(slide.shapes, 1): + # 使用與提取邏輯相同的處理順序(並行處理) + + # 處理文字框 + if getattr(shape, "has_text_frame", False): + text = self._extract_text_from_frame(shape.text_frame) + if text.strip(): + ok, skip = self._insert_single_language_translation( + shape.text_frame, text, translation_map, target_languages[0] + ) + ok_count += ok + skip_count += skip + + # 處理表格 + if getattr(shape, "has_table", False): + table_ok, table_skip = self._insert_table_translations( + shape.table, translation_map, target_languages[0] + ) + ok_count += table_ok + skip_count += table_skip + + # 處理圖表(並行處理) + if getattr(shape, "has_chart", False): + chart_ok, chart_skip = self._insert_chart_translations( + shape.chart, translation_map, target_languages[0] + ) + ok_count += chart_ok + skip_count += chart_skip + + # 處理群組形狀(並行處理,支援深度嵌套) + if hasattr(shape, 'shapes'): + group_ok, group_skip = self._insert_group_translations( + shape.shapes, translation_map, target_languages[0], slide_idx, shape_idx + ) + ok_count += group_ok + skip_count += group_skip + + # 處理基本形狀文字(並行處理) + if hasattr(shape, 'text') and shape.text.strip(): + if (target_languages[0], shape.text) in translation_map: + translated_text = translation_map[(target_languages[0], shape.text)] + shape.text = translated_text + ok_count += 1 + logger.debug(f"Inserted basic shape translation on slide {slide_idx}: {shape.text[:30]}...") + else: + skip_count += 1 + + # 儲存文件 + prs.save(output_path) + logger.info(f"Saved PowerPoint file with {ok_count} translations, {skip_count} skips") + return ok_count, skip_count + + except Exception as e: + logger.error(f"Failed to insert PowerPoint translations: {str(e)}") + raise FileProcessingError(f"PowerPoint 翻譯插入失敗: {str(e)}") + + def insert_pptx_combined_translations(self, translation_map: Dict[Tuple[str, str], str], + target_languages: List[str], output_path: str) -> Tuple[int, int]: + """插入翻譯到 PowerPoint 文件 - 組合模式(原文+所有譯文)""" + try: + import pptx + from shutil import copyfile + + # 複製原始文件 + copyfile(str(self.file_path), output_path) + + # 載入 PowerPoint 文件 + prs = pptx.Presentation(output_path) + ok_count = skip_count = 0 + + for slide in prs.slides: + for shape in slide.shapes: + # 處理文字框 + if getattr(shape, "has_text_frame", False): + text = self._extract_text_from_frame(shape.text_frame) + if text.strip(): + ok, skip = self._insert_combined_language_translation( + shape.text_frame, text, translation_map, target_languages + ) + ok_count += ok + skip_count += skip + + # 處理表格 + elif getattr(shape, "has_table", False): + table_ok, table_skip = self._insert_combined_table_translations( + shape.table, translation_map, target_languages + ) + ok_count += table_ok + skip_count += table_skip + + # 處理圖表 + elif getattr(shape, "has_chart", False): + chart_ok, chart_skip = self._insert_combined_chart_translations( + shape.chart, translation_map, target_languages + ) + ok_count += chart_ok + skip_count += chart_skip + + # 處理群組形狀 + elif hasattr(shape, 'shapes'): + group_ok, group_skip = self._insert_combined_group_translations( + shape.shapes, translation_map, target_languages + ) + ok_count += group_ok + skip_count += group_skip + + # 處理基本形狀文字 + elif hasattr(shape, 'text') and shape.text.strip(): + # 收集所有語言的翻譯 + translations = [] + for lang in target_languages: + if (lang, shape.text) in translation_map: + translations.append(translation_map[(lang, shape.text)]) + else: + translations.append(f"【翻譯缺失|{lang}】") + + if translations: + # 組合原文和所有翻譯 + combined_text = shape.text + '\n' + '\n'.join(translations) + shape.text = combined_text + ok_count += 1 + else: + skip_count += 1 + + # 儲存文件 + prs.save(output_path) + logger.info(f"Saved combined PowerPoint file with {ok_count} translations, {skip_count} skips") + return ok_count, skip_count + + except Exception as e: + logger.error(f"Failed to insert combined PowerPoint translations: {str(e)}") + raise FileProcessingError(f"PowerPoint 組合翻譯插入失敗: {str(e)}") + + def _insert_single_language_translation(self, text_frame, original_text: str, + translation_map: Dict[Tuple[str, str], str], + target_language: str) -> Tuple[int, int]: + """插入單語言翻譯到文字框""" + if (target_language, original_text) not in translation_map: + return 0, 1 + + translated_text = translation_map[(target_language, original_text)] + + # 檢查是否已存在翻譯 + if self._check_existing_translations(text_frame, [translated_text]): + return 0, 1 + + # 清除現有內容,只保留翻譯 + text_frame.clear() + para = text_frame.add_paragraph() + para.text = translated_text + + # 設定格式 + for run in para.runs: + run.font.italic = True + try: + from pptx.util import Pt as PPTPt + run.font.size = PPTPt(12) + except: + pass + + return 1, 0 + + def _insert_combined_language_translation(self, text_frame, original_text: str, + translation_map: Dict[Tuple[str, str], str], + target_languages: List[str]) -> Tuple[int, int]: + """插入組合語言翻譯到文字框(原文+所有譯文)""" + translations = [] + for lang in target_languages: + if (lang, original_text) in translation_map: + translations.append(translation_map[(lang, original_text)]) + else: + translations.append(f"【翻譯缺失|{lang}】") + + if not any(trans for trans in translations if not trans.startswith("【翻譯缺失")): + return 0, 1 + + # 檢查是否已存在翻譯 + combined_translations = [original_text] + translations + if self._check_existing_translations(text_frame, combined_translations): + return 0, 1 + + # 添加所有翻譯 + for translation in translations: + self._append_translation(text_frame, translation) + + return 1, 0 + + def _insert_table_translations(self, table, translation_map: Dict[Tuple[str, str], str], + target_language: str) -> Tuple[int, int]: + """插入翻譯到表格 - 單語言模式""" + ok_count = skip_count = 0 + + for row in table.rows: + for cell in row.cells: + cell_text = cell.text_frame.text.strip() + if not cell_text: + continue + + if (target_language, cell_text) in translation_map: + translated_text = translation_map[(target_language, cell_text)] + + # 替換儲存格內容為翻譯文 + cell.text_frame.clear() + para = cell.text_frame.add_paragraph() + para.text = translated_text + + # 設定格式 + for run in para.runs: + run.font.italic = True + try: + from pptx.util import Pt as PPTPt + run.font.size = PPTPt(10) + except: + pass + + ok_count += 1 + else: + skip_count += 1 + + return ok_count, skip_count + + def _insert_combined_table_translations(self, table, translation_map: Dict[Tuple[str, str], str], + target_languages: List[str]) -> Tuple[int, int]: + """插入翻譯到表格 - 組合模式""" + ok_count = skip_count = 0 + + for row in table.rows: + for cell in row.cells: + cell_text = cell.text_frame.text.strip() + if not cell_text: + continue + + # 收集所有語言的翻譯 + translations = [] + for lang in target_languages: + if (lang, cell_text) in translation_map: + translations.append(translation_map[(lang, cell_text)]) + else: + translations.append(f"【翻譯缺失|{lang}】") + + if translations: + # 組合原文和所有翻譯 + combined_text = cell_text + '\n' + '\n'.join(translations) + + # 替換儲存格內容 + cell.text_frame.clear() + para = cell.text_frame.add_paragraph() + para.text = combined_text + + # 設定格式 + for run in para.runs: + try: + from pptx.util import Pt as PPTPt + run.font.size = PPTPt(9) + except: + pass + + ok_count += 1 + else: + skip_count += 1 + + return ok_count, skip_count + + def _insert_chart_translations(self, chart, translation_map: Dict[Tuple[str, str], str], + target_language: str) -> Tuple[int, int]: + """插入翻譯到圖表 - 有限支援""" + ok_count = skip_count = 0 + + try: + # 處理圖表標題 + if hasattr(chart, 'chart_title') and chart.chart_title.has_text_frame: + title_text = chart.chart_title.text_frame.text.strip() + if title_text and (target_language, title_text) in translation_map: + translated_title = translation_map[(target_language, title_text)] + chart.chart_title.text_frame.text = translated_title + ok_count += 1 + logger.debug(f"Translated chart title: {title_text[:30]} -> {translated_title[:30]}") + else: + skip_count += 1 + + # 注意:python-pptx 對圖表軸標籤等的支援非常有限 + logger.info(f"Chart translation: {ok_count} successful, {skip_count} skipped (limited support)") + + except Exception as e: + logger.error(f"Failed to insert chart translations: {str(e)}") + skip_count += 1 + + return ok_count, skip_count + + def _insert_group_translations(self, shapes, translation_map: Dict[Tuple[str, str], str], + target_language: str, slide_idx: int = 0, shape_idx: int = 0, depth: int = 0) -> Tuple[int, int]: + """插入翻譯到群組形狀 - 支援深度嵌套,與提取邏輯保持一致""" + ok_count = skip_count = 0 + max_depth = 10 # 防止無限遞歸 + + if depth > max_depth: + logger.warning(f"Group nesting depth exceeded {max_depth} on slide {slide_idx}, skipping deeper levels") + return ok_count, skip_count + + try: + for sub_shape_idx, sub_shape in enumerate(shapes): + shape_processed = False + + # 1. 優先處理嵌套群組(遞歸處理) + if hasattr(sub_shape, 'shapes') and hasattr(sub_shape, 'shape_type'): + try: + nested_ok, nested_skip = self._insert_group_translations( + sub_shape.shapes, translation_map, target_language, + slide_idx, f"{shape_idx}.{sub_shape_idx}", depth + 1 + ) + ok_count += nested_ok + skip_count += nested_skip + if nested_ok > 0: + shape_processed = True + logger.debug(f"Inserted {nested_ok} nested group translations at depth {depth + 1}") + except Exception as e: + logger.debug(f"Failed to process nested group at depth {depth + 1}: {str(e)}") + + # 2. 處理群組內的文字框(並行處理) + if getattr(sub_shape, "has_text_frame", False): + text = self._extract_text_from_frame(sub_shape.text_frame) + if text.strip(): + if (target_language, text) in translation_map: + translated_text = translation_map[(target_language, text)] + # 使用更安全的文字替換方法 + try: + # 清除並重新設置文字 + sub_shape.text_frame.clear() + para = sub_shape.text_frame.add_paragraph() + para.text = translated_text + ok_count += 1 + shape_processed = True + logger.debug(f"Inserted group text frame translation: {text[:30]}... -> {translated_text[:30]}...") + except Exception as e: + logger.warning(f"Failed to replace text frame content: {str(e)}") + skip_count += 1 + else: + skip_count += 1 + + # 3. 處理群組內的表格(並行處理) + if getattr(sub_shape, "has_table", False): + table_ok, table_skip = self._insert_table_translations( + sub_shape.table, translation_map, target_language + ) + ok_count += table_ok + skip_count += table_skip + if table_ok > 0: + shape_processed = True + + # 4. 處理群組內的圖表(並行處理) + if getattr(sub_shape, "has_chart", False): + chart_ok, chart_skip = self._insert_chart_translations( + sub_shape.chart, translation_map, target_language + ) + ok_count += chart_ok + skip_count += chart_skip + if chart_ok > 0: + shape_processed = True + + # 5. 處理基本形狀文字(作為備選方案) + if not shape_processed and hasattr(sub_shape, 'text') and sub_shape.text.strip(): + if (target_language, sub_shape.text) in translation_map: + translated_text = translation_map[(target_language, sub_shape.text)] + sub_shape.text = translated_text + ok_count += 1 + logger.debug(f"Inserted basic group shape translation: {sub_shape.text[:30]}...") + shape_processed = True + else: + skip_count += 1 + + logger.debug(f"Group translation at depth {depth}: {ok_count} successful, {skip_count} skipped") + + except Exception as e: + logger.error(f"Failed to insert group translations at depth {depth}: {str(e)}") + + return ok_count, skip_count + + def _insert_combined_chart_translations(self, chart, translation_map: Dict[Tuple[str, str], str], + target_languages: List[str]) -> Tuple[int, int]: + """插入組合翻譯到圖表 - 有限支援""" + ok_count = skip_count = 0 + + try: + # 處理圖表標題 + if hasattr(chart, 'chart_title') and chart.chart_title.has_text_frame: + title_text = chart.chart_title.text_frame.text.strip() + if title_text: + # 收集所有語言的翻譯 + translations = [] + for lang in target_languages: + if (lang, title_text) in translation_map: + translations.append(translation_map[(lang, title_text)]) + else: + translations.append(f"【翻譯缺失|{lang}】") + + if any(trans for trans in translations if not trans.startswith("【翻譯缺失")): + # 組合原文和所有翻譯 + combined_text = title_text + '\n' + '\n'.join(translations) + chart.chart_title.text_frame.text = combined_text + ok_count += 1 + else: + skip_count += 1 + else: + skip_count += 1 + + # 注意:python-pptx 對圖表軸標籤等的支援非常有限 + logger.info(f"Combined chart translation: {ok_count} successful, {skip_count} skipped (limited support)") + + except Exception as e: + logger.error(f"Failed to insert combined chart translations: {str(e)}") + skip_count += 1 + + return ok_count, skip_count + + def _insert_combined_group_translations(self, shapes, translation_map: Dict[Tuple[str, str], str], + target_languages: List[str]) -> Tuple[int, int]: + """插入組合翻譯到群組形狀""" + ok_count = skip_count = 0 + + try: + for sub_shape in shapes: + # 處理群組內的文字框 + if getattr(sub_shape, "has_text_frame", False): + text = self._extract_text_from_frame(sub_shape.text_frame) + if text.strip(): + # 收集所有語言的翻譯 + translations = [] + for lang in target_languages: + if (lang, text) in translation_map: + translations.append(translation_map[(lang, text)]) + else: + translations.append(f"【翻譯缺失|{lang}】") + + if any(trans for trans in translations if not trans.startswith("【翻譯缺失")): + # 添加所有翻譯 + for translation in translations: + self._append_translation(sub_shape.text_frame, translation) + ok_count += 1 + else: + skip_count += 1 + else: + skip_count += 1 + + # 處理群組內的表格 + elif getattr(sub_shape, "has_table", False): + table_ok, table_skip = self._insert_combined_table_translations( + sub_shape.table, translation_map, target_languages + ) + ok_count += table_ok + skip_count += table_skip + + # 處理群組內的基本形狀文字 + elif hasattr(sub_shape, 'text') and sub_shape.text.strip(): + # 收集所有語言的翻譯 + translations = [] + for lang in target_languages: + if (lang, sub_shape.text) in translation_map: + translations.append(translation_map[(lang, sub_shape.text)]) + else: + translations.append(f"【翻譯缺失|{lang}】") + + if translations: + # 組合原文和所有翻譯 + combined_text = sub_shape.text + '\n' + '\n'.join(translations) + sub_shape.text = combined_text + ok_count += 1 + else: + skip_count += 1 + + except Exception as e: + logger.error(f"Failed to insert combined group translations: {str(e)}") + + return ok_count, skip_count + + +class TranslationService: + """翻譯服務""" + + def __init__(self): + self.dify_client = DifyClient() + self.document_processor = DocumentProcessor() + + # 文件解析器映射 + self.parsers = { + '.docx': DocxParser, + '.doc': DocParser, # 需要先轉換為 DOCX + '.pptx': PptxParser, # PowerPoint 簡報支援 + '.xlsx': ExcelParser, + '.xls': ExcelParser, # Excel 處理器會自動處理 XLS 轉換 + '.pdf': PdfParser, + # 其他格式可以稍後添加 + } + + def get_document_parser(self, file_path: str) -> DocumentParser: + """取得文件解析器""" + file_ext = Path(file_path).suffix.lower() + + parser_class = self.parsers.get(file_ext) + if not parser_class: + raise FileProcessingError(f"不支援的檔案格式: {file_ext}") + + return parser_class(file_path) + + def split_text_into_sentences(self, text: str, language: str = 'auto') -> List[str]: + """將文字分割成句子 - 使用增強的分句邏輯""" + return self.document_processor.split_text_into_sentences(text, language) + + def translate_excel_cell(self, text: str, source_language: str, + target_language: str, user_id: int = None, + job_id: int = None, conversation_id: str = None) -> Dict[str, Any]: + """ + Excel儲存格翻譯 - 整個儲存格作為一個單位翻譯,不進行切片 + 返回 dict 包含 translated_text 和 conversation_id + """ + if not text or not text.strip(): + return {"translated_text": "", "conversation_id": conversation_id} + + # 檢查快取 - 整個儲存格內容 + cached_translation = TranslationCache.get_translation(text, source_language, target_language) + if cached_translation: + logger.debug(f"Excel cell cache hit: {text[:30]}...") + return {"translated_text": cached_translation, "conversation_id": conversation_id} + + # 直接翻譯整個儲存格內容,不進行任何切片 + try: + result = self.dify_client.translate_text( + text=text, + source_language=source_language, + target_language=target_language, + user_id=user_id, + job_id=job_id, + conversation_id=conversation_id # 傳遞 conversation_id + ) + + translated_text = result['translated_text'] + + # 儲存整個儲存格的翻譯到快取 + TranslationCache.save_translation( + text, source_language, target_language, translated_text + ) + + return result # 返回包含 conversation_id 的完整結果 + + except Exception as e: + logger.error(f"Failed to translate Excel cell: {text[:30]}... Error: {str(e)}") + # 翻譯失敗時返回失敗標記 + return f"【翻譯失敗|{target_language}】{text}" + + def translate_word_table_cell(self, text: str, source_language: str, + target_language: str, user_id: int = None, + job_id: int = None) -> str: + """ + Word表格儲存格翻譯 - 整個儲存格內容作為一個單位翻譯,不進行段落切片 + """ + if not text or not text.strip(): + return "" + + # 檢查快取 - 整個儲存格內容 + cached_translation = TranslationCache.get_translation(text, source_language, target_language) + if cached_translation: + logger.debug(f"Word table cell cache hit: {text[:30]}...") + return cached_translation + + # 直接翻譯整個儲存格內容,不進行任何段落切片 + try: + result = self.dify_client.translate_text( + text=text, + source_language=source_language, + target_language=target_language, + user_id=user_id, + job_id=job_id + ) + + translated_text = result['translated_text'] + + # 儲存整個儲存格的翻譯到快取 + TranslationCache.save_translation( + text, source_language, target_language, translated_text + ) + + return translated_text + + except Exception as e: + logger.error(f"Failed to translate Word table cell: {text[:30]}... Error: {str(e)}") + return f"【翻譯失敗|{target_language}】{text}" + + def translate_segment_with_sentences(self, text: str, source_language: str, + target_language: str, user_id: int = None, + job_id: int = None, conversation_id: str = None) -> Dict[str, Any]: + """ + 按段落翻譯,模仿成功版本的 translate_block_sentencewise 邏輯 + 對多行文字進行逐行、逐句翻譯,並重新組合成完整段落 + 僅用於Word文檔,Excel請使用 translate_excel_cell + """ + if not text or not text.strip(): + return "" + + # 檢查快取 - 先檢查整個段落的快取 + cached_whole = TranslationCache.get_translation(text, source_language, target_language) + if cached_whole: + logger.debug(f"Whole paragraph cache hit: {text[:30]}...") + return cached_whole + + # 按行處理 + out_lines = [] + all_successful = True + current_conversation_id = conversation_id + + for raw_line in text.split('\n'): + if not raw_line.strip(): + out_lines.append("") + continue + + # 分句處理 + sentences = self.document_processor.split_text_into_sentences(raw_line, source_language) + if not sentences: + sentences = [raw_line] + + translated_parts = [] + for sentence in sentences: + sentence = sentence.strip() + if not sentence: + continue + + # 檢查句子級快取 + cached_sentence = TranslationCache.get_translation(sentence, source_language, target_language) + if cached_sentence: + translated_parts.append(cached_sentence) + continue + + # 呼叫 Dify API 翻譯句子 + try: + result = self.dify_client.translate_text( + text=sentence, + source_language=source_language, + target_language=target_language, + user_id=user_id, + job_id=job_id, + conversation_id=current_conversation_id + ) + + translated_sentence = result['translated_text'] + + # 更新對話ID以保持上下文連續性 + if result.get('conversation_id'): + current_conversation_id = result['conversation_id'] + + # 儲存句子級快取 + TranslationCache.save_translation( + sentence, source_language, target_language, translated_sentence + ) + + translated_parts.append(translated_sentence) + + except Exception as e: + logger.error(f"Failed to translate sentence: {sentence[:30]}... Error: {str(e)}") + translated_parts.append(f"【翻譯失敗|{target_language}】{sentence}") + all_successful = False + + # 重新組合句子為一行 + out_lines.append(" ".join(translated_parts)) + + # 重新組合所有行 + final_result = "\n".join(out_lines) + + # 如果全部成功,儲存整個段落的快取 + if all_successful: + TranslationCache.save_translation(text, source_language, target_language, final_result) + + return { + 'translated_text': final_result, + 'conversation_id': current_conversation_id + } + + def translate_text_with_cache(self, text: str, source_language: str, + target_language: str, user_id: int = None, + job_id: int = None, conversation_id: str = None) -> Dict[str, Any]: + """帶快取的文字翻譯""" + + # 檢查快取 + cached_translation = TranslationCache.get_translation( + text, source_language, target_language + ) + + if cached_translation: + logger.debug(f"Cache hit for translation: {text[:50]}...") + return { + 'translated_text': cached_translation, + 'conversation_id': conversation_id, # 保持原有的conversation_id + 'from_cache': True + } + + # 呼叫 Dify API + try: + result = self.dify_client.translate_text( + text=text, + source_language=source_language, + target_language=target_language, + user_id=user_id, + job_id=job_id, + conversation_id=conversation_id + ) + + translated_text = result['translated_text'] + new_conversation_id = result.get('conversation_id') + + # 儲存到快取 + TranslationCache.save_translation( + text, source_language, target_language, translated_text + ) + + return { + 'translated_text': translated_text, + 'conversation_id': new_conversation_id, + 'from_cache': False + } + + except Exception as e: + logger.error(f"Translation failed for text: {text[:50]}... Error: {str(e)}") + raise TranslationError(f"翻譯失敗: {str(e)}") + + def translate_document(self, job_uuid: str) -> Dict[str, Any]: + """翻譯文件(主要入口點)- 使用增強的文檔處理邏輯""" + try: + # 取得任務資訊 + job = TranslationJob.query.filter_by(job_uuid=job_uuid).first() + if not job: + raise TranslationError(f"找不到任務: {job_uuid}") + + logger.info(f"Starting enhanced document translation: {job_uuid}") + + # 更新任務狀態 + job.update_status('PROCESSING', progress=0) + + # 使用增強的文檔處理器直接提取段落 + file_ext = Path(job.file_path).suffix.lower() + + if file_ext in ['.docx', '.doc']: + # 使用增強的 DOCX 處理邏輯 + segments = self.document_processor.extract_docx_segments(job.file_path) + logger.info(f"Enhanced extraction: Found {len(segments)} segments to translate") + + if not segments: + raise TranslationError("文件中未找到可翻譯的文字段落") + + # 使用成功版本的翻譯邏輯 - 直接按段落翻譯,不做複雜分割 + translatable_segments = [] + for seg in segments: + if self.document_processor.should_translate_text(seg.text, job.source_language): + translatable_segments.append(seg) + + logger.info(f"Found {len(translatable_segments)} segments to translate") + + # 批次翻譯 - 直接按原始段落翻譯 + translation_map = {} # 格式: (target_language, source_text) -> translated_text + total_segments = len(translatable_segments) + + for target_language in job.target_languages: + logger.info(f"Translating to {target_language}") + + # 每個目標語言使用獨立的對話ID以保持該語言的翻譯一致性 + current_conversation_id = None + + for i, seg in enumerate(translatable_segments): + try: + # 根據段落類型選擇適當的翻譯方法 + if seg.kind == "table_cell": + # 表格儲存格使用整個儲存格為單位的翻譯方法 + translated = self.translate_word_table_cell( + text=seg.text, + source_language=job.source_language, + target_language=target_language, + user_id=job.user_id, + job_id=job.id + ) + else: + # 一般段落使用原有的句子切片方法 + translation_result = self.translate_segment_with_sentences( + text=seg.text, + source_language=job.source_language, + target_language=target_language, + user_id=job.user_id, + job_id=job.id, + conversation_id=current_conversation_id + ) + + translated = translation_result['translated_text'] + # 更新當前對話ID以保持上下文連續性 + if translation_result.get('conversation_id'): + current_conversation_id = translation_result['conversation_id'] + + # 直接以原始段落文字為鍵儲存翻譯結果 + translation_map[(target_language, seg.text)] = translated + + # 更新進度 + progress = (i + 1) / total_segments * 100 / len(job.target_languages) + current_lang_index = job.target_languages.index(target_language) + total_progress = (current_lang_index * 100 + progress) / len(job.target_languages) + job.update_status('PROCESSING', progress=total_progress) + + # 短暫延遲避免過快請求 + time.sleep(0.1) + + except Exception as e: + logger.error(f"Failed to translate segment: {seg.text[:50]}... Error: {str(e)}") + # 翻譯失敗時保留原文 + translation_map[(target_language, seg.text)] = f"[翻譯失敗] {seg.text}" + + # 保存該語言的對話ID到任務記錄中(用於後續重試等場景) + if current_conversation_id and not job.conversation_id: + job.conversation_id = current_conversation_id + db.session.commit() + logger.info(f"Saved conversation_id {current_conversation_id} for job {job.job_uuid}") + + # 生成翻譯文件 + logger.info("Generating translated documents with enhanced insertion") + output_dir = Path(job.file_path).parent + output_files = {} + + for target_language in job.target_languages: + try: + # 生成輸出檔名 + output_filename = generate_filename( + Path(job.file_path).name, + 'translated', + 'translated', + target_language + ) + output_path = output_dir / output_filename + + # 使用增強的翻譯插入邏輯 + ok_count, skip_count = self.document_processor.insert_docx_translations( + job.file_path, + segments, + translation_map, + [target_language], + str(output_path) + ) + + output_files[target_language] = str(output_path) + + # 記錄翻譯檔案到資料庫 + file_size = Path(output_path).stat().st_size + job.add_translated_file( + language_code=target_language, + filename=Path(output_path).name, + file_path=str(output_path), + file_size=file_size + ) + + logger.info(f"Generated {target_language}: {ok_count} insertions, {skip_count} skips") + + except Exception as e: + logger.error(f"Failed to generate translated document for {target_language}: {str(e)}") + raise TranslationError(f"生成 {target_language} 翻譯文件失敗: {str(e)}") + + # 生成組合多語言檔案 - 包含所有翻譯在一個文件中 + if len(job.target_languages) > 1: + try: + # 生成組合檔案的檔名 + combined_filename = generate_filename( + Path(job.file_path).name, + 'translated', + 'combined', + 'multilang' + ) + combined_output_path = output_dir / combined_filename + + # 使用新的組合翻譯插入方法 + combined_ok_count, combined_skip_count = self.document_processor.insert_docx_combined_translations( + job.file_path, + segments, + translation_map, + job.target_languages, + str(combined_output_path) + ) + + output_files['combined'] = str(combined_output_path) + + # 記錄組合翻譯檔案到資料庫 + file_size = Path(combined_output_path).stat().st_size + job.add_translated_file( + language_code='combined', + filename=Path(combined_output_path).name, + file_path=str(combined_output_path), + file_size=file_size + ) + + logger.info(f"Generated combined multi-language file: {combined_ok_count} insertions, {combined_skip_count} skips") + + except Exception as e: + logger.error(f"Failed to generate combined multi-language document: {str(e)}") + # 不要因為組合檔案失敗而讓整個任務失敗,只記錄警告 + logger.warning("Combined multi-language file generation failed, but individual files were successful") + + elif file_ext in ['.xlsx', '.xls']: + # Excel 文件使用儲存格為單位的翻譯邏輯 + logger.info(f"Using cell-based processing for Excel files") + parser = self.get_document_parser(job.file_path) + + # 提取儲存格文字內容(不進行句子切片) + cell_segments = parser.extract_text_segments() + + if not cell_segments: + raise TranslationError("Excel 文件中未找到可翻譯的文字") + + logger.info(f"Found {len(cell_segments)} cell segments to translate") + + # 批次翻譯 - 使用儲存格為單位的翻譯方法 + translation_results = {} + total_segments = len(cell_segments) + + for target_language in job.target_languages: + logger.info(f"Translating Excel cells to {target_language}") + translated_cells = [] + current_conversation_id = job.conversation_id # 維持上下文連貫性 + + for i, cell_text in enumerate(cell_segments): + try: + # 使用新的儲存格翻譯方法(整個儲存格作為單位) + translated = self.translate_excel_cell( + text=cell_text, + source_language=job.source_language, + target_language=target_language, + user_id=job.user_id, + job_id=job.id, + conversation_id=current_conversation_id # 傳遞 conversation_id + ) + # 提取翻譯文字(translate_excel_cell 現在返回 dict) + translated_text = translated["translated_text"] if isinstance(translated, dict) else translated + translated_cells.append(translated_text) + + # 更新 conversation_id 以維持連續對話上下文 + if isinstance(translated, dict) and translated.get("conversation_id"): + current_conversation_id = translated["conversation_id"] + + # 更新進度 + progress = (i + 1) / total_segments * 100 / len(job.target_languages) + current_lang_index = job.target_languages.index(target_language) + total_progress = (current_lang_index * 100 + progress) / len(job.target_languages) + job.update_status('PROCESSING', progress=total_progress) + + time.sleep(0.1) + + except Exception as e: + logger.error(f"Failed to translate Excel cell: {cell_text[:50]}... Error: {str(e)}") + translated_cells.append(f"[翻譯失敗] {cell_text}") + + translation_results[target_language] = translated_cells + + # 生成翻譯文件 + output_dir = Path(job.file_path).parent + output_files = {} + + for target_language, translations in translation_results.items(): + translation_mapping = {target_language: translations} + + output_file = parser.generate_translated_document( + translations=translation_mapping, + target_language=target_language, + output_dir=output_dir + ) + + output_files[target_language] = output_file + + file_size = Path(output_file).stat().st_size + job.add_translated_file( + language_code=target_language, + filename=Path(output_file).name, + file_path=output_file, + file_size=file_size + ) + + # 生成組合多語言Excel檔案 + if len(job.target_languages) > 1: + try: + # 生成組合檔案的檔名 + combined_filename = generate_filename( + Path(job.file_path).name, + 'translated', + 'combined', + 'multilang' + ) + combined_output_path = output_dir / combined_filename + + # 為Excel組合檔案建立翻譯映射 + combined_translation_mapping = {} + for lang in job.target_languages: + combined_translation_mapping[lang] = translation_results[lang] + + # 使用修改過的generate_combined_excel_document方法 + combined_output_file = self._generate_combined_excel_document( + parser, + combined_translation_mapping, + job.target_languages, + combined_output_path + ) + + output_files['combined'] = combined_output_file + + # 記錄組合翻譯檔案到資料庫 + file_size = Path(combined_output_file).stat().st_size + job.add_translated_file( + language_code='combined', + filename=Path(combined_output_file).name, + file_path=combined_output_file, + file_size=file_size + ) + + logger.info(f"Generated combined multi-language Excel file") + + except Exception as e: + logger.error(f"Failed to generate combined multi-language Excel document: {str(e)}") + logger.warning("Combined multi-language Excel file generation failed, but individual files were successful") + + elif file_ext == '.pptx': + # PowerPoint 文件使用增強的處理邏輯,仿照 DOCX 處理方式 + logger.info(f"Using enhanced PowerPoint processing for {job_uuid}") + parser = self.get_document_parser(job.file_path) + + # 提取文字段落和表格內容 + text_segments = parser.extract_text_segments() + + if not text_segments: + raise TranslationError("PowerPoint 文件中未找到可翻譯的文字") + + logger.info(f"Found {len(text_segments)} PowerPoint text segments to translate") + + # 批次翻譯 - 建立翻譯映射 + translation_map = {} # 格式: (target_language, source_text) -> translated_text + total_segments = len(text_segments) + + for target_language in job.target_languages: + logger.info(f"Translating PowerPoint segments to {target_language}") + translated_segments = [] + current_conversation_id = job.conversation_id # 維持上下文連貫性 + + for i, segment_text in enumerate(text_segments): + try: + # 對於 PowerPoint 文字框和表格,使用段落級別的翻譯 + translated = self.translate_segment_with_sentences( + text=segment_text, + source_language=job.source_language, + target_language=target_language, + user_id=job.user_id, + job_id=job.id, + conversation_id=current_conversation_id # 傳遞 conversation_id + ) + + # 使用與 DOCX 相同的格式儲存翻譯結果 + translation_map[(target_language, segment_text)] = translated + + # 更新 conversation_id 以維持連續對話上下文 + if isinstance(translated, dict) and translated.get("conversation_id"): + current_conversation_id = translated["conversation_id"] + + # 更新進度 + progress = (i + 1) / total_segments * 100 / len(job.target_languages) + current_lang_index = job.target_languages.index(target_language) + total_progress = (current_lang_index * 100 + progress) / len(job.target_languages) + job.update_status('PROCESSING', progress=total_progress) + + time.sleep(0.1) + + except Exception as e: + logger.error(f"Failed to translate PowerPoint segment: {segment_text[:50]}... Error: {str(e)}") + # 翻譯失敗時保留原文 + translation_map[(target_language, segment_text)] = f"[翻譯失敗] {segment_text}" + + # 生成翻譯文件 - 仿照 DOCX 的方式 + logger.info("Generating translated PowerPoint documents with enhanced insertion") + output_dir = Path(job.file_path).parent + output_files = {} + + # 生成單語言文件 + for target_language in job.target_languages: + try: + # 生成輸出檔名 + output_filename = generate_filename( + Path(job.file_path).name, + 'translated', + 'translated', + target_language + ) + output_path = output_dir / output_filename + + # 使用增強的翻譯插入邏輯 + ok_count, skip_count = parser.insert_pptx_translations( + translation_map, + [target_language], + str(output_path) + ) + + output_files[target_language] = str(output_path) + + # 記錄翻譯檔案到資料庫 + file_size = Path(output_path).stat().st_size + job.add_translated_file( + language_code=target_language, + filename=Path(output_path).name, + file_path=str(output_path), + file_size=file_size + ) + + logger.info(f"Generated {target_language}: {ok_count} insertions, {skip_count} skips") + + except Exception as e: + logger.error(f"Failed to generate translated PowerPoint document for {target_language}: {str(e)}") + raise TranslationError(f"生成 {target_language} PowerPoint 翻譯文件失敗: {str(e)}") + + # 生成組合多語言檔案 - 包含所有翻譯在一個文件中 + if len(job.target_languages) > 1: + try: + # 生成組合檔案的檔名 + combined_filename = generate_filename( + Path(job.file_path).name, + 'translated', + 'combined', + 'multilang' + ) + combined_output_path = output_dir / combined_filename + + # 使用組合翻譯插入方法 + combined_ok_count, combined_skip_count = parser.insert_pptx_combined_translations( + translation_map, + job.target_languages, + str(combined_output_path) + ) + + output_files['combined'] = str(combined_output_path) + + # 記錄組合翻譯檔案到資料庫 + file_size = Path(combined_output_path).stat().st_size + job.add_translated_file( + language_code='combined', + filename=Path(combined_output_path).name, + file_path=str(combined_output_path), + file_size=file_size + ) + + logger.info(f"Generated combined multi-language PowerPoint file: {combined_ok_count} insertions, {combined_skip_count} skips") + + except Exception as e: + logger.error(f"Failed to generate combined multi-language PowerPoint document: {str(e)}") + # 不要因為組合檔案失敗而讓整個任務失敗,只記錄警告 + logger.warning("Combined multi-language PowerPoint file generation failed, but individual files were successful") + + elif file_ext == '.pdf': + # PDF 文件使用增強的OCR處理邏輯(避免重複OCR) + logger.info(f"Using enhanced PDF processing for {job_uuid}") + + from app.services.enhanced_pdf_parser import EnhancedPdfParser + enhanced_parser = EnhancedPdfParser(job.file_path) + + # 提取文字片段(會使用OCR快取避免重複處理) + text_segments = enhanced_parser.extract_text_segments(user_id=job.user_id, job_id=job.id) + + if not text_segments: + raise TranslationError("PDF文件中未找到可翻譯的文字") + + logger.info(f"Found {len(text_segments)} PDF text segments to translate") + + # 批次翻譯PDF文字段落 + translation_results = {} + total_segments = len(text_segments) + + for target_language in job.target_languages: + logger.info(f"Translating PDF segments to {target_language}") + translated_segments = [] + current_conversation_id = job.conversation_id # 維持上下文連貫性 + + for i, segment_text in enumerate(text_segments): + try: + # 對於PDF段落,使用段落級別的翻譯(保留段落結構) + translated = self.translate_segment_with_sentences( + text=segment_text, + source_language=job.source_language, + target_language=target_language, + user_id=job.user_id, + job_id=job.id, + conversation_id=current_conversation_id # 傳遞 conversation_id + ) + # 提取翻譯文字(translate_segment_with_sentences 返回 dict) + translated_text = translated['translated_text'] if isinstance(translated, dict) else translated + translated_segments.append(translated_text) + + # 更新 conversation_id 以維持連續對話上下文 + if isinstance(translated, dict) and translated.get('conversation_id'): + current_conversation_id = translated['conversation_id'] + + # 更新進度 + progress = (i + 1) / total_segments * 100 / len(job.target_languages) + current_lang_index = job.target_languages.index(target_language) + total_progress = (current_lang_index * 100 + progress) / len(job.target_languages) + job.update_status('PROCESSING', progress=total_progress) + + time.sleep(0.1) + + except Exception as e: + logger.error(f"Failed to translate PDF segment: {segment_text[:50]}... Error: {str(e)}") + translated_segments.append(f"[翻譯失敗] {segment_text}") + + translation_results[target_language] = translated_segments + + # 生成翻譯Word文件 + logger.info("Generating translated Word documents from PDF") + output_dir = Path(job.file_path).parent + output_files = {} + + for target_language, translations in translation_results.items(): + try: + # 使用增強PDF解析器生成Word文檔 + output_file = enhanced_parser.generate_translated_document( + translations={target_language: translations}, + target_language=target_language, + output_dir=output_dir + ) + + output_files[target_language] = output_file + + # 記錄翻譯檔案到資料庫 + file_size = Path(output_file).stat().st_size + job.add_translated_file( + language_code=target_language, + filename=Path(output_file).name, + file_path=output_file, + file_size=file_size + ) + + logger.info(f"Generated PDF translation for {target_language}: {output_file}") + + except Exception as e: + logger.error(f"Failed to generate PDF translated document for {target_language}: {str(e)}") + raise TranslationError(f"生成PDF {target_language} 翻譯文件失敗: {str(e)}") + + # 生成組合多語言文檔 - 譯文1/譯文2格式(當有多個目標語言時) + if len(job.target_languages) > 1: + try: + logger.info("Generating combined multi-language PDF document") + combined_output_file = enhanced_parser.generate_combined_translated_document( + all_translations=translation_results, + target_languages=job.target_languages, + output_dir=output_dir + ) + + output_files['combined'] = combined_output_file + + # 記錄組合翻譯檔案到資料庫 + file_size = Path(combined_output_file).stat().st_size + job.add_translated_file( + language_code='combined', + filename=Path(combined_output_file).name, + file_path=combined_output_file, + file_size=file_size + ) + + logger.info(f"Generated combined multi-language PDF file: {combined_output_file}") + + except Exception as e: + logger.error(f"Failed to generate combined multi-language PDF document: {str(e)}") + # 不要因為組合檔案失敗而讓整個任務失敗,只記錄警告 + logger.warning("Combined multi-language PDF file generation failed, but individual files were successful") + + else: + # 對於其他文件格式,使用原有邏輯 + logger.info(f"Using legacy sentence-based processing for {file_ext} files") + parser = self.get_document_parser(job.file_path) + + # 提取文字片段 - 对PDF传递user_id和job_id以支持OCR + if file_ext == '.pdf': + text_segments = parser.extract_text_segments(user_id=job.user_id, job_id=job.id) + else: + text_segments = parser.extract_text_segments() + + if not text_segments: + raise TranslationError("文件中未找到可翻譯的文字") + + # 分割成句子 + all_sentences = [] + for segment in text_segments: + sentences = self.split_text_into_sentences(segment, job.source_language) + all_sentences.extend(sentences) + + # 去重複 + unique_sentences = list(dict.fromkeys(all_sentences)) + logger.info(f"Found {len(unique_sentences)} unique sentences to translate") + + # 批次翻譯 + translation_results = {} + total_sentences = len(unique_sentences) + + for target_language in job.target_languages: + logger.info(f"Translating to {target_language}") + translated_sentences = [] + current_conversation_id = job.conversation_id # 維持上下文連貫性 + + for i, sentence in enumerate(unique_sentences): + try: + translation_result = self.translate_text_with_cache( + text=sentence, + source_language=job.source_language, + target_language=target_language, + user_id=job.user_id, + job_id=job.id, + conversation_id=current_conversation_id # 傳遞 conversation_id + ) + translated_sentences.append(translation_result['translated_text']) + + # 更新 conversation_id 以維持連續對話上下文 + if translation_result.get("conversation_id"): + current_conversation_id = translation_result["conversation_id"] + + # 更新進度 + progress = (i + 1) / total_sentences * 100 / len(job.target_languages) + current_lang_index = job.target_languages.index(target_language) + total_progress = (current_lang_index * 100 + progress) / len(job.target_languages) + job.update_status('PROCESSING', progress=total_progress) + + time.sleep(0.1) + + except Exception as e: + logger.error(f"Failed to translate sentence: {sentence[:50]}... Error: {str(e)}") + translated_sentences.append(f"[翻譯失敗] {sentence}") + + translation_results[target_language] = translated_sentences + + # 生成翻譯文件 + output_dir = Path(job.file_path).parent + output_files = {} + + for target_language, translations in translation_results.items(): + translation_mapping = {target_language: translations} + + output_file = parser.generate_translated_document( + translations=translation_mapping, + target_language=target_language, + output_dir=output_dir + ) + + output_files[target_language] = output_file + + file_size = Path(output_file).stat().st_size + job.add_translated_file( + language_code=target_language, + filename=Path(output_file).name, + file_path=output_file, + file_size=file_size + ) + + # 計算總成本 + total_cost = self._calculate_job_cost(job.id) + + # 更新任務狀態為完成 + job.update_status('COMPLETED', progress=100) + job.total_cost = total_cost + # 計算實際使用的 token 數(從 API 使用統計中獲取) + from sqlalchemy import func + from app.models.stats import APIUsageStats + from app import db + + actual_tokens = db.session.query( + func.sum(APIUsageStats.total_tokens) + ).filter_by(job_id=job.id).scalar() + + job.total_tokens = int(actual_tokens) if actual_tokens else 0 + + db.session.commit() + + logger.info(f"Enhanced document translation completed: {job_uuid}") + + return { + 'success': True, + 'job_uuid': job_uuid, + 'output_files': output_files, + 'total_sentences': len(texts_to_translate) if 'texts_to_translate' in locals() else len(unique_sentences) if 'unique_sentences' in locals() else 0, + 'total_cost': float(total_cost), + 'target_languages': job.target_languages + } + + except TranslationError: + raise + except Exception as e: + logger.error(f"Enhanced document translation failed: {job_uuid}. Error: {str(e)}") + raise TranslationError(f"文件翻譯失敗: {str(e)}") + + def _calculate_job_cost(self, job_id: int) -> float: + """計算任務總成本""" + from app import db + from sqlalchemy import func + from app.models.stats import APIUsageStats + + total_cost = db.session.query( + func.sum(APIUsageStats.cost) + ).filter_by(job_id=job_id).scalar() + + return float(total_cost) if total_cost else 0.0 + + def _generate_combined_excel_document(self, parser, translation_mapping: Dict[str, List[str]], + target_languages: List[str], output_path: Path) -> str: + """生成包含所有翻譯語言的組合Excel檔案""" + try: + import openpyxl + from openpyxl.styles import Alignment, Font + from sqlalchemy import text as sql_text + from app import db + + # 載入原始工作簿 + wb = openpyxl.load_workbook(str(parser.file_path), data_only=False) + try: + wb_vals = openpyxl.load_workbook(str(parser.file_path), data_only=True) + except Exception: + wb_vals = None + + # 取得原始文字段落以建立翻譯映射 + original_segments = parser.extract_text_segments() + combined_tmap = {} + + logger.info(f"Building combined translation map for {len(original_segments)} segments") + + for original_text in original_segments: + # 從翻譯快取中查詢所有語言的翻譯 + for target_lang in target_languages: + result = db.session.execute(sql_text(""" + SELECT translated_text + FROM dt_translation_cache + WHERE source_text = :text AND target_language = :lang + ORDER BY created_at ASC + LIMIT 1 + """), {'text': original_text, 'lang': target_lang}) + + row = result.fetchone() + if row and row[0]: + combined_tmap[(target_lang, original_text)] = row[0] + + logger.info(f"Built combined translation map with {len(combined_tmap)} mappings") + + # 處理每個工作表,插入組合翻譯 + for ws in wb.worksheets: + logger.info(f"Processing combined worksheet: {ws.title}") + ws_vals = wb_vals[ws.title] if wb_vals and ws.title in wb_vals.sheetnames else None + max_row, max_col = ws.max_row, ws.max_column + + for r in range(1, max_row + 1): + for c in range(1, max_col + 1): + cell = ws.cell(row=r, column=c) + src_text = parser._get_display_text_for_translation(ws, ws_vals, r, c) + + if not src_text or not parser._should_translate(src_text, 'auto'): + continue + + # 收集所有語言的翻譯 + translations = [] + for target_lang in target_languages: + if (target_lang, src_text) in combined_tmap: + translations.append(combined_tmap[(target_lang, src_text)]) + else: + translations.append(f"【翻譯缺失|{target_lang}】") + + # 組合翻譯文字:原文\n英文\n越南文 + if translations: + combined_text = src_text + '\n' + '\n'.join(translations) + + # 設置儲存格值 + cell.value = combined_text + cell.alignment = Alignment(wrap_text=True, vertical='top') + cell.font = Font(size=10) + + # 儲存組合檔案 + wb.save(str(output_path)) + + logger.info(f"Generated combined Excel file: {output_path}") + return str(output_path) + + except Exception as e: + logger.error(f"Failed to generate combined Excel document: {str(e)}") + raise FileProcessingError(f"組合 Excel 檔案生成失敗: {str(e)}") + + def _generate_combined_pptx_document(self, parser, translation_results: Dict[str, List[str]], + target_languages: List[str], output_path: Path) -> str: + """生成包含所有翻譯語言的組合PowerPoint檔案""" + try: + import pptx + from sqlalchemy import text as sql_text + from app import db + + # 載入原始 PowerPoint 文件 + prs = pptx.Presentation(str(parser.file_path)) + + # 收集所有文字框和原始文字 + text_frames_data = [] + for slide in prs.slides: + for shape in slide.shapes: + if getattr(shape, "has_text_frame", False): + text = parser._extract_text_from_frame(shape.text_frame) + if text.strip(): + text_frames_data.append((shape.text_frame, text)) + + # 建立組合翻譯映射 - 從快取讀取所有語言的翻譯 + combined_translation_map = {} + logger.info(f"Building combined PowerPoint translation map for {len(text_frames_data)} text frames") + + for text_frame, original_text in text_frames_data: + # 從翻譯快取中查詢所有語言的翻譯 + for target_lang in target_languages: + result = db.session.execute(sql_text(""" + SELECT translated_text + FROM dt_translation_cache + WHERE source_text = :text AND target_language = :lang + ORDER BY created_at ASC + LIMIT 1 + """), {'text': original_text, 'lang': target_lang}) + + row = result.fetchone() + if row and row[0]: + combined_translation_map[(target_lang, original_text)] = row[0] + + logger.info(f"Built combined PowerPoint translation map with {len(combined_translation_map)} mappings") + + # 處理每個文字框,插入組合翻譯 + ok_count = skip_count = 0 + + for text_frame, original_text in text_frames_data: + # 收集所有語言的翻譯 + translations = [] + for target_lang in target_languages: + if (target_lang, original_text) in combined_translation_map: + translations.append(combined_translation_map[(target_lang, original_text)]) + else: + translations.append(f"【翻譯缺失|{target_lang}】") + + # 檢查是否已存在翻譯 + if parser._check_existing_translations(text_frame, translations): + skip_count += 1 + continue + + # 添加所有語言的翻譯 + for translation in translations: + parser._append_translation(text_frame, translation) + + ok_count += 1 + + # 儲存組合檔案 + prs.save(str(output_path)) + + logger.info(f"Generated combined PowerPoint file: {output_path} with {ok_count} frames, {skip_count} skips") + return str(output_path) + + except Exception as e: + logger.error(f"Failed to generate combined PowerPoint document: {str(e)}") + raise FileProcessingError(f"組合 PowerPoint 檔案生成失敗: {str(e)}") \ No newline at end of file diff --git a/app/tasks/__init__.py b/app/tasks/__init__.py new file mode 100644 index 0000000..94f238c --- /dev/null +++ b/app/tasks/__init__.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Celery 任務模組 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +from .translation import process_translation_job, cleanup_old_files + +__all__ = [ + 'process_translation_job', + 'cleanup_old_files' +] \ No newline at end of file diff --git a/app/tasks/translation.py b/app/tasks/translation.py new file mode 100644 index 0000000..5feb47b --- /dev/null +++ b/app/tasks/translation.py @@ -0,0 +1,350 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +翻譯相關 Celery 任務 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +import os +import shutil +from datetime import datetime, timedelta +from pathlib import Path +from celery import Celery, current_task +from celery.schedules import crontab +from app import create_app, db + +logger = None + +def get_celery_instance(): + """取得 Celery 實例""" + app = create_app() + return app.celery + +# 建立 Celery 實例 +celery = get_celery_instance() + +# 初始化 logger +from app.utils.logger import get_logger +logger = get_logger(__name__) + +from app.models.job import TranslationJob +from app.models.log import SystemLog +from app.services.translation_service import TranslationService +from app.services.notification_service import NotificationService +from app.utils.exceptions import TranslationError + + +@celery.task(bind=True, max_retries=3) +def process_translation_job(self, job_id: int): + """處理翻譯任務""" + app = create_app() + + with app.app_context(): + try: + # 取得任務資訊 + job = TranslationJob.query.get(job_id) + if not job: + raise ValueError(f"Job {job_id} not found") + + logger.info(f"Starting translation job processing: {job.job_uuid}") + + # 記錄任務開始 + SystemLog.info( + 'tasks.translation', + f'Translation job started: {job.job_uuid}', + user_id=job.user_id, + job_id=job.id, + extra_data={ + 'filename': job.original_filename, + 'target_languages': job.target_languages, + 'retry_count': self.request.retries + } + ) + + # 建立翻譯服務 + translation_service = TranslationService() + + # 執行翻譯 + result = translation_service.translate_document(job.job_uuid) + + if result['success']: + logger.info(f"Translation job completed successfully: {job.job_uuid}") + + # 重新獲取任務以確保狀態是最新的 + db.session.refresh(job) + + # 發送完成通知 + try: + notification_service = NotificationService() + # 發送郵件通知 + notification_service.send_job_completion_notification(job) + # 發送資料庫通知 - 跳過狀態檢查,直接發送 + notification_service.send_job_completion_db_notification_direct(job) + except Exception as e: + logger.warning(f"Failed to send completion notification: {str(e)}") + + # 記錄完成日誌 + SystemLog.info( + 'tasks.translation', + f'Translation job completed: {job.job_uuid}', + user_id=job.user_id, + job_id=job.id, + extra_data={ + 'total_cost': result.get('total_cost', 0), + 'total_sentences': result.get('total_sentences', 0), + 'output_files': list(result.get('output_files', {}).keys()) + } + ) + else: + raise TranslationError(result.get('error', 'Unknown translation error')) + + except Exception as exc: + logger.error(f"Translation job failed: {job.job_uuid}. Error: {str(exc)}") + + with app.app_context(): + # 更新任務狀態 + job = TranslationJob.query.get(job_id) + if job: + job.error_message = str(exc) + job.retry_count = self.request.retries + 1 + + if self.request.retries < self.max_retries: + # 準備重試 + job.update_status('RETRY') + + # 計算重試延遲:30s, 60s, 120s + countdown = [30, 60, 120][self.request.retries] + + SystemLog.warning( + 'tasks.translation', + f'Translation job retry scheduled: {job.job_uuid} (attempt {self.request.retries + 2})', + user_id=job.user_id, + job_id=job.id, + extra_data={ + 'error': str(exc), + 'retry_count': self.request.retries + 1, + 'countdown': countdown + } + ) + + logger.info(f"Retrying translation job in {countdown}s: {job.job_uuid}") + raise self.retry(exc=exc, countdown=countdown) + + else: + # 重試次數用盡,標記失敗 + job.update_status('FAILED') + + # 發送失敗通知 + try: + notification_service = NotificationService() + # 發送郵件通知 + notification_service.send_job_failure_notification(job) + # 發送資料庫通知 + notification_service.send_job_failure_db_notification(job, str(exc)) + except Exception as e: + logger.warning(f"Failed to send failure notification: {str(e)}") + + SystemLog.error( + 'tasks.translation', + f'Translation job failed permanently: {job.job_uuid}', + user_id=job.user_id, + job_id=job.id, + extra_data={ + 'error': str(exc), + 'total_retries': self.request.retries + } + ) + + # 發送失敗通知 + try: + notification_service = NotificationService() + notification_service.send_job_failure_notification(job) + except Exception as e: + logger.warning(f"Failed to send failure notification: {str(e)}") + + logger.error(f"Translation job failed permanently: {job.job_uuid}") + + raise exc + + +@celery.task +def cleanup_old_files(): + """清理舊檔案(定期任務)""" + app = create_app() + + with app.app_context(): + try: + logger.info("Starting file cleanup task") + + upload_folder = Path(app.config.get('UPLOAD_FOLDER')) + retention_days = app.config.get('FILE_RETENTION_DAYS', 7) + cutoff_date = datetime.utcnow() - timedelta(days=retention_days) + + if not upload_folder.exists(): + logger.warning(f"Upload folder does not exist: {upload_folder}") + return + + deleted_files = 0 + deleted_dirs = 0 + total_size_freed = 0 + + # 遍歷上傳目錄中的所有 UUID 目錄 + for item in upload_folder.iterdir(): + if not item.is_dir(): + continue + + try: + # 檢查目錄的修改時間 + dir_mtime = datetime.fromtimestamp(item.stat().st_mtime) + + if dir_mtime < cutoff_date: + # 計算目錄大小 + dir_size = sum(f.stat().st_size for f in item.rglob('*') if f.is_file()) + + # 檢查是否還有相關的資料庫記錄 + job_uuid = item.name + job = TranslationJob.query.filter_by(job_uuid=job_uuid).first() + + if job: + # 檢查任務是否已完成且超過保留期 + if job.completed_at and job.completed_at < cutoff_date: + # 刪除目錄 + shutil.rmtree(item) + deleted_dirs += 1 + total_size_freed += dir_size + + logger.info(f"Cleaned up job directory: {job_uuid}") + + # 記錄清理日誌 + SystemLog.info( + 'tasks.cleanup', + f'Cleaned up files for completed job: {job_uuid}', + user_id=job.user_id, + job_id=job.id, + extra_data={ + 'files_size_mb': dir_size / (1024 * 1024), + 'retention_days': retention_days + } + ) + else: + # 沒有對應的資料庫記錄,直接刪除 + shutil.rmtree(item) + deleted_dirs += 1 + total_size_freed += dir_size + + logger.info(f"Cleaned up orphaned directory: {job_uuid}") + + except Exception as e: + logger.error(f"Failed to process directory {item}: {str(e)}") + continue + + # 記錄清理結果 + cleanup_result = { + 'deleted_directories': deleted_dirs, + 'total_size_freed_mb': total_size_freed / (1024 * 1024), + 'retention_days': retention_days, + 'cutoff_date': cutoff_date.isoformat() + } + + SystemLog.info( + 'tasks.cleanup', + f'File cleanup completed: {deleted_dirs} directories, {total_size_freed / (1024 * 1024):.2f} MB freed', + extra_data=cleanup_result + ) + + logger.info(f"File cleanup completed: {cleanup_result}") + + return cleanup_result + + except Exception as e: + logger.error(f"File cleanup task failed: {str(e)}") + + SystemLog.error( + 'tasks.cleanup', + f'File cleanup task failed: {str(e)}', + extra_data={'error': str(e)} + ) + + raise e + + +@celery.task +def send_daily_admin_report(): + """發送每日管理員報告""" + app = create_app() + + with app.app_context(): + try: + logger.info("Generating daily admin report") + + from app.models.stats import APIUsageStats + from app.services.notification_service import NotificationService + + # 取得昨日統計 + yesterday = datetime.utcnow() - timedelta(days=1) + daily_stats = APIUsageStats.get_daily_statistics(days=1) + + # 取得系統錯誤摘要 + error_summary = SystemLog.get_error_summary(days=1) + + # 準備報告內容 + if daily_stats: + yesterday_data = daily_stats[0] + subject = f"每日系統報告 - {yesterday_data['date']}" + + message = f""" + 昨日系統使用狀況: + • 翻譯任務: {yesterday_data['total_calls']} 個 + • 成功任務: {yesterday_data['successful_calls']} 個 + • 失敗任務: {yesterday_data['failed_calls']} 個 + • 總成本: ${yesterday_data['total_cost']:.4f} + • 總 Token 數: {yesterday_data['total_tokens']} + + 系統錯誤摘要: + • 錯誤數量: {error_summary['total_errors']} + + 請查看管理後台了解詳細資訊。 + """ + else: + subject = f"每日系統報告 - {yesterday.strftime('%Y-%m-%d')}" + message = "昨日無翻譯任務記錄。" + + # 發送管理員通知 + notification_service = NotificationService() + result = notification_service.send_admin_notification(subject, message) + + if result: + logger.info("Daily admin report sent successfully") + else: + logger.warning("Failed to send daily admin report") + + return result + + except Exception as e: + logger.error(f"Daily admin report task failed: {str(e)}") + raise e + + +# 定期任務設定 +@celery.on_after_configure.connect +def setup_periodic_tasks(sender, **kwargs): + """設定定期任務""" + + # 每日凌晨 2 點執行檔案清理 + sender.add_periodic_task( + crontab(hour=2, minute=0), + cleanup_old_files.s(), + name='cleanup-old-files-daily' + ) + + # 每日早上 8 點發送管理員報告 + sender.add_periodic_task( + crontab(hour=8, minute=0), + send_daily_admin_report.s(), + name='daily-admin-report' + ) + + diff --git a/app/utils/__init__.py b/app/utils/__init__.py new file mode 100644 index 0000000..719cd69 --- /dev/null +++ b/app/utils/__init__.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +工具模組 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +from .decorators import login_required, admin_required +from .validators import validate_file, validate_languages +from .helpers import generate_filename, format_file_size +from .exceptions import ( + DocumentTranslatorError, + AuthenticationError, + ValidationError, + TranslationError, + FileProcessingError +) + +__all__ = [ + 'login_required', + 'admin_required', + 'validate_file', + 'validate_languages', + 'generate_filename', + 'format_file_size', + 'DocumentTranslatorError', + 'AuthenticationError', + 'ValidationError', + 'TranslationError', + 'FileProcessingError' +] \ No newline at end of file diff --git a/app/utils/api_auth.py b/app/utils/api_auth.py new file mode 100644 index 0000000..191e0ec --- /dev/null +++ b/app/utils/api_auth.py @@ -0,0 +1,277 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +API 認證服務 +用於與 PANJIT Auth API 整合認證 + +Author: PANJIT IT Team +Created: 2025-10-01 +""" + +import requests +import json +from datetime import datetime, timedelta +from typing import Optional, Dict, Any, Tuple +from flask import current_app +from .logger import get_logger +from .exceptions import AuthenticationError + +logger = get_logger(__name__) + + +class APIAuthService: + """API 認證服務""" + + def __init__(self): + self.config = current_app.config + self.api_base_url = "https://pj-auth-api.vercel.app" + self.login_endpoint = "/api/auth/login" + self.logout_endpoint = "/api/auth/logout" + self.timeout = 30 # 30 秒超時 + + def authenticate_user(self, username: str, password: str) -> Dict[str, Any]: + """ + 透過 API 驗證使用者憑證 + + Args: + username: 使用者帳號 + password: 密碼 + + Returns: + Dict: 包含使用者資訊和 Token 的字典 + + Raises: + AuthenticationError: 認證失敗時拋出 + """ + try: + login_url = f"{self.api_base_url}{self.login_endpoint}" + + payload = { + "username": username, + "password": password + } + + headers = { + "Content-Type": "application/json" + } + + logger.info(f"正在透過 API 驗證使用者: {username}") + + # 發送認證請求 + response = requests.post( + login_url, + json=payload, + headers=headers, + timeout=self.timeout + ) + + # 解析回應 + if response.status_code == 200: + data = response.json() + + if data.get('success'): + logger.info(f"API 認證成功: {username}") + return self._parse_auth_response(data) + else: + error_msg = data.get('error', '認證失敗') + logger.warning(f"API 認證失敗: {username} - {error_msg}") + raise AuthenticationError(f"認證失敗: {error_msg}") + + elif response.status_code == 401: + data = response.json() + error_msg = data.get('error', '帳號或密碼錯誤') + logger.warning(f"API 認證失敗 (401): {username} - {error_msg}") + raise AuthenticationError("帳號或密碼錯誤") + + else: + logger.error(f"API 認證請求失敗: HTTP {response.status_code}") + raise AuthenticationError(f"認證服務錯誤 (HTTP {response.status_code})") + + except requests.exceptions.Timeout: + logger.error(f"API 認證請求超時: {username}") + raise AuthenticationError("認證服務回應超時,請稍後再試") + + except requests.exceptions.ConnectionError: + logger.error(f"API 認證連線錯誤: {username}") + raise AuthenticationError("無法連接認證服務,請檢查網路連線") + + except requests.exceptions.RequestException as e: + logger.error(f"API 認證請求錯誤: {username} - {str(e)}") + raise AuthenticationError(f"認證服務錯誤: {str(e)}") + + except json.JSONDecodeError: + logger.error(f"API 認證回應格式錯誤: {username}") + raise AuthenticationError("認證服務回應格式錯誤") + + except Exception as e: + logger.error(f"API 認證未知錯誤: {username} - {str(e)}") + raise AuthenticationError(f"認證過程發生錯誤: {str(e)}") + + def _parse_auth_response(self, data: Dict[str, Any]) -> Dict[str, Any]: + """ + 解析 API 認證回應 + + Args: + data: API 回應資料 + + Returns: + Dict: 標準化的使用者資訊 + """ + try: + auth_data = data.get('data', {}) + user_info = auth_data.get('userInfo', {}) + + # 解析 Token 過期時間 + expires_at = None + issued_at = None + + if 'expiresAt' in auth_data: + try: + expires_at = datetime.fromisoformat(auth_data['expiresAt'].replace('Z', '+00:00')) + except (ValueError, AttributeError): + logger.warning("無法解析 API Token 過期時間") + + if 'issuedAt' in auth_data: + try: + issued_at = datetime.fromisoformat(auth_data['issuedAt'].replace('Z', '+00:00')) + except (ValueError, AttributeError): + logger.warning("無法解析 API Token 發行時間") + + # 標準化使用者資訊 (方案 A: API name 是姓名+email 格式) + api_name = user_info.get('name', '') # 例: "劉怡明 ymirliu@panjit.com.tw" + api_email = user_info.get('email', '') # 例: "ymirliu@panjit.com.tw" + + result = { + # 基本使用者資訊 (方案 A: username 和 display_name 都用 API name) + 'username': api_name, # 姓名+email 格式 + 'display_name': api_name, # 姓名+email 格式 + 'email': api_email, # 純 email + 'department': user_info.get('jobTitle'), # 使用 jobTitle 作為部門 + 'user_principal_name': api_email, + + # API 特有資訊 + 'api_user_id': user_info.get('id', ''), # Azure Object ID + 'job_title': user_info.get('jobTitle'), + 'office_location': user_info.get('officeLocation'), + 'business_phones': user_info.get('businessPhones', []), + + # Token 資訊 + 'api_access_token': auth_data.get('access_token', ''), + 'api_id_token': auth_data.get('id_token', ''), + 'api_token_type': auth_data.get('token_type', 'Bearer'), + 'api_expires_in': auth_data.get('expires_in', 0), + 'api_issued_at': issued_at, + 'api_expires_at': expires_at, + + # 完整的 API 回應 (用於記錄) + 'full_api_response': data, + 'api_user_info': user_info + } + + return result + + except Exception as e: + logger.error(f"解析 API 回應時發生錯誤: {str(e)}") + raise AuthenticationError(f"解析認證回應時發生錯誤: {str(e)}") + + def logout_user(self, access_token: str) -> bool: + """ + 透過 API 登出使用者 + + Args: + access_token: 使用者的 access token + + Returns: + bool: 登出是否成功 + """ + try: + logout_url = f"{self.api_base_url}{self.logout_endpoint}" + + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json" + } + + response = requests.post( + logout_url, + headers=headers, + timeout=self.timeout + ) + + if response.status_code == 200: + data = response.json() + if data.get('success'): + logger.info("API 登出成功") + return True + + logger.warning(f"API 登出失敗: HTTP {response.status_code}") + return False + + except Exception as e: + logger.error(f"API 登出時發生錯誤: {str(e)}") + return False + + def validate_token(self, access_token: str) -> bool: + """ + 驗證 Token 是否有效 + + Args: + access_token: 要驗證的 token + + Returns: + bool: Token 是否有效 + """ + try: + # 這裡可以實作 Token 驗證邏輯 + # 目前 API 沒有提供專門的驗證端點,可以考慮解析 JWT 或調用其他端點 + + # 簡單的檢查:Token 不能為空且格式看起來像 JWT + if not access_token or len(access_token.split('.')) != 3: + return False + + # TODO: 實作更完整的 JWT 驗證邏輯 + # 可以解析 JWT payload 檢查過期時間等 + + return True + + except Exception as e: + logger.error(f"驗證 Token 時發生錯誤: {str(e)}") + return False + + def test_connection(self) -> bool: + """ + 測試 API 連線 + + Returns: + bool: 連線是否正常 + """ + try: + # 嘗試連接 API 基礎端點 + response = requests.get( + self.api_base_url, + timeout=10 + ) + + return response.status_code in [200, 404] # 404 也算正常,表示能連接到伺服器 + + except Exception as e: + logger.error(f"API 連線測試失敗: {str(e)}") + return False + + def calculate_internal_expiry(self, api_expires_at: Optional[datetime], extend_days: int = 3) -> datetime: + """ + 計算內部 Token 過期時間 + + Args: + api_expires_at: API Token 過期時間 + extend_days: 延長天數 + + Returns: + datetime: 內部 Token 過期時間 + """ + if api_expires_at: + # 基於 API Token 過期時間延長 + return api_expires_at + timedelta(days=extend_days) + else: + # 如果沒有 API 過期時間,從現在開始計算 + return datetime.utcnow() + timedelta(days=extend_days) \ No newline at end of file diff --git a/app/utils/decorators.py b/app/utils/decorators.py new file mode 100644 index 0000000..f699d7a --- /dev/null +++ b/app/utils/decorators.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +裝飾器模組 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +from functools import wraps +from flask import session, jsonify, g, current_app +from flask_jwt_extended import jwt_required, get_jwt_identity, get_jwt + + +def login_required(f): + """登入驗證裝飾器""" + @wraps(f) + def decorated_function(*args, **kwargs): + from app.utils.logger import get_logger + from flask import request + logger = get_logger(__name__) + + user_id = session.get('user_id') + + # 調試:記錄 session 檢查 + logger.info(f"🔐 [Session Check] Endpoint: {request.endpoint}, Method: {request.method}, URL: {request.url}") + logger.info(f"🔐 [Session Data] UserID: {user_id}, SessionData: {dict(session)}, SessionID: {session.get('_id', 'unknown')}") + + if not user_id: + logger.warning(f"❌ [Auth Failed] No user_id in session for {request.endpoint}") + return jsonify({ + 'success': False, + 'error': 'AUTHENTICATION_REQUIRED', + 'message': '請先登入' + }), 401 + + # 取得使用者資訊並設定到 g 物件 + from app.models import User + user = User.query.get(user_id) + if not user: + # 清除無效的 session + session.clear() + return jsonify({ + 'success': False, + 'error': 'USER_NOT_FOUND', + 'message': '使用者不存在' + }), 401 + + g.current_user = user + g.current_user_id = user.id + g.is_admin = user.is_admin + + return f(*args, **kwargs) + + return decorated_function + + +def jwt_login_required(f): + """JWT 登入驗證裝飾器""" + @wraps(f) + @jwt_required() + def decorated_function(*args, **kwargs): + from app.utils.logger import get_logger + from flask import request + logger = get_logger(__name__) + + try: + username = get_jwt_identity() + claims = get_jwt() + + # 設定到 g 物件供其他地方使用 + g.current_user_username = username + g.current_user_id = claims.get('user_id') + g.is_admin = claims.get('is_admin', False) + + logger.info(f"🔑 [JWT Auth] User: {username}, UserID: {claims.get('user_id')}, Admin: {claims.get('is_admin')}") + + except Exception as e: + logger.error(f"❌ [JWT Auth] JWT validation failed: {str(e)}") + return jsonify({ + 'success': False, + 'error': 'AUTHENTICATION_REQUIRED', + 'message': '認證失效,請重新登入' + }), 401 + + return f(*args, **kwargs) + + return decorated_function + + +def admin_required(f): + """管理員權限裝飾器(使用JWT認證)""" + @wraps(f) + @jwt_required() + def decorated_function(*args, **kwargs): + from app.utils.logger import get_logger + from flask import request + logger = get_logger(__name__) + + try: + username = get_jwt_identity() + claims = get_jwt() + + # 設定到 g 物件供其他地方使用 + g.current_user_username = username + g.current_user_id = claims.get('user_id') + g.is_admin = claims.get('is_admin', False) + + logger.info(f"🔑 [JWT Admin Auth] User: {username}, UserID: {claims.get('user_id')}, Admin: {claims.get('is_admin')}") + + # 檢查管理員權限 + if not claims.get('is_admin', False): + logger.warning(f"❌ [Admin Auth] Permission denied for user: {username}") + return jsonify({ + 'success': False, + 'error': 'PERMISSION_DENIED', + 'message': '權限不足,需要管理員權限' + }), 403 + + # 驗證用戶是否存在且仍為管理員 + from app.models import User + user = User.query.get(claims.get('user_id')) + if not user: + logger.error(f"❌ [Admin Auth] User not found: {claims.get('user_id')}") + return jsonify({ + 'success': False, + 'error': 'USER_NOT_FOUND', + 'message': '使用者不存在' + }), 401 + + if not user.is_admin: + logger.warning(f"❌ [Admin Auth] User no longer admin: {username}") + return jsonify({ + 'success': False, + 'error': 'PERMISSION_DENIED', + 'message': '權限不足,需要管理員權限' + }), 403 + + # 設定完整用戶資訊 + g.current_user = user + + except Exception as e: + logger.error(f"❌ [Admin Auth] JWT validation failed: {str(e)}") + return jsonify({ + 'success': False, + 'error': 'AUTHENTICATION_REQUIRED', + 'message': '認證失效,請重新登入' + }), 401 + + return f(*args, **kwargs) + + return decorated_function + + +def validate_json(required_fields=None): + """JSON 驗證裝飾器""" + def decorator(f): + @wraps(f) + def decorated_function(*args, **kwargs): + from flask import request + + if not request.is_json: + return jsonify({ + 'success': False, + 'error': 'INVALID_CONTENT_TYPE', + 'message': '請求必須為 JSON 格式' + }), 400 + + data = request.get_json() + if not data: + return jsonify({ + 'success': False, + 'error': 'INVALID_JSON', + 'message': 'JSON 資料格式錯誤' + }), 400 + + # 檢查必要欄位 + if required_fields: + missing_fields = [field for field in required_fields if field not in data] + if missing_fields: + return jsonify({ + 'success': False, + 'error': 'MISSING_FIELDS', + 'message': f'缺少必要欄位: {", ".join(missing_fields)}' + }), 400 + + return f(*args, **kwargs) + + return decorated_function + return decorator + + +def rate_limit(max_requests=100, per_seconds=3600): + """簡單的速率限制裝飾器""" + def decorator(f): + @wraps(f) + def decorated_function(*args, **kwargs): + from flask import request + import redis + import time + + try: + # 使用 Redis 進行速率限制 + redis_client = redis.from_url(current_app.config['REDIS_URL']) + + # 使用 IP 地址作為 key + client_id = request.remote_addr + key = f"rate_limit:{f.__name__}:{client_id}" + + current_time = int(time.time()) + window_start = current_time - per_seconds + + # 清理過期的請求記錄 + redis_client.zremrangebyscore(key, 0, window_start) + + # 取得當前窗口內的請求數 + current_requests = redis_client.zcard(key) + + if current_requests >= max_requests: + return jsonify({ + 'success': False, + 'error': 'RATE_LIMIT_EXCEEDED', + 'message': '請求過於頻繁,請稍後再試' + }), 429 + + # 記錄當前請求 + redis_client.zadd(key, {str(current_time): current_time}) + redis_client.expire(key, per_seconds) + + except Exception: + # 如果 Redis 不可用,不阻擋請求 + pass + + return f(*args, **kwargs) + + return decorated_function + return decorator \ No newline at end of file diff --git a/app/utils/exceptions.py b/app/utils/exceptions.py new file mode 100644 index 0000000..3fbf3f7 --- /dev/null +++ b/app/utils/exceptions.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +自定義例外模組 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + + +class DocumentTranslatorError(Exception): + """文件翻譯系統基礎例外""" + def __init__(self, message, error_code=None): + self.message = message + self.error_code = error_code + super().__init__(self.message) + + +class AuthenticationError(DocumentTranslatorError): + """認證相關例外""" + pass + + +class ValidationError(DocumentTranslatorError): + """驗證相關例外""" + pass + + +class TranslationError(DocumentTranslatorError): + """翻譯相關例外""" + pass + + +class FileProcessingError(DocumentTranslatorError): + """檔案處理相關例外""" + pass + + +class APIError(DocumentTranslatorError): + """API 相關例外""" + pass + + +class ConfigurationError(DocumentTranslatorError): + """配置相關例外""" + pass + + +class DatabaseError(DocumentTranslatorError): + """資料庫相關例外""" + pass \ No newline at end of file diff --git a/app/utils/helpers.py b/app/utils/helpers.py new file mode 100644 index 0000000..ba0816f --- /dev/null +++ b/app/utils/helpers.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +輔助工具模組 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +import os +import uuid +import shutil +from pathlib import Path +from datetime import datetime +from werkzeug.utils import secure_filename +from flask import current_app + + +def generate_filename(original_filename, job_uuid, file_type='original', language_code=None): + """生成安全的檔案名稱""" + # 取得檔案副檔名 + file_ext = Path(original_filename).suffix.lower() + + # 清理原始檔名 + clean_name = Path(original_filename).stem + clean_name = secure_filename(clean_name)[:50] # 限制長度 + + if file_type == 'original': + return f"original_{clean_name}_{job_uuid[:8]}{file_ext}" + elif file_type == 'translated': + return f"translated_{clean_name}_{language_code}_{job_uuid[:8]}{file_ext}" + else: + return f"{file_type}_{clean_name}_{job_uuid[:8]}{file_ext}" + + +def create_job_directory(job_uuid): + """建立任務專用目錄""" + upload_folder = current_app.config.get('UPLOAD_FOLDER') + job_dir = Path(upload_folder) / job_uuid + + # 建立目錄 + job_dir.mkdir(parents=True, exist_ok=True) + + return job_dir + + +def save_uploaded_file(file_obj, job_uuid): + """儲存上傳的檔案""" + try: + # 建立任務目錄 + job_dir = create_job_directory(job_uuid) + + # 生成檔案名稱 + filename = generate_filename(file_obj.filename, job_uuid, 'original') + file_path = job_dir / filename + + # 儲存檔案 + file_obj.save(str(file_path)) + + # 取得檔案大小 + file_size = file_path.stat().st_size + + return { + 'success': True, + 'filename': filename, + 'file_path': str(file_path), + 'file_size': file_size + } + + except Exception as e: + return { + 'success': False, + 'error': str(e) + } + + +def cleanup_job_directory(job_uuid): + """清理任務目錄""" + try: + upload_folder = current_app.config.get('UPLOAD_FOLDER') + job_dir = Path(upload_folder) / job_uuid + + if job_dir.exists() and job_dir.is_dir(): + shutil.rmtree(job_dir) + return True + + return False + + except Exception: + return False + + +def format_file_size(size_bytes): + """格式化檔案大小""" + if size_bytes == 0: + return "0 B" + + size_names = ["B", "KB", "MB", "GB", "TB"] + i = 0 + while size_bytes >= 1024 and i < len(size_names) - 1: + size_bytes /= 1024.0 + i += 1 + + return f"{size_bytes:.1f} {size_names[i]}" + + +def get_file_icon(file_extension): + """根據副檔名取得檔案圖示""" + icon_map = { + '.docx': 'file-word', + '.doc': 'file-word', + '.pptx': 'file-powerpoint', + '.ppt': 'file-powerpoint', + '.xlsx': 'file-excel', + '.xls': 'file-excel', + '.pdf': 'file-pdf' + } + + return icon_map.get(file_extension.lower(), 'file') + + +def calculate_processing_time(start_time, end_time=None): + """計算處理時間""" + if not start_time: + return None + + if not end_time: + end_time = datetime.utcnow() + + if isinstance(start_time, str): + start_time = datetime.fromisoformat(start_time.replace('Z', '+00:00')) + + if isinstance(end_time, str): + end_time = datetime.fromisoformat(end_time.replace('Z', '+00:00')) + + duration = end_time - start_time + + # 轉換為秒 + total_seconds = int(duration.total_seconds()) + + if total_seconds < 60: + return f"{total_seconds}秒" + elif total_seconds < 3600: + minutes = total_seconds // 60 + seconds = total_seconds % 60 + return f"{minutes}分{seconds}秒" + else: + hours = total_seconds // 3600 + minutes = (total_seconds % 3600) // 60 + return f"{hours}小時{minutes}分" + + +def generate_download_token(job_uuid, language_code, user_id): + """生成下載令牌""" + import hashlib + import time + + # 組合資料 + data = f"{job_uuid}:{language_code}:{user_id}:{int(time.time())}" + + # 加上應用程式密鑰 + secret_key = current_app.config.get('SECRET_KEY', 'default_secret') + data_with_secret = f"{data}:{secret_key}" + + # 生成 hash + token = hashlib.sha256(data_with_secret.encode()).hexdigest() + + return token + + +def verify_download_token(token, job_uuid, language_code, user_id, max_age=3600): + """驗證下載令牌""" + import time + + try: + # 取得當前時間戳 + current_time = int(time.time()) + + # 在有效時間範圍內嘗試匹配令牌 + for i in range(max_age): + timestamp = current_time - i + expected_token = generate_download_token_with_timestamp( + job_uuid, language_code, user_id, timestamp + ) + + if token == expected_token: + return True + + return False + + except Exception: + return False + + +def generate_download_token_with_timestamp(job_uuid, language_code, user_id, timestamp): + """使用指定時間戳生成下載令牌""" + import hashlib + + data = f"{job_uuid}:{language_code}:{user_id}:{timestamp}" + secret_key = current_app.config.get('SECRET_KEY', 'default_secret') + data_with_secret = f"{data}:{secret_key}" + + return hashlib.sha256(data_with_secret.encode()).hexdigest() + + +def get_supported_languages(): + """取得支援的語言列表""" + return { + 'auto': '自動偵測', + 'zh-CN': '簡體中文', + 'zh-TW': '繁體中文', + 'en': '英文', + 'ja': '日文', + 'ko': '韓文', + 'vi': '越南文', + 'th': '泰文', + 'id': '印尼文', + 'ms': '馬來文', + 'es': '西班牙文', + 'fr': '法文', + 'de': '德文', + 'ru': '俄文' + } + + +def parse_json_field(json_str): + """安全解析JSON欄位""" + import json + + if not json_str: + return None + + try: + if isinstance(json_str, str): + return json.loads(json_str) + return json_str + except (json.JSONDecodeError, TypeError): + return None + + +def format_datetime(dt, format_type='full'): + """格式化日期時間""" + if not dt: + return None + + if isinstance(dt, str): + try: + dt = datetime.fromisoformat(dt.replace('Z', '+00:00')) + except ValueError: + return dt + + if format_type == 'date': + return dt.strftime('%Y-%m-%d') + elif format_type == 'time': + return dt.strftime('%H:%M:%S') + elif format_type == 'short': + return dt.strftime('%Y-%m-%d %H:%M') + else: # full + return dt.strftime('%Y-%m-%d %H:%M:%S') + + +def create_response(success=True, data=None, message=None, error=None, error_code=None): + """建立統一的API回應格式""" + response = { + 'success': success + } + + if data is not None: + response['data'] = data + + if message: + response['message'] = message + + if error: + response['error'] = error_code or 'ERROR' + if not message: + response['message'] = error + + return response \ No newline at end of file diff --git a/app/utils/image_preprocessor.py b/app/utils/image_preprocessor.py new file mode 100644 index 0000000..a535df3 --- /dev/null +++ b/app/utils/image_preprocessor.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +圖像預處理工具 - 用於提升 OCR 識別準確度 + +Author: PANJIT IT Team +Created: 2025-10-01 +Modified: 2025-10-01 +""" + +import io +import numpy as np +from PIL import Image, ImageEnhance, ImageFilter +from typing import Optional, Tuple +from app.utils.logger import get_logger + +logger = get_logger(__name__) + +# 檢查 OpenCV 是否可用 +try: + import cv2 + _HAS_OPENCV = True + logger.info("OpenCV is available for advanced image preprocessing") +except ImportError: + _HAS_OPENCV = False + logger.warning("OpenCV not available, using PIL-only preprocessing") + + +class ImagePreprocessor: + """圖像預處理器 - 提升掃描文件 OCR 品質""" + + def __init__(self, use_opencv: bool = True): + """ + 初始化圖像預處理器 + + Args: + use_opencv: 是否使用 OpenCV 進行進階處理(若可用) + """ + self.use_opencv = use_opencv and _HAS_OPENCV + logger.info(f"ImagePreprocessor initialized (OpenCV: {self.use_opencv})") + + def preprocess_for_ocr(self, image_bytes: bytes, + enhance_level: str = 'medium') -> bytes: + """ + 對圖像進行 OCR 前處理 + + Args: + image_bytes: 原始圖像字節數據 + enhance_level: 增強級別 ('low', 'medium', 'high') + + Returns: + 處理後的圖像字節數據 (PNG格式) + """ + try: + # 1. 載入圖像 + image = Image.open(io.BytesIO(image_bytes)) + original_mode = image.mode + logger.debug(f"Original image: {image.size}, mode={original_mode}") + + # 2. 轉換為 RGB (如果需要) + if image.mode not in ('RGB', 'L'): + image = image.convert('RGB') + logger.debug(f"Converted to RGB mode") + + # 3. 根據增強級別選擇處理流程 + if self.use_opencv: + processed_image = self._preprocess_with_opencv(image, enhance_level) + else: + processed_image = self._preprocess_with_pil(image, enhance_level) + + # 4. 轉換為 PNG 字節 + output_buffer = io.BytesIO() + processed_image.save(output_buffer, format='PNG', optimize=True) + processed_bytes = output_buffer.getvalue() + + logger.info(f"Image preprocessed: {len(image_bytes)} -> {len(processed_bytes)} bytes (level={enhance_level})") + return processed_bytes + + except Exception as e: + logger.error(f"Image preprocessing failed: {e}, returning original image") + return image_bytes # 失敗時返回原圖 + + def _preprocess_with_opencv(self, image: Image.Image, level: str) -> Image.Image: + """使用 OpenCV 進行進階圖像處理""" + # PIL Image -> NumPy array + img_array = np.array(image) + + # 轉換為 BGR (OpenCV 格式) + if len(img_array.shape) == 3 and img_array.shape[2] == 3: + img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR) + else: + img_bgr = img_array + + # 1. 灰階化 + gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) + logger.debug("Applied grayscale conversion (OpenCV)") + + # 2. 去噪 - 根據級別調整 + if level == 'high': + # 高級別:較強去噪 + denoised = cv2.fastNlMeansDenoising(gray, None, h=10, templateWindowSize=7, searchWindowSize=21) + logger.debug("Applied strong denoising (h=10)") + elif level == 'medium': + # 中級別:中等去噪 + denoised = cv2.fastNlMeansDenoising(gray, None, h=7, templateWindowSize=7, searchWindowSize=21) + logger.debug("Applied medium denoising (h=7)") + else: + # 低級別:輕度去噪 + denoised = cv2.bilateralFilter(gray, 5, 50, 50) + logger.debug("Applied light denoising (bilateral)") + + # 3. 對比度增強 - CLAHE + clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) + enhanced = clahe.apply(denoised) + logger.debug("Applied CLAHE contrast enhancement") + + # 4. 銳化 (高級別才使用) + if level == 'high': + kernel = np.array([[-1,-1,-1], + [-1, 9,-1], + [-1,-1,-1]]) + sharpened = cv2.filter2D(enhanced, -1, kernel) + logger.debug("Applied sharpening filter") + else: + sharpened = enhanced + + # 5. 自適應二值化 (根據級別決定是否使用) + if level in ('medium', 'high'): + # 使用自適應閾值 + binary = cv2.adaptiveThreshold( + sharpened, 255, + cv2.ADAPTIVE_THRESH_GAUSSIAN_C, + cv2.THRESH_BINARY, + blockSize=11, + C=2 + ) + logger.debug("Applied adaptive thresholding") + final_image = binary + else: + final_image = sharpened + + # NumPy array -> PIL Image + return Image.fromarray(final_image) + + def _preprocess_with_pil(self, image: Image.Image, level: str) -> Image.Image: + """使用 PIL 進行基礎圖像處理(當 OpenCV 不可用時)""" + + # 1. 灰階化 + gray = image.convert('L') + logger.debug("Applied grayscale conversion (PIL)") + + # 2. 對比度增強 + enhancer = ImageEnhance.Contrast(gray) + if level == 'high': + contrast_factor = 2.0 + elif level == 'medium': + contrast_factor = 1.5 + else: + contrast_factor = 1.2 + + enhanced = enhancer.enhance(contrast_factor) + logger.debug(f"Applied contrast enhancement (factor={contrast_factor})") + + # 3. 銳化 + if level in ('medium', 'high'): + sharpness = ImageEnhance.Sharpness(enhanced) + sharp_factor = 2.0 if level == 'high' else 1.5 + sharpened = sharpness.enhance(sharp_factor) + logger.debug(f"Applied sharpening (factor={sharp_factor})") + else: + sharpened = enhanced + + # 4. 去噪 (使用中值濾波) + if level == 'high': + denoised = sharpened.filter(ImageFilter.MedianFilter(size=3)) + logger.debug("Applied median filter (size=3)") + else: + denoised = sharpened + + return denoised + + def auto_detect_enhance_level(self, image_bytes: bytes) -> str: + """ + 自動偵測最佳增強級別 + + Args: + image_bytes: 圖像字節數據 + + Returns: + 建議的增強級別 ('low', 'medium', 'high') + """ + try: + image = Image.open(io.BytesIO(image_bytes)) + + if self.use_opencv: + # 使用 OpenCV 計算圖像品質指標 + img_array = np.array(image.convert('L')) + + # 計算拉普拉斯方差 (評估清晰度) + laplacian_var = cv2.Laplacian(img_array, cv2.CV_64F).var() + + # 計算對比度 (標準差) + contrast = np.std(img_array) + + logger.debug(f"Image quality metrics: laplacian_var={laplacian_var:.2f}, contrast={contrast:.2f}") + + # 根據指標決定增強級別 + if laplacian_var < 50 or contrast < 40: + # 模糊或低對比度 -> 高級別增強 + return 'high' + elif laplacian_var < 100 or contrast < 60: + # 中等品質 -> 中級別增強 + return 'medium' + else: + # 高品質 -> 低級別增強 + return 'low' + else: + # PIL 簡易判斷 + gray = image.convert('L') + img_array = np.array(gray) + + # 簡單對比度評估 + contrast = np.std(img_array) + + if contrast < 40: + return 'high' + elif contrast < 60: + return 'medium' + else: + return 'low' + + except Exception as e: + logger.error(f"Auto enhance level detection failed: {e}") + return 'medium' # 預設使用中級別 + + def preprocess_smart(self, image_bytes: bytes) -> bytes: + """ + 智能預處理 - 自動偵測並應用最佳處理級別 + + Args: + image_bytes: 原始圖像字節數據 + + Returns: + 處理後的圖像字節數據 + """ + enhance_level = self.auto_detect_enhance_level(image_bytes) + logger.info(f"Auto-detected enhancement level: {enhance_level}") + return self.preprocess_for_ocr(image_bytes, enhance_level) diff --git a/app/utils/ldap_auth.py b/app/utils/ldap_auth.py new file mode 100644 index 0000000..ad1735e --- /dev/null +++ b/app/utils/ldap_auth.py @@ -0,0 +1,232 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +LDAP 認證服務 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +import time +from ldap3 import Server, Connection, SUBTREE, ALL_ATTRIBUTES +from flask import current_app +from .logger import get_logger +from .exceptions import AuthenticationError + +logger = get_logger(__name__) + + +class LDAPAuthService: + """LDAP 認證服務""" + + def __init__(self): + self.config = current_app.config + self.server_url = self.config.get('LDAP_SERVER') + self.port = self.config.get('LDAP_PORT', 389) + self.use_ssl = self.config.get('LDAP_USE_SSL', False) + self.bind_user_dn = self.config.get('LDAP_BIND_USER_DN') + self.bind_password = self.config.get('LDAP_BIND_USER_PASSWORD') + self.search_base = self.config.get('LDAP_SEARCH_BASE') + self.login_attr = self.config.get('LDAP_USER_LOGIN_ATTR', 'userPrincipalName') + + def create_connection(self, retries=3): + """建立 LDAP 連線(帶重試機制)""" + for attempt in range(retries): + try: + server = Server( + self.server_url, + port=self.port, + use_ssl=self.use_ssl, + get_info=ALL_ATTRIBUTES + ) + + conn = Connection( + server, + user=self.bind_user_dn, + password=self.bind_password, + auto_bind=True, + raise_exceptions=True + ) + + logger.info("LDAP connection established successfully") + return conn + + except Exception as e: + logger.error(f"LDAP connection attempt {attempt + 1} failed: {str(e)}") + if attempt == retries - 1: + raise AuthenticationError(f"LDAP connection failed: {str(e)}") + time.sleep(1) + + return None + + def authenticate_user(self, username, password): + """驗證使用者憑證""" + try: + conn = self.create_connection() + if not conn: + raise AuthenticationError("Unable to connect to LDAP server") + + # 搜尋使用者 + search_filter = f"(&(objectClass=person)(objectCategory=person)({self.login_attr}={username}))" + + conn.search( + self.search_base, + search_filter, + SUBTREE, + attributes=['displayName', 'mail', 'sAMAccountName', 'userPrincipalName', 'department'] + ) + + if not conn.entries: + logger.warning(f"User not found: {username}") + raise AuthenticationError("帳號不存在") + + user_entry = conn.entries[0] + user_dn = user_entry.entry_dn + + # 驗證使用者密碼 + try: + user_conn = Connection( + conn.server, + user=user_dn, + password=password, + auto_bind=True, + raise_exceptions=True + ) + user_conn.unbind() + + # 返回使用者資訊 + user_info = { + 'username': str(user_entry.sAMAccountName) if user_entry.sAMAccountName else username, + 'display_name': str(user_entry.displayName) if user_entry.displayName else username, + 'email': str(user_entry.mail) if user_entry.mail else f"{username}@panjit.com.tw", + 'department': str(user_entry.department) if hasattr(user_entry, 'department') and user_entry.department else None, + 'user_principal_name': str(user_entry.userPrincipalName) if user_entry.userPrincipalName else username + } + + logger.info(f"User authenticated successfully: {username}") + return user_info + + except Exception as e: + logger.warning(f"Authentication failed for user {username}: {str(e)}") + raise AuthenticationError("密碼錯誤") + + except AuthenticationError: + raise + except Exception as e: + logger.error(f"LDAP authentication error: {str(e)}") + raise AuthenticationError(f"認證服務錯誤: {str(e)}") + + finally: + if 'conn' in locals() and conn: + conn.unbind() + + def search_users(self, search_term, limit=20): + """搜尋使用者""" + try: + conn = self.create_connection() + if not conn: + return [] + + # 建構搜尋過濾器 + search_filter = f"""(& + (objectClass=person) + (objectCategory=person) + (!(userAccountControl:1.2.840.113556.1.4.803:=2)) + (| + (displayName=*{search_term}*) + (mail=*{search_term}*) + (sAMAccountName=*{search_term}*) + (userPrincipalName=*{search_term}*) + ) + )""" + + # 移除多餘空白 + search_filter = ' '.join(search_filter.split()) + + conn.search( + self.search_base, + search_filter, + SUBTREE, + attributes=['sAMAccountName', 'displayName', 'mail', 'department'], + size_limit=limit + ) + + results = [] + for entry in conn.entries: + results.append({ + 'username': str(entry.sAMAccountName) if entry.sAMAccountName else '', + 'display_name': str(entry.displayName) if entry.displayName else '', + 'email': str(entry.mail) if entry.mail else '', + 'department': str(entry.department) if hasattr(entry, 'department') and entry.department else '' + }) + + logger.info(f"LDAP search found {len(results)} results for term: {search_term}") + return results + + except Exception as e: + logger.error(f"LDAP search error: {str(e)}") + return [] + finally: + if 'conn' in locals() and conn: + conn.unbind() + + def get_user_info(self, username): + """取得使用者詳細資訊""" + try: + conn = self.create_connection() + if not conn: + return None + + # 支援 sAMAccountName 和 userPrincipalName 格式 + if '@' in username: + search_filter = f"""(& + (objectClass=person) + (| + (userPrincipalName={username}) + (mail={username}) + ) + )""" + else: + search_filter = f"(&(objectClass=person)(sAMAccountName={username}))" + + # 移除多餘空白 + search_filter = ' '.join(search_filter.split()) + + conn.search( + self.search_base, + search_filter, + SUBTREE, + attributes=['displayName', 'mail', 'sAMAccountName', 'userPrincipalName', 'department'] + ) + + if not conn.entries: + return None + + entry = conn.entries[0] + return { + 'username': str(entry.sAMAccountName) if entry.sAMAccountName else username, + 'display_name': str(entry.displayName) if entry.displayName else username, + 'email': str(entry.mail) if entry.mail else f"{username}@panjit.com.tw", + 'department': str(entry.department) if hasattr(entry, 'department') and entry.department else None, + 'user_principal_name': str(entry.userPrincipalName) if entry.userPrincipalName else '' + } + + except Exception as e: + logger.error(f"Error getting user info for {username}: {str(e)}") + return None + finally: + if 'conn' in locals() and conn: + conn.unbind() + + def test_connection(self): + """測試 LDAP 連線(健康檢查用)""" + try: + conn = self.create_connection(retries=1) + if conn: + conn.unbind() + return True + return False + except Exception as e: + logger.error(f"LDAP connection test failed: {str(e)}") + return False \ No newline at end of file diff --git a/app/utils/logger.py b/app/utils/logger.py new file mode 100644 index 0000000..97132c8 --- /dev/null +++ b/app/utils/logger.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +日誌管理模組 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +import logging +import os +from pathlib import Path +from logging.handlers import RotatingFileHandler +from flask import current_app, has_request_context, request, g + + +def get_logger(name): + """取得指定名稱的日誌器""" + logger = logging.getLogger(name) + + # 避免重複設定 handler + if not logger.handlers: + setup_logger(logger) + + return logger + + +def setup_logger(logger): + """設定日誌器""" + if has_request_context() and current_app: + log_level = current_app.config.get('LOG_LEVEL', 'INFO') + log_file = current_app.config.get('LOG_FILE', 'logs/app.log') + else: + log_level = os.environ.get('LOG_LEVEL', 'INFO') + log_file = os.environ.get('LOG_FILE', 'logs/app.log') + + # 確保日誌目錄存在 + log_path = Path(log_file) + log_path.parent.mkdir(parents=True, exist_ok=True) + + # 設定日誌等級 + logger.setLevel(getattr(logging, log_level.upper())) + + # 建立格式化器 + formatter = logging.Formatter( + '%(asctime)s [%(levelname)s] %(name)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + + # 檔案處理器(使用輪轉) + file_handler = RotatingFileHandler( + log_file, + maxBytes=10*1024*1024, # 10MB + backupCount=5, + encoding='utf-8' + ) + file_handler.setLevel(getattr(logging, log_level.upper())) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + # 控制台處理器 + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + + +class DatabaseLogHandler(logging.Handler): + """資料庫日誌處理器""" + + def emit(self, record): + """發送日誌記錄到資料庫""" + try: + from app.models.log import SystemLog + + # 取得使用者和任務資訊(如果有的話) + user_id = None + job_id = None + extra_data = {} + + if has_request_context(): + user_id = g.get('current_user_id') + extra_data.update({ + 'method': request.method, + 'endpoint': request.endpoint, + 'url': request.url, + 'ip_address': request.remote_addr, + 'user_agent': request.headers.get('User-Agent') + }) + + # 儲存到資料庫 + SystemLog.log( + level=record.levelname, + module=record.name, + message=record.getMessage(), + user_id=user_id, + job_id=job_id, + extra_data=extra_data if extra_data else None + ) + + except Exception: + # 避免日誌記錄失敗影響主程序 + pass + + +def init_logging(app): + """初始化應用程式日誌""" + # 設定根日誌器 + root_logger = logging.getLogger() + root_logger.setLevel(logging.INFO) + + # 添加資料庫日誌處理器(僅對重要日誌) + if app.config.get('SQLALCHEMY_DATABASE_URI'): + db_handler = DatabaseLogHandler() + db_handler.setLevel(logging.WARNING) # 只記錄警告以上等級到資料庫 + root_logger.addHandler(db_handler) + + # 設定 Flask 應用日誌 + if not app.logger.handlers: + setup_logger(app.logger) + + # 設定第三方庫日誌等級 + logging.getLogger('werkzeug').setLevel(logging.WARNING) + logging.getLogger('urllib3').setLevel(logging.WARNING) + logging.getLogger('requests').setLevel(logging.WARNING) \ No newline at end of file diff --git a/app/utils/response.py b/app/utils/response.py new file mode 100644 index 0000000..7dc46d0 --- /dev/null +++ b/app/utils/response.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +API 響應處理工具 + +Author: PANJIT IT Team +Created: 2025-09-02 +""" + +from datetime import datetime +from typing import Dict, Any, List, Union +from app.utils.timezone import to_taiwan_time, format_taiwan_time + + +def convert_datetime_to_taiwan(data: Union[Dict, List, Any]) -> Union[Dict, List, Any]: + """遞迴轉換資料中的 datetime 欄位為台灣時間 + + Args: + data: 要轉換的資料(字典、列表或其他) + + Returns: + 轉換後的資料 + """ + if isinstance(data, dict): + result = {} + for key, value in data.items(): + if isinstance(value, datetime): + # 將 datetime 轉換為台灣時間的 ISO 字符串 + taiwan_dt = to_taiwan_time(value) + result[key] = taiwan_dt.isoformat() + elif key in ['created_at', 'updated_at', 'completed_at', 'processing_started_at', 'last_login', 'timestamp']: + # 特定的時間欄位 + if isinstance(value, str): + try: + # 嘗試解析 ISO 格式的時間字符串 + dt = datetime.fromisoformat(value.replace('Z', '+00:00')) + taiwan_dt = to_taiwan_time(dt) + result[key] = taiwan_dt.isoformat() + except: + result[key] = value + else: + result[key] = convert_datetime_to_taiwan(value) + else: + result[key] = convert_datetime_to_taiwan(value) + return result + elif isinstance(data, list): + return [convert_datetime_to_taiwan(item) for item in data] + else: + return data + + +def create_taiwan_response(success: bool = True, data: Any = None, message: str = '', + error: str = '', **kwargs) -> Dict[str, Any]: + """創建包含台灣時區轉換的 API 響應 + + Args: + success: 是否成功 + data: 響應資料 + message: 成功訊息 + error: 錯誤訊息 + **kwargs: 其他參數 + + Returns: + 包含台灣時區的響應字典 + """ + response = { + 'success': success, + 'timestamp': format_taiwan_time(datetime.now(), "%Y-%m-%d %H:%M:%S") + } + + if data is not None: + response['data'] = convert_datetime_to_taiwan(data) + + if message: + response['message'] = message + + if error: + response['error'] = error + + # 加入其他參數 + for key, value in kwargs.items(): + response[key] = convert_datetime_to_taiwan(value) + + return response \ No newline at end of file diff --git a/app/utils/timezone.py b/app/utils/timezone.py new file mode 100644 index 0000000..c000ae6 --- /dev/null +++ b/app/utils/timezone.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +時區工具函數 + +Author: PANJIT IT Team +Created: 2025-09-02 +""" + +from datetime import datetime, timezone, timedelta +from typing import Optional + +# 台灣時區 UTC+8 +TAIWAN_TZ = timezone(timedelta(hours=8)) + + +def now_taiwan() -> datetime: + """取得當前台灣時間(UTC+8)""" + return datetime.now(TAIWAN_TZ) + + +def now_utc() -> datetime: + """取得當前 UTC 時間""" + return datetime.now(timezone.utc) + + +def to_taiwan_time(dt: datetime) -> datetime: + """將 datetime 轉換為台灣時間 + + Args: + dt: datetime 物件(可能是 naive 或 aware) + + Returns: + 台灣時區的 datetime 物件 + """ + if dt is None: + return None + + # 如果是 naive datetime,假設為 UTC + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + + # 轉換為台灣時區 + return dt.astimezone(TAIWAN_TZ) + + +def to_utc_time(dt: datetime) -> datetime: + """將 datetime 轉換為 UTC 時間 + + Args: + dt: datetime 物件(可能是 naive 或 aware) + + Returns: + UTC 時區的 datetime 物件 + """ + if dt is None: + return None + + # 如果是 naive datetime,假設為台灣時間 + if dt.tzinfo is None: + dt = dt.replace(tzinfo=TAIWAN_TZ) + + # 轉換為 UTC + return dt.astimezone(timezone.utc) + + +def format_taiwan_time(dt: datetime, format_str: str = "%Y-%m-%d %H:%M:%S") -> str: + """格式化台灣時間為字符串 + + Args: + dt: datetime 物件 + format_str: 格式化字符串 + + Returns: + 格式化後的時間字符串 + """ + if dt is None: + return "" + + taiwan_dt = to_taiwan_time(dt) + return taiwan_dt.strftime(format_str) + + +def parse_taiwan_time(time_str: str, format_str: str = "%Y-%m-%d %H:%M:%S") -> datetime: + """解析台灣時間字符串為 datetime + + Args: + time_str: 時間字符串 + format_str: 解析格式 + + Returns: + 台灣時區的 datetime 物件 + """ + naive_dt = datetime.strptime(time_str, format_str) + return naive_dt.replace(tzinfo=TAIWAN_TZ) + + +# 為了向後兼容,提供替代 datetime.utcnow() 的函數 +def utcnow() -> datetime: + """取得當前 UTC 時間(替代 datetime.utcnow()) + + 注意:新代碼建議使用 now_taiwan() 或 now_utc() + """ + return now_utc().replace(tzinfo=None) # 返回 naive UTC datetime 以保持兼容性 \ No newline at end of file diff --git a/app/utils/validators.py b/app/utils/validators.py new file mode 100644 index 0000000..d57dea1 --- /dev/null +++ b/app/utils/validators.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +驗證工具模組 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +import os +from pathlib import Path +from flask import current_app +from .exceptions import ValidationError + + +def validate_file(file_obj): + """驗證上傳的檔案""" + if not file_obj: + raise ValidationError("未選擇檔案", "NO_FILE") + + if not file_obj.filename: + raise ValidationError("檔案名稱為空", "NO_FILENAME") + + # 檢查檔案副檔名 + file_ext = Path(file_obj.filename).suffix.lower() + allowed_extensions = current_app.config.get('ALLOWED_EXTENSIONS', {'.docx', '.doc', '.pptx', '.xlsx', '.xls', '.pdf'}) + + if file_ext not in allowed_extensions: + raise ValidationError( + f"不支援的檔案類型: {file_ext},支援的格式: {', '.join(allowed_extensions)}", + "INVALID_FILE_TYPE" + ) + + # 檢查檔案大小 + max_size = current_app.config.get('MAX_CONTENT_LENGTH', 26214400) # 25MB + + # 取得檔案大小 + file_obj.seek(0, os.SEEK_END) + file_size = file_obj.tell() + file_obj.seek(0) + + if file_size > max_size: + raise ValidationError( + f"檔案大小超過限制 ({format_file_size(max_size)})", + "FILE_TOO_LARGE" + ) + + if file_size == 0: + raise ValidationError("檔案為空", "EMPTY_FILE") + + return { + 'filename': file_obj.filename, + 'file_extension': file_ext, + 'file_size': file_size, + 'valid': True + } + + +def validate_languages(source_language, target_languages): + """驗證語言設定""" + # 支援的語言列表 + supported_languages = { + 'auto': '自動偵測', + 'zh-CN': '簡體中文', + 'zh-TW': '繁體中文', + 'en': '英文', + 'ja': '日文', + 'ko': '韓文', + 'vi': '越南文', + 'th': '泰文', + 'id': '印尼文', + 'ms': '馬來文', + 'es': '西班牙文', + 'fr': '法文', + 'de': '德文', + 'ru': '俄文' + } + + # 驗證來源語言 + if source_language and source_language not in supported_languages: + raise ValidationError( + f"不支援的來源語言: {source_language}", + "INVALID_SOURCE_LANGUAGE" + ) + + # 驗證目標語言 + if not target_languages or not isinstance(target_languages, list): + raise ValidationError("必須指定至少一個目標語言", "NO_TARGET_LANGUAGES") + + if len(target_languages) == 0: + raise ValidationError("必須指定至少一個目標語言", "NO_TARGET_LANGUAGES") + + if len(target_languages) > 10: # 限制最多10個目標語言 + raise ValidationError("目標語言數量過多,最多支援10個", "TOO_MANY_TARGET_LANGUAGES") + + invalid_languages = [lang for lang in target_languages if lang not in supported_languages] + if invalid_languages: + raise ValidationError( + f"不支援的目標語言: {', '.join(invalid_languages)}", + "INVALID_TARGET_LANGUAGE" + ) + + # 檢查來源語言和目標語言是否有重疊 + if source_language and source_language != 'auto' and source_language in target_languages: + raise ValidationError( + "目標語言不能包含來源語言", + "SOURCE_TARGET_OVERLAP" + ) + + return { + 'source_language': source_language or 'auto', + 'target_languages': target_languages, + 'supported_languages': supported_languages, + 'valid': True + } + + +def validate_job_uuid(job_uuid): + """驗證任務UUID格式""" + import uuid + + if not job_uuid: + raise ValidationError("任務UUID不能為空", "INVALID_UUID") + + try: + uuid.UUID(job_uuid) + return True + except ValueError: + raise ValidationError("任務UUID格式錯誤", "INVALID_UUID") + + +def validate_pagination(page, per_page): + """驗證分頁參數""" + try: + page = int(page) if page else 1 + per_page = int(per_page) if per_page else 20 + except (ValueError, TypeError): + raise ValidationError("分頁參數必須為數字", "INVALID_PAGINATION") + + if page < 1: + raise ValidationError("頁數必須大於0", "INVALID_PAGE") + + if per_page < 1 or per_page > 100: + raise ValidationError("每頁項目數必須在1-100之間", "INVALID_PER_PAGE") + + return page, per_page + + +def format_file_size(size_bytes): + """格式化檔案大小顯示""" + if size_bytes == 0: + return "0 B" + + size_names = ["B", "KB", "MB", "GB", "TB"] + i = 0 + while size_bytes >= 1024 and i < len(size_names) - 1: + size_bytes /= 1024.0 + i += 1 + + return f"{size_bytes:.1f} {size_names[i]}" + + +def sanitize_filename(filename): + """清理檔案名稱,移除不安全字元""" + import re + + # 保留檔案名稱和副檔名 + name = Path(filename).stem + ext = Path(filename).suffix + + # 移除或替換不安全字元 + safe_name = re.sub(r'[^\w\s.-]', '_', name) + safe_name = re.sub(r'\s+', '_', safe_name) # 空白替換為底線 + safe_name = safe_name.strip('._') # 移除開頭結尾的點和底線 + + # 限制長度 + if len(safe_name) > 100: + safe_name = safe_name[:100] + + return f"{safe_name}{ext}" + + +def validate_date_range(start_date, end_date): + """驗證日期範圍""" + from datetime import datetime + + if start_date: + try: + start_date = datetime.fromisoformat(start_date.replace('Z', '+00:00')) + except ValueError: + raise ValidationError("開始日期格式錯誤", "INVALID_START_DATE") + + if end_date: + try: + end_date = datetime.fromisoformat(end_date.replace('Z', '+00:00')) + except ValueError: + raise ValidationError("結束日期格式錯誤", "INVALID_END_DATE") + + if start_date and end_date and start_date > end_date: + raise ValidationError("開始日期不能晚於結束日期", "INVALID_DATE_RANGE") + + return start_date, end_date \ No newline at end of file diff --git a/app/websocket.py.disabled b/app/websocket.py.disabled new file mode 100644 index 0000000..47a808b --- /dev/null +++ b/app/websocket.py.disabled @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +WebSocket 服務模組 + +Author: PANJIT IT Team +Created: 2024-01-28 +Modified: 2024-01-28 +""" + +import os +from flask_socketio import SocketIO, emit, join_room, leave_room, disconnect +from flask_jwt_extended import decode_token, get_jwt +from flask import request +from functools import wraps +import logging + +# 初始化 SocketIO +socketio = SocketIO( + cors_allowed_origins="*", + # Use eventlet for production and enable Redis message queue for multi-process/replica support + async_mode='eventlet', + message_queue=os.getenv('REDIS_URL'), + logger=True, + engineio_logger=False +) + +# 存儲用戶連接 +connected_users = {} + +logger = logging.getLogger(__name__) + + +def jwt_required_ws(f): + """WebSocket JWT 驗證裝飾器""" + @wraps(f) + def decorated_function(*args, **kwargs): + try: + # 從查詢參數獲取 token + token = request.args.get('token') + if not token: + disconnect() + return False + + # 解碼 token + decoded = decode_token(token) + user_id = decoded.get('sub') + + # 儲存用戶信息 + request.user_id = user_id + + return f(*args, **kwargs) + + except Exception as e: + logger.error(f"WebSocket authentication failed: {e}") + disconnect() + return False + + return decorated_function + + +@socketio.on('connect') +def handle_connect(auth): + """處理客戶端連接""" + try: + # 從認證數據獲取 token + if auth and 'token' in auth: + token = auth['token'] + decoded = decode_token(token) + user_id = decoded.get('sub') + + # 記錄連接 + connected_users[request.sid] = { + 'user_id': user_id, + 'sid': request.sid + } + + # 加入用戶專屬房間 + join_room(f"user_{user_id}") + + logger.info(f"User {user_id} connected with session {request.sid}") + + # 發送連接成功消息 + emit('connected', { + 'message': '連接成功', + 'user_id': user_id + }) + + return True + else: + logger.warning("Connection attempt without authentication") + disconnect() + return False + + except Exception as e: + logger.error(f"Connection error: {e}") + disconnect() + return False + + +@socketio.on('disconnect') +def handle_disconnect(): + """處理客戶端斷開連接""" + try: + if request.sid in connected_users: + user_info = connected_users[request.sid] + user_id = user_info['user_id'] + + # 離開房間 + leave_room(f"user_{user_id}") + + # 移除連接記錄 + del connected_users[request.sid] + + logger.info(f"User {user_id} disconnected") + + except Exception as e: + logger.error(f"Disconnect error: {e}") + + +@socketio.on('ping') +def handle_ping(): + """處理心跳包""" + emit('pong', {'timestamp': request.args.get('timestamp')}) + + +@socketio.on('subscribe_job') +def handle_subscribe_job(data): + """訂閱任務更新""" + try: + job_uuid = data.get('job_uuid') + if job_uuid: + join_room(f"job_{job_uuid}") + logger.info(f"Client {request.sid} subscribed to job {job_uuid}") + emit('subscribed', {'job_uuid': job_uuid}) + except Exception as e: + logger.error(f"Subscribe job error: {e}") + + +@socketio.on('unsubscribe_job') +def handle_unsubscribe_job(data): + """取消訂閱任務更新""" + try: + job_uuid = data.get('job_uuid') + if job_uuid: + leave_room(f"job_{job_uuid}") + logger.info(f"Client {request.sid} unsubscribed from job {job_uuid}") + emit('unsubscribed', {'job_uuid': job_uuid}) + except Exception as e: + logger.error(f"Unsubscribe job error: {e}") + + +# 工具函數:發送通知 +def send_notification_to_user(user_id, notification_data): + """ + 向特定用戶發送通知 + + Args: + user_id: 用戶ID + notification_data: 通知數據 + """ + try: + socketio.emit( + 'new_notification', + notification_data, + room=f"user_{user_id}", + namespace='/' + ) + logger.info(f"Notification sent to user {user_id}") + + except Exception as e: + logger.error(f"Failed to send notification: {e}") + + +def send_job_update(job_uuid, update_data): + """ + 發送任務更新 + + Args: + job_uuid: 任務UUID + update_data: 更新數據 + """ + try: + socketio.emit( + 'job_update', + { + 'job_uuid': job_uuid, + **update_data + }, + room=f"job_{job_uuid}", + namespace='/' + ) + logger.info(f"Job update sent for {job_uuid}") + + except Exception as e: + logger.error(f"Failed to send job update: {e}") + + +def broadcast_system_message(message, message_type='info'): + """ + 廣播系統消息給所有連接的用戶 + + Args: + message: 消息內容 + message_type: 消息類型 + """ + try: + socketio.emit( + 'system_message', + { + 'message': message, + 'type': message_type + }, + namespace='/', + broadcast=True + ) + logger.info(f"System message broadcasted: {message}") + + except Exception as e: + logger.error(f"Failed to broadcast system message: {e}") + + +# 初始化函數 +def init_websocket(app): + """ + 初始化 WebSocket + + Args: + app: Flask 應用實例 + """ + socketio.init_app(app) + logger.info("WebSocket initialized") + return socketio diff --git a/celery_app.py b/celery_app.py new file mode 100644 index 0000000..9fd6c2d --- /dev/null +++ b/celery_app.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Celery Worker 入口點 +""" + +import os +import sys +from pathlib import Path + +# 添加專案根目錄到 Python 路徑 +project_root = Path(__file__).parent +sys.path.insert(0, str(project_root)) + +# 導入應用和創建Celery實例 +from app import create_app + +# 創建應用實例 +flask_app = create_app() + +# 導出Celery實例供worker使用 +celery = flask_app.celery + +# 重要:導入任務模組以確保任務被註冊 +from app.tasks import translation + +# 確保可以通過celery -A celery_app訪問 +__all__ = ['celery'] + +if __name__ == "__main__": + print("Celery app created successfully") + print(f"Flask app: {flask_app}") + print(f"Celery instance: {celery}") + print(f"Available tasks: {list(celery.tasks.keys())}") \ No newline at end of file diff --git a/frontend/.env.example b/frontend/.env.example new file mode 100644 index 0000000..35a6ada --- /dev/null +++ b/frontend/.env.example @@ -0,0 +1,18 @@ +# PANJIT Document Translator Frontend - Environment Template +# Copy this file to .env and modify the values as needed + +# Application Settings +VITE_APP_TITLE=PANJIT Document Translator +VITE_APP_VERSION=1.0.0 + +# API Configuration (Update these for production) +VITE_API_BASE_URL=http://127.0.0.1:5000/api/v1 +VITE_WS_BASE_URL=ws://127.0.0.1:5000 + +# File Upload Settings +VITE_MAX_FILE_SIZE=26214400 +VITE_ALLOWED_FILE_TYPES=.doc,.docx,.ppt,.pptx,.xls,.xlsx,.pdf + +# Development Settings +VITE_DEV_MODE=true +VITE_MOCK_API=false \ No newline at end of file diff --git a/frontend/.eslintrc.cjs b/frontend/.eslintrc.cjs new file mode 100644 index 0000000..a1169fb --- /dev/null +++ b/frontend/.eslintrc.cjs @@ -0,0 +1,58 @@ +module.exports = { + root: true, + env: { + node: true, + browser: true, + es2022: true + }, + extends: [ + 'plugin:vue/vue3-essential', + 'eslint:recommended', + '@vue/eslint-config-prettier' + ], + parserOptions: { + ecmaVersion: 'latest', + sourceType: 'module' + }, + plugins: [ + 'vue' + ], + rules: { + // Vue 相關規則 + 'vue/multi-word-component-names': 'off', + 'vue/no-unused-vars': 'error', + 'vue/component-name-in-template-casing': ['error', 'PascalCase', { + 'registeredComponentsOnly': false + }], + 'vue/component-definition-name-casing': ['error', 'PascalCase'], + 'vue/attribute-hyphenation': ['error', 'always'], + 'vue/v-on-event-hyphenation': ['error', 'always'], + + // JavaScript 規則 + 'no-console': process.env.NODE_ENV === 'production' ? 'warn' : 'off', + 'no-debugger': process.env.NODE_ENV === 'production' ? 'error' : 'off', + 'no-unused-vars': ['error', { + 'vars': 'all', + 'args': 'after-used', + 'ignoreRestSiblings': false + }], + 'prefer-const': 'error', + 'no-var': 'error', + 'object-shorthand': 'error', + 'prefer-template': 'error', + + // 程式碼品質 + 'eqeqeq': ['error', 'always'], + 'curly': ['error', 'all'], + 'brace-style': ['error', '1tbs'], + 'comma-dangle': ['error', 'never'], + 'quotes': ['error', 'single', { 'avoidEscape': true }], + 'semi': ['error', 'never'] + }, + globals: { + defineProps: 'readonly', + defineEmits: 'readonly', + defineExpose: 'readonly', + withDefaults: 'readonly' + } +} \ No newline at end of file diff --git a/frontend/.prettierrc b/frontend/.prettierrc new file mode 100644 index 0000000..1f20091 --- /dev/null +++ b/frontend/.prettierrc @@ -0,0 +1,14 @@ +{ + "semi": false, + "singleQuote": true, + "quoteProps": "as-needed", + "trailingComma": "none", + "bracketSpacing": true, + "bracketSameLine": false, + "arrowParens": "avoid", + "printWidth": 100, + "tabWidth": 2, + "useTabs": false, + "endOfLine": "lf", + "vueIndentScriptAndStyle": false +} \ No newline at end of file diff --git a/frontend/auto-imports.d.ts b/frontend/auto-imports.d.ts new file mode 100644 index 0000000..4ab159f --- /dev/null +++ b/frontend/auto-imports.d.ts @@ -0,0 +1,89 @@ +/* eslint-disable */ +/* prettier-ignore */ +// @ts-nocheck +// noinspection JSUnusedGlobalSymbols +// Generated by unplugin-auto-import +export {} +declare global { + const EffectScope: typeof import('vue')['EffectScope'] + const acceptHMRUpdate: typeof import('pinia')['acceptHMRUpdate'] + const axios: typeof import('axios')['default'] + const computed: typeof import('vue')['computed'] + const createApp: typeof import('vue')['createApp'] + const createPinia: typeof import('pinia')['createPinia'] + const customRef: typeof import('vue')['customRef'] + const default: typeof import('axios')['default'] + const defineAsyncComponent: typeof import('vue')['defineAsyncComponent'] + const defineComponent: typeof import('vue')['defineComponent'] + const defineStore: typeof import('pinia')['defineStore'] + const effectScope: typeof import('vue')['effectScope'] + const getActivePinia: typeof import('pinia')['getActivePinia'] + const getCurrentInstance: typeof import('vue')['getCurrentInstance'] + const getCurrentScope: typeof import('vue')['getCurrentScope'] + const h: typeof import('vue')['h'] + const inject: typeof import('vue')['inject'] + const isProxy: typeof import('vue')['isProxy'] + const isReactive: typeof import('vue')['isReactive'] + const isReadonly: typeof import('vue')['isReadonly'] + const isRef: typeof import('vue')['isRef'] + const mapActions: typeof import('pinia')['mapActions'] + const mapGetters: typeof import('pinia')['mapGetters'] + const mapState: typeof import('pinia')['mapState'] + const mapStores: typeof import('pinia')['mapStores'] + const mapWritableState: typeof import('pinia')['mapWritableState'] + const markRaw: typeof import('vue')['markRaw'] + const nextTick: typeof import('vue')['nextTick'] + const onActivated: typeof import('vue')['onActivated'] + const onBeforeMount: typeof import('vue')['onBeforeMount'] + const onBeforeRouteLeave: typeof import('vue-router')['onBeforeRouteLeave'] + const onBeforeRouteUpdate: typeof import('vue-router')['onBeforeRouteUpdate'] + const onBeforeUnmount: typeof import('vue')['onBeforeUnmount'] + const onBeforeUpdate: typeof import('vue')['onBeforeUpdate'] + const onDeactivated: typeof import('vue')['onDeactivated'] + const onErrorCaptured: typeof import('vue')['onErrorCaptured'] + const onMounted: typeof import('vue')['onMounted'] + const onRenderTracked: typeof import('vue')['onRenderTracked'] + const onRenderTriggered: typeof import('vue')['onRenderTriggered'] + const onScopeDispose: typeof import('vue')['onScopeDispose'] + const onServerPrefetch: typeof import('vue')['onServerPrefetch'] + const onUnmounted: typeof import('vue')['onUnmounted'] + const onUpdated: typeof import('vue')['onUpdated'] + const onWatcherCleanup: typeof import('vue')['onWatcherCleanup'] + const provide: typeof import('vue')['provide'] + const reactive: typeof import('vue')['reactive'] + const readonly: typeof import('vue')['readonly'] + const ref: typeof import('vue')['ref'] + const resolveComponent: typeof import('vue')['resolveComponent'] + const setActivePinia: typeof import('pinia')['setActivePinia'] + const setMapStoreSuffix: typeof import('pinia')['setMapStoreSuffix'] + const shallowReactive: typeof import('vue')['shallowReactive'] + const shallowReadonly: typeof import('vue')['shallowReadonly'] + const shallowRef: typeof import('vue')['shallowRef'] + const storeToRefs: typeof import('pinia')['storeToRefs'] + const toRaw: typeof import('vue')['toRaw'] + const toRef: typeof import('vue')['toRef'] + const toRefs: typeof import('vue')['toRefs'] + const toValue: typeof import('vue')['toValue'] + const triggerRef: typeof import('vue')['triggerRef'] + const unref: typeof import('vue')['unref'] + const useAttrs: typeof import('vue')['useAttrs'] + const useCssModule: typeof import('vue')['useCssModule'] + const useCssVars: typeof import('vue')['useCssVars'] + const useId: typeof import('vue')['useId'] + const useLink: typeof import('vue-router')['useLink'] + const useModel: typeof import('vue')['useModel'] + const useRoute: typeof import('vue-router')['useRoute'] + const useRouter: typeof import('vue-router')['useRouter'] + const useSlots: typeof import('vue')['useSlots'] + const useTemplateRef: typeof import('vue')['useTemplateRef'] + const watch: typeof import('vue')['watch'] + const watchEffect: typeof import('vue')['watchEffect'] + const watchPostEffect: typeof import('vue')['watchPostEffect'] + const watchSyncEffect: typeof import('vue')['watchSyncEffect'] +} +// for type re-export +declare global { + // @ts-ignore + export type { Component, ComponentPublicInstance, ComputedRef, DirectiveBinding, ExtractDefaultPropTypes, ExtractPropTypes, ExtractPublicPropTypes, InjectionKey, PropType, Ref, MaybeRef, MaybeRefOrGetter, VNode, WritableComputedRef } from 'vue' + import('vue') +} diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..51f4e6c --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,46 @@ + + + + + + + PANJIT Document Translator + + + + + +
+
+
+
+
+ + + \ No newline at end of file diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..ddc0c3e --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,44 @@ +{ + "name": "panjit-document-translator-frontend", + "private": true, + "version": "1.0.0", + "description": "PANJIT Document Translator Web System Frontend", + "type": "module", + "scripts": { + "dev": "vite", + "build": "vite build", + "preview": "vite preview", + "lint": "eslint . --ext vue,js,jsx,cjs,mjs,ts,tsx,cts,mts --fix", + "format": "prettier --write src/", + "serve": "vite preview" + }, + "dependencies": { + "vue": "^3.3.4", + "vue-router": "^4.2.4", + "pinia": "^2.1.6", + "element-plus": "^2.3.8", + "@element-plus/icons-vue": "^2.1.0", + "axios": "^1.4.0", + "socket.io-client": "^4.7.2", + "echarts": "^5.4.3", + "vue-echarts": "^6.6.0", + "dayjs": "^1.11.9", + "file-saver": "^2.0.5", + "nprogress": "^0.2.0", + "js-cookie": "^3.0.5" + }, + "devDependencies": { + "@vitejs/plugin-vue": "^4.2.3", + "vite": "^4.4.0", + "sass": "^1.64.1", + "unplugin-auto-import": "^0.16.6", + "unplugin-vue-components": "^0.25.1", + "unplugin-element-plus": "^0.7.1", + "eslint": "^8.45.0", + "eslint-plugin-vue": "^9.15.1", + "eslint-config-prettier": "^8.8.0", + "@vue/eslint-config-prettier": "^8.0.0", + "prettier": "^3.0.0", + "vite-plugin-eslint": "^1.8.1" + } +} \ No newline at end of file diff --git a/frontend/public/panjit-logo.png b/frontend/public/panjit-logo.png new file mode 100644 index 0000000..b12aa06 Binary files /dev/null and b/frontend/public/panjit-logo.png differ diff --git a/frontend/src/App.vue b/frontend/src/App.vue new file mode 100644 index 0000000..496625d --- /dev/null +++ b/frontend/src/App.vue @@ -0,0 +1,95 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/layouts/MainLayout.vue b/frontend/src/layouts/MainLayout.vue new file mode 100644 index 0000000..02aed18 --- /dev/null +++ b/frontend/src/layouts/MainLayout.vue @@ -0,0 +1,407 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/main.js b/frontend/src/main.js new file mode 100644 index 0000000..1f717f8 --- /dev/null +++ b/frontend/src/main.js @@ -0,0 +1,49 @@ +import { createApp, nextTick } from 'vue' +import { createPinia } from 'pinia' +import ElementPlus from 'element-plus' +import 'element-plus/dist/index.css' +import 'element-plus/theme-chalk/dark/css-vars.css' +import * as ElementPlusIconsVue from '@element-plus/icons-vue' +import zhCn from 'element-plus/dist/locale/zh-cn.mjs' + +import App from './App.vue' +import router from './router' +import './style/main.scss' + +// 創建應用實例 +const app = createApp(App) + +// 註冊 Element Plus 圖標 +for (const [key, component] of Object.entries(ElementPlusIconsVue)) { + app.component(key, component) +} + +// 使用插件 +app.use(createPinia()) +app.use(router) +app.use(ElementPlus, { + locale: zhCn +}) + +// 全局錯誤處理 +app.config.errorHandler = (err, vm, info) => { + console.error('全局錯誤處理:', err, info) +} + +// 隱藏載入畫面 +const hideLoading = () => { + const loading = document.getElementById('loading') + if (loading) { + loading.style.display = 'none' + } +} + +// 掛載應用 +app.mount('#app') + +// 應用載入完成後隱藏載入畫面 +nextTick(() => { + hideLoading() +}) + +export default app \ No newline at end of file diff --git a/frontend/src/router/index.js b/frontend/src/router/index.js new file mode 100644 index 0000000..102fa93 --- /dev/null +++ b/frontend/src/router/index.js @@ -0,0 +1,174 @@ +import { createRouter, createWebHistory } from 'vue-router' +import { useAuthStore } from '@/stores/auth' +import { ElMessage } from 'element-plus' +import NProgress from 'nprogress' +import 'nprogress/nprogress.css' + +// 配置 NProgress +NProgress.configure({ + showSpinner: false, + minimum: 0.1, + speed: 200 +}) + +const routes = [ + { + path: '/login', + name: 'Login', + component: () => import('@/views/LoginView.vue'), + meta: { + title: '登入', + requiresAuth: false, + hideLayout: true + } + }, + { + path: '/', + name: 'Layout', + component: () => import('@/layouts/MainLayout.vue'), + redirect: '/home', + meta: { + requiresAuth: true + }, + children: [ + { + path: '/home', + name: 'Home', + component: () => import('@/views/HomeView.vue'), + meta: { + title: '首頁', + icon: 'House', + showInMenu: true + } + }, + { + path: '/upload', + name: 'Upload', + component: () => import('@/views/UploadView.vue'), + meta: { + title: '檔案上傳', + icon: 'Upload', + showInMenu: true + } + }, + { + path: '/jobs', + name: 'Jobs', + component: () => import('@/views/JobListView.vue'), + meta: { + title: '任務列表', + icon: 'List', + showInMenu: true + } + }, + { + path: '/history', + name: 'History', + component: () => import('@/views/HistoryView.vue'), + meta: { + title: '歷史記錄', + icon: 'Clock', + showInMenu: true + } + }, + { + path: '/profile', + name: 'Profile', + component: () => import('@/views/ProfileView.vue'), + meta: { + title: '個人設定', + icon: 'User' + } + }, + { + path: '/admin', + name: 'Admin', + component: () => import('@/views/AdminView.vue'), + meta: { + title: '管理後台', + icon: 'Setting', + requiresAdmin: true, + showInMenu: true + } + }, + { + path: '/admin/jobs', + name: 'AdminJobs', + component: () => import('@/views/AdminJobsView.vue'), + meta: { + title: '全部任務', + requiresAdmin: true + } + } + ] + }, + { + path: '/job/:uuid', + name: 'JobDetail', + component: () => import('@/views/JobDetailView.vue'), + meta: { + title: '任務詳情', + requiresAuth: true, + hideLayout: false + } + }, + { + path: '/:pathMatch(.*)*', + name: 'NotFound', + component: () => import('@/views/NotFoundView.vue'), + meta: { + title: '頁面不存在', + hideLayout: true + } + } +] + +const router = createRouter({ + history: createWebHistory(), + routes, + scrollBehavior(to, from, savedPosition) { + if (savedPosition) { + return savedPosition + } else { + return { top: 0 } + } + } +}) + +// 路由守衛 +router.beforeEach(async (to, from, next) => { + NProgress.start() + + const authStore = useAuthStore() + + // 設置頁面標題 + document.title = to.meta.title ? `${to.meta.title} - PANJIT Document Translator` : 'PANJIT Document Translator' + + // 檢查是否需要認證 + if (to.meta.requiresAuth && !authStore.isAuthenticated) { + ElMessage.warning('請先登入') + next('/login') + return + } + + // 檢查管理員權限 + if (to.meta.requiresAdmin && !authStore.isAdmin) { + ElMessage.error('無權限存取此頁面') + next('/home') + return + } + + // 如果已經登入且訪問登入頁面,重定向到首頁 + if (to.path === '/login' && authStore.isAuthenticated) { + next('/home') + return + } + + next() +}) + +router.afterEach(() => { + NProgress.done() +}) + +export default router \ No newline at end of file diff --git a/frontend/src/services/admin.js b/frontend/src/services/admin.js new file mode 100644 index 0000000..10d3e86 --- /dev/null +++ b/frontend/src/services/admin.js @@ -0,0 +1,139 @@ +import { request } from '@/utils/request' + +/** + * 管理員相關 API + */ +export const adminAPI = { + /** + * 取得系統統計資訊 + * @param {string} period - 統計週期 (day/week/month/year) + */ + getStats(period = 'month') { + return request.get('/admin/stats', { params: { period } }) + }, + + /** + * 取得所有使用者任務 + * @param {Object} params - 查詢參數 + */ + getAllJobs(params = {}) { + const defaultParams = { + page: 1, + per_page: 50, + user_id: 'all', + status: 'all' + } + return request.get('/admin/jobs', { params: { ...defaultParams, ...params } }) + }, + + /** + * 取得使用者列表 + */ + getUsers() { + return request.get('/admin/users') + }, + + /** + * 取得使用者詳細資訊 + * @param {number} userId - 使用者 ID + */ + getUserDetail(userId) { + return request.get(`/admin/users/${userId}`) + }, + + /** + * 更新使用者狀態 + * @param {number} userId - 使用者 ID + * @param {Object} data - 更新資料 + */ + updateUser(userId, data) { + return request.put(`/admin/users/${userId}`, data) + }, + + /** + * 取得 API 使用統計 + * @param {Object} params - 查詢參數 + */ + getApiUsageStats(params = {}) { + return request.get('/admin/api-usage', { params }) + }, + + /** + * 取得系統日誌 + * @param {Object} params - 查詢參數 + */ + getSystemLogs(params = {}) { + const defaultParams = { + page: 1, + per_page: 100, + level: 'all' + } + return request.get('/admin/logs', { params: { ...defaultParams, ...params } }) + }, + + /** + * 取得成本報表 + * @param {Object} params - 查詢參數 + */ + getCostReport(params = {}) { + return request.get('/admin/cost-report', { params }) + }, + + /** + * 匯出報表 + * @param {string} type - 報表類型 + * @param {Object} params - 查詢參數 + */ + exportReport(type, params = {}) { + return request.get(`/admin/export/${type}`, { + params, + responseType: 'blob' + }) + }, + + /** + * 系統健康檢查 + */ + getSystemHealth() { + return request.get('/admin/health') + }, + + /** + * 取得系統指標 + */ + getSystemMetrics() { + return request.get('/admin/metrics') + }, + + /** + * 清理舊檔案 + * @param {Object} options - 清理選項 + */ + cleanupOldFiles(options = {}) { + const defaultOptions = { + cleanup_files: true, + cleanup_logs: false, + cleanup_cache: false, + files_days: 7, + logs_days: 30, + cache_days: 90 + } + return request.post('/admin/maintenance/cleanup', { ...defaultOptions, ...options }) + }, + + /** + * 管理員取消任務 + * @param {string} jobUuid - 任務 UUID + */ + adminCancelJob(jobUuid) { + return request.post(`/admin/jobs/${jobUuid}/cancel`) + }, + + /** + * 管理員刪除任務 + * @param {string} jobUuid - 任務 UUID + */ + adminDeleteJob(jobUuid) { + return request.delete(`/admin/jobs/${jobUuid}`) + } +} \ No newline at end of file diff --git a/frontend/src/services/auth.js b/frontend/src/services/auth.js new file mode 100644 index 0000000..39e4036 --- /dev/null +++ b/frontend/src/services/auth.js @@ -0,0 +1,44 @@ +import { request } from '@/utils/request' + +/** + * 認證相關 API + */ +export const authAPI = { + /** + * 使用者登入 + * @param {Object} credentials - 登入憑證 + * @param {string} credentials.username - AD 帳號 + * @param {string} credentials.password - 密碼 + */ + login(credentials) { + return request.post('/auth/login', credentials) + }, + + /** + * 使用者登出 + */ + logout() { + return request.post('/auth/logout') + }, + + /** + * 取得當前使用者資訊 + */ + getCurrentUser() { + return request.get('/auth/me') + }, + + /** + * 檢查認證狀態 + */ + checkAuth() { + return request.get('/auth/check') + }, + + /** + * 刷新認證狀態 + */ + refresh() { + return request.post('/auth/refresh') + } +} \ No newline at end of file diff --git a/frontend/src/services/jobs.js b/frontend/src/services/jobs.js new file mode 100644 index 0000000..6616813 --- /dev/null +++ b/frontend/src/services/jobs.js @@ -0,0 +1,113 @@ +import { request, uploadRequest } from '@/utils/request' + +/** + * 任務相關 API + */ +export const jobsAPI = { + /** + * 上傳檔案 + * @param {FormData} formData - 包含檔案和設定的表單資料 + */ + uploadFile(formData) { + return uploadRequest.post('/files/upload', formData, { + onUploadProgress: (progressEvent) => { + // 上傳進度回調在外部處理 + if (formData.onUploadProgress) { + formData.onUploadProgress(progressEvent) + } + } + }) + }, + + /** + * 取得使用者任務列表 + * @param {Object} params - 查詢參數 + * @param {number} params.page - 頁數 + * @param {number} params.per_page - 每頁數量 + * @param {string} params.status - 任務狀態篩選 + */ + getJobs(params = {}) { + const defaultParams = { + page: 1, + per_page: 20, + status: 'all' + } + return request.get('/jobs', { params: { ...defaultParams, ...params } }) + }, + + /** + * 取得任務詳細資訊 + * @param {string} jobUuid - 任務 UUID + */ + getJobDetail(jobUuid) { + return request.get(`/jobs/${jobUuid}`) + }, + + /** + * 重試失敗任務 + * @param {string} jobUuid - 任務 UUID + */ + retryJob(jobUuid) { + return request.post(`/jobs/${jobUuid}/retry`) + }, + + /** + * 取消任務 + * @param {string} jobUuid - 任務 UUID + */ + cancelJob(jobUuid) { + return request.post(`/jobs/${jobUuid}/cancel`) + }, + + /** + * 刪除任務 + * @param {string} jobUuid - 任務 UUID + */ + deleteJob(jobUuid) { + return request.delete(`/jobs/${jobUuid}`) + } +} + +/** + * 檔案相關 API + */ +export const filesAPI = { + /** + * 下載翻譯檔案 + * @param {string} jobUuid - 任務 UUID + * @param {string} languageCode - 語言代碼 + */ + downloadFile(jobUuid, languageCode) { + return request.get(`/files/${jobUuid}/download/${languageCode}`, { + responseType: 'blob' + }) + }, + + /** + * 批量下載檔案 + * @param {string} jobUuid - 任務 UUID + */ + downloadAllFiles(jobUuid) { + return request.get(`/files/${jobUuid}/download/batch`, { + responseType: 'blob' + }) + }, + + /** + * 下載合併檔案 + * @param {string} jobUuid - 任務 UUID + */ + downloadCombineFile(jobUuid) { + return request.get(`/files/${jobUuid}/download/combine`, { + responseType: 'blob' + }) + }, + + /** + * 取得檔案資訊 + * @param {string} jobUuid - 任務 UUID + */ + getFileInfo(jobUuid) { + return request.get(`/files/${jobUuid}/info`) + } +} \ No newline at end of file diff --git a/frontend/src/services/notification.js b/frontend/src/services/notification.js new file mode 100644 index 0000000..9d94bb0 --- /dev/null +++ b/frontend/src/services/notification.js @@ -0,0 +1,63 @@ +import { request } from '@/utils/request' + +/** + * 通知相關 API 服務 + */ +export const notificationAPI = { + /** + * 獲取通知列表 + * @param {Object} params - 查詢參數 + * @param {number} params.page - 頁碼 + * @param {number} params.per_page - 每頁數量 + * @param {string} params.status - 狀態過濾 ('all', 'unread', 'read') + * @param {string} params.type - 類型過濾 + */ + getNotifications(params = {}) { + return request.get('/notifications', { params }) + }, + + /** + * 獲取單個通知詳情 + * @param {string} notificationId - 通知ID + */ + getNotification(notificationId) { + return request.get(`/notifications/${notificationId}`) + }, + + /** + * 標記通知為已讀 + * @param {string} notificationId - 通知ID + */ + markAsRead(notificationId) { + return request.post(`/notifications/${notificationId}/read`) + }, + + /** + * 標記所有通知為已讀 + */ + markAllAsRead() { + return request.post('/notifications/read-all') + }, + + /** + * 刪除通知 + * @param {string} notificationId - 通知ID + */ + deleteNotification(notificationId) { + return request.delete(`/notifications/${notificationId}`) + }, + + /** + * 清空所有已讀通知 + */ + clearNotifications() { + return request.delete('/notifications/clear') + }, + + /** + * 創建測試通知(開發用) + */ + createTestNotification() { + return request.post('/notifications/test') + } +} \ No newline at end of file diff --git a/frontend/src/stores/admin.js b/frontend/src/stores/admin.js new file mode 100644 index 0000000..513d097 --- /dev/null +++ b/frontend/src/stores/admin.js @@ -0,0 +1,311 @@ +import { defineStore } from 'pinia' +import { adminAPI } from '@/services/admin' +import { ElMessage } from 'element-plus' + +export const useAdminStore = defineStore('admin', { + state: () => ({ + stats: null, + users: [], + allJobs: [], + systemLogs: [], + apiUsageStats: [], + costReport: null, + systemHealth: null, + systemMetrics: null, + loading: false, + pagination: { + page: 1, + per_page: 50, + total: 0, + pages: 0 + } + }), + + getters: { + // 系統概覽統計 + overviewStats: (state) => state.stats?.overview || {}, + + // 每日統計資料 + dailyStats: (state) => state.stats?.daily_stats || [], + + // 使用者排名 + userRankings: (state) => state.stats?.user_rankings || [], + + // 活躍使用者數量 + activeUsersCount: (state) => state.stats?.overview?.active_users_today || 0, + + // 總成本 + totalCost: (state) => state.stats?.overview?.total_cost || 0, + + // 系統是否健康 + isSystemHealthy: (state) => { + const status = state.systemHealth?.status + return status === 'healthy' || status === 'warning' + } + }, + + actions: { + /** + * 取得系統統計資訊 + * @param {string} period - 統計週期 + */ + async fetchStats(period = 'month') { + try { + this.loading = true + + const response = await adminAPI.getStats(period) + + if (response.success) { + this.stats = response.data + return response.data + } + } catch (error) { + console.error('取得統計資訊失敗:', error) + ElMessage.error('載入統計資訊失敗') + } finally { + this.loading = false + } + }, + + /** + * 取得所有使用者任務 + * @param {Object} params - 查詢參數 + */ + async fetchAllJobs(params = {}) { + try { + this.loading = true + + const response = await adminAPI.getAllJobs(params) + + if (response.success) { + this.allJobs = response.data.jobs + this.pagination = response.data.pagination + return response.data + } + } catch (error) { + console.error('取得所有任務失敗:', error) + ElMessage.error('載入任務資料失敗') + } finally { + this.loading = false + } + }, + + /** + * 取得使用者列表 + */ + async fetchUsers() { + try { + const response = await adminAPI.getUsers() + + if (response.success) { + this.users = response.data.users + return response.data + } + } catch (error) { + console.error('取得使用者列表失敗:', error) + ElMessage.error('載入使用者資料失敗') + } + }, + + /** + * 更新使用者狀態 + * @param {number} userId - 使用者 ID + * @param {Object} data - 更新資料 + */ + async updateUser(userId, data) { + try { + const response = await adminAPI.updateUser(userId, data) + + if (response.success) { + // 更新本地使用者資料 + const userIndex = this.users.findIndex(user => user.id === userId) + if (userIndex !== -1) { + this.users[userIndex] = { ...this.users[userIndex], ...response.data } + } + + ElMessage.success('使用者資料更新成功') + return response.data + } + } catch (error) { + console.error('更新使用者失敗:', error) + ElMessage.error('更新使用者失敗') + } + }, + + /** + * 取得 API 使用統計 + * @param {Object} params - 查詢參數 + */ + async fetchApiUsageStats(params = {}) { + try { + const response = await adminAPI.getApiUsageStats(params) + + if (response.success) { + this.apiUsageStats = response.data.stats + return response.data + } + } catch (error) { + console.error('取得 API 使用統計失敗:', error) + ElMessage.error('載入 API 統計失敗') + } + }, + + /** + * 取得系統日誌 + * @param {Object} params - 查詢參數 + */ + async fetchSystemLogs(params = {}) { + try { + this.loading = true + + const response = await adminAPI.getSystemLogs(params) + + if (response.success) { + this.systemLogs = response.data.logs + return response.data + } + } catch (error) { + console.error('取得系統日誌失敗:', error) + ElMessage.error('載入系統日誌失敗') + } finally { + this.loading = false + } + }, + + /** + * 取得成本報表 + * @param {Object} params - 查詢參數 + */ + async fetchCostReport(params = {}) { + try { + const response = await adminAPI.getCostReport(params) + + if (response.success) { + this.costReport = response.data + return response.data + } + } catch (error) { + console.error('取得成本報表失敗:', error) + ElMessage.error('載入成本報表失敗') + } + }, + + /** + * 匯出報表 + * @param {string} type - 報表類型 + * @param {Object} params - 查詢參數 + */ + async exportReport(type, params = {}) { + try { + const response = await adminAPI.exportReport(type, params) + + // 下載檔案 + const blob = new Blob([response], { + type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' + }) + + const url = window.URL.createObjectURL(blob) + const link = document.createElement('a') + link.href = url + link.download = `${type}_report_${new Date().toISOString().slice(0, 10)}.xlsx` + link.click() + + window.URL.revokeObjectURL(url) + ElMessage.success('報表匯出成功') + } catch (error) { + console.error('匯出報表失敗:', error) + ElMessage.error('匯出報表失敗') + } + }, + + /** + * 取得系統健康狀態 + */ + async fetchSystemHealth() { + try { + console.log('開始獲取系統健康狀態...') + const response = await adminAPI.getSystemHealth() + console.log('健康檢查響應:', response) + + // 處理響應數據格式 + if (response.success && response.data) { + this.systemHealth = response.data + console.log('健康狀態設定為:', this.systemHealth) + } else if (response.status) { + // 直接返回狀態數據 + this.systemHealth = response + console.log('直接設定健康狀態為:', this.systemHealth) + } else { + // 預設為異常狀態 + this.systemHealth = { status: 'unhealthy', error: 'Invalid response format' } + console.log('設定預設異常狀態') + } + + return response + } catch (error) { + console.error('取得系統健康狀態失敗:', error) + console.error('錯誤詳情:', error.response?.data || error.message) + + // 根據錯誤類型設定不同狀態 + if (error.response?.status === 403) { + this.systemHealth = { status: 'unhealthy', error: '權限不足' } + } else if (error.response?.status === 401) { + this.systemHealth = { status: 'unhealthy', error: '需要登入' } + } else { + this.systemHealth = { status: 'unhealthy', error: '連接失敗' } + } + } + }, + + /** + * 取得系統指標 + */ + async fetchSystemMetrics() { + try { + const response = await adminAPI.getSystemMetrics() + + if (response.success && response.data) { + this.systemMetrics = response.data + return response.data + } else if (response.jobs) { + // 兼容舊格式 + this.systemMetrics = response + return response + } + } catch (error) { + console.error('取得系統指標失敗:', error) + } + }, + + /** + * 清理舊檔案 + */ + async cleanupOldFiles() { + try { + const response = await adminAPI.cleanupOldFiles() + + if (response.success) { + ElMessage.success('檔案清理完成') + return response.data + } + } catch (error) { + console.error('清理檔案失敗:', error) + ElMessage.error('清理檔案失敗') + } + }, + + /** + * 重置管理員資料 + */ + resetAdminData() { + this.stats = null + this.users = [] + this.allJobs = [] + this.systemLogs = [] + this.apiUsageStats = [] + this.costReport = null + this.systemHealth = null + this.systemMetrics = null + } + } +}) \ No newline at end of file diff --git a/frontend/src/stores/auth.js b/frontend/src/stores/auth.js new file mode 100644 index 0000000..8e8f513 --- /dev/null +++ b/frontend/src/stores/auth.js @@ -0,0 +1,193 @@ +import { defineStore } from 'pinia' +import { authAPI } from '@/services/auth' +import { ElMessage } from 'element-plus' +import Cookies from 'js-cookie' + +export const useAuthStore = defineStore('auth', { + state: () => ({ + user: null, + isAuthenticated: false, + token: null, + refreshToken: null, + loading: false + }), + + getters: { + isAdmin: (state) => state.user?.is_admin || false, + userName: (state) => state.user?.display_name || '', + userEmail: (state) => state.user?.email || '', + department: (state) => state.user?.department || '' + }, + + actions: { + /** + * 使用者登入 + * @param {Object} credentials - 登入憑證 + */ + async login(credentials) { + try { + this.loading = true + console.log('🔑 [Auth] 開始登入流程', credentials.username) + + const response = await authAPI.login(credentials) + console.log('🔑 [Auth] 登入 API 回應', response) + + if (response.success) { + this.user = response.data.user + this.token = response.data.access_token // 改為使用 access_token + this.refreshToken = response.data.refresh_token // 儲存 refresh_token + this.isAuthenticated = true + + console.log('🔑 [Auth] 設定認證狀態', { + user: this.user, + token: this.token ? `${this.token.substring(0, 20)}...` : null, + isAuthenticated: this.isAuthenticated + }) + + // 儲存認證資訊到 localStorage + localStorage.setItem('auth_user', JSON.stringify(response.data.user)) + localStorage.setItem('auth_token', this.token) + localStorage.setItem('auth_refresh_token', this.refreshToken) + localStorage.setItem('auth_authenticated', 'true') + + // JWT 不需要 cookie,移除 cookie 設定 + + console.log('🔑 [Auth] 登入成功,JWT tokens 已儲存') + ElMessage.success(response.message || '登入成功') + return response.data + } else { + throw new Error(response.message || '登入失敗') + } + } catch (error) { + console.error('❌ [Auth] 登入錯誤:', error) + this.clearAuth() + throw error + } finally { + this.loading = false + } + }, + + /** + * 使用者登出 + * @param {boolean} showMessage - 是否顯示登出訊息(預設為 true) + * @param {boolean} isAutoLogout - 是否為自動登出(預設為 false) + */ + async logout(showMessage = true, isAutoLogout = false) { + try { + console.log('🚪 [Auth] 開始登出流程', { showMessage, isAutoLogout }) + + // 只有手動登出時才呼叫 API + if (!isAutoLogout) { + await authAPI.logout() + console.log('🚪 [Auth] 登出 API 完成') + } + } catch (error) { + // 登出 API 失敗不影響本地清除動作,且不顯示錯誤 + console.error('❌ [Auth] 登出 API 錯誤(已忽略):', error) + } finally { + console.log('🚪 [Auth] 清除認證資料') + this.clearAuth() + + // 只在需要時顯示訊息 + if (showMessage && !isAutoLogout) { + ElMessage.success('已安全登出') + } + } + }, + + /** + * 檢查認證狀態 + */ + async checkAuth() { + try { + // 先檢查 localStorage 中的認證資訊 + const authUser = localStorage.getItem('auth_user') + const authToken = localStorage.getItem('auth_token') + const authRefreshToken = localStorage.getItem('auth_refresh_token') + const authAuthenticated = localStorage.getItem('auth_authenticated') + + if (!authUser || !authToken || authAuthenticated !== 'true') { + return false + } + + // 恢復認證狀態 + this.user = JSON.parse(authUser) + this.token = authToken + this.refreshToken = authRefreshToken + this.isAuthenticated = true + + console.log('🔑 [Auth] 從 localStorage 恢復認證狀態', { + user: this.user, + hasToken: !!this.token, + hasRefreshToken: !!this.refreshToken + }) + + return true + + } catch (error) { + console.error('❌ [Auth] 認證檢查失敗:', error) + this.clearAuth() + return false + } + }, + + /** + * 刷新用戶資訊 + */ + async refreshUser() { + try { + const response = await authAPI.getCurrentUser() + + if (response.success && response.data.user) { + this.user = response.data.user + } + } catch (error) { + console.error('刷新用戶資訊失敗:', error) + this.clearAuth() + } + }, + + /** + * 清除認證資訊 + */ + clearAuth() { + console.log('🧡 [Auth] 清除認證資料前', { + user: this.user, + token: this.token, + refreshToken: this.refreshToken, + isAuthenticated: this.isAuthenticated + }) + + this.user = null + this.token = null + this.refreshToken = null + this.isAuthenticated = false + this.loading = false + + // 清除所有認證相關的存儲 + localStorage.removeItem('auth_user') + localStorage.removeItem('auth_token') + localStorage.removeItem('auth_refresh_token') + localStorage.removeItem('auth_authenticated') + + console.log('🧡 [Auth] JWT 認證資料已清除') + }, + + /** + * 更新用戶資訊 + * @param {Object} userData - 用戶資料 + */ + updateUser(userData) { + if (this.user) { + this.user = { ...this.user, ...userData } + } + } + }, + + // 持久化設定(可選) + persist: { + key: 'auth_store', + storage: localStorage, + paths: ['user', 'isAuthenticated'] // 只持久化這些欄位 + } +}) \ No newline at end of file diff --git a/frontend/src/stores/jobs.js b/frontend/src/stores/jobs.js new file mode 100644 index 0000000..fda6a82 --- /dev/null +++ b/frontend/src/stores/jobs.js @@ -0,0 +1,427 @@ +import { defineStore } from 'pinia' +import { jobsAPI, filesAPI } from '@/services/jobs' +import { ElMessage, ElNotification } from 'element-plus' +import { saveAs } from 'file-saver' + +export const useJobsStore = defineStore('jobs', { + state: () => ({ + jobs: [], + currentJob: null, + pagination: { + page: 1, + per_page: 20, + total: 0, + pages: 0 + }, + loading: false, + uploadProgress: 0, + filters: { + status: 'all', + search: '' + }, + // 輪詢管理 + pollingIntervals: new Map() // 存儲每個任務的輪詢間隔 ID + }), + + getters: { + // 按狀態分組的任務 + pendingJobs: (state) => state.jobs.filter(job => job.status === 'PENDING'), + processingJobs: (state) => state.jobs.filter(job => job.status === 'PROCESSING'), + completedJobs: (state) => state.jobs.filter(job => job.status === 'COMPLETED'), + failedJobs: (state) => state.jobs.filter(job => job.status === 'FAILED'), + retryJobs: (state) => state.jobs.filter(job => job.status === 'RETRY'), + + // 根據 UUID 查找任務 + getJobByUuid: (state) => (uuid) => { + return state.jobs.find(job => job.job_uuid === uuid) + }, + + // 統計資訊 + jobStats: (state) => ({ + total: state.jobs.length, + pending: state.jobs.filter(job => job.status === 'PENDING').length, + processing: state.jobs.filter(job => job.status === 'PROCESSING').length, + completed: state.jobs.filter(job => job.status === 'COMPLETED').length, + failed: state.jobs.filter(job => job.status === 'FAILED').length + }) + }, + + actions: { + /** + * 取得任務列表 + * @param {Object} options - 查詢選項 + */ + async fetchJobs(options = {}) { + try { + this.loading = true + + const params = { + page: options.page || this.pagination.page, + per_page: options.per_page || this.pagination.per_page, + status: options.status || this.filters.status + } + + const response = await jobsAPI.getJobs(params) + + if (response.success) { + this.jobs = response.data.jobs + this.pagination = response.data.pagination + return response.data + } + } catch (error) { + console.error('取得任務列表失敗:', error) + ElMessage.error('載入任務列表失敗') + } finally { + this.loading = false + } + }, + + /** + * 上傳檔案 + * @param {FormData} formData - 表單資料 + * @param {Function} onProgress - 進度回調 + */ + async uploadFile(formData, onProgress) { + try { + this.uploadProgress = 0 + + // 設定進度回調 + if (onProgress) { + formData.onUploadProgress = (progressEvent) => { + const progress = Math.round((progressEvent.loaded * 100) / progressEvent.total) + this.uploadProgress = progress + onProgress(progress) + } + } + + const response = await jobsAPI.uploadFile(formData) + + if (response.success) { + // 將新任務添加到列表頂部 + const newJob = response.data + this.jobs.unshift(newJob) + + ElMessage.success('檔案上傳成功,已加入翻譯佇列') + return newJob + } + } catch (error) { + console.error('檔案上傳失敗:', error) + throw error + } finally { + this.uploadProgress = 0 + } + }, + + /** + * 取得任務詳情 + * @param {string} jobUuid - 任務 UUID + */ + async fetchJobDetail(jobUuid) { + try { + const response = await jobsAPI.getJobDetail(jobUuid) + + if (response && response.success) { + this.currentJob = response.data.job + return response.data + } else { + console.error('API 響應格式錯誤:', response) + throw new Error('API 響應格式錯誤') + } + } catch (error) { + console.error('取得任務詳情失敗:', error) + ElMessage.error('載入任務詳情失敗') + throw error + } + }, + + /** + * 重試失敗任務 + * @param {string} jobUuid - 任務 UUID + */ + async retryJob(jobUuid) { + try { + const response = await jobsAPI.retryJob(jobUuid) + + if (response.success) { + // 更新本地任務狀態 + const jobIndex = this.jobs.findIndex(job => job.job_uuid === jobUuid) + if (jobIndex !== -1) { + this.jobs[jobIndex] = { ...this.jobs[jobIndex], ...response.data } + } + + ElMessage.success('任務已重新加入佇列') + return response.data + } + } catch (error) { + console.error('重試任務失敗:', error) + ElMessage.error('重試任務失敗') + } + }, + + /** + * 取消任務 + * @param {string} jobUuid - 任務 UUID + */ + async cancelJob(jobUuid) { + try { + const response = await jobsAPI.cancelJob(jobUuid) + + if (response.success) { + const jobIndex = this.jobs.findIndex(job => job.job_uuid === jobUuid) + if (jobIndex !== -1) { + this.jobs[jobIndex] = { + ...this.jobs[jobIndex], + status: 'FAILED', + error_message: '使用者取消任務' + } + } + + ElMessage.success('任務已取消') + } + } catch (error) { + console.error('取消任務失敗:', error) + ElMessage.error('取消任務失敗') + } + }, + + /** + * 刪除任務 + * @param {string} jobUuid - 任務 UUID + */ + async deleteJob(jobUuid) { + try { + const response = await jobsAPI.deleteJob(jobUuid) + + if (response.success) { + // 先停止輪詢 + this.unsubscribeFromJobUpdates(jobUuid) + + // 從列表中移除任務 + const jobIndex = this.jobs.findIndex(job => job.job_uuid === jobUuid) + if (jobIndex !== -1) { + this.jobs.splice(jobIndex, 1) + } + + ElMessage.success('任務已刪除') + } + } catch (error) { + console.error('刪除任務失敗:', error) + ElMessage.error('刪除任務失敗') + } + }, + + /** + * 下載檔案 + * @param {string} jobUuid - 任務 UUID + * @param {string} languageCode - 語言代碼 + * @param {string} filename - 檔案名稱 + */ + async downloadFile(jobUuid, languageCode, filename) { + try { + const response = await filesAPI.downloadFile(jobUuid, languageCode) + + // 根據檔案副檔名設定正確的MIME類型 + const getFileType = (filename) => { + const ext = filename.toLowerCase().split('.').pop() + const mimeTypes = { + 'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'doc': 'application/msword', + 'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'xls': 'application/vnd.ms-excel', + 'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + 'pdf': 'application/pdf', + 'txt': 'text/plain', + 'zip': 'application/zip' + } + return mimeTypes[ext] || 'application/octet-stream' + } + + // 使用 FileSaver.js 下載檔案,使用正確的MIME類型 + const blob = new Blob([response], { type: getFileType(filename) }) + saveAs(blob, filename) + + ElMessage.success('檔案下載完成') + } catch (error) { + console.error('下載檔案失敗:', error) + ElMessage.error('檔案下載失敗') + } + }, + + /** + * 批量下載檔案 + * @param {string} jobUuid - 任務 UUID + * @param {string} filename - 壓縮檔名稱 + */ + async downloadAllFiles(jobUuid, filename) { + try { + const response = await filesAPI.downloadAllFiles(jobUuid) + + const blob = new Blob([response], { type: 'application/zip' }) + saveAs(blob, filename || `${jobUuid}.zip`) + + ElMessage.success('檔案打包下載完成') + } catch (error) { + console.error('批量下載失敗:', error) + ElMessage.error('批量下載失敗') + } + }, + + /** + * 更新任務狀態(用於 WebSocket 即時更新) + * @param {string} jobUuid - 任務 UUID + * @param {Object} statusUpdate - 狀態更新資料 + */ + updateJobStatus(jobUuid, statusUpdate) { + const jobIndex = this.jobs.findIndex(job => job.job_uuid === jobUuid) + + if (jobIndex !== -1) { + this.jobs[jobIndex] = { ...this.jobs[jobIndex], ...statusUpdate } + + // 如果是當前查看的任務詳情,也要更新 + if (this.currentJob && this.currentJob.job_uuid === jobUuid) { + this.currentJob = { ...this.currentJob, ...statusUpdate } + } + + // 任務完成時顯示通知 + if (statusUpdate.status === 'COMPLETED') { + ElNotification({ + title: '翻譯完成', + message: `檔案「${this.jobs[jobIndex].original_filename}」翻譯完成`, + type: 'success', + duration: 5000 + }) + } else if (statusUpdate.status === 'FAILED') { + ElNotification({ + title: '翻譯失敗', + message: `檔案「${this.jobs[jobIndex].original_filename}」翻譯失敗`, + type: 'error', + duration: 5000 + }) + } + } + }, + + /** + * 設定篩選條件 + * @param {Object} filters - 篩選條件 + */ + setFilters(filters) { + this.filters = { ...this.filters, ...filters } + }, + + /** + * 訂閱任務更新 (輪詢機制) + * @param {string} jobUuid - 任務 UUID + */ + subscribeToJobUpdates(jobUuid) { + // 如果已經在輪詢這個任務,先停止舊的輪詢 + if (this.pollingIntervals.has(jobUuid)) { + this.unsubscribeFromJobUpdates(jobUuid) + } + + console.log(`[DEBUG] 開始訂閱任務更新: ${jobUuid}`) + + const pollInterval = setInterval(async () => { + try { + const job = await this.fetchJobDetail(jobUuid) + + if (job) { + // 任務存在,更新本地狀態 + const existingJobIndex = this.jobs.findIndex(j => j.job_uuid === jobUuid) + if (existingJobIndex !== -1) { + // 更新現有任務 + this.jobs[existingJobIndex] = { ...this.jobs[existingJobIndex], ...job } + } + + // 檢查任務是否已完成 + if (['COMPLETED', 'FAILED'].includes(job.status)) { + console.log(`[DEBUG] 任務 ${jobUuid} 已完成 (${job.status}),停止輪詢`) + this.unsubscribeFromJobUpdates(jobUuid) + + // 顯示完成通知 + if (job.status === 'COMPLETED') { + ElNotification({ + title: '翻譯完成', + message: `檔案 "${job.original_filename}" 翻譯完成`, + type: 'success', + duration: 5000 + }) + } + } + } else { + // 任務不存在(可能被刪除),停止輪詢 + console.log(`[DEBUG] 任務 ${jobUuid} 不存在,停止輪詢`) + this.unsubscribeFromJobUpdates(jobUuid) + + // 從本地列表中移除任務 + const existingJobIndex = this.jobs.findIndex(j => j.job_uuid === jobUuid) + if (existingJobIndex !== -1) { + this.jobs.splice(existingJobIndex, 1) + } + } + } catch (error) { + console.error(`輪詢任務 ${jobUuid} 狀態失敗:`, error) + + // 檢查是否是 404 錯誤(任務不存在) + if (error.response?.status === 404) { + console.log(`[DEBUG] 任務 ${jobUuid} 已被刪除,停止輪詢`) + this.unsubscribeFromJobUpdates(jobUuid) + + // 從本地列表中移除任務 + const existingJobIndex = this.jobs.findIndex(j => j.job_uuid === jobUuid) + if (existingJobIndex !== -1) { + this.jobs.splice(existingJobIndex, 1) + } + } else { + // 其他錯誤,繼續輪詢但記錄錯誤 + console.warn(`輪詢任務 ${jobUuid} 時發生錯誤,將繼續重試:`, error.message) + } + } + }, 3000) // 每 3 秒檢查一次 + + // 儲存輪詢間隔 ID + this.pollingIntervals.set(jobUuid, pollInterval) + }, + + /** + * 取消訂閱任務更新 + * @param {string} jobUuid - 任務 UUID + */ + unsubscribeFromJobUpdates(jobUuid) { + const intervalId = this.pollingIntervals.get(jobUuid) + if (intervalId) { + clearInterval(intervalId) + this.pollingIntervals.delete(jobUuid) + console.log(`[DEBUG] 已取消任務 ${jobUuid} 的輪詢訂閱`) + } + }, + + /** + * 停止所有輪詢 + */ + stopAllPolling() { + for (const [jobUuid, intervalId] of this.pollingIntervals) { + clearInterval(intervalId) + console.log(`[DEBUG] 已停止任務 ${jobUuid} 的輪詢`) + } + this.pollingIntervals.clear() + }, + + /** + * 重置任務列表 + */ + resetJobs() { + // 先停止所有輪詢 + this.stopAllPolling() + + this.jobs = [] + this.currentJob = null + this.pagination = { + page: 1, + per_page: 20, + total: 0, + pages: 0 + } + } + } +}) \ No newline at end of file diff --git a/frontend/src/stores/notification.js b/frontend/src/stores/notification.js new file mode 100644 index 0000000..2144dec --- /dev/null +++ b/frontend/src/stores/notification.js @@ -0,0 +1,310 @@ +import { defineStore } from 'pinia' +import { notificationAPI } from '@/services/notification' +import { ElMessage } from 'element-plus' + +export const useNotificationStore = defineStore('notification', { + state: () => ({ + notifications: [], + unreadCount: 0, + loading: false, + pagination: { + total: 0, + page: 1, + per_page: 20, + pages: 0 + } + }), + + getters: { + unreadNotifications: (state) => { + return state.notifications.filter(notification => !notification.is_read) + }, + + readNotifications: (state) => { + return state.notifications.filter(notification => notification.is_read) + }, + + hasUnreadNotifications: (state) => { + return state.unreadCount > 0 + } + }, + + actions: { + /** + * 獲取通知列表 + * @param {Object} params - 查詢參數 + */ + async fetchNotifications(params = {}) { + try { + this.loading = true + + const response = await notificationAPI.getNotifications({ + page: this.pagination.page, + per_page: this.pagination.per_page, + ...params + }) + + if (response.success) { + this.notifications = response.data.notifications + this.unreadCount = response.data.unread_count + this.pagination = { + ...this.pagination, + ...response.data.pagination + } + + console.log('📮 [Notification] 通知列表已更新', { + total: this.pagination.total, + unread: this.unreadCount + }) + } + + return response + + } catch (error) { + console.error('❌ [Notification] 獲取通知列表失敗:', error) + ElMessage.error('獲取通知失敗') + throw error + } finally { + this.loading = false + } + }, + + /** + * 標記通知為已讀 + * @param {string} notificationId - 通知ID + */ + async markAsRead(notificationId) { + try { + const response = await notificationAPI.markAsRead(notificationId) + + if (response.success) { + // 更新本地狀態 + const notification = this.notifications.find(n => n.id === notificationId) + if (notification && !notification.is_read) { + notification.is_read = true + notification.read = true + notification.read_at = new Date().toISOString() + this.unreadCount = Math.max(0, this.unreadCount - 1) + } + + console.log('✅ [Notification] 通知已標記為已讀:', notificationId) + } + + return response + + } catch (error) { + console.error('❌ [Notification] 標記已讀失敗:', error) + ElMessage.error('標記已讀失敗') + throw error + } + }, + + /** + * 標記所有通知為已讀 + */ + async markAllAsRead() { + try { + const response = await notificationAPI.markAllAsRead() + + if (response.success) { + // 更新本地狀態 + this.notifications.forEach(notification => { + if (!notification.is_read) { + notification.is_read = true + notification.read = true + notification.read_at = new Date().toISOString() + } + }) + this.unreadCount = 0 + + console.log('✅ [Notification] 所有通知已標記為已讀') + ElMessage.success(response.message || '所有通知已標記為已讀') + } + + return response + + } catch (error) { + console.error('❌ [Notification] 標記全部已讀失敗:', error) + ElMessage.error('標記全部已讀失敗') + throw error + } + }, + + /** + * 刪除通知 + * @param {string} notificationId - 通知ID + */ + async deleteNotification(notificationId) { + try { + const response = await notificationAPI.deleteNotification(notificationId) + + if (response.success) { + // 從本地狀態移除 + const index = this.notifications.findIndex(n => n.id === notificationId) + if (index !== -1) { + const notification = this.notifications[index] + if (!notification.is_read) { + this.unreadCount = Math.max(0, this.unreadCount - 1) + } + this.notifications.splice(index, 1) + this.pagination.total = Math.max(0, this.pagination.total - 1) + } + + console.log('🗑️ [Notification] 通知已刪除:', notificationId) + } + + return response + + } catch (error) { + console.error('❌ [Notification] 刪除通知失敗:', error) + ElMessage.error('刪除通知失敗') + throw error + } + }, + + /** + * 清空所有已讀通知 + */ + async clearNotifications() { + try { + const response = await notificationAPI.clearNotifications() + + if (response.success) { + // 從本地狀態移除已讀通知 + this.notifications = this.notifications.filter(n => !n.is_read) + this.pagination.total = this.notifications.length + + console.log('🧹 [Notification] 已讀通知已清除') + ElMessage.success(response.message || '已讀通知已清除') + } + + return response + + } catch (error) { + console.error('❌ [Notification] 清除通知失敗:', error) + ElMessage.error('清除通知失敗') + throw error + } + }, + + /** + * 添加新通知(用於 WebSocket 推送) + * @param {Object} notification - 通知數據 + */ + addNotification(notification) { + // 檢查是否已存在 + const exists = this.notifications.find(n => n.id === notification.id) + if (!exists) { + // 添加到列表開頭 + this.notifications.unshift(notification) + + // 更新未讀數量 + if (!notification.is_read) { + this.unreadCount += 1 + } + + // 更新總數 + this.pagination.total += 1 + + console.log('📩 [Notification] 新通知已添加:', notification.title) + + // 顯示通知 + ElMessage({ + type: this.getMessageType(notification.type), + title: notification.title, + message: notification.message, + duration: 5000 + }) + } + }, + + /** + * 更新通知 + * @param {Object} notification - 通知數據 + */ + updateNotification(notification) { + const index = this.notifications.findIndex(n => n.id === notification.id) + if (index !== -1) { + const oldNotification = this.notifications[index] + + // 更新未讀數量 + if (oldNotification.is_read !== notification.is_read) { + if (notification.is_read) { + this.unreadCount = Math.max(0, this.unreadCount - 1) + } else { + this.unreadCount += 1 + } + } + + // 更新通知 + this.notifications[index] = { ...oldNotification, ...notification } + + console.log('📝 [Notification] 通知已更新:', notification.id) + } + }, + + /** + * 創建測試通知(開發用) + */ + async createTestNotification() { + try { + const response = await notificationAPI.createTestNotification() + + if (response.success) { + // 重新獲取通知列表 + await this.fetchNotifications() + ElMessage.success('測試通知已創建') + } + + return response + + } catch (error) { + console.error('❌ [Notification] 創建測試通知失敗:', error) + ElMessage.error('創建測試通知失敗') + throw error + } + }, + + /** + * 設置分頁 + * @param {number} page - 頁碼 + * @param {number} per_page - 每頁數量 + */ + setPagination(page, per_page) { + this.pagination.page = page + if (per_page) { + this.pagination.per_page = per_page + } + }, + + /** + * 重置狀態 + */ + reset() { + this.notifications = [] + this.unreadCount = 0 + this.loading = false + this.pagination = { + total: 0, + page: 1, + per_page: 20, + pages: 0 + } + }, + + /** + * 獲取 ElMessage 類型 + * @param {string} type - 通知類型 + */ + getMessageType(type) { + const typeMap = { + 'success': 'success', + 'error': 'error', + 'warning': 'warning', + 'info': 'info', + 'system': 'info' + } + return typeMap[type] || 'info' + } + } +}) \ No newline at end of file diff --git a/frontend/src/style/components.scss b/frontend/src/style/components.scss new file mode 100644 index 0000000..21def24 --- /dev/null +++ b/frontend/src/style/components.scss @@ -0,0 +1,325 @@ +// 組件樣式 + +// 狀態標籤樣式 +.status-badge { + display: inline-flex; + align-items: center; + padding: 2px 8px; + border-radius: $border-radius-base; + font-size: $font-size-small; + font-weight: 500; + text-transform: uppercase; + letter-spacing: 0.5px; + + &.pending { + background-color: map-get($status-colors, 'PENDING'); + color: white; + } + + &.processing { + background-color: map-get($status-colors, 'PROCESSING'); + color: white; + } + + &.completed { + background-color: map-get($status-colors, 'COMPLETED'); + color: white; + } + + &.failed { + background-color: map-get($status-colors, 'FAILED'); + color: white; + } + + &.retry { + background-color: map-get($status-colors, 'RETRY'); + color: white; + } +} + +// 檔案圖示樣式 +.file-icon { + display: inline-flex; + align-items: center; + justify-content: center; + width: 32px; + height: 32px; + border-radius: $border-radius-base; + color: white; + font-size: $font-size-small; + font-weight: bold; + + &.docx, &.doc { + background-color: map-get($file-type-colors, 'docx'); + } + + &.pptx, &.ppt { + background-color: map-get($file-type-colors, 'pptx'); + } + + &.xlsx, &.xls { + background-color: map-get($file-type-colors, 'xlsx'); + } + + &.pdf { + background-color: map-get($file-type-colors, 'pdf'); + } +} + +// 進度條樣式 +.progress-bar { + width: 100%; + height: 6px; + background-color: $border-color-lighter; + border-radius: 3px; + overflow: hidden; + + .progress-fill { + height: 100%; + background: linear-gradient(90deg, $primary-color, lighten($primary-color, 10%)); + border-radius: 3px; + transition: width 0.3s ease; + position: relative; + + &::after { + content: ''; + position: absolute; + top: 0; + left: 0; + bottom: 0; + right: 0; + background: linear-gradient( + -45deg, + rgba(255, 255, 255, 0.2) 25%, + transparent 25%, + transparent 50%, + rgba(255, 255, 255, 0.2) 50%, + rgba(255, 255, 255, 0.2) 75%, + transparent 75%, + transparent + ); + background-size: 20px 20px; + animation: progress-stripes 1s linear infinite; + } + } +} + +@keyframes progress-stripes { + 0% { background-position: 0 0; } + 100% { background-position: 20px 0; } +} + +// 上傳區域樣式 +.upload-area { + border: 2px dashed $border-color; + border-radius: $border-radius-base; + background-color: $bg-color-light; + transition: all $transition-duration-base; + + &:hover, &.dragover { + border-color: $primary-color; + background-color: rgba($primary-color, 0.05); + } + + &.disabled { + border-color: $border-color-lighter; + background-color: $border-color-extra-light; + cursor: not-allowed; + + * { + pointer-events: none; + } + } +} + +// 任務卡片樣式 +.job-card { + @include card-style; + margin-bottom: $spacing-md; + cursor: pointer; + position: relative; + + &:hover { + border-color: $primary-color; + transform: translateY(-1px); + } + + .job-header { + @include flex-between; + margin-bottom: $spacing-sm; + + .job-title { + font-weight: 600; + color: $text-color-primary; + @include text-ellipsis; + max-width: 60%; + } + + .job-actions { + display: flex; + gap: $spacing-xs; + } + } + + .job-info { + display: grid; + grid-template-columns: 1fr 1fr; + gap: $spacing-sm; + font-size: $font-size-small; + color: $text-color-secondary; + + @include respond-to(sm) { + grid-template-columns: 1fr; + } + } + + .job-progress { + margin-top: $spacing-sm; + + .progress-text { + @include flex-between; + font-size: $font-size-small; + color: $text-color-secondary; + margin-bottom: $spacing-xs; + } + } + + .job-footer { + @include flex-between; + margin-top: $spacing-sm; + padding-top: $spacing-sm; + border-top: 1px solid $border-color-lighter; + + .job-time { + font-size: $font-size-small; + color: $text-color-secondary; + } + } +} + +// 統計卡片樣式 +.stat-card { + @include card-style($spacing-lg); + text-align: center; + + .stat-icon { + width: 48px; + height: 48px; + margin: 0 auto $spacing-sm; + border-radius: 50%; + @include flex-center; + + &.primary { background-color: rgba($primary-color, 0.1); color: $primary-color; } + &.success { background-color: rgba($success-color, 0.1); color: $success-color; } + &.warning { background-color: rgba($warning-color, 0.1); color: $warning-color; } + &.danger { background-color: rgba($danger-color, 0.1); color: $danger-color; } + &.info { background-color: rgba($info-color, 0.1); color: $info-color; } + } + + .stat-value { + font-size: $font-size-extra-large; + font-weight: bold; + color: $text-color-primary; + margin-bottom: $spacing-xs; + } + + .stat-label { + font-size: $font-size-small; + color: $text-color-secondary; + margin-bottom: $spacing-sm; + } + + .stat-change { + font-size: $font-size-small; + + &.positive { color: $success-color; } + &.negative { color: $danger-color; } + } +} + +// 空狀態樣式 +.empty-state { + text-align: center; + padding: $spacing-xxl * 2; + color: $text-color-secondary; + + .empty-icon { + font-size: 64px; + color: $border-color; + margin-bottom: $spacing-lg; + } + + .empty-title { + font-size: $font-size-large; + color: $text-color-primary; + margin-bottom: $spacing-sm; + } + + .empty-description { + font-size: $font-size-base; + line-height: 1.6; + margin-bottom: $spacing-lg; + } +} + +// 語言標籤樣式 +.language-tag { + display: inline-block; + padding: 2px 6px; + margin: 2px; + background-color: $primary-color; + color: white; + border-radius: $border-radius-small; + font-size: $font-size-small; + + &:last-child { + margin-right: 0; + } +} + +// 載入覆蓋層 +.loading-overlay { + position: absolute; + top: 0; + left: 0; + right: 0; + bottom: 0; + background-color: rgba(255, 255, 255, 0.8); + backdrop-filter: blur(2px); + @include flex-center; + z-index: $z-index-modal; + + .loading-content { + text-align: center; + + .loading-spinner { + @include loading-spinner(32px); + margin: 0 auto $spacing-md; + } + + .loading-text { + color: $text-color-secondary; + font-size: $font-size-base; + } + } +} + +// 工具提示樣式覆蓋 +.custom-tooltip { + &.el-popper { + max-width: 300px; + + .el-popper__arrow::before { + border-color: rgba(0, 0, 0, 0.8); + } + } + + .el-tooltip__content { + background-color: rgba(0, 0, 0, 0.8); + color: white; + border-radius: $border-radius-base; + padding: $spacing-sm $spacing-md; + font-size: $font-size-small; + line-height: 1.4; + } +} \ No newline at end of file diff --git a/frontend/src/style/layouts.scss b/frontend/src/style/layouts.scss new file mode 100644 index 0000000..f0b63f0 --- /dev/null +++ b/frontend/src/style/layouts.scss @@ -0,0 +1,461 @@ +// 布局樣式 + +// 主要布局容器 +.app-layout { + display: flex; + height: 100vh; + overflow: hidden; + + // 側邊欄 + .layout-sidebar { + width: 240px; + background-color: $sidebar-bg; + color: $sidebar-text-color; + display: flex; + flex-direction: column; + transition: width $transition-duration-base; + z-index: $z-index-top; + + &.collapsed { + width: 64px; + } + + @include respond-to(md) { + position: fixed; + top: 0; + left: 0; + bottom: 0; + transform: translateX(-100%); + + &.mobile-show { + transform: translateX(0); + } + } + + .sidebar-header { + padding: $spacing-lg; + border-bottom: 1px solid rgba(255, 255, 255, 0.1); + @include flex-center; + + .logo { + display: flex; + align-items: center; + color: white; + font-size: $font-size-large; + font-weight: bold; + text-decoration: none; + + .logo-icon { + width: 32px; + height: 32px; + margin-right: $spacing-sm; + background: linear-gradient(45deg, $primary-color, lighten($primary-color, 10%)); + border-radius: $border-radius-base; + @include flex-center; + color: white; + } + + .logo-text { + transition: opacity $transition-duration-base; + + .collapsed & { + opacity: 0; + width: 0; + overflow: hidden; + } + } + } + } + + .sidebar-menu { + flex: 1; + padding: $spacing-lg 0; + overflow-y: auto; + @include custom-scrollbar(rgba(255, 255, 255, 0.3), transparent, 4px); + + .menu-item { + display: block; + padding: $spacing-md $spacing-lg; + color: $sidebar-text-color; + text-decoration: none; + transition: all $transition-duration-fast; + position: relative; + + &:hover { + background-color: rgba(255, 255, 255, 0.1); + color: white; + } + + &.active { + background-color: rgba($primary-color, 0.2); + color: $primary-color; + + &::before { + content: ''; + position: absolute; + left: 0; + top: 0; + bottom: 0; + width: 3px; + background-color: $primary-color; + } + } + + .menu-icon { + width: 20px; + margin-right: $spacing-sm; + text-align: center; + transition: margin-right $transition-duration-base; + + .collapsed & { + margin-right: 0; + } + } + + .menu-text { + transition: opacity $transition-duration-base; + + .collapsed & { + opacity: 0; + width: 0; + overflow: hidden; + } + } + } + } + + .sidebar-footer { + padding: $spacing-lg; + border-top: 1px solid rgba(255, 255, 255, 0.1); + + .collapse-toggle { + width: 100%; + padding: $spacing-sm; + background: transparent; + border: 1px solid rgba(255, 255, 255, 0.2); + border-radius: $border-radius-base; + color: $sidebar-text-color; + cursor: pointer; + transition: all $transition-duration-fast; + + &:hover { + background-color: rgba(255, 255, 255, 0.1); + border-color: rgba(255, 255, 255, 0.3); + } + } + } + } + + // 主要內容區 + .layout-main { + flex: 1; + display: flex; + flex-direction: column; + overflow: hidden; + background-color: $bg-color-page; + + // 頂部導航欄 + .layout-header { + height: 60px; + background-color: $header-bg; + border-bottom: 1px solid $border-color-lighter; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05); + @include flex-between; + padding: 0 $spacing-lg; + z-index: $z-index-normal; + + @include respond-to(md) { + padding: 0 $spacing-md; + } + + .header-left { + display: flex; + align-items: center; + + .menu-toggle { + display: none; + padding: $spacing-sm; + background: transparent; + border: none; + cursor: pointer; + margin-right: $spacing-md; + + @include respond-to(md) { + display: block; + } + } + + .breadcrumb { + display: flex; + align-items: center; + font-size: $font-size-base; + color: $text-color-secondary; + + .breadcrumb-item { + &:not(:last-child)::after { + content: '/'; + margin: 0 $spacing-sm; + color: $text-color-placeholder; + } + + &:last-child { + color: $text-color-primary; + font-weight: 500; + } + } + } + } + + .header-right { + display: flex; + align-items: center; + gap: $spacing-md; + + .notification-bell { + position: relative; + cursor: pointer; + padding: $spacing-sm; + border-radius: $border-radius-base; + transition: background-color $transition-duration-fast; + + &:hover { + background-color: $bg-color-light; + } + + .badge { + position: absolute; + top: 2px; + right: 2px; + width: 8px; + height: 8px; + background-color: $danger-color; + border-radius: 50%; + } + } + + .user-avatar { + cursor: pointer; + + .avatar-button { + display: flex; + align-items: center; + padding: $spacing-sm; + border-radius: $border-radius-base; + transition: background-color $transition-duration-fast; + + &:hover { + background-color: $bg-color-light; + } + + .avatar { + width: 32px; + height: 32px; + border-radius: 50%; + background: linear-gradient(45deg, $primary-color, lighten($primary-color, 10%)); + @include flex-center; + color: white; + font-weight: bold; + margin-right: $spacing-sm; + + @include respond-to(sm) { + margin-right: 0; + } + } + + .user-info { + @include respond-to(sm) { + display: none; + } + + .user-name { + font-size: $font-size-base; + font-weight: 500; + color: $text-color-primary; + line-height: 1.2; + } + + .user-role { + font-size: $font-size-small; + color: $text-color-secondary; + line-height: 1.2; + } + } + } + } + } + } + + // 內容區域 + .layout-content { + flex: 1; + overflow: hidden; + position: relative; + + .content-wrapper { + height: 100%; + overflow: auto; + padding: $spacing-lg; + + @include respond-to(md) { + padding: $spacing-md; + } + + @include respond-to(sm) { + padding: $spacing-sm; + } + } + } + } +} + +// 移動設備遮罩 +.mobile-mask { + display: none; + + @include respond-to(md) { + display: block; + position: fixed; + top: 0; + left: 0; + right: 0; + bottom: 0; + background-color: rgba(0, 0, 0, 0.5); + z-index: $z-index-top - 1; + opacity: 0; + visibility: hidden; + transition: all $transition-duration-base; + + &.show { + opacity: 1; + visibility: visible; + } + } +} + +// 登入頁面布局 +.login-layout { + min-height: 100vh; + background: linear-gradient(135deg, #2c3e50 0%, #34495e 100%); + @include flex-center; + padding: $spacing-lg; + + .login-container { + width: 100%; + max-width: 400px; + background: white; + border-radius: $border-radius-base * 2; + box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1); + overflow: hidden; + + .login-header { + background: linear-gradient(45deg, #1a1a2e, #16213e); + padding: $spacing-xxl; + text-align: center; + color: white; + + .login-logo { + width: 200px; + height: 80px; + margin: 0 auto $spacing-lg; + @include flex-center; + + img { + width: 100%; + height: 100%; + object-fit: contain; + // 移除濾鏡,讓白色 LOGO 在深色背景上自然顯示 + } + } + + .login-title { + font-size: $font-size-extra-large; + font-weight: bold; + margin-bottom: $spacing-sm; + } + + .login-subtitle { + font-size: $font-size-base; + opacity: 0.9; + } + } + + .login-body { + padding: $spacing-xxl; + } + + .login-footer { + padding: $spacing-lg $spacing-xxl; + background-color: $bg-color-light; + text-align: center; + color: $text-color-secondary; + font-size: $font-size-small; + } + } +} + +// 頁面標題區域 +.page-header { + display: flex; + align-items: center; + justify-content: space-between; + margin-bottom: $spacing-lg; + padding-bottom: $spacing-md; + border-bottom: 1px solid $border-color-lighter; + + @include respond-to(sm) { + flex-direction: column; + align-items: flex-start; + gap: $spacing-md; + } + + .page-title { + font-size: $font-size-extra-large; + font-weight: bold; + color: $text-color-primary; + margin: 0; + } + + .page-actions { + display: flex; + gap: $spacing-sm; + } +} + +// 內容卡片 +.content-card { + @include card-style; + + &:not(:last-child) { + margin-bottom: $spacing-lg; + } + + .card-header { + @include flex-between; + margin-bottom: $spacing-lg; + padding-bottom: $spacing-md; + border-bottom: 1px solid $border-color-lighter; + + .card-title { + font-size: $font-size-large; + font-weight: 600; + color: $text-color-primary; + margin: 0; + } + + .card-actions { + display: flex; + gap: $spacing-sm; + } + } + + .card-body { + // 內容樣式由具體組件定義 + } + + .card-footer { + margin-top: $spacing-lg; + padding-top: $spacing-md; + border-top: 1px solid $border-color-lighter; + @include flex-between; + } +} \ No newline at end of file diff --git a/frontend/src/style/main.scss b/frontend/src/style/main.scss new file mode 100644 index 0000000..68a1332 --- /dev/null +++ b/frontend/src/style/main.scss @@ -0,0 +1,187 @@ +// 主要樣式文件 +@import './variables.scss'; +@import './mixins.scss'; +@import './components.scss'; +@import './layouts.scss'; + +// 全局重置樣式 +*, *::before, *::after { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +html { + height: 100%; + font-size: 14px; +} + +body { + height: 100%; + font-family: $font-family; + background-color: var(--el-bg-color-page); + color: var(--el-text-color-primary); + line-height: 1.6; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; +} + +#app { + height: 100%; +} + +// 滾動條樣式 +::-webkit-scrollbar { + width: 8px; + height: 8px; +} + +::-webkit-scrollbar-track { + background: var(--el-fill-color-lighter); + border-radius: 4px; +} + +::-webkit-scrollbar-thumb { + background: var(--el-border-color); + border-radius: 4px; + + &:hover { + background: var(--el-border-color-darker); + } +} + +// Firefox 滾動條 +* { + scrollbar-width: thin; + scrollbar-color: var(--el-border-color) var(--el-fill-color-lighter); +} + +// 文字選擇顏色 +::selection { + background: var(--el-color-primary-light-8); + color: var(--el-color-primary); +} + +::-moz-selection { + background: var(--el-color-primary-light-8); + color: var(--el-color-primary); +} + +// 通用輔助類別 +.text-center { text-align: center; } +.text-left { text-align: left; } +.text-right { text-align: right; } + +.flex { display: flex; } +.flex-center { + display: flex; + align-items: center; + justify-content: center; +} +.flex-between { + display: flex; + align-items: center; + justify-content: space-between; +} +.flex-column { + display: flex; + flex-direction: column; +} + +// 間距輔助類別 +@for $i from 1 through 10 { + .m-#{$i} { margin: #{$i * 4}px; } + .mt-#{$i} { margin-top: #{$i * 4}px; } + .mr-#{$i} { margin-right: #{$i * 4}px; } + .mb-#{$i} { margin-bottom: #{$i * 4}px; } + .ml-#{$i} { margin-left: #{$i * 4}px; } + .mx-#{$i} { + margin-left: #{$i * 4}px; + margin-right: #{$i * 4}px; + } + .my-#{$i} { + margin-top: #{$i * 4}px; + margin-bottom: #{$i * 4}px; + } + + .p-#{$i} { padding: #{$i * 4}px; } + .pt-#{$i} { padding-top: #{$i * 4}px; } + .pr-#{$i} { padding-right: #{$i * 4}px; } + .pb-#{$i} { padding-bottom: #{$i * 4}px; } + .pl-#{$i} { padding-left: #{$i * 4}px; } + .px-#{$i} { + padding-left: #{$i * 4}px; + padding-right: #{$i * 4}px; + } + .py-#{$i} { + padding-top: #{$i * 4}px; + padding-bottom: #{$i * 4}px; + } +} + +// 響應式斷點 +.hidden-xs { + @include respond-to(xs) { display: none !important; } +} +.hidden-sm { + @include respond-to(sm) { display: none !important; } +} +.hidden-md { + @include respond-to(md) { display: none !important; } +} +.hidden-lg { + @include respond-to(lg) { display: none !important; } +} + +// 動畫類別 +.fade-enter-active, +.fade-leave-active { + transition: opacity 0.3s ease; +} + +.fade-enter-from, +.fade-leave-to { + opacity: 0; +} + +.slide-enter-active, +.slide-leave-active { + transition: all 0.3s cubic-bezier(0.25, 0.8, 0.5, 1); +} + +.slide-enter-from { + transform: translateX(-20px); + opacity: 0; +} + +.slide-leave-to { + transform: translateX(20px); + opacity: 0; +} + +// 卡片陰影 +.card-shadow { + box-shadow: 0 2px 12px 0 rgba(0, 0, 0, 0.1); +} + +.card-hover-shadow { + transition: box-shadow 0.3s ease; + + &:hover { + box-shadow: 0 4px 20px 0 rgba(0, 0, 0, 0.15); + } +} + +// 載入狀態 +.loading-overlay { + position: absolute; + top: 0; + left: 0; + right: 0; + bottom: 0; + background: rgba(255, 255, 255, 0.8); + display: flex; + align-items: center; + justify-content: center; + z-index: 999; +} \ No newline at end of file diff --git a/frontend/src/style/mixins.scss b/frontend/src/style/mixins.scss new file mode 100644 index 0000000..7f70e34 --- /dev/null +++ b/frontend/src/style/mixins.scss @@ -0,0 +1,272 @@ +// SCSS Mixins 混合器 + +// 響應式斷點混合器 +@mixin respond-to($breakpoint) { + @if $breakpoint == xs { + @media (max-width: #{$breakpoint-xs - 1px}) { @content; } + } + @if $breakpoint == sm { + @media (max-width: #{$breakpoint-sm - 1px}) { @content; } + } + @if $breakpoint == md { + @media (max-width: #{$breakpoint-md - 1px}) { @content; } + } + @if $breakpoint == lg { + @media (max-width: #{$breakpoint-lg - 1px}) { @content; } + } + @if $breakpoint == xl { + @media (min-width: $breakpoint-xl) { @content; } + } +} + +// 最小寬度斷點 +@mixin respond-above($breakpoint) { + @if $breakpoint == xs { + @media (min-width: $breakpoint-xs) { @content; } + } + @if $breakpoint == sm { + @media (min-width: $breakpoint-sm) { @content; } + } + @if $breakpoint == md { + @media (min-width: $breakpoint-md) { @content; } + } + @if $breakpoint == lg { + @media (min-width: $breakpoint-lg) { @content; } + } +} + +// Flexbox 輔助混合器 +@mixin flex-center { + display: flex; + align-items: center; + justify-content: center; +} + +@mixin flex-between { + display: flex; + align-items: center; + justify-content: space-between; +} + +@mixin flex-start { + display: flex; + align-items: center; + justify-content: flex-start; +} + +@mixin flex-end { + display: flex; + align-items: center; + justify-content: flex-end; +} + +@mixin flex-column { + display: flex; + flex-direction: column; +} + +@mixin flex-column-center { + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; +} + +// 文字省略號 +@mixin text-ellipsis { + overflow: hidden; + white-space: nowrap; + text-overflow: ellipsis; +} + +@mixin multi-line-ellipsis($lines: 2) { + overflow: hidden; + display: -webkit-box; + -webkit-line-clamp: $lines; + -webkit-box-orient: vertical; + text-overflow: ellipsis; +} + +// 清除浮動 +@mixin clearfix { + &::after { + content: ''; + display: table; + clear: both; + } +} + +// 隱藏滾動條 +@mixin hide-scrollbar { + scrollbar-width: none; /* Firefox */ + -ms-overflow-style: none; /* IE 10+ */ + + &::-webkit-scrollbar { + display: none; /* Chrome Safari */ + } +} + +// 自定義滾動條 +@mixin custom-scrollbar($thumb-color: $border-color, $track-color: transparent, $size: 6px) { + &::-webkit-scrollbar { + width: $size; + height: $size; + } + + &::-webkit-scrollbar-track { + background: $track-color; + border-radius: $size / 2; + } + + &::-webkit-scrollbar-thumb { + background: $thumb-color; + border-radius: $size / 2; + + &:hover { + background: darken($thumb-color, 10%); + } + } +} + +// 絕對定位置中 +@mixin absolute-center { + position: absolute; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); +} + +@mixin absolute-center-x { + position: absolute; + left: 50%; + transform: translateX(-50%); +} + +@mixin absolute-center-y { + position: absolute; + top: 50%; + transform: translateY(-50%); +} + +// 固定比例容器 +@mixin aspect-ratio($width: 16, $height: 9) { + position: relative; + overflow: hidden; + + &::before { + content: ''; + display: block; + width: 100%; + padding-top: ($height / $width) * 100%; + } + + > * { + position: absolute; + top: 0; + left: 0; + right: 0; + bottom: 0; + } +} + +// 過渡動畫 +@mixin transition($property: all, $duration: $transition-duration-base, $timing-function: ease-in-out) { + transition: $property $duration $timing-function; +} + +@mixin hover-lift { + transition: transform $transition-duration-fast ease-out, box-shadow $transition-duration-fast ease-out; + + &:hover { + transform: translateY(-2px); + box-shadow: $box-shadow-dark; + } +} + +// 按鈕樣式混合器 +@mixin button-variant($color, $background, $border: $background) { + color: $color; + background-color: $background; + border-color: $border; + + &:hover, + &:focus { + color: $color; + background-color: lighten($background, 5%); + border-color: lighten($border, 5%); + } + + &:active { + color: $color; + background-color: darken($background, 5%); + border-color: darken($border, 5%); + } +} + +// 狀態標籤樣式 +@mixin status-badge($color) { + display: inline-block; + padding: 2px 8px; + font-size: $font-size-small; + font-weight: 500; + color: white; + background-color: $color; + border-radius: $border-radius-base; + text-transform: uppercase; + letter-spacing: 0.5px; +} + +// 卡片樣式 +@mixin card-style($padding: $spacing-lg, $border-radius: $border-radius-base) { + background: $bg-color; + border: 1px solid $border-color-lighter; + border-radius: $border-radius; + box-shadow: $box-shadow-light; + padding: $padding; + transition: box-shadow $transition-duration-base; + + &:hover { + box-shadow: $box-shadow-dark; + } +} + +// 表單輸入樣式 +@mixin form-input { + display: block; + width: 100%; + padding: 8px 12px; + font-size: $font-size-base; + line-height: $line-height-base; + color: $text-color-primary; + background-color: $bg-color; + border: 1px solid $border-color; + border-radius: $border-radius-base; + transition: border-color $transition-duration-fast, box-shadow $transition-duration-fast; + + &:focus { + outline: none; + border-color: $primary-color; + box-shadow: 0 0 0 2px rgba($primary-color, 0.2); + } + + &:disabled { + background-color: $bg-color-light; + color: $text-color-placeholder; + cursor: not-allowed; + } +} + +// Loading 動畫 +@mixin loading-spinner($size: 20px, $color: $primary-color) { + width: $size; + height: $size; + border: 2px solid transparent; + border-top-color: $color; + border-radius: 50%; + animation: spin 1s linear infinite; +} + +@keyframes spin { + 0% { transform: rotate(0deg); } + 100% { transform: rotate(360deg); } +} \ No newline at end of file diff --git a/frontend/src/style/variables.scss b/frontend/src/style/variables.scss new file mode 100644 index 0000000..90c47bc --- /dev/null +++ b/frontend/src/style/variables.scss @@ -0,0 +1,106 @@ +// SCSS 變數定義 + +// 顏色系統 - 調整為深色系 +$primary-color: #2c3e50; +$success-color: #27ae60; +$warning-color: #f39c12; +$danger-color: #e74c3c; +$info-color: #34495e; + +// 文字顏色 +$text-color-primary: #303133; +$text-color-regular: #606266; +$text-color-secondary: #909399; +$text-color-placeholder: #c0c4cc; + +// 背景顏色 +$bg-color-page: #f2f3f5; +$bg-color: #ffffff; +$bg-color-light: #fafafa; + +// 邊框顏色 +$border-color: #dcdfe6; +$border-color-light: #e4e7ed; +$border-color-lighter: #ebeef5; +$border-color-extra-light: #f2f6fc; + +// 字體 +$font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', sans-serif; +$font-size-extra-large: 20px; +$font-size-large: 18px; +$font-size-medium: 16px; +$font-size-base: 14px; +$font-size-small: 13px; +$font-size-extra-small: 12px; + +// 行高 +$line-height-base: 1.5; + +// 間距 +$spacing-base: 4px; +$spacing-xs: 4px; +$spacing-sm: 8px; +$spacing-md: 12px; +$spacing-lg: 16px; +$spacing-xl: 20px; +$spacing-xxl: 24px; + +// 邊框半徑 +$border-radius-base: 4px; +$border-radius-small: 2px; +$border-radius-round: 20px; +$border-radius-circle: 50%; + +// 陰影 +$box-shadow-base: 0 2px 4px rgba(0, 0, 0, .12), 0 0 6px rgba(0, 0, 0, .04); +$box-shadow-light: 0 2px 12px 0 rgba(0, 0, 0, .1); +$box-shadow-dark: 0 4px 20px 0 rgba(0, 0, 0, .15); + +// z-index 層級 +$z-index-normal: 1; +$z-index-top: 1000; +$z-index-popper: 2000; +$z-index-modal: 3000; + +// 斷點 +$breakpoint-xs: 480px; +$breakpoint-sm: 768px; +$breakpoint-md: 992px; +$breakpoint-lg: 1200px; +$breakpoint-xl: 1920px; + +// 動畫持續時間 +$transition-duration-fast: 0.2s; +$transition-duration-base: 0.3s; +$transition-duration-slow: 0.5s; + +// 動畫緩動函數 +$ease-in-out-circ: cubic-bezier(0.78, 0.14, 0.15, 0.86); +$ease-out-back: cubic-bezier(0.12, 0.4, 0.29, 1.46); +$ease-in-out-back: cubic-bezier(0.71, -0.46, 0.29, 1.46); + +// 組件特定顏色 +$header-bg: #fff; +$sidebar-bg: #1a1a2e; +$sidebar-text-color: #bfcbd9; +$sidebar-active-color: #3498db; + +// 狀態顏色映射 +$status-colors: ( + 'PENDING': #7f8c8d, + 'PROCESSING': #3498db, + 'COMPLETED': #27ae60, + 'FAILED': #e74c3c, + 'RETRY': #f39c12 +); + +// 檔案類型圖示顏色 +$file-type-colors: ( + 'docx': #2b579a, + 'doc': #2b579a, + 'pptx': #d24726, + 'ppt': #d24726, + 'xlsx': #207245, + 'xls': #207245, + 'pdf': #ff0000 +); \ No newline at end of file diff --git a/frontend/src/utils/request.js b/frontend/src/utils/request.js new file mode 100644 index 0000000..8d13ca0 --- /dev/null +++ b/frontend/src/utils/request.js @@ -0,0 +1,203 @@ +import axios from 'axios' +import { ElMessage, ElMessageBox } from 'element-plus' +import { useAuthStore } from '@/stores/auth' +import router from '@/router' +import NProgress from 'nprogress' + +// 創建 axios 實例 +const service = axios.create({ + baseURL: import.meta.env.VITE_API_BASE_URL || 'http://127.0.0.1:12010/api/v1', + timeout: 30000, // 30秒超時 + headers: { + 'Content-Type': 'application/json' + } +}) + +// 請求攔截器 +service.interceptors.request.use( + config => { + NProgress.start() + + + // JWT 認證:添加 Authorization header + const authStore = useAuthStore() + if (authStore.token) { + config.headers.Authorization = `Bearer ${authStore.token}` + } + + return config + }, + error => { + NProgress.done() + console.error('❌ [Request Error]:', error) + return Promise.reject(error) + } +) + +// 用於防止重複處理 401 錯誤 +let isHandling401 = false + +// 回應攔截器 +service.interceptors.response.use( + response => { + NProgress.done() + + console.log('✅ [API Response]', { + status: response.status, + statusText: response.statusText, + url: response.config.url, + method: response.config.method.toUpperCase(), + data: response.data, + headers: response.headers, + timestamp: new Date().toISOString() + }) + + const { data } = response + + // 後端統一回應格式處理 + if (data && typeof data === 'object') { + if (data.success === false) { + // 業務錯誤處理 + const message = data.message || '操作失敗' + console.warn('⚠️ [Business Error]:', message) + ElMessage.error(message) + return Promise.reject(new Error(message)) + } + + return data + } + + return response + }, + error => { + NProgress.done() + + const { response } = error + const authStore = useAuthStore() + + if (response) { + const { status, data } = response + + switch (status) { + case 401: + // 避免在登入頁面或登入過程中觸發自動登出 + const requestUrl = error.config?.url || '' + const currentPath = router.currentRoute.value.path + + console.error('🔐 [401 Unauthorized]', { + requestUrl, + currentPath, + isLoginPage: currentPath === '/login', + isLoginRequest: requestUrl.includes('/auth/login'), + isHandling401, + willTriggerLogout: currentPath !== '/login' && !requestUrl.includes('/auth/login') && !isHandling401, + timestamp: new Date().toISOString() + }) + + // 防止重複處理 + if (!isHandling401 && currentPath !== '/login' && !requestUrl.includes('/auth/login')) { + isHandling401 = true + console.error('🚪 [Auto Logout] 認證失效,觸發自動登出') + + // 只顯示一次訊息 + ElMessage.error('認證已過期,請重新登入') + + // 使用自動登出模式,不顯示額外訊息 + authStore.logout(false, true).finally(() => { + router.push('/login').then(() => { + // 導航完成後重置標記 + setTimeout(() => { + isHandling401 = false + }, 1000) + }) + }) + } else if (isHandling401) { + console.log('🔐 [401 Skipped] 已在處理其他 401 錯誤') + } else { + console.log('🔐 [401 Ignored] 在登入頁面或登入請求') + } + break + + case 403: + ElMessage.error('無權限存取此資源') + break + + case 404: + ElMessage.error('請求的資源不存在') + break + + case 422: + // 表單驗證錯誤 + const message = data.message || '輸入資料格式錯誤' + ElMessage.error(message) + break + + case 429: + ElMessage.error('請求過於頻繁,請稍後再試') + break + + case 500: + ElMessage.error('伺服器內部錯誤') + break + + case 502: + case 503: + case 504: + ElMessage.error('伺服器暫時無法存取,請稍後再試') + break + + default: + const errorMessage = data?.message || error.message || '網路錯誤' + ElMessage.error(errorMessage) + } + } else if (error.code === 'ECONNABORTED') { + ElMessage.error('請求超時,請檢查網路連線') + } else { + ElMessage.error('網路連線失敗,請檢查網路設定') + } + + return Promise.reject(error) + } +) + +// 檔案上傳專用請求實例 +export const uploadRequest = axios.create({ + baseURL: import.meta.env.VITE_API_BASE_URL || 'http://127.0.0.1:12010/api/v1', + timeout: 120000, // 2分鐘超時 + headers: { + 'Content-Type': 'multipart/form-data' + } +}) + +// 為上傳請求添加攔截器 +uploadRequest.interceptors.request.use( + config => { + // JWT 認證:添加 Authorization header + const authStore = useAuthStore() + if (authStore.token) { + config.headers.Authorization = `Bearer ${authStore.token}` + } + return config + }, + error => Promise.reject(error) +) + +uploadRequest.interceptors.response.use( + response => response.data, + error => { + const message = error.response?.data?.message || '檔案上傳失敗' + ElMessage.error(message) + return Promise.reject(error) + } +) + +// 常用請求方法封裝 +export const request = { + get: (url, config = {}) => service.get(url, config), + post: (url, data = {}, config = {}) => service.post(url, data, config), + put: (url, data = {}, config = {}) => service.put(url, data, config), + delete: (url, config = {}) => service.delete(url, config), + patch: (url, data = {}, config = {}) => service.patch(url, data, config) +} + +export default service \ No newline at end of file diff --git a/frontend/src/utils/websocket.js b/frontend/src/utils/websocket.js new file mode 100644 index 0000000..7bfa66e --- /dev/null +++ b/frontend/src/utils/websocket.js @@ -0,0 +1,460 @@ +import { io } from 'socket.io-client' +import { useJobsStore } from '@/stores/jobs' +import { useNotificationStore } from '@/stores/notification' +import { ElMessage, ElNotification } from 'element-plus' + +/** + * WebSocket 服務類 + */ +class WebSocketService { + constructor() { + this.socket = null + this.isConnected = false + this.reconnectAttempts = 0 + this.maxReconnectAttempts = 5 + this.reconnectInterval = 5000 + this.jobSubscriptions = new Set() + } + + /** + * 初始化並連接 WebSocket + */ + connect() { + // 檢查 WebSocket 是否被禁用 + const devMode = import.meta.env.VITE_DEV_MODE === 'true' + const isProd = import.meta.env.PROD + const wsDisabled = import.meta.env.VITE_DISABLE_WEBSOCKET === 'true' + + if (!devMode || isProd || wsDisabled) { + console.log('🔌 [WebSocket] WebSocket 連接已禁用,跳過連接') + return + } + + if (this.socket) { + return + } + + try { + // 建立 Socket.IO 連接 + const wsUrl = import.meta.env.VITE_WS_BASE_URL || 'http://127.0.0.1:12010' + console.log('🔌 [WebSocket] 嘗試連接到:', wsUrl) + + this.socket = io(wsUrl, { + path: '/socket.io/', + transports: ['polling'], + upgrade: false, + rememberUpgrade: false, + autoConnect: true, + forceNew: false, + reconnection: true, + reconnectionDelay: this.reconnectInterval, + reconnectionAttempts: this.maxReconnectAttempts + }) + + this.setupEventHandlers() + } catch (error) { + console.error('WebSocket 連接失敗:', error) + } + } + + /** + * 設定事件處理器 + */ + setupEventHandlers() { + if (!this.socket) return + + // 連接成功 + this.socket.on('connect', () => { + console.log('WebSocket 已連接') + this.isConnected = true + this.reconnectAttempts = 0 + + // 重新訂閱所有任務 + this.resubscribeJobs() + }) + + // 連接失敗 + this.socket.on('connect_error', (error) => { + console.error('WebSocket 連接錯誤:', error) + this.isConnected = false + }) + + // 斷線 + this.socket.on('disconnect', (reason) => { + console.log('WebSocket 已斷線:', reason) + this.isConnected = false + + if (reason === 'io server disconnect') { + // 服務器主動斷線,需要重新連接 + this.socket.connect() + } + }) + + // 任務狀態更新 + this.socket.on('job_status', (data) => { + this.handleJobStatusUpdate(data) + }) + + // 系統通知 + this.socket.on('system_notification', (data) => { + this.handleSystemNotification(data) + }) + + // 新通知推送 + this.socket.on('new_notification', (data) => { + this.handleNewNotification(data) + }) + + // 系統消息 + this.socket.on('system_message', (data) => { + this.handleSystemMessage(data) + }) + + // 連接狀態回應 + this.socket.on('connected', (data) => { + console.log('WebSocket 連接確認:', data) + }) + + // 訂閱成功回應 + this.socket.on('subscribed', (data) => { + console.log('任務訂閱成功:', data.job_uuid) + }) + + // 取消訂閱成功回應 + this.socket.on('unsubscribed', (data) => { + console.log('任務取消訂閱成功:', data.job_uuid) + }) + + // 錯誤處理 + this.socket.on('error', (error) => { + console.error('WebSocket 錯誤:', error) + ElMessage.error(error.message || 'WebSocket 連接錯誤') + }) + } + + /** + * 處理任務狀態更新 + * @param {Object} data - 狀態更新資料 + */ + handleJobStatusUpdate(data) { + try { + if (data.type === 'job_status' && data.data) { + const jobsStore = useJobsStore() + const { job_uuid, ...statusUpdate } = data.data + + // 更新任務狀態 + jobsStore.updateJobStatus(job_uuid, statusUpdate) + + console.log('任務狀態已更新:', job_uuid, statusUpdate) + } + } catch (error) { + console.error('處理任務狀態更新失敗:', error) + } + } + + /** + * 處理系統通知 + * @param {Object} data - 通知資料 + */ + handleSystemNotification(data) { + const { type, message, title, level } = data + + switch (level) { + case 'success': + ElNotification.success({ + title: title || '系統通知', + message: message, + duration: 5000 + }) + break + + case 'warning': + ElNotification.warning({ + title: title || '系統警告', + message: message, + duration: 8000 + }) + break + + case 'error': + ElNotification.error({ + title: title || '系統錯誤', + message: message, + duration: 10000 + }) + break + + default: + ElNotification({ + title: title || '系統消息', + message: message, + duration: 5000 + }) + } + } + + /** + * 處理新通知推送 + * @param {Object} data - 通知資料 + */ + handleNewNotification(data) { + try { + console.log('📩 [WebSocket] 收到新通知:', data) + + const notificationStore = useNotificationStore() + + // 添加通知到 store + notificationStore.addNotification(data) + + // 顯示桌面通知 + this.showDesktopNotification(data) + + } catch (error) { + console.error('處理新通知失敗:', error) + } + } + + /** + * 處理系統消息 + * @param {Object} data - 系統消息資料 + */ + handleSystemMessage(data) { + try { + console.log('📢 [WebSocket] 收到系統消息:', data) + + const { message, type } = data + + // 顯示系統消息 + const messageType = type || 'info' + ElMessage({ + type: messageType === 'system' ? 'info' : messageType, + message: message, + duration: 5000, + showClose: true + }) + + } catch (error) { + console.error('處理系統消息失敗:', error) + } + } + + /** + * 顯示桌面通知 + * @param {Object} notification - 通知資料 + */ + showDesktopNotification(notification) { + try { + // 檢查瀏覽器是否支援通知 + if (!('Notification' in window)) { + return + } + + // 檢查通知權限 + if (Notification.permission === 'granted') { + new Notification(notification.title, { + body: notification.message, + icon: '/panjit-logo.png', + tag: notification.id, + requireInteraction: false + }) + } else if (Notification.permission !== 'denied') { + // 請求通知權限 + Notification.requestPermission().then(permission => { + if (permission === 'granted') { + new Notification(notification.title, { + body: notification.message, + icon: '/panjit-logo.png', + tag: notification.id, + requireInteraction: false + }) + } + }) + } + + } catch (error) { + console.error('顯示桌面通知失敗:', error) + } + } + + /** + * 訂閱任務狀態更新 + * @param {string} jobUuid - 任務 UUID + */ + subscribeToJob(jobUuid) { + // 檢查 WebSocket 是否被禁用 + const devMode = import.meta.env.VITE_DEV_MODE === 'true' + const isProd = import.meta.env.PROD + const wsDisabled = import.meta.env.VITE_DISABLE_WEBSOCKET === 'true' + + if (!devMode || isProd || wsDisabled) { + return // WebSocket 被禁用,靜默返回 + } + + if (!this.socket || !this.isConnected) { + // 靜默處理,避免控制台警告 + return + } + + if (this.jobSubscriptions.has(jobUuid)) { + return // 已經訂閱過 + } + + this.socket.emit('subscribe_job', { job_uuid: jobUuid }) + this.jobSubscriptions.add(jobUuid) + } + + /** + * 取消訂閱任務狀態更新 + * @param {string} jobUuid - 任務 UUID + */ + unsubscribeFromJob(jobUuid) { + if (!this.socket || !this.isConnected) { + return + } + + this.socket.emit('unsubscribe_job', { job_uuid: jobUuid }) + this.jobSubscriptions.delete(jobUuid) + } + + /** + * 重新訂閱所有任務 + */ + resubscribeJobs() { + if (!this.isConnected) return + + this.jobSubscriptions.forEach(jobUuid => { + this.socket.emit('subscribe_job', { job_uuid: jobUuid }) + }) + } + + /** + * 批量訂閱任務 + * @param {string[]} jobUuids - 任務 UUID 陣列 + */ + subscribeToJobs(jobUuids) { + jobUuids.forEach(jobUuid => { + this.subscribeToJob(jobUuid) + }) + } + + /** + * 批量取消訂閱任務 + * @param {string[]} jobUuids - 任務 UUID 陣列 + */ + unsubscribeFromJobs(jobUuids) { + jobUuids.forEach(jobUuid => { + this.unsubscribeFromJob(jobUuid) + }) + } + + /** + * 發送自定義事件 + * @param {string} event - 事件名稱 + * @param {Object} data - 事件資料 + */ + emit(event, data) { + // 檢查 WebSocket 是否被禁用 + const devMode = import.meta.env.VITE_DEV_MODE === 'true' + const isProd = import.meta.env.PROD + const wsDisabled = import.meta.env.VITE_DISABLE_WEBSOCKET === 'true' + + if (!devMode || isProd || wsDisabled) { + return // WebSocket 被禁用,靜默返回 + } + + if (this.socket && this.isConnected) { + this.socket.emit(event, data) + } + } + + /** + * 監聽自定義事件 + * @param {string} event - 事件名稱 + * @param {Function} callback - 回調函數 + */ + on(event, callback) { + // 檢查 WebSocket 是否被禁用 + const devMode = import.meta.env.VITE_DEV_MODE === 'true' + const isProd = import.meta.env.PROD + const wsDisabled = import.meta.env.VITE_DISABLE_WEBSOCKET === 'true' + + if (!devMode || isProd || wsDisabled) { + return // WebSocket 被禁用,靜默返回 + } + + if (this.socket) { + this.socket.on(event, callback) + } + } + + /** + * 取消監聽事件 + * @param {string} event - 事件名稱 + * @param {Function} callback - 回調函數 + */ + off(event, callback) { + if (this.socket) { + this.socket.off(event, callback) + } + } + + /** + * 斷開連接 + */ + disconnect() { + if (this.socket) { + this.jobSubscriptions.clear() + this.socket.disconnect() + this.socket = null + this.isConnected = false + console.log('WebSocket 已主動斷開') + } + } + + /** + * 重新連接 + */ + reconnect() { + this.disconnect() + setTimeout(() => { + this.connect() + }, 1000) + } + + /** + * 取得連接狀態 + */ + getConnectionStatus() { + return { + isConnected: this.isConnected, + socket: this.socket, + subscriptions: Array.from(this.jobSubscriptions) + } + } +} + +// 創建全局實例 +export const websocketService = new WebSocketService() + +// 自動連接(在需要時) +export const initWebSocket = () => { + // 檢查是否禁用 WebSocket (多種方式) + const devMode = import.meta.env.VITE_DEV_MODE === 'true' + const isProd = import.meta.env.PROD + const wsDisabled = import.meta.env.VITE_DISABLE_WEBSOCKET === 'true' + + if (!devMode || isProd || wsDisabled) { + console.log('🔌 [WebSocket] WebSocket 連接已禁用', { devMode, isProd, wsDisabled }) + return + } + + console.log('🔌 [WebSocket] 嘗試初始化 WebSocket 連接') + websocketService.connect() +} + +// 清理連接(在登出時) +export const cleanupWebSocket = () => { + websocketService.disconnect() +} + +export default websocketService \ No newline at end of file diff --git a/frontend/src/views/AdminJobsView.vue b/frontend/src/views/AdminJobsView.vue new file mode 100644 index 0000000..03a0a96 --- /dev/null +++ b/frontend/src/views/AdminJobsView.vue @@ -0,0 +1,538 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/views/AdminView.vue b/frontend/src/views/AdminView.vue new file mode 100644 index 0000000..5d7d3ad --- /dev/null +++ b/frontend/src/views/AdminView.vue @@ -0,0 +1,1035 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/views/HistoryView.vue b/frontend/src/views/HistoryView.vue new file mode 100644 index 0000000..95468dc --- /dev/null +++ b/frontend/src/views/HistoryView.vue @@ -0,0 +1,850 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/views/HomeView.vue b/frontend/src/views/HomeView.vue new file mode 100644 index 0000000..28bd1eb --- /dev/null +++ b/frontend/src/views/HomeView.vue @@ -0,0 +1,572 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/views/JobDetailView.vue b/frontend/src/views/JobDetailView.vue new file mode 100644 index 0000000..19a3649 --- /dev/null +++ b/frontend/src/views/JobDetailView.vue @@ -0,0 +1,930 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/views/JobListView.vue b/frontend/src/views/JobListView.vue new file mode 100644 index 0000000..d98cc0c --- /dev/null +++ b/frontend/src/views/JobListView.vue @@ -0,0 +1,904 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/views/LoginView.vue b/frontend/src/views/LoginView.vue new file mode 100644 index 0000000..1e0ec7a --- /dev/null +++ b/frontend/src/views/LoginView.vue @@ -0,0 +1,350 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/views/NotFoundView.vue b/frontend/src/views/NotFoundView.vue new file mode 100644 index 0000000..0392329 --- /dev/null +++ b/frontend/src/views/NotFoundView.vue @@ -0,0 +1,278 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/views/ProfileView.vue b/frontend/src/views/ProfileView.vue new file mode 100644 index 0000000..103d6f8 --- /dev/null +++ b/frontend/src/views/ProfileView.vue @@ -0,0 +1,562 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/views/UploadView.vue b/frontend/src/views/UploadView.vue new file mode 100644 index 0000000..acc1e05 --- /dev/null +++ b/frontend/src/views/UploadView.vue @@ -0,0 +1,865 @@ + + + + + \ No newline at end of file diff --git a/frontend/vite.config.js b/frontend/vite.config.js new file mode 100644 index 0000000..f169dc6 --- /dev/null +++ b/frontend/vite.config.js @@ -0,0 +1,72 @@ +import { defineConfig } from 'vite' +import vue from '@vitejs/plugin-vue' +import { resolve } from 'path' +import AutoImport from 'unplugin-auto-import/vite' +import Components from 'unplugin-vue-components/vite' +import { ElementPlusResolver } from 'unplugin-vue-components/resolvers' +import ElementPlus from 'unplugin-element-plus/vite' + +export default defineConfig({ + plugins: [ + vue(), + // Element Plus 自動導入 + AutoImport({ + resolvers: [ElementPlusResolver()], + imports: [ + 'vue', + 'vue-router', + 'pinia', + { + axios: [ + 'default', + ['default', 'axios'] + ] + } + ], + dts: true + }), + Components({ + resolvers: [ElementPlusResolver()] + }), + ElementPlus({ + useSource: true + }) + ], + resolve: { + alias: { + '@': resolve(__dirname, 'src') + } + }, + server: { + port: 3000, + host: '0.0.0.0', + proxy: { + '/api': { + target: 'http://127.0.0.1:5000', + changeOrigin: true, + secure: false + }, + '/socket.io': { + target: 'http://127.0.0.1:5000', + changeOrigin: true, + ws: true + } + } + }, + build: { + outDir: 'dist', + assetsDir: 'assets', + sourcemap: false, + rollupOptions: { + output: { + chunkFileNames: 'js/[name]-[hash].js', + entryFileNames: 'js/[name]-[hash].js', + assetFileNames: '[ext]/[name]-[hash].[ext]' + } + } + }, + define: { + __VUE_OPTIONS_API__: true, + __VUE_PROD_DEVTOOLS__: false + } +}) \ No newline at end of file diff --git a/install.sh b/install.sh new file mode 100644 index 0000000..0f8dd8b --- /dev/null +++ b/install.sh @@ -0,0 +1,281 @@ +#!/bin/bash +############################################################################### +# PANJIT Document Translator - 一鍵安裝腳本 +# 用於 1Panel 環境部署 +# +# 此腳本會: +# 1. 檢查 Python 版本 +# 2. 安裝所有 Python 依賴套件 +# 3. 建立必要目錄 +# 4. 驗證關鍵配置 +# +# Author: PANJIT IT Team +# Created: 2025-10-03 +############################################################################### + +set -e # 發生錯誤時立即退出 + +# 顏色定義 +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# 日誌函數 +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +log_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# 顯示橫幅 +show_banner() { + echo "" + echo "╔═══════════════════════════════════════════════════════════╗" + echo "║ PANJIT Document Translator - 環境安裝腳本 ║" + echo "║ 適用於 1Panel 環境部署 ║" + echo "╚═══════════════════════════════════════════════════════════╝" + echo "" +} + +# 檢查 Python 版本 +check_python() { + log_info "檢查 Python 版本..." + + if ! command -v python3 &> /dev/null; then + log_error "找不到 python3,請先安裝 Python 3.10+" + exit 1 + fi + + PYTHON_VERSION=$(python3 --version 2>&1 | awk '{print $2}') + PYTHON_MAJOR=$(echo $PYTHON_VERSION | cut -d. -f1) + PYTHON_MINOR=$(echo $PYTHON_VERSION | cut -d. -f2) + + if [ "$PYTHON_MAJOR" -lt 3 ] || ([ "$PYTHON_MAJOR" -eq 3 ] && [ "$PYTHON_MINOR" -lt 10 ]); then + log_error "Python 版本過低: $PYTHON_VERSION (需要 3.10+)" + exit 1 + fi + + log_success "Python 版本: $PYTHON_VERSION ✓" +} + +# 檢查 pip +check_pip() { + log_info "檢查 pip..." + + if ! command -v pip3 &> /dev/null; then + log_error "找不到 pip3,請先安裝 pip" + exit 1 + fi + + PIP_VERSION=$(pip3 --version | awk '{print $2}') + log_success "pip 版本: $PIP_VERSION ✓" +} + +# 升級 pip +upgrade_pip() { + log_info "升級 pip 到最新版本..." + pip3 install --upgrade pip --quiet + log_success "pip 升級完成 ✓" +} + +# 安裝 Python 依賴 +install_dependencies() { + log_info "安裝 Python 依賴套件..." + echo "" + + if [ ! -f "requirements.txt" ]; then + log_error "找不到 requirements.txt 檔案" + exit 1 + fi + + # 顯示將要安裝的套件數量 + PACKAGE_COUNT=$(grep -c "^[^#]" requirements.txt || true) + log_info "將安裝 $PACKAGE_COUNT 個套件(這可能需要幾分鐘)..." + echo "" + + # 安裝依賴(使用清華鏡像加速) + if pip3 install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple 2>&1 | tee /tmp/pip_install.log; then + log_success "所有依賴套件安裝完成 ✓" + else + log_warning "使用清華鏡像安裝失敗,嘗試使用官方源..." + if pip3 install -r requirements.txt 2>&1 | tee /tmp/pip_install.log; then + log_success "所有依賴套件安裝完成 ✓" + else + log_error "依賴套件安裝失敗,請檢查 /tmp/pip_install.log" + exit 1 + fi + fi + echo "" +} + +# 建立必要目錄 +create_directories() { + log_info "建立必要目錄..." + + DIRECTORIES=( + "uploads" + "logs" + "static" + ) + + for dir in "${DIRECTORIES[@]}"; do + if [ ! -d "$dir" ]; then + mkdir -p "$dir" + log_success "建立目錄: $dir ✓" + else + log_info "目錄已存在: $dir" + fi + done + + # 設定目錄權限 + chmod 755 uploads logs static + log_success "目錄權限設定完成 ✓" + echo "" +} + +# 檢查環境變數檔案 +check_env_file() { + log_info "檢查環境變數配置..." + + if [ -f ".env" ]; then + log_success "找到 .env 檔案 ✓" + + # 檢查關鍵配置項 + REQUIRED_VARS=( + "DATABASE_URL" + "REDIS_URL" + "LDAP_SERVER" + "SMTP_SERVER" + ) + + MISSING_VARS=() + for var in "${REQUIRED_VARS[@]}"; do + if ! grep -q "^${var}=" .env; then + MISSING_VARS+=("$var") + fi + done + + if [ ${#MISSING_VARS[@]} -gt 0 ]; then + log_warning "以下環境變數未設定:" + for var in "${MISSING_VARS[@]}"; do + echo " - $var" + done + else + log_success "關鍵環境變數已配置 ✓" + fi + else + log_warning "找不到 .env 檔案" + log_info "請參考 .env.example 建立 .env 檔案" + fi + echo "" +} + +# 檢查 api.txt +check_api_file() { + log_info "檢查 Dify API 配置..." + + if [ -f "api.txt" ]; then + log_success "找到 api.txt 檔案 ✓" + + # 檢查必要配置 + if grep -q "translation_api:" api.txt && grep -q "ocr_api:" api.txt; then + log_success "Dify API 配置完整 ✓" + else + log_warning "api.txt 配置可能不完整,請檢查" + fi + else + log_warning "找不到 api.txt 檔案(Dify API 配置)" + log_info "如需翻譯功能,請建立 api.txt 並配置 API 金鑰" + fi + echo "" +} + +# 驗證安裝 +verify_installation() { + log_info "驗證安裝..." + + # 檢查關鍵套件 + CRITICAL_PACKAGES=( + "flask" + "celery" + "redis" + "pymysql" + "ldap3" + ) + + for package in "${CRITICAL_PACKAGES[@]}"; do + if python3 -c "import ${package}" 2>/dev/null; then + log_success "套件 ${package} 安裝成功 ✓" + else + log_error "套件 ${package} 安裝失敗" + exit 1 + fi + done + echo "" +} + +# 顯示後續步驟 +show_next_steps() { + echo "" + echo "╔═══════════════════════════════════════════════════════════╗" + echo "║ 安裝完成!後續步驟: ║" + echo "╚═══════════════════════════════════════════════════════════╝" + echo "" + echo "1. 📝 修改環境變數中的安全金鑰:" + echo " nano .env" + echo " # 修改 SECRET_KEY 和 JWT_SECRET_KEY" + echo "" + echo "2. ✅ 確認 API 配置(已預設好):" + echo " cat api.txt" + echo "" + echo "3. 🚀 在 1Panel 介面設定:" + echo " 命令執行: python3 start.py" + echo " 端口映射: 12010" + echo "" + echo "4. ✅ 驗證服務:" + echo " curl http://localhost:12010/api/health" + echo "" + echo "📚 詳細說明請參考 README.md" + echo "" +} + +# 主函數 +main() { + show_banner + + log_info "開始安裝環境..." + echo "" + + # 執行檢查和安裝 + check_python + check_pip + upgrade_pip + echo "" + + install_dependencies + create_directories + check_env_file + check_api_file + verify_installation + + log_success "=========================================" + log_success " 所有安裝步驟完成!" + log_success "=========================================" + + show_next_steps +} + +# 執行主函數 +main diff --git a/migrations/add_conversation_id.sql b/migrations/add_conversation_id.sql new file mode 100644 index 0000000..e1c3684 --- /dev/null +++ b/migrations/add_conversation_id.sql @@ -0,0 +1,8 @@ +-- 添加 conversation_id 字段以支持對話持續性 +-- 這個字段用於在同一個翻譯任務中保持 Dify API 對話的連續性 + +ALTER TABLE dt_translation_jobs +ADD COLUMN conversation_id VARCHAR(100) COMMENT 'Dify對話ID,用於維持翻譯上下文'; + +-- 為現有的 conversation_id 字段創建索引,以提高查詢效率 +CREATE INDEX idx_conversation_id ON dt_translation_jobs(conversation_id); \ No newline at end of file diff --git a/migrations/add_sys_user.sql b/migrations/add_sys_user.sql new file mode 100644 index 0000000..e17642e --- /dev/null +++ b/migrations/add_sys_user.sql @@ -0,0 +1,83 @@ +-- 建立系統使用者表 (sys_user) +-- 專門用於記錄帳號密碼和登入相關資訊 +-- 不影響現有 users 表的權限管理功能 +-- Created: 2025-10-01 + +CREATE TABLE IF NOT EXISTS sys_user ( + id BIGINT AUTO_INCREMENT PRIMARY KEY, + + -- 帳號資訊 + username VARCHAR(255) NOT NULL COMMENT '登入帳號', + password_hash VARCHAR(512) COMMENT '密碼雜湊 (如果需要本地儲存)', + email VARCHAR(255) NOT NULL COMMENT '電子郵件', + display_name VARCHAR(255) COMMENT '顯示名稱', + + -- API 認證資訊 + api_user_id VARCHAR(255) COMMENT 'API 回傳的使用者 ID', + api_access_token TEXT COMMENT 'API 回傳的 access_token', + api_token_expires_at TIMESTAMP NULL COMMENT 'API Token 過期時間', + + -- 登入相關 + auth_method ENUM('API', 'LDAP') DEFAULT 'API' COMMENT '認證方式', + last_login_at TIMESTAMP NULL COMMENT '最後登入時間', + last_login_ip VARCHAR(45) COMMENT '最後登入 IP', + login_count INT DEFAULT 0 COMMENT '登入次數', + login_success_count INT DEFAULT 0 COMMENT '成功登入次數', + login_fail_count INT DEFAULT 0 COMMENT '失敗登入次數', + + -- 帳號狀態 + is_active BOOLEAN DEFAULT TRUE COMMENT '是否啟用', + is_locked BOOLEAN DEFAULT FALSE COMMENT '是否鎖定', + locked_until TIMESTAMP NULL COMMENT '鎖定至何時', + + -- 審計欄位 + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '建立時間', + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新時間', + + -- 索引 + INDEX idx_username (username), + INDEX idx_email (email), + INDEX idx_api_user_id (api_user_id), + INDEX idx_auth_method (auth_method), + INDEX idx_last_login (last_login_at), + INDEX idx_active_users (is_active, is_locked), + + -- 約束 + UNIQUE KEY uk_username (username), + UNIQUE KEY uk_email (email) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='系統使用者表 - 帳號密碼登入記錄'; + +-- 建立登入記錄表 (簡化版) +CREATE TABLE IF NOT EXISTS login_logs ( + id BIGINT AUTO_INCREMENT PRIMARY KEY, + + -- 基本資訊 + username VARCHAR(255) NOT NULL COMMENT '登入帳號', + auth_method ENUM('API', 'LDAP') NOT NULL COMMENT '認證方式', + + -- 登入結果 + login_success BOOLEAN NOT NULL COMMENT '是否成功', + error_message TEXT COMMENT '錯誤訊息(失敗時)', + + -- 環境資訊 + ip_address VARCHAR(45) COMMENT 'IP 地址', + user_agent TEXT COMMENT '瀏覽器資訊', + + -- API 回應 (可選,用於除錯) + api_response_summary JSON COMMENT 'API 回應摘要', + + -- 時間 + login_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '登入時間', + + -- 索引 + INDEX idx_username (username), + INDEX idx_auth_method (auth_method), + INDEX idx_login_success (login_success), + INDEX idx_login_at (login_at), + INDEX idx_username_time (username, login_at) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='登入記錄表'; + +-- 清理舊的認證相關表(如果存在) +DROP TABLE IF EXISTS auth_records; +DROP TABLE IF EXISTS token_refresh_logs; +DROP TABLE IF EXISTS login_history; \ No newline at end of file diff --git a/migrations/clean_dt_tables.sql b/migrations/clean_dt_tables.sql new file mode 100644 index 0000000..7a1e633 --- /dev/null +++ b/migrations/clean_dt_tables.sql @@ -0,0 +1,23 @@ +-- 清理所有 dt_ 前綴的資料表 +-- 重新開始,建立乾淨的架構 +-- Created: 2025-10-01 + +-- 關閉外鍵約束檢查 (避免刪除順序問題) +SET FOREIGN_KEY_CHECKS = 0; + +-- 刪除所有 dt_ 前綴的資料表 (按照依賴關係順序) +-- 先刪除有外鍵依賴的子表,再刪除父表 +DROP TABLE IF EXISTS dt_job_files; +DROP TABLE IF EXISTS dt_translation_cache; +DROP TABLE IF EXISTS dt_api_usage_stats; +DROP TABLE IF EXISTS dt_system_logs; +DROP TABLE IF EXISTS dt_notifications; +DROP TABLE IF EXISTS dt_login_logs; +DROP TABLE IF EXISTS dt_translation_jobs; +DROP TABLE IF EXISTS dt_users; + +-- 重新啟用外鍵約束檢查 +SET FOREIGN_KEY_CHECKS = 1; + +-- 驗證清理結果 +SHOW TABLES LIKE 'dt_%'; \ No newline at end of file diff --git a/migrations/create_fresh_schema.sql b/migrations/create_fresh_schema.sql new file mode 100644 index 0000000..8d52e27 --- /dev/null +++ b/migrations/create_fresh_schema.sql @@ -0,0 +1,160 @@ +-- 全新的文件翻譯系統資料庫架構 +-- 方案 A: dt_users 用於業務功能,sys_user 用於登入記錄 +-- API name 格式: 姓名+email,email 作為主要識別鍵 +-- Created: 2025-10-01 + +-- 1. 建立 dt_users 表 (業務功能使用) +CREATE TABLE dt_users ( + id INT AUTO_INCREMENT PRIMARY KEY, + username VARCHAR(255) NOT NULL COMMENT 'API name (姓名+email格式)', + display_name VARCHAR(255) NOT NULL COMMENT 'API name (姓名+email格式)', + email VARCHAR(255) NOT NULL UNIQUE COMMENT '電子郵件 (主要識別鍵)', + department VARCHAR(100) COMMENT '部門/職位', + is_admin BOOLEAN DEFAULT FALSE COMMENT '是否為管理員', + last_login DATETIME COMMENT '最後登入時間', + created_at DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '建立時間', + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新時間', + + INDEX idx_email (email), + INDEX idx_username_email (username, email) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='使用者資訊表'; + +-- 2. 建立 dt_translation_jobs 表 (翻譯工作) +CREATE TABLE dt_translation_jobs ( + id BIGINT AUTO_INCREMENT PRIMARY KEY, + user_id INT NOT NULL COMMENT '關聯到 dt_users.id', + job_name VARCHAR(255) NOT NULL COMMENT '工作名稱', + source_lang VARCHAR(10) NOT NULL COMMENT '來源語言', + target_lang VARCHAR(10) NOT NULL COMMENT '目標語言', + file_type ENUM('DOCX', 'PPTX', 'PDF', 'TXT') NOT NULL COMMENT '檔案類型', + status ENUM('PENDING', 'PROCESSING', 'COMPLETED', 'FAILED') DEFAULT 'PENDING' COMMENT '工作狀態', + progress INT DEFAULT 0 COMMENT '進度百分比', + total_pages INT DEFAULT 0 COMMENT '總頁數', + processed_pages INT DEFAULT 0 COMMENT '已處理頁數', + cost DECIMAL(10,4) DEFAULT 0 COMMENT '翻譯成本', + error_message TEXT COMMENT '錯誤訊息', + conversation_id VARCHAR(255) COMMENT 'Dify 對話 ID', + created_at DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '建立時間', + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新時間', + completed_at DATETIME COMMENT '完成時間', + + FOREIGN KEY (user_id) REFERENCES dt_users(id) ON DELETE CASCADE, + INDEX idx_user_id (user_id), + INDEX idx_status (status), + INDEX idx_created_at (created_at) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='翻譯工作表'; + +-- 3. 建立 dt_job_files 表 (工作檔案) +CREATE TABLE dt_job_files ( + id BIGINT AUTO_INCREMENT PRIMARY KEY, + job_id BIGINT NOT NULL COMMENT '關聯到 dt_translation_jobs.id', + file_type ENUM('source', 'translated') NOT NULL COMMENT '檔案類型', + original_filename VARCHAR(255) NOT NULL COMMENT '原始檔名', + stored_filename VARCHAR(255) NOT NULL COMMENT '儲存檔名', + file_path VARCHAR(500) NOT NULL COMMENT '檔案路徑', + file_size BIGINT DEFAULT 0 COMMENT '檔案大小', + mime_type VARCHAR(100) COMMENT 'MIME 類型', + created_at DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '建立時間', + + FOREIGN KEY (job_id) REFERENCES dt_translation_jobs(id) ON DELETE CASCADE, + INDEX idx_job_id (job_id), + INDEX idx_file_type (file_type) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='工作檔案表'; + +-- 4. 建立 dt_translation_cache 表 (翻譯快取) +CREATE TABLE dt_translation_cache ( + id BIGINT AUTO_INCREMENT PRIMARY KEY, + content_hash VARCHAR(64) NOT NULL COMMENT '內容雜湊', + source_lang VARCHAR(10) NOT NULL COMMENT '來源語言', + target_lang VARCHAR(10) NOT NULL COMMENT '目標語言', + source_text TEXT NOT NULL COMMENT '來源文字', + translated_text TEXT NOT NULL COMMENT '翻譯文字', + quality_score DECIMAL(3,2) DEFAULT 0.00 COMMENT '品質分數', + hit_count INT DEFAULT 0 COMMENT '命中次數', + created_at DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '建立時間', + last_used_at DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最後使用時間', + + UNIQUE KEY uk_content_lang (content_hash, source_lang, target_lang), + INDEX idx_last_used (last_used_at), + INDEX idx_hit_count (hit_count) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='翻譯快取表'; + +-- 5. 建立 dt_api_usage_stats 表 (API 使用統計) +CREATE TABLE dt_api_usage_stats ( + id BIGINT AUTO_INCREMENT PRIMARY KEY, + user_id INT NOT NULL COMMENT '關聯到 dt_users.id', + job_id BIGINT COMMENT '關聯到 dt_translation_jobs.id', + api_name VARCHAR(50) NOT NULL COMMENT 'API 名稱', + request_count INT DEFAULT 1 COMMENT '請求次數', + token_used INT DEFAULT 0 COMMENT '使用的 token 數', + cost DECIMAL(10,4) DEFAULT 0 COMMENT '成本', + response_time_ms INT DEFAULT 0 COMMENT '回應時間(毫秒)', + status ENUM('SUCCESS', 'FAILED', 'TIMEOUT') DEFAULT 'SUCCESS' COMMENT '狀態', + error_message TEXT COMMENT '錯誤訊息', + created_at DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '建立時間', + date_key DATE GENERATED ALWAYS AS (DATE(created_at)) STORED COMMENT '日期鍵', + + FOREIGN KEY (user_id) REFERENCES dt_users(id) ON DELETE CASCADE, + FOREIGN KEY (job_id) REFERENCES dt_translation_jobs(id) ON DELETE SET NULL, + INDEX idx_user_date (user_id, date_key), + INDEX idx_api_name (api_name), + INDEX idx_created_at (created_at) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='API 使用統計表'; + +-- 6. 建立 dt_system_logs 表 (系統日誌) +CREATE TABLE dt_system_logs ( + id BIGINT AUTO_INCREMENT PRIMARY KEY, + level ENUM('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL') NOT NULL COMMENT '日誌級別', + category VARCHAR(50) NOT NULL COMMENT '日誌分類', + message TEXT NOT NULL COMMENT '日誌訊息', + user_id INT COMMENT '關聯到 dt_users.id', + job_id BIGINT COMMENT '關聯到 dt_translation_jobs.id', + extra_data JSON COMMENT '額外資料', + ip_address VARCHAR(45) COMMENT 'IP 地址', + user_agent TEXT COMMENT '用戶代理', + created_at DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '建立時間', + date_key DATE GENERATED ALWAYS AS (DATE(created_at)) STORED COMMENT '日期鍵', + + FOREIGN KEY (user_id) REFERENCES dt_users(id) ON DELETE SET NULL, + FOREIGN KEY (job_id) REFERENCES dt_translation_jobs(id) ON DELETE SET NULL, + INDEX idx_level_category (level, category), + INDEX idx_user_date (user_id, date_key), + INDEX idx_created_at (created_at) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='系統日誌表'; + +-- 7. 建立 dt_notifications 表 (通知) +CREATE TABLE dt_notifications ( + id BIGINT AUTO_INCREMENT PRIMARY KEY, + user_id INT NOT NULL COMMENT '關聯到 dt_users.id', + type ENUM('INFO', 'SUCCESS', 'WARNING', 'ERROR') NOT NULL COMMENT '通知類型', + title VARCHAR(255) NOT NULL COMMENT '通知標題', + message TEXT NOT NULL COMMENT '通知內容', + is_read BOOLEAN DEFAULT FALSE COMMENT '是否已讀', + data JSON COMMENT '額外資料', + created_at DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '建立時間', + read_at DATETIME COMMENT '已讀時間', + + FOREIGN KEY (user_id) REFERENCES dt_users(id) ON DELETE CASCADE, + INDEX idx_user_unread (user_id, is_read), + INDEX idx_created_at (created_at) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='通知表'; + +-- 8. 保持現有的 sys_user 表 (專門用於登入記錄) +-- sys_user 表已存在,透過 email 與 dt_users 關聯 + +-- 9. 重新命名 login_logs 為 dt_login_logs +RENAME TABLE login_logs TO dt_login_logs; + +-- 10. 為 dt_login_logs 添加與 dt_users 的關聯 +ALTER TABLE dt_login_logs +ADD COLUMN user_id INT COMMENT '關聯到 dt_users.id', +ADD INDEX idx_user_id (user_id), +ADD FOREIGN KEY fk_dt_login_logs_user_id (user_id) REFERENCES dt_users(id) ON DELETE SET NULL; + +-- 11. 插入預設管理員使用者 +INSERT INTO dt_users (username, display_name, email, department, is_admin) +VALUES ('ymirliu ymirliu@panjit.com.tw', 'ymirliu ymirliu@panjit.com.tw', 'ymirliu@panjit.com.tw', 'IT', TRUE); + +-- 12. 驗證架構建立 +SELECT 'Tables created:' as status; +SHOW TABLES LIKE 'dt_%'; \ No newline at end of file diff --git a/migrations/fix_api_usage_stats.py b/migrations/fix_api_usage_stats.py new file mode 100644 index 0000000..73c6c7d --- /dev/null +++ b/migrations/fix_api_usage_stats.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +執行 API Usage Stats 資料表修復 Migration + +Usage: + python migrations/fix_api_usage_stats.py +""" + +import sys +from pathlib import Path + +# 添加專案根目錄到 Python 路徑 +project_root = Path(__file__).parent.parent +sys.path.insert(0, str(project_root)) + +from app import create_app, db +from sqlalchemy import text + + +def run_migration(): + """執行資料表結構修復""" + app = create_app() + + with app.app_context(): + print("=" * 60) + print("API Usage Stats 資料表結構修復") + print("=" * 60) + + try: + # 1. 檢查當前結構 + print("\n[1/8] 檢查當前資料表結構...") + result = db.session.execute(text('SHOW COLUMNS FROM dt_api_usage_stats')) + current_columns = {row[0]: row for row in result} + print(f" ✓ 當前欄位數量: {len(current_columns)}") + + # 2. 備份現有資料 + print("\n[2/8] 建立資料備份...") + db.session.execute(text(''' + CREATE TABLE IF NOT EXISTS dt_api_usage_stats_backup_20251001 + AS SELECT * FROM dt_api_usage_stats + ''')) + db.session.commit() + + backup_count = db.session.execute( + text('SELECT COUNT(*) FROM dt_api_usage_stats_backup_20251001') + ).scalar() + print(f" ✓ 已備份 {backup_count} 筆記錄") + + # 3. 修改欄位名稱:api_name → api_endpoint + if 'api_name' in current_columns: + print("\n[3/8] 重新命名 api_name → api_endpoint...") + db.session.execute(text(''' + ALTER TABLE dt_api_usage_stats + CHANGE COLUMN api_name api_endpoint VARCHAR(200) NOT NULL COMMENT 'API端點' + ''')) + db.session.commit() + print(" ✓ 已重新命名 api_name → api_endpoint") + else: + print("\n[3/8] 跳過(api_name 已不存在或已是 api_endpoint)") + + # 4. 新增 prompt_tokens 和 completion_tokens + print("\n[4/8] 新增 prompt_tokens 和 completion_tokens...") + if 'prompt_tokens' not in current_columns: + db.session.execute(text(''' + ALTER TABLE dt_api_usage_stats + ADD COLUMN prompt_tokens INT DEFAULT 0 COMMENT 'Prompt token數' AFTER api_endpoint + ''')) + if 'completion_tokens' not in current_columns: + db.session.execute(text(''' + ALTER TABLE dt_api_usage_stats + ADD COLUMN completion_tokens INT DEFAULT 0 COMMENT 'Completion token數' AFTER prompt_tokens + ''')) + db.session.commit() + print(" ✓ 已新增 token 細分欄位") + + # 5. 重新命名 token_used → total_tokens + if 'token_used' in current_columns: + print("\n[5/8] 重新命名 token_used → total_tokens...") + db.session.execute(text(''' + ALTER TABLE dt_api_usage_stats + CHANGE COLUMN token_used total_tokens INT DEFAULT 0 COMMENT '總token數' + ''')) + db.session.commit() + print(" ✓ 已重新命名 token_used → total_tokens") + else: + print("\n[5/8] 跳過(token_used 已不存在或已是 total_tokens)") + + # 6. 新增計費相關欄位 + print("\n[6/8] 新增計費相關欄位...") + if 'prompt_unit_price' not in current_columns: + db.session.execute(text(''' + ALTER TABLE dt_api_usage_stats + ADD COLUMN prompt_unit_price DECIMAL(10, 8) DEFAULT 0.00000000 COMMENT '單價' AFTER total_tokens + ''')) + if 'prompt_price_unit' not in current_columns: + db.session.execute(text(''' + ALTER TABLE dt_api_usage_stats + ADD COLUMN prompt_price_unit VARCHAR(20) DEFAULT 'USD' COMMENT '價格單位' AFTER prompt_unit_price + ''')) + db.session.commit() + print(" ✓ 已新增計費欄位") + + # 7. 替換 status 欄位為 success (BOOLEAN) + print("\n[7/8] 更新 status 欄位...") + if 'status' in current_columns and 'success' not in current_columns: + # 先新增 success 欄位 + db.session.execute(text(''' + ALTER TABLE dt_api_usage_stats + ADD COLUMN success BOOLEAN DEFAULT TRUE COMMENT '是否成功' AFTER response_time_ms + ''')) + + # 將 status 資料轉換到 success + db.session.execute(text(''' + UPDATE dt_api_usage_stats + SET success = (status = 'SUCCESS') + ''')) + + # 刪除舊的 status 欄位 + db.session.execute(text(''' + ALTER TABLE dt_api_usage_stats + DROP COLUMN status + ''')) + db.session.commit() + print(" ✓ 已將 status 轉換為 success (BOOLEAN)") + else: + print(" ℹ 跳過(已完成或不需要轉換)") + + # 8. 更新索引 + print("\n[8/8] 建立索引...") + try: + db.session.execute(text(''' + ALTER TABLE dt_api_usage_stats + ADD INDEX IF NOT EXISTS idx_api_endpoint (api_endpoint) + ''')) + except Exception as e: + if 'Duplicate' not in str(e): + raise + + try: + db.session.execute(text(''' + ALTER TABLE dt_api_usage_stats + ADD INDEX IF NOT EXISTS idx_success (success) + ''')) + except Exception as e: + if 'Duplicate' not in str(e): + raise + + db.session.commit() + print(" ✓ 已建立索引") + + # 9. 驗證最終結構 + print("\n" + "=" * 60) + print("驗證最終資料表結構") + print("=" * 60) + + result = db.session.execute(text('SHOW COLUMNS FROM dt_api_usage_stats')) + final_columns = list(result) + + print(f"\n最終欄位列表 (共 {len(final_columns)} 個):") + for col in final_columns: + print(f" - {col[0]:25} {col[1]:20} NULL={col[2]} Default={col[4]}") + + # 10. 統計資料 + print("\n" + "=" * 60) + print("資料統計") + print("=" * 60) + + total_records = db.session.execute( + text('SELECT COUNT(*) FROM dt_api_usage_stats') + ).scalar() + print(f"總記錄數: {total_records}") + + if total_records > 0: + stats = db.session.execute(text(''' + SELECT + api_endpoint, + COUNT(*) as count, + SUM(total_tokens) as total_tokens, + SUM(cost) as total_cost + FROM dt_api_usage_stats + GROUP BY api_endpoint + ''')).fetchall() + + print("\nAPI 使用統計:") + for stat in stats: + print(f" {stat[0]:40} | {stat[1]:5} 次 | {stat[2]:10} tokens | ${stat[3]:.4f}") + + print("\n" + "=" * 60) + print("✅ Migration 執行完成!") + print("=" * 60) + + except Exception as e: + db.session.rollback() + print(f"\n❌ Migration 失敗: {str(e)}") + print("\n可以使用備份表還原資料:") + print(" DROP TABLE dt_api_usage_stats;") + print(" CREATE TABLE dt_api_usage_stats AS SELECT * FROM dt_api_usage_stats_backup_20251001;") + raise + + +if __name__ == '__main__': + run_migration() diff --git a/migrations/fix_auth_architecture.sql b/migrations/fix_auth_architecture.sql new file mode 100644 index 0000000..016475e --- /dev/null +++ b/migrations/fix_auth_architecture.sql @@ -0,0 +1,36 @@ +-- 修正認證系統架構 +-- 方案 A: 保留 dt_users 的 username 和 display_name,都使用 API 回傳的 name (姓名+email) +-- 使用 email 作為主要唯一識別碼,sys_user 表專門記錄登入資訊 +-- Created: 2025-10-01 + +-- 1. 確保 dt_users 表的 email 唯一約束 +-- 先檢查是否有重複的 email,如果有則需要手動處理 +-- 因為有外鍵約束,不能直接刪除 +-- 先顯示重複的 email 記錄讓管理員確認 +-- SELECT email, COUNT(*) as count FROM dt_users GROUP BY email HAVING COUNT(*) > 1; + +-- 添加 email 唯一約束 +ALTER TABLE dt_users +ADD CONSTRAINT uk_dt_users_email UNIQUE (email); + +-- 2. 調整現有欄位註解,說明新的使用方式 +ALTER TABLE dt_users +MODIFY COLUMN username VARCHAR(255) NOT NULL COMMENT 'API name (姓名+email格式)', +MODIFY COLUMN email VARCHAR(255) NOT NULL COMMENT '電子郵件 (主要識別鍵)'; + +-- 3. 保持 sys_user 表結構,但調整為專門記錄登入資訊 +-- sys_user 表通過 email 與 dt_users 關聯 +-- (保留現有的 sys_user 表,因為它是專門用於登入記錄) + +-- 4. 重新命名 login_logs 為 dt_login_logs (配合專案命名規則) +RENAME TABLE login_logs TO dt_login_logs; + +-- 5. 更新 dt_login_logs 表結構 (配合 dt_users 的主鍵) +ALTER TABLE dt_login_logs +ADD COLUMN user_id INT COMMENT '關聯到 dt_users.id', +ADD INDEX idx_user_id (user_id), +ADD FOREIGN KEY fk_dt_login_logs_user_id (user_id) REFERENCES dt_users(id) ON DELETE SET NULL; + +-- 6. 建立使用者識別索引 (支援 email 和 username 快速查詢) +ALTER TABLE dt_users +ADD INDEX idx_username_email (username, email); \ No newline at end of file diff --git a/migrations/fix_translation_cache.py b/migrations/fix_translation_cache.py new file mode 100644 index 0000000..dba5f2a --- /dev/null +++ b/migrations/fix_translation_cache.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +修復 dt_translation_cache 資料表結構 + +問題:資料表欄位名稱與模型定義不一致 +- content_hash → source_text_hash +- source_lang → source_language +- target_lang → target_language +""" + +import sys +from pathlib import Path + +project_root = Path(__file__).parent.parent +sys.path.insert(0, str(project_root)) + +from app import create_app, db +from sqlalchemy import text + + +def run_migration(): + app = create_app() + + with app.app_context(): + print("=" * 60) + print("修復 dt_translation_cache 資料表結構") + print("=" * 60) + + try: + # 1. 檢查當前結構 + print("\n[1/6] 檢查當前資料表結構...") + result = db.session.execute(text('SHOW COLUMNS FROM dt_translation_cache')) + current_columns = {row[0]: row for row in result} + print(f" ✓ 當前欄位: {', '.join(current_columns.keys())}") + + # 2. 備份資料 + print("\n[2/6] 備份現有資料...") + db.session.execute(text(''' + CREATE TABLE IF NOT EXISTS dt_translation_cache_backup_20251001 + AS SELECT * FROM dt_translation_cache + ''')) + db.session.commit() + + backup_count = db.session.execute( + text('SELECT COUNT(*) FROM dt_translation_cache_backup_20251001') + ).scalar() + print(f" ✓ 已備份 {backup_count} 筆記錄") + + # 3. 重新命名欄位:content_hash → source_text_hash + if 'content_hash' in current_columns and 'source_text_hash' not in current_columns: + print("\n[3/6] 重新命名 content_hash → source_text_hash...") + db.session.execute(text(''' + ALTER TABLE dt_translation_cache + CHANGE COLUMN content_hash source_text_hash VARCHAR(64) NOT NULL COMMENT '來源文字hash' + ''')) + db.session.commit() + print(" ✓ 已重新命名") + else: + print("\n[3/6] 跳過(已經是 source_text_hash)") + + # 4. 重新命名欄位:source_lang → source_language + if 'source_lang' in current_columns and 'source_language' not in current_columns: + print("\n[4/6] 重新命名 source_lang → source_language...") + db.session.execute(text(''' + ALTER TABLE dt_translation_cache + CHANGE COLUMN source_lang source_language VARCHAR(50) NOT NULL COMMENT '來源語言' + ''')) + db.session.commit() + print(" ✓ 已重新命名") + else: + print("\n[4/6] 跳過(已經是 source_language)") + + # 5. 重新命名欄位:target_lang → target_language + if 'target_lang' in current_columns and 'target_language' not in current_columns: + print("\n[5/6] 重新命名 target_lang → target_language...") + db.session.execute(text(''' + ALTER TABLE dt_translation_cache + CHANGE COLUMN target_lang target_language VARCHAR(50) NOT NULL COMMENT '目標語言' + ''')) + db.session.commit() + print(" ✓ 已重新命名") + else: + print("\n[5/6] 跳過(已經是 target_language)") + + # 6. 刪除不需要的欄位 + print("\n[6/6] 清理多餘欄位...") + + # 檢查並刪除 quality_score + if 'quality_score' in current_columns: + db.session.execute(text(''' + ALTER TABLE dt_translation_cache + DROP COLUMN quality_score + ''')) + print(" ✓ 已刪除 quality_score") + + # 檢查並刪除 hit_count + if 'hit_count' in current_columns: + db.session.execute(text(''' + ALTER TABLE dt_translation_cache + DROP COLUMN hit_count + ''')) + print(" ✓ 已刪除 hit_count") + + # 檢查並刪除 last_used_at + if 'last_used_at' in current_columns: + db.session.execute(text(''' + ALTER TABLE dt_translation_cache + DROP COLUMN last_used_at + ''')) + print(" ✓ 已刪除 last_used_at") + + db.session.commit() + + # 7. 重建索引和約束 + print("\n[7/7] 重建索引和約束...") + + # 先刪除舊的唯一約束(如果存在) + try: + db.session.execute(text(''' + ALTER TABLE dt_translation_cache + DROP INDEX idx_content_hash + ''')) + print(" ✓ 已刪除舊索引 idx_content_hash") + except: + pass + + try: + db.session.execute(text(''' + ALTER TABLE dt_translation_cache + DROP INDEX idx_source_lang_target_lang + ''')) + print(" ✓ 已刪除舊索引 idx_source_lang_target_lang") + except: + pass + + # 建立新的唯一約束 + try: + db.session.execute(text(''' + ALTER TABLE dt_translation_cache + ADD UNIQUE KEY uk_cache (source_text_hash, source_language, target_language) + ''')) + print(" ✓ 已建立唯一約束 uk_cache") + except Exception as e: + if 'Duplicate' not in str(e): + print(f" ⚠ 約束建立警告: {str(e)}") + + # 建立語言索引 + try: + db.session.execute(text(''' + ALTER TABLE dt_translation_cache + ADD INDEX idx_languages (source_language, target_language) + ''')) + print(" ✓ 已建立索引 idx_languages") + except Exception as e: + if 'Duplicate' not in str(e): + print(f" ⚠ 索引建立警告: {str(e)}") + + db.session.commit() + + # 驗證最終結構 + print("\n" + "=" * 60) + print("驗證最終資料表結構") + print("=" * 60) + + result = db.session.execute(text('SHOW COLUMNS FROM dt_translation_cache')) + final_columns = list(result) + + print(f"\n最終欄位列表 (共 {len(final_columns)} 個):") + for col in final_columns: + print(f" - {col[0]:30} {col[1]:30} NULL={col[2]}") + + # 顯示索引 + print("\n索引:") + result = db.session.execute(text('SHOW INDEX FROM dt_translation_cache')) + for idx in result: + print(f" - {idx[2]:30} -> {idx[4]}") + + print("\n" + "=" * 60) + print("✅ Migration 執行完成!") + print("=" * 60) + + except Exception as e: + db.session.rollback() + print(f"\n❌ Migration 失敗: {str(e)}") + print("\n可以使用備份表還原資料:") + print(" DROP TABLE dt_translation_cache;") + print(" CREATE TABLE dt_translation_cache AS SELECT * FROM dt_translation_cache_backup_20251001;") + raise + + +if __name__ == '__main__': + run_migration() diff --git a/migrations/merge_duplicate_users.sql b/migrations/merge_duplicate_users.sql new file mode 100644 index 0000000..dbf7293 --- /dev/null +++ b/migrations/merge_duplicate_users.sql @@ -0,0 +1,19 @@ +-- 合併重複的使用者記錄 +-- 保留 ID=3 的記錄 (較新且有較多關聯資料) +-- 將 ID=1 的關聯資料轉移到 ID=3,然後刪除 ID=1 + +-- 1. 將 ID=1 的 system_logs 轉移到 ID=3 +UPDATE dt_system_logs SET user_id = 3 WHERE user_id = 1; + +-- 2. 確認沒有其他關聯資料需要轉移 +-- (dt_translation_jobs, dt_api_usage_stats 都已經在 ID=3) + +-- 3. 刪除重複的記錄 ID=1 +DELETE FROM dt_users WHERE id = 1; + +-- 4. 驗證結果 +SELECT 'After merge:' as status; +SELECT id, username, display_name, email FROM dt_users WHERE email = 'ymirliu@panjit.com.tw'; +SELECT 'Jobs:', COUNT(*) FROM dt_translation_jobs WHERE user_id = 3; +SELECT 'Logs:', COUNT(*) FROM dt_system_logs WHERE user_id = 3; +SELECT 'Stats:', COUNT(*) FROM dt_api_usage_stats WHERE user_id = 3; \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..837917d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,56 @@ +# Flask Framework +Flask==3.0.0 +Flask-SQLAlchemy==3.1.1 +Flask-Session==0.5.0 +Flask-Cors==4.0.0 +# Flask-SocketIO==5.3.6 # Temporarily disabled +Flask-JWT-Extended==4.6.0 + +# Database +PyMySQL==1.1.0 +SQLAlchemy==2.0.23 +Alembic==1.12.1 +cryptography>=41.0.0 + +# Task Queue +Celery==5.3.4 +redis==5.0.1 + +# Authentication +ldap3==2.9.1 + +# File Processing +python-docx==1.1.0 +python-pptx==0.6.23 +openpyxl==3.1.2 +PyPDF2==3.0.1 +PyMuPDF>=1.23.0 + +# Image Processing (for OCR enhancement) +Pillow>=10.0.0 +opencv-python-headless==4.8.1.78 +numpy>=1.24.0,<2.0.0 + +# Translation & Language Processing +requests==2.31.0 +blingfire==0.1.8 +pysbd==0.3.4 + +# Utilities +python-dotenv==1.0.0 +Werkzeug==3.0.1 +gunicorn==21.2.0 +gevent>=23.9.0 + +# Email +Jinja2==3.1.2 + +# Testing +pytest==7.4.3 +pytest-flask==1.3.0 +pytest-mock==3.12.0 +coverage==7.3.2 + +# Development +black==23.11.0 +flake8==6.1.0 diff --git a/schema_generated.sql b/schema_generated.sql new file mode 100644 index 0000000..50ca9a6 --- /dev/null +++ b/schema_generated.sql @@ -0,0 +1,184 @@ +-- ============================================================================ +-- 自動生成的資料表 Schema +-- 生成時間: 2025-10-01 14:49:58 +-- 警告: 此檔案由 generate_schema_from_models.py 自動生成 +-- 請勿手動編輯! +-- ============================================================================ + +USE db_A060; + +-- User +DROP TABLE IF EXISTS `dt_users`; +CREATE TABLE IF NOT EXISTS `dt_users` ( + `id` INTEGER NOT NULL AUTO_INCREMENT, + `username` VARCHAR(100) NOT NULL COMMENT 'AD帳號', + `display_name` VARCHAR(200) NOT NULL COMMENT '顯示名稱', + `email` VARCHAR(255) NOT NULL COMMENT '電子郵件', + `department` VARCHAR(100) COMMENT '部門', + `is_admin` BOOL DEFAULT 0 COMMENT '是否為管理員', + `last_login` DATETIME COMMENT '最後登入時間', + `created_at` DATETIME COMMENT '建立時間', + `updated_at` DATETIME ON UPDATE CURRENT_TIMESTAMP COMMENT '更新時間', + PRIMARY KEY (`id`), + INDEX `ix_dt_users_email` (`email`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- SysUser +DROP TABLE IF EXISTS `sys_user`; +CREATE TABLE IF NOT EXISTS `sys_user` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `username` VARCHAR(255) NOT NULL COMMENT '登入帳號', + `password_hash` VARCHAR(512) COMMENT '密碼雜湊 (如果需要本地儲存)', + `email` VARCHAR(255) NOT NULL COMMENT '電子郵件', + `display_name` VARCHAR(255) COMMENT '顯示名稱', + `api_user_id` VARCHAR(255) COMMENT 'API 回傳的使用者 ID', + `api_access_token` TEXT COMMENT 'API 回傳的 access_token', + `api_token_expires_at` DATETIME COMMENT 'API Token 過期時間', + `auth_method` ENUM('API','LDAP') DEFAULT 'API' COMMENT '認證方式', + `last_login_at` DATETIME COMMENT '最後登入時間', + `last_login_ip` VARCHAR(45) COMMENT '最後登入 IP', + `login_count` INTEGER DEFAULT 0 COMMENT '登入次數', + `login_success_count` INTEGER DEFAULT 0 COMMENT '成功登入次數', + `login_fail_count` INTEGER DEFAULT 0 COMMENT '失敗登入次數', + `is_active` BOOL DEFAULT 1 COMMENT '是否啟用', + `is_locked` BOOL DEFAULT 0 COMMENT '是否鎖定', + `locked_until` DATETIME COMMENT '鎖定至何時', + `created_at` DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '建立時間', + `updated_at` DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新時間', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_username` (`username`), + UNIQUE KEY `uk_email` (`email`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- LoginLog +DROP TABLE IF EXISTS `login_logs`; +CREATE TABLE IF NOT EXISTS `login_logs` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `username` VARCHAR(255) NOT NULL COMMENT '登入帳號', + `auth_method` ENUM('API','LDAP') NOT NULL COMMENT '認證方式', + `login_success` BOOL NOT NULL COMMENT '是否成功', + `error_message` TEXT COMMENT '錯誤訊息(失敗時)', + `ip_address` VARCHAR(45) COMMENT 'IP 地址', + `user_agent` TEXT COMMENT '瀏覽器資訊', + `api_response_summary` JSON COMMENT 'API 回應摘要', + `login_at` DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '登入時間', + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- TranslationJob +DROP TABLE IF EXISTS `dt_translation_jobs`; +CREATE TABLE IF NOT EXISTS `dt_translation_jobs` ( + `id` INTEGER NOT NULL AUTO_INCREMENT, + `job_uuid` VARCHAR(36) NOT NULL COMMENT '任務唯一識別碼', + `user_id` INTEGER NOT NULL COMMENT '使用者ID', + `original_filename` VARCHAR(500) NOT NULL COMMENT '原始檔名', + `file_extension` VARCHAR(10) NOT NULL COMMENT '檔案副檔名', + `file_size` BIGINT NOT NULL COMMENT '檔案大小(bytes)', + `file_path` VARCHAR(1000) NOT NULL COMMENT '檔案路徑', + `source_language` VARCHAR(50) COMMENT '來源語言', + `target_languages` JSON NOT NULL COMMENT '目標語言陣列', + `status` ENUM('PENDING','PROCESSING','COMPLETED','FAILED','RETRY') DEFAULT 'PENDING' COMMENT '任務狀態', + `progress` NUMERIC(5, 2) DEFAULT 0.0 COMMENT '處理進度(%)', + `retry_count` INTEGER DEFAULT 0 COMMENT '重試次數', + `error_message` TEXT COMMENT '錯誤訊息', + `total_tokens` INTEGER DEFAULT 0 COMMENT '總token數', + `total_cost` NUMERIC(10, 4) DEFAULT 0.0 COMMENT '總成本', + `conversation_id` VARCHAR(100) COMMENT 'Dify對話ID,用於維持翻譯上下文', + `processing_started_at` DATETIME COMMENT '開始處理時間', + `completed_at` DATETIME COMMENT '完成時間', + `created_at` DATETIME COMMENT '建立時間', + `updated_at` DATETIME ON UPDATE CURRENT_TIMESTAMP COMMENT '更新時間', + `deleted_at` DATETIME COMMENT '軟刪除時間', + PRIMARY KEY (`id`), + CONSTRAINT `fk_dt_translation_jobs_user_id` FOREIGN KEY (`user_id`) REFERENCES `dt_users` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- JobFile +DROP TABLE IF EXISTS `dt_job_files`; +CREATE TABLE IF NOT EXISTS `dt_job_files` ( + `id` INTEGER NOT NULL AUTO_INCREMENT, + `job_id` INTEGER NOT NULL COMMENT '任務ID', + `file_type` ENUM('source','translated') NOT NULL COMMENT '檔案類型', + `language_code` VARCHAR(50) COMMENT '語言代碼(翻譯檔案)', + `original_filename` VARCHAR(255) NOT NULL COMMENT '原始檔名', + `stored_filename` VARCHAR(255) NOT NULL COMMENT '儲存檔名', + `file_path` VARCHAR(500) NOT NULL COMMENT '檔案路徑', + `file_size` BIGINT DEFAULT 0 COMMENT '檔案大小', + `mime_type` VARCHAR(100) COMMENT 'MIME 類型', + `created_at` DATETIME COMMENT '建立時間', + PRIMARY KEY (`id`), + CONSTRAINT `fk_dt_job_files_job_id` FOREIGN KEY (`job_id`) REFERENCES `dt_translation_jobs` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- SystemLog +DROP TABLE IF EXISTS `dt_system_logs`; +CREATE TABLE IF NOT EXISTS `dt_system_logs` ( + `id` INTEGER NOT NULL AUTO_INCREMENT, + `level` ENUM('DEBUG','INFO','WARNING','ERROR','CRITICAL') NOT NULL COMMENT '日誌等級', + `module` VARCHAR(100) NOT NULL COMMENT '模組名稱', + `user_id` INTEGER COMMENT '使用者ID', + `job_id` INTEGER COMMENT '任務ID', + `message` TEXT NOT NULL COMMENT '日誌訊息', + `extra_data` JSON COMMENT '額外資料', + `created_at` DATETIME COMMENT '建立時間', + PRIMARY KEY (`id`), + CONSTRAINT `fk_dt_system_logs_user_id` FOREIGN KEY (`user_id`) REFERENCES `dt_users` (`id`) ON DELETE CASCADE, + CONSTRAINT `fk_dt_system_logs_job_id` FOREIGN KEY (`job_id`) REFERENCES `dt_translation_jobs` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- TranslationCache +DROP TABLE IF EXISTS `dt_translation_cache`; +CREATE TABLE IF NOT EXISTS `dt_translation_cache` ( + `id` INTEGER NOT NULL AUTO_INCREMENT, + `source_text_hash` VARCHAR(64) NOT NULL COMMENT '來源文字hash', + `source_language` VARCHAR(50) NOT NULL COMMENT '來源語言', + `target_language` VARCHAR(50) NOT NULL COMMENT '目標語言', + `source_text` TEXT NOT NULL COMMENT '來源文字', + `translated_text` TEXT NOT NULL COMMENT '翻譯文字', + `created_at` DATETIME COMMENT '建立時間', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_cache` (`source_text_hash`, `source_language`, `target_language`), + INDEX `idx_languages` (`source_language`, `target_language`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- APIUsageStats +DROP TABLE IF EXISTS `dt_api_usage_stats`; +CREATE TABLE IF NOT EXISTS `dt_api_usage_stats` ( + `id` INTEGER NOT NULL AUTO_INCREMENT, + `user_id` INTEGER NOT NULL COMMENT '使用者ID', + `job_id` INTEGER COMMENT '任務ID', + `api_endpoint` VARCHAR(200) NOT NULL COMMENT 'API端點', + `prompt_tokens` INTEGER DEFAULT 0 COMMENT 'Prompt token數', + `completion_tokens` INTEGER DEFAULT 0 COMMENT 'Completion token數', + `total_tokens` INTEGER DEFAULT 0 COMMENT '總token數', + `prompt_unit_price` NUMERIC(10, 8) DEFAULT 0.0 COMMENT '單價', + `prompt_price_unit` VARCHAR(20) DEFAULT 'USD' COMMENT '價格單位', + `cost` NUMERIC(10, 4) DEFAULT 0.0 COMMENT '成本', + `response_time_ms` INTEGER DEFAULT 0 COMMENT '回應時間(毫秒)', + `success` BOOL DEFAULT 1 COMMENT '是否成功', + `error_message` TEXT COMMENT '錯誤訊息', + `created_at` DATETIME COMMENT '建立時間', + PRIMARY KEY (`id`), + CONSTRAINT `fk_dt_api_usage_stats_user_id` FOREIGN KEY (`user_id`) REFERENCES `dt_users` (`id`) ON DELETE CASCADE, + CONSTRAINT `fk_dt_api_usage_stats_job_id` FOREIGN KEY (`job_id`) REFERENCES `dt_translation_jobs` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- Notification +DROP TABLE IF EXISTS `dt_notifications`; +CREATE TABLE IF NOT EXISTS `dt_notifications` ( + `id` INTEGER NOT NULL AUTO_INCREMENT, + `notification_uuid` VARCHAR(36) NOT NULL COMMENT '通知唯一識別碼', + `user_id` INTEGER NOT NULL COMMENT '使用者ID', + `type` VARCHAR(20) NOT NULL DEFAULT 'info' COMMENT '通知類型', + `title` VARCHAR(255) NOT NULL COMMENT '通知標題', + `message` TEXT NOT NULL COMMENT '通知內容', + `job_uuid` VARCHAR(36) COMMENT '關聯任務UUID', + `link` VARCHAR(500) COMMENT '相關連結', + `is_read` BOOL NOT NULL DEFAULT 0 COMMENT '是否已讀', + `read_at` DATETIME COMMENT '閱讀時間', + `created_at` DATETIME NOT NULL COMMENT '建立時間', + `expires_at` DATETIME COMMENT '過期時間', + `extra_data` JSON COMMENT '額外數據', + PRIMARY KEY (`id`), + CONSTRAINT `fk_dt_notifications_user_id` FOREIGN KEY (`user_id`) REFERENCES `dt_users` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; diff --git a/start.py b/start.py new file mode 100644 index 0000000..030214a --- /dev/null +++ b/start.py @@ -0,0 +1,297 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +PANJIT Document Translator - 統一啟動入口 +適用於 1Panel 環境部署 + +此腳本會: +1. 啟動 Flask Web 服務 +2. 啟動 Celery Worker(翻譯任務處理) +3. 啟動 Celery Beat(定時任務) + +Author: PANJIT IT Team +Created: 2025-10-03 +""" + +import os +import sys +import signal +import subprocess +import time +from pathlib import Path +from multiprocessing import Process + +# 添加專案根目錄到 Python 路徑 +project_root = Path(__file__).parent +sys.path.insert(0, str(project_root)) + +# ANSI 顏色碼 +class Colors: + BLUE = '\033[94m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + RED = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + +# 全域進程列表 +processes = [] + +def log_info(message): + """顯示資訊日誌""" + print(f"{Colors.BLUE}[INFO]{Colors.ENDC} {message}") + +def log_success(message): + """顯示成功日誌""" + print(f"{Colors.GREEN}[SUCCESS]{Colors.ENDC} {message}") + +def log_warning(message): + """顯示警告日誌""" + print(f"{Colors.YELLOW}[WARNING]{Colors.ENDC} {message}") + +def log_error(message): + """顯示錯誤日誌""" + print(f"{Colors.RED}[ERROR]{Colors.ENDC} {message}") + +def show_banner(): + """顯示啟動橫幅""" + print(f""" +{Colors.BOLD}╔═══════════════════════════════════════════════════════════╗ +║ PANJIT Document Translator V2 ║ +║ 正在啟動服務... ║ +╚═══════════════════════════════════════════════════════════╝{Colors.ENDC} + """) + +def check_environment(): + """檢查環境配置""" + log_info("檢查環境配置...") + + # 檢查 Python 版本 + if sys.version_info < (3, 10): + log_error(f"Python 版本過低: {sys.version}") + log_error("需要 Python 3.10 或更高版本") + sys.exit(1) + + log_success(f"Python 版本: {sys.version.split()[0]} ✓") + + # 檢查必要檔案 + required_files = ['app.py', 'celery_app.py', 'requirements.txt'] + for file in required_files: + if not Path(file).exists(): + log_error(f"找不到必要檔案: {file}") + sys.exit(1) + + log_success("必要檔案檢查完成 ✓") + + # 檢查環境變數 + if not Path('.env').exists(): + log_warning("找不到 .env 檔案,將使用預設配置") + else: + log_success("找到 .env 配置檔案 ✓") + + # 檢查必要目錄 + directories = ['uploads', 'logs', 'static'] + for directory in directories: + Path(directory).mkdir(parents=True, exist_ok=True) + + log_success("目錄結構檢查完成 ✓") + print() + +def start_flask_app(): + """啟動 Flask Web 服務""" + log_info("啟動 Flask Web 服務...") + + # 從環境變數讀取配置 + host = os.environ.get('HOST', '0.0.0.0') + port = int(os.environ.get('PORT', 12010)) + + # 使用 gunicorn 啟動(生產環境) + if os.environ.get('FLASK_ENV') == 'production': + workers = int(os.environ.get('GUNICORN_WORKERS', 4)) + cmd = [ + 'gunicorn', + '--bind', f'{host}:{port}', + '--workers', str(workers), + '--timeout', '300', + '--access-logfile', 'logs/access.log', + '--error-logfile', 'logs/error.log', + '--log-level', 'info', + 'app:app' # 直接使用 app.py 中的 app 物件 + ] + else: + # 開發環境使用 Flask 內建伺服器 + cmd = ['python3', 'app.py'] + + try: + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True, + bufsize=1 + ) + processes.append(('Flask App', process)) + log_success(f"Flask 服務已啟動 (PID: {process.pid}) ✓") + log_info(f"服務地址: http://{host}:{port}") + return process + except Exception as e: + log_error(f"Flask 服務啟動失敗: {e}") + sys.exit(1) + +def start_celery_worker(): + """啟動 Celery Worker""" + log_info("啟動 Celery Worker...") + + # 檢查 Redis 連線 + redis_url = os.environ.get('REDIS_URL', 'redis://localhost:6379/0') + log_info(f"Redis URL: {redis_url}") + + # Celery Worker 命令 + cmd = [ + 'celery', + '-A', 'celery_app.celery', + 'worker', + '--loglevel=info', + '--concurrency=2', + '--logfile=logs/celery_worker.log' + ] + + try: + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True, + bufsize=1 + ) + processes.append(('Celery Worker', process)) + log_success(f"Celery Worker 已啟動 (PID: {process.pid}) ✓") + return process + except Exception as e: + log_error(f"Celery Worker 啟動失敗: {e}") + log_warning("如果沒有 Redis 服務,翻譯功能將無法使用") + return None + +def start_celery_beat(): + """啟動 Celery Beat(定時任務)""" + log_info("啟動 Celery Beat...") + + cmd = [ + 'celery', + '-A', 'celery_app.celery', + 'beat', + '--loglevel=info', + '--logfile=logs/celery_beat.log' + ] + + try: + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True, + bufsize=1 + ) + processes.append(('Celery Beat', process)) + log_success(f"Celery Beat 已啟動 (PID: {process.pid}) ✓") + return process + except Exception as e: + log_error(f"Celery Beat 啟動失敗: {e}") + log_warning("定時任務將無法執行") + return None + +def signal_handler(signum, frame): + """處理終止信號""" + print() + log_warning("收到終止信號,正在關閉所有服務...") + + for name, process in processes: + if process and process.poll() is None: + log_info(f"停止 {name} (PID: {process.pid})...") + try: + process.terminate() + process.wait(timeout=5) + log_success(f"{name} 已停止 ✓") + except subprocess.TimeoutExpired: + log_warning(f"{name} 未響應,強制終止...") + process.kill() + log_success(f"{name} 已強制終止 ✓") + except Exception as e: + log_error(f"停止 {name} 時發生錯誤: {e}") + + log_success("\n所有服務已停止") + sys.exit(0) + +def monitor_processes(): + """監控進程狀態""" + log_info("開始監控服務狀態...") + print() + print("=" * 60) + print(f"{Colors.BOLD}服務狀態:{Colors.ENDC}") + for name, process in processes: + if process: + status = "運行中 ✓" if process.poll() is None else "已停止 ✗" + print(f" • {name:20s} PID: {process.pid:6d} {status}") + print("=" * 60) + print() + log_success("所有服務已啟動完成!") + print() + log_info("按 Ctrl+C 停止所有服務") + print() + + try: + while True: + time.sleep(5) + + # 檢查進程是否異常退出 + for name, process in processes: + if process and process.poll() is not None: + log_error(f"{name} 異常退出 (退出碼: {process.returncode})") + log_warning("正在停止其他服務...") + signal_handler(signal.SIGTERM, None) + sys.exit(1) + + except KeyboardInterrupt: + signal_handler(signal.SIGINT, None) + +def main(): + """主函數""" + # 註冊信號處理 + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + # 顯示橫幅 + show_banner() + + # 檢查環境 + check_environment() + + # 啟動服務 + log_info("正在啟動所有服務...") + print() + + # 1. 啟動 Flask Web 服務 + flask_process = start_flask_app() + time.sleep(2) # 等待 Flask 啟動 + + # 2. 啟動 Celery Worker + worker_process = start_celery_worker() + time.sleep(2) # 等待 Worker 啟動 + + # 3. 啟動 Celery Beat + beat_process = start_celery_beat() + time.sleep(2) # 等待 Beat 啟動 + + print() + + # 監控進程 + monitor_processes() + +if __name__ == '__main__': + try: + main() + except Exception as e: + log_error(f"啟動失敗: {e}") + import traceback + traceback.print_exc() + sys.exit(1)