refactor: centralize DIFY settings in config.py and cleanup env files
- Update config.py to read both .env and .env.local (with .env.local priority) - Move DIFY API settings from hardcoded values to environment configuration - Remove unused PADDLEOCR_MODEL_DIR setting (models stored in ~/.paddleocr/) - Remove deprecated argostranslate translation settings - Add DIFY settings: base_url, api_key, timeout, max_retries, batch limits - Update dify_client.py to use settings from config.py - Update translation_service.py to use settings instead of constants - Fix frontend env files to use correct variable name VITE_API_BASE_URL - Update setup_dev_env.sh with correct PaddlePaddle version (3.2.0) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
13
.env.example
13
.env.example
@@ -31,7 +31,7 @@ TASK_RETENTION_DAYS=30
|
||||
MAX_TASKS_PER_USER=1000
|
||||
|
||||
# ===== OCR Configuration =====
|
||||
PADDLEOCR_MODEL_DIR=./models/paddleocr
|
||||
# Note: PaddleOCR/PaddleX models are stored in ~/.paddleocr/ and ~/.paddlex/ by default
|
||||
OCR_LANGUAGES=ch,en,japan,korean
|
||||
OCR_CONFIDENCE_THRESHOLD=0.5
|
||||
MAX_OCR_WORKERS=4
|
||||
@@ -69,10 +69,17 @@ PDF_MARGIN_RIGHT=20
|
||||
# ===== Translation Configuration (DIFY API) =====
|
||||
# Enable translation feature
|
||||
ENABLE_TRANSLATION=true
|
||||
# DIFY API endpoint
|
||||
DIFY_API_URL=https://your-dify-instance.example.com
|
||||
# DIFY API base URL
|
||||
DIFY_BASE_URL=https://your-dify-instance.example.com/v1
|
||||
# DIFY API key (get from DIFY dashboard)
|
||||
DIFY_API_KEY=your-dify-api-key
|
||||
# API request timeout in seconds
|
||||
DIFY_TIMEOUT=120.0
|
||||
# Maximum retry attempts
|
||||
DIFY_MAX_RETRIES=3
|
||||
# Batch translation limits
|
||||
DIFY_MAX_BATCH_CHARS=5000
|
||||
DIFY_MAX_BATCH_ITEMS=20
|
||||
|
||||
# ===== Background Tasks Configuration =====
|
||||
TASK_QUEUE_TYPE=memory
|
||||
|
||||
@@ -5,9 +5,13 @@ Loads environment variables and provides centralized configuration
|
||||
|
||||
from typing import List, Optional
|
||||
from pydantic_settings import BaseSettings
|
||||
from pydantic import Field
|
||||
from pydantic import Field, model_validator
|
||||
from pathlib import Path
|
||||
|
||||
# Anchor all default paths to the backend directory to avoid scattering runtime folders
|
||||
BACKEND_ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
PROJECT_ROOT = BACKEND_ROOT.parent
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""Application settings loaded from environment variables"""
|
||||
@@ -28,8 +32,8 @@ class Settings(BaseSettings):
|
||||
)
|
||||
|
||||
# ===== Application Configuration =====
|
||||
backend_port: int = Field(default=12010)
|
||||
frontend_port: int = Field(default=12011)
|
||||
backend_port: int = Field(default=8000)
|
||||
frontend_port: int = Field(default=5173)
|
||||
secret_key: str = Field(default="your-secret-key-change-this")
|
||||
algorithm: str = Field(default="HS256")
|
||||
access_token_expire_minutes: int = Field(default=1440) # 24 hours
|
||||
@@ -52,7 +56,7 @@ class Settings(BaseSettings):
|
||||
max_tasks_per_user: int = Field(default=1000)
|
||||
|
||||
# ===== OCR Configuration =====
|
||||
paddleocr_model_dir: str = Field(default="./models/paddleocr")
|
||||
# Note: PaddleOCR models are stored in ~/.paddleocr/ and ~/.paddlex/ by default
|
||||
ocr_languages: str = Field(default="ch,en,japan,korean")
|
||||
ocr_confidence_threshold: float = Field(default=0.5)
|
||||
max_ocr_workers: int = Field(default=4)
|
||||
@@ -323,10 +327,10 @@ class Settings(BaseSettings):
|
||||
# ===== File Upload Configuration =====
|
||||
max_upload_size: int = Field(default=52428800) # 50MB
|
||||
allowed_extensions: str = Field(default="png,jpg,jpeg,pdf,bmp,tiff,doc,docx,ppt,pptx")
|
||||
upload_dir: str = Field(default="./uploads")
|
||||
temp_dir: str = Field(default="./uploads/temp")
|
||||
processed_dir: str = Field(default="./uploads/processed")
|
||||
images_dir: str = Field(default="./uploads/images")
|
||||
upload_dir: str = Field(default=str(BACKEND_ROOT / "uploads"))
|
||||
temp_dir: str = Field(default=str(BACKEND_ROOT / "uploads" / "temp"))
|
||||
processed_dir: str = Field(default=str(BACKEND_ROOT / "uploads" / "processed"))
|
||||
images_dir: str = Field(default=str(BACKEND_ROOT / "uploads" / "images"))
|
||||
|
||||
@property
|
||||
def allowed_extensions_list(self) -> List[str]:
|
||||
@@ -334,11 +338,11 @@ class Settings(BaseSettings):
|
||||
return [ext.strip() for ext in self.allowed_extensions.split(",")]
|
||||
|
||||
# ===== Export Configuration =====
|
||||
storage_dir: str = Field(default="./storage")
|
||||
markdown_dir: str = Field(default="./storage/markdown")
|
||||
json_dir: str = Field(default="./storage/json")
|
||||
exports_dir: str = Field(default="./storage/exports")
|
||||
result_dir: str = Field(default="./storage/results")
|
||||
storage_dir: str = Field(default=str(BACKEND_ROOT / "storage"))
|
||||
markdown_dir: str = Field(default=str(BACKEND_ROOT / "storage" / "markdown"))
|
||||
json_dir: str = Field(default=str(BACKEND_ROOT / "storage" / "json"))
|
||||
exports_dir: str = Field(default=str(BACKEND_ROOT / "storage" / "exports"))
|
||||
result_dir: str = Field(default=str(BACKEND_ROOT / "storage" / "results"))
|
||||
|
||||
# ===== PDF Generation Configuration =====
|
||||
pandoc_path: str = Field(default="/opt/homebrew/bin/pandoc")
|
||||
@@ -350,21 +354,25 @@ class Settings(BaseSettings):
|
||||
pdf_margin_right: int = Field(default=20)
|
||||
|
||||
# ===== Layout-Preserving PDF Configuration =====
|
||||
chinese_font_path: str = Field(default="./backend/fonts/NotoSansSC-Regular.ttf")
|
||||
chinese_font_path: str = Field(default=str(BACKEND_ROOT / "fonts" / "NotoSansSC-Regular.ttf"))
|
||||
pdf_font_size_base: int = Field(default=12)
|
||||
pdf_enable_bbox_debug: bool = Field(default=False) # Draw bounding boxes for debugging
|
||||
|
||||
# ===== Translation Configuration (Reserved) =====
|
||||
enable_translation: bool = Field(default=False)
|
||||
translation_engine: str = Field(default="offline")
|
||||
argostranslate_models_dir: str = Field(default="./models/argostranslate")
|
||||
# ===== Translation Configuration (DIFY API) =====
|
||||
enable_translation: bool = Field(default=True)
|
||||
dify_base_url: str = Field(default="https://dify.theaken.com/v1")
|
||||
dify_api_key: str = Field(default="") # Required: set in .env.local
|
||||
dify_timeout: float = Field(default=120.0) # seconds
|
||||
dify_max_retries: int = Field(default=3)
|
||||
dify_max_batch_chars: int = Field(default=5000) # Max characters per batch
|
||||
dify_max_batch_items: int = Field(default=20) # Max items per batch
|
||||
|
||||
# ===== Background Tasks Configuration =====
|
||||
task_queue_type: str = Field(default="memory")
|
||||
redis_url: str = Field(default="redis://localhost:6379/0")
|
||||
|
||||
# ===== CORS Configuration =====
|
||||
cors_origins: str = Field(default="http://localhost:12011,http://127.0.0.1:12011")
|
||||
cors_origins: str = Field(default="http://localhost:5173,http://127.0.0.1:5173")
|
||||
|
||||
@property
|
||||
def cors_origins_list(self) -> List[str]:
|
||||
@@ -373,14 +381,52 @@ class Settings(BaseSettings):
|
||||
|
||||
# ===== Logging Configuration =====
|
||||
log_level: str = Field(default="INFO")
|
||||
log_file: str = Field(default="./logs/app.log")
|
||||
log_file: str = Field(default=str(BACKEND_ROOT / "logs" / "app.log"))
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _normalize_paths(self):
|
||||
"""Resolve all runtime paths to backend-rooted absolutes"""
|
||||
path_fields = [
|
||||
"upload_dir",
|
||||
"temp_dir",
|
||||
"processed_dir",
|
||||
"images_dir",
|
||||
"storage_dir",
|
||||
"markdown_dir",
|
||||
"json_dir",
|
||||
"exports_dir",
|
||||
"result_dir",
|
||||
"log_file",
|
||||
"chinese_font_path",
|
||||
]
|
||||
|
||||
for field in path_fields:
|
||||
value = getattr(self, field)
|
||||
if value:
|
||||
setattr(self, field, str(self._resolve_path(str(value))))
|
||||
|
||||
return self
|
||||
|
||||
class Config:
|
||||
# Look for .env in project root (one level up from backend/)
|
||||
env_file = str(Path(__file__).resolve().parent.parent.parent.parent / ".env")
|
||||
# Look for .env files in project root (one level up from backend/)
|
||||
# .env.local has higher priority and overrides .env
|
||||
env_file = (
|
||||
str(PROJECT_ROOT / ".env"),
|
||||
str(PROJECT_ROOT / ".env.local"),
|
||||
)
|
||||
env_file_encoding = "utf-8"
|
||||
case_sensitive = False
|
||||
|
||||
def _resolve_path(self, path_value: str) -> Path:
|
||||
"""
|
||||
Convert relative paths to backend-rooted absolute paths.
|
||||
|
||||
This keeps runtime artifacts contained under backend/ even when the app
|
||||
is launched from different working directories.
|
||||
"""
|
||||
path = Path(path_value)
|
||||
return path if path.is_absolute() else BACKEND_ROOT / path
|
||||
|
||||
def ensure_directories(self):
|
||||
"""Create all necessary directories if they don't exist"""
|
||||
dirs = [
|
||||
@@ -393,15 +439,11 @@ class Settings(BaseSettings):
|
||||
self.json_dir,
|
||||
self.exports_dir,
|
||||
self.result_dir,
|
||||
self.paddleocr_model_dir,
|
||||
Path(self.log_file).parent,
|
||||
]
|
||||
|
||||
if self.enable_translation and self.translation_engine == "offline":
|
||||
dirs.append(self.argostranslate_models_dir)
|
||||
|
||||
for dir_path in dirs:
|
||||
Path(dir_path).mkdir(parents=True, exist_ok=True)
|
||||
self._resolve_path(str(dir_path)).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
# Global settings instance
|
||||
|
||||
@@ -3,7 +3,6 @@ Tool_OCR - DIFY AI Client
|
||||
HTTP client for DIFY translation API with batch support
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
@@ -12,20 +11,10 @@ from typing import Dict, List, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# DIFY API Configuration
|
||||
DIFY_BASE_URL = "https://dify.theaken.com/v1"
|
||||
DIFY_API_KEY = "app-YOPrF2ro5fshzMkCZviIuUJd"
|
||||
DIFY_TIMEOUT = 120.0 # seconds (increased for batch)
|
||||
DIFY_MAX_RETRIES = 3
|
||||
|
||||
# Batch translation limits
|
||||
# Conservative limits to avoid gateway timeouts
|
||||
# DIFY server may have processing time limits
|
||||
MAX_BATCH_CHARS = 5000
|
||||
MAX_BATCH_ITEMS = 20
|
||||
|
||||
# Language name mapping
|
||||
LANGUAGE_NAMES = {
|
||||
"en": "English",
|
||||
@@ -77,22 +66,39 @@ class DifyClient:
|
||||
- Blocking mode API calls
|
||||
- Automatic retry with exponential backoff
|
||||
- Token and latency tracking
|
||||
|
||||
Configuration is loaded from settings (config.py / .env.local):
|
||||
- DIFY_BASE_URL: API base URL
|
||||
- DIFY_API_KEY: API key (required)
|
||||
- DIFY_TIMEOUT: Request timeout in seconds
|
||||
- DIFY_MAX_RETRIES: Max retry attempts
|
||||
- DIFY_MAX_BATCH_CHARS: Max characters per batch
|
||||
- DIFY_MAX_BATCH_ITEMS: Max items per batch
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str = DIFY_BASE_URL,
|
||||
api_key: str = DIFY_API_KEY,
|
||||
timeout: float = DIFY_TIMEOUT,
|
||||
max_retries: int = DIFY_MAX_RETRIES
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
timeout: Optional[float] = None,
|
||||
max_retries: Optional[int] = None
|
||||
):
|
||||
self.base_url = base_url
|
||||
self.api_key = api_key
|
||||
self.timeout = timeout
|
||||
self.max_retries = max_retries
|
||||
# Use settings as defaults when not explicitly provided
|
||||
self.base_url = base_url or settings.dify_base_url
|
||||
self.api_key = api_key or settings.dify_api_key
|
||||
self.timeout = timeout if timeout is not None else settings.dify_timeout
|
||||
self.max_retries = max_retries if max_retries is not None else settings.dify_max_retries
|
||||
self.max_batch_chars = settings.dify_max_batch_chars
|
||||
self.max_batch_items = settings.dify_max_batch_items
|
||||
self._total_tokens = 0
|
||||
self._total_requests = 0
|
||||
|
||||
# Warn if API key is not configured
|
||||
if not self.api_key:
|
||||
logger.warning(
|
||||
"DIFY_API_KEY not configured. Set DIFY_API_KEY in .env.local for translation to work."
|
||||
)
|
||||
|
||||
def _get_language_name(self, lang_code: str) -> str:
|
||||
"""Convert language code to full name for prompt"""
|
||||
return LANGUAGE_NAMES.get(lang_code, lang_code)
|
||||
|
||||
@@ -19,12 +19,11 @@ from app.schemas.translation import (
|
||||
TranslationProgress,
|
||||
TranslationStatusEnum,
|
||||
)
|
||||
from app.core.config import settings
|
||||
from app.services.dify_client import (
|
||||
DifyClient,
|
||||
DifyTranslationError,
|
||||
get_dify_client,
|
||||
MAX_BATCH_CHARS,
|
||||
MAX_BATCH_ITEMS,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -205,8 +204,8 @@ class TranslationBatch:
|
||||
"""Check if item can be added to this batch"""
|
||||
item_chars = len(item.content)
|
||||
return (
|
||||
len(self.items) < MAX_BATCH_ITEMS and
|
||||
self.total_chars + item_chars <= MAX_BATCH_CHARS
|
||||
len(self.items) < settings.dify_max_batch_items and
|
||||
self.total_chars + item_chars <= settings.dify_max_batch_chars
|
||||
)
|
||||
|
||||
def add(self, item: TranslatableItem):
|
||||
@@ -324,7 +323,7 @@ class TranslationService:
|
||||
|
||||
logger.info(
|
||||
f"Created {len(batches)} batches from {len(items)} items "
|
||||
f"(max {MAX_BATCH_CHARS} chars, max {MAX_BATCH_ITEMS} items per batch)"
|
||||
f"(max {settings.dify_max_batch_chars} chars, max {settings.dify_max_batch_items} items per batch)"
|
||||
)
|
||||
|
||||
return batches
|
||||
|
||||
@@ -4,4 +4,4 @@
|
||||
# Backend API URL
|
||||
# For local development: http://localhost:8000
|
||||
# For WSL2: Use the WSL2 IP address (get with: hostname -I | awk '{print $1}')
|
||||
VITE_API_URL=http://localhost:8000
|
||||
VITE_API_BASE_URL=http://localhost:8000
|
||||
|
||||
@@ -166,7 +166,8 @@ detect_gpu() {
|
||||
if [ "$FORCE_CPU" = true ]; then
|
||||
echo -e "${YELLOW}ℹ 已指定 --cpu-only,跳過 GPU 偵測${NC}"
|
||||
USE_GPU=false
|
||||
PADDLE_PACKAGE="paddlepaddle>=3.2.1"
|
||||
PADDLE_PACKAGE="paddlepaddle==3.2.0"
|
||||
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cpu/"
|
||||
return
|
||||
fi
|
||||
|
||||
@@ -179,31 +180,40 @@ detect_gpu() {
|
||||
print_success "CUDA 版本: $CUDA_VERSION"
|
||||
|
||||
CUDA_MAJOR=$(echo $CUDA_VERSION | cut -d. -f1)
|
||||
CUDA_MINOR=$(echo $CUDA_VERSION | cut -d. -f2)
|
||||
|
||||
if [ "$CUDA_MAJOR" -ge 12 ]; then
|
||||
if [ "$CUDA_MAJOR" -ge 13 ] || ([ "$CUDA_MAJOR" -eq 12 ] && [ "$CUDA_MINOR" -ge 6 ]); then
|
||||
echo "將安裝 PaddlePaddle GPU 版本 (CUDA 12.6+)"
|
||||
USE_GPU=true
|
||||
PADDLE_PACKAGE="paddlepaddle-gpu==3.2.0"
|
||||
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cu126/"
|
||||
elif [ "$CUDA_MAJOR" -eq 12 ]; then
|
||||
echo "將安裝 PaddlePaddle GPU 版本 (CUDA 12.x)"
|
||||
USE_GPU=true
|
||||
PADDLE_PACKAGE="paddlepaddle-gpu>=3.2.1"
|
||||
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cu123/"
|
||||
PADDLE_PACKAGE="paddlepaddle-gpu==3.2.0"
|
||||
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cu126/"
|
||||
elif [ "$CUDA_MAJOR" -eq 11 ]; then
|
||||
echo "將安裝 PaddlePaddle GPU 版本 (CUDA 11.x)"
|
||||
echo "將安裝 PaddlePaddle GPU 版本 (CUDA 11.8)"
|
||||
USE_GPU=true
|
||||
PADDLE_PACKAGE="paddlepaddle-gpu>=3.2.1"
|
||||
PADDLE_PACKAGE="paddlepaddle-gpu==3.2.0"
|
||||
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cu118/"
|
||||
else
|
||||
print_warning "CUDA 版本不支援 ($CUDA_VERSION),將使用 CPU 版本"
|
||||
USE_GPU=false
|
||||
PADDLE_PACKAGE="paddlepaddle>=3.2.1"
|
||||
PADDLE_PACKAGE="paddlepaddle==3.2.0"
|
||||
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cpu/"
|
||||
fi
|
||||
else
|
||||
print_warning "無法獲取 CUDA 版本,將使用 CPU 版本"
|
||||
USE_GPU=false
|
||||
PADDLE_PACKAGE="paddlepaddle>=3.2.1"
|
||||
PADDLE_PACKAGE="paddlepaddle==3.2.0"
|
||||
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cpu/"
|
||||
fi
|
||||
else
|
||||
echo -e "${YELLOW}ℹ 未偵測到 NVIDIA GPU,將使用 CPU 版本${NC}"
|
||||
USE_GPU=false
|
||||
PADDLE_PACKAGE="paddlepaddle>=3.2.1"
|
||||
PADDLE_PACKAGE="paddlepaddle==3.2.0"
|
||||
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cpu/"
|
||||
fi
|
||||
}
|
||||
|
||||
@@ -348,7 +358,7 @@ if [ "$USE_GPU" = true ]; then
|
||||
fi
|
||||
else
|
||||
echo "安裝 CPU 版本..."
|
||||
pip install 'paddlepaddle>=3.2.1'
|
||||
pip install "$PADDLE_PACKAGE" -i "$PADDLE_INDEX"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
Reference in New Issue
Block a user