改用API驗證

This commit is contained in:
beabigegg
2025-10-02 17:13:24 +08:00
parent 0a89c19fc9
commit adecdf0cce
48 changed files with 6136 additions and 1239 deletions

View File

@@ -23,29 +23,51 @@ class DifyClient:
"""Dify API 客戶端"""
def __init__(self):
self.base_url = current_app.config.get('DIFY_API_BASE_URL', '')
self.api_key = current_app.config.get('DIFY_API_KEY', '')
# 翻译API配置
self.translation_base_url = current_app.config.get('DIFY_TRANSLATION_BASE_URL', '')
self.translation_api_key = current_app.config.get('DIFY_TRANSLATION_API_KEY', '')
# OCR API配置
self.ocr_base_url = current_app.config.get('DIFY_OCR_BASE_URL', '')
self.ocr_api_key = current_app.config.get('DIFY_OCR_API_KEY', '')
self.timeout = (10, 60) # (連接超時, 讀取超時)
self.max_retries = 3
self.retry_delay = 1.6 # 指數退避基數
if not self.base_url or not self.api_key:
logger.warning("Dify API configuration is incomplete")
if not self.translation_base_url or not self.translation_api_key:
logger.warning("Dify Translation API configuration is incomplete")
if not self.ocr_base_url or not self.ocr_api_key:
logger.warning("Dify OCR API configuration is incomplete")
def _make_request(self, method: str, endpoint: str, data: Dict[str, Any] = None,
user_id: int = None, job_id: int = None) -> Dict[str, Any]:
def _make_request(self, method: str, endpoint: str, data: Dict[str, Any] = None,
user_id: int = None, job_id: int = None, files_data: Dict = None,
api_type: str = 'translation') -> Dict[str, Any]:
"""發送 HTTP 請求到 Dify API"""
if not self.base_url or not self.api_key:
raise APIError("Dify API 未配置完整")
url = f"{self.base_url.rstrip('/')}/{endpoint.lstrip('/')}"
# 根据API类型选择配置
if api_type == 'ocr':
base_url = self.ocr_base_url
api_key = self.ocr_api_key
if not base_url or not api_key:
raise APIError("Dify OCR API 未配置完整")
else: # translation
base_url = self.translation_base_url
api_key = self.translation_api_key
if not base_url or not api_key:
raise APIError("Dify Translation API 未配置完整")
url = f"{base_url.rstrip('/')}/{endpoint.lstrip('/')}"
headers = {
'Authorization': f'Bearer {self.api_key}',
'Content-Type': 'application/json',
'Authorization': f'Bearer {api_key}',
'User-Agent': 'PANJIT-Document-Translator/1.0'
}
# 只有在非文件上传时才设置JSON Content-Type
if not files_data:
headers['Content-Type'] = 'application/json'
# 重試邏輯
last_exception = None
@@ -53,11 +75,15 @@ class DifyClient:
for attempt in range(self.max_retries):
try:
logger.debug(f"Making Dify API request: {method} {url} (attempt {attempt + 1})")
# logger.debug(f"Making Dify API request: {method} {url} (attempt {attempt + 1})")
if method.upper() == 'GET':
response = requests.get(url, headers=headers, timeout=self.timeout, params=data)
elif files_data:
# 文件上传请求使用multipart/form-data
response = requests.post(url, headers=headers, timeout=self.timeout, files=files_data, data=data)
else:
# 普通JSON请求
response = requests.post(url, headers=headers, timeout=self.timeout, json=data)
# 計算響應時間
@@ -80,7 +106,7 @@ class DifyClient:
success=True
)
logger.debug(f"Dify API request successful: {response_time_ms}ms")
# logger.debug(f"Dify API request successful: {response_time_ms}ms")
return result
except requests.exceptions.RequestException as e:
@@ -107,7 +133,7 @@ class DifyClient:
# 指數退避
delay = self.retry_delay ** attempt
logger.debug(f"Retrying in {delay} seconds...")
# logger.debug(f"Retrying in {delay} seconds...")
time.sleep(delay)
# 所有重試都失敗了
@@ -137,7 +163,7 @@ class DifyClient:
logger.warning(f"Failed to record API usage: {str(e)}")
def translate_text(self, text: str, source_language: str, target_language: str,
user_id: int = None, job_id: int = None) -> Dict[str, Any]:
user_id: int = None, job_id: int = None, conversation_id: str = None) -> Dict[str, Any]:
"""翻譯文字"""
if not text.strip():
@@ -181,7 +207,15 @@ Rules:
'user': f"user_{user_id}" if user_id else "doc-translator-user",
'query': query
}
# 如果有 conversation_id加入請求中以維持對話連續性
if conversation_id:
request_data['conversation_id'] = conversation_id
logger.info(f"[TRANSLATION] Sending translation request...")
logger.info(f"[TRANSLATION] Request data: {request_data}")
logger.info(f"[TRANSLATION] Text length: {len(text)} characters")
try:
response = self._make_request(
method='POST',
@@ -203,6 +237,7 @@ Rules:
'source_text': text,
'source_language': source_language,
'target_language': target_language,
'conversation_id': response.get('conversation_id'),
'metadata': response.get('metadata', {})
}
@@ -271,18 +306,165 @@ Rules:
with open(config_file, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if line.startswith('base_url:'):
if line.startswith('#') or not line:
continue # 跳过注释和空行
# 翻译API配置兼容旧格式
if line.startswith('base_url:') or line.startswith('translation_base_url:'):
base_url = line.split(':', 1)[1].strip()
current_app.config['DIFY_TRANSLATION_BASE_URL'] = base_url
# 兼容旧配置
current_app.config['DIFY_API_BASE_URL'] = base_url
elif line.startswith('api:'):
elif line.startswith('api:') or line.startswith('translation_api:'):
api_key = line.split(':', 1)[1].strip()
current_app.config['DIFY_TRANSLATION_API_KEY'] = api_key
# 兼容旧配置
current_app.config['DIFY_API_KEY'] = api_key
# OCR API配置
elif line.startswith('ocr_base_url:'):
ocr_base_url = line.split(':', 1)[1].strip()
current_app.config['DIFY_OCR_BASE_URL'] = ocr_base_url
elif line.startswith('ocr_api:'):
ocr_api_key = line.split(':', 1)[1].strip()
current_app.config['DIFY_OCR_API_KEY'] = ocr_api_key
logger.info("Dify API config loaded from file")
except Exception as e:
logger.error(f"Failed to load Dify config from file: {str(e)}")
def upload_file(self, image_data: bytes, filename: str, user_id: int = None) -> str:
"""上传图片文件到Dify OCR API并返回file_id"""
if not image_data:
raise APIError("图片数据不能为空")
logger.info(f"[OCR-UPLOAD] Starting file upload to Dify OCR API")
logger.info(f"[OCR-UPLOAD] File: {filename}, Size: {len(image_data)} bytes, User: {user_id}")
# 构建文件上传数据
files_data = {
'file': (filename, image_data, 'image/png') # 假设为PNG格式
}
form_data = {
'user': f"user_{user_id}" if user_id else "doc-translator-user"
}
# logger.debug(f"[OCR-UPLOAD] Upload form_data: {form_data}")
# logger.debug(f"[OCR-UPLOAD] Using OCR API: {self.ocr_base_url}")
try:
response = self._make_request(
method='POST',
endpoint='/files/upload',
data=form_data,
files_data=files_data,
user_id=user_id,
api_type='ocr' # 使用OCR API
)
logger.info(f"[OCR-UPLOAD] Raw Dify upload response: {response}")
file_id = response.get('id')
if not file_id:
logger.error(f"[OCR-UPLOAD] No file ID in response: {response}")
raise APIError("Dify 文件上传失败未返回文件ID")
logger.info(f"[OCR-UPLOAD] ✓ File uploaded successfully: {file_id}")
# logger.debug(f"[OCR-UPLOAD] File details: name={response.get('name')}, size={response.get('size')}, type={response.get('mime_type')}")
return file_id
except APIError:
raise
except Exception as e:
error_msg = f"文件上传到Dify失败: {str(e)}"
logger.error(f"[OCR-UPLOAD] ✗ Upload failed: {error_msg}")
raise APIError(error_msg)
def ocr_image_with_dify(self, image_data: bytes, filename: str = "image.png",
user_id: int = None, job_id: int = None) -> str:
"""使用Dify进行图像OCR识别"""
logger.info(f"[OCR-RECOGNITION] Starting OCR process for {filename}")
logger.info(f"[OCR-RECOGNITION] Image size: {len(image_data)} bytes, User: {user_id}, Job: {job_id}")
try:
# 1. 先上传文件获取file_id
logger.info(f"[OCR-RECOGNITION] Step 1: Uploading image to Dify...")
file_id = self.upload_file(image_data, filename, user_id)
logger.info(f"[OCR-RECOGNITION] Step 1 ✓ File uploaded with ID: {file_id}")
# 2. 构建OCR请求
# 系统提示词已在Dify Chat Flow中配置这里只需要发送简单的用户query
query = "將圖片中的文字完整的提取出來"
logger.info(f"[OCR-RECOGNITION] Step 2: Preparing OCR request...")
# logger.debug(f"[OCR-RECOGNITION] Query: {query}")
# 3. 构建Chat Flow请求根据最新Dify运行记录图片应该放在files数组中
request_data = {
'inputs': {},
'response_mode': 'blocking',
'user': f"user_{user_id}" if user_id else "doc-translator-user",
'query': query,
'files': [
{
'type': 'image',
'transfer_method': 'local_file',
'upload_file_id': file_id
}
]
}
logger.info(f"[OCR-RECOGNITION] Step 3: Sending OCR request to Dify...")
logger.info(f"[OCR-RECOGNITION] Request data: {request_data}")
logger.info(f"[OCR-RECOGNITION] Using OCR API: {self.ocr_base_url}")
response = self._make_request(
method='POST',
endpoint='/chat-messages',
data=request_data,
user_id=user_id,
job_id=job_id,
api_type='ocr' # 使用OCR API
)
logger.info(f"[OCR-RECOGNITION] Step 3 ✓ Received response from Dify")
logger.info(f"[OCR-RECOGNITION] Raw Dify OCR response: {response}")
# 从响应中提取OCR结果
answer = response.get('answer', '')
metadata = response.get('metadata', {})
conversation_id = response.get('conversation_id', '')
logger.info(f"[OCR-RECOGNITION] Response details:")
logger.info(f"[OCR-RECOGNITION] - Answer length: {len(answer) if answer else 0} characters")
logger.info(f"[OCR-RECOGNITION] - Conversation ID: {conversation_id}")
logger.info(f"[OCR-RECOGNITION] - Metadata: {metadata}")
if not isinstance(answer, str) or not answer.strip():
logger.error(f"[OCR-RECOGNITION] ✗ Empty or invalid answer from Dify")
logger.error(f"[OCR-RECOGNITION] Answer type: {type(answer)}, Content: '{answer}'")
raise APIError("Dify OCR 返回空的识别结果")
# 记录OCR识别的前100个字符用于调试
preview = answer[:100] + "..." if len(answer) > 100 else answer
logger.info(f"[OCR-RECOGNITION] ✓ OCR completed successfully")
logger.info(f"[OCR-RECOGNITION] Extracted {len(answer)} characters")
# logger.debug(f"[OCR-RECOGNITION] Text preview: {preview}")
return answer.strip()
except APIError:
raise
except Exception as e:
error_msg = f"Dify OCR识别失败: {str(e)}"
logger.error(f"[OCR-RECOGNITION] ✗ OCR process failed: {error_msg}")
logger.error(f"[OCR-RECOGNITION] Exception details: {type(e).__name__}: {str(e)}")
raise APIError(error_msg)
def init_dify_config(app):
"""初始化 Dify 配置"""
@@ -291,12 +473,22 @@ def init_dify_config(app):
DifyClient.load_config_from_file()
# 檢查配置完整性
base_url = app.config.get('DIFY_API_BASE_URL')
api_key = app.config.get('DIFY_API_KEY')
if base_url and api_key:
logger.info("Dify API configuration loaded successfully")
translation_base_url = app.config.get('DIFY_TRANSLATION_BASE_URL')
translation_api_key = app.config.get('DIFY_TRANSLATION_API_KEY')
ocr_base_url = app.config.get('DIFY_OCR_BASE_URL')
ocr_api_key = app.config.get('DIFY_OCR_API_KEY')
logger.info("Dify API Configuration Status:")
if translation_base_url and translation_api_key:
logger.info("✓ Translation API configured successfully")
else:
logger.warning("Dify API configuration is incomplete")
logger.warning(f"Base URL: {'' if base_url else ''}")
logger.warning(f"API Key: {'' if api_key else ''}")
logger.warning("✗ Translation API configuration is incomplete")
logger.warning(f" - Translation Base URL: {'' if translation_base_url else ''}")
logger.warning(f" - Translation API Key: {'' if translation_api_key else ''}")
if ocr_base_url and ocr_api_key:
logger.info("✓ OCR API configured successfully")
else:
logger.warning("✗ OCR API configuration is incomplete (扫描PDF功能将不可用)")
logger.warning(f" - OCR Base URL: {'' if ocr_base_url else ''}")
logger.warning(f" - OCR API Key: {'' if ocr_api_key else ''}")