改用API驗證

2025-10-02 17:13:24 +08:00
parent 0a89c19fc9
commit adecdf0cce
48 changed files with 6136 additions and 1239 deletions
--- a/app/services/dify_client.py
+++ b/app/services/dify_client.py
@@ -23,29 +23,51 @@ class DifyClient:
    """Dify API 客戶端"""
    
    def __init__(self):
-        self.base_url = current_app.config.get('DIFY_API_BASE_URL', '')
-        self.api_key = current_app.config.get('DIFY_API_KEY', '')
+        # 翻译API配置
+        self.translation_base_url = current_app.config.get('DIFY_TRANSLATION_BASE_URL', '')
+        self.translation_api_key = current_app.config.get('DIFY_TRANSLATION_API_KEY', '')
+
+        # OCR API配置
+        self.ocr_base_url = current_app.config.get('DIFY_OCR_BASE_URL', '')
+        self.ocr_api_key = current_app.config.get('DIFY_OCR_API_KEY', '')
+
        self.timeout = (10, 60)  # (連接超時, 讀取超時)
        self.max_retries = 3
        self.retry_delay = 1.6  # 指數退避基數
-        
-        if not self.base_url or not self.api_key:
-            logger.warning("Dify API configuration is incomplete")
+
+        if not self.translation_base_url or not self.translation_api_key:
+            logger.warning("Dify Translation API configuration is incomplete")
+
+        if not self.ocr_base_url or not self.ocr_api_key:
+            logger.warning("Dify OCR API configuration is incomplete")
    
-    def _make_request(self, method: str, endpoint: str, data: Dict[str, Any] = None, 
-                     user_id: int = None, job_id: int = None) -> Dict[str, Any]:
+    def _make_request(self, method: str, endpoint: str, data: Dict[str, Any] = None,
+                     user_id: int = None, job_id: int = None, files_data: Dict = None,
+                     api_type: str = 'translation') -> Dict[str, Any]:
        """發送 HTTP 請求到 Dify API"""
-        
-        if not self.base_url or not self.api_key:
-            raise APIError("Dify API 未配置完整")
-        
-        url = f"{self.base_url.rstrip('/')}/{endpoint.lstrip('/')}"
-        
+
+        # 根据API类型选择配置
+        if api_type == 'ocr':
+            base_url = self.ocr_base_url
+            api_key = self.ocr_api_key
+            if not base_url or not api_key:
+                raise APIError("Dify OCR API 未配置完整")
+        else:  # translation
+            base_url = self.translation_base_url
+            api_key = self.translation_api_key
+            if not base_url or not api_key:
+                raise APIError("Dify Translation API 未配置完整")
+
+        url = f"{base_url.rstrip('/')}/{endpoint.lstrip('/')}"
+
        headers = {
-            'Authorization': f'Bearer {self.api_key}',
-            'Content-Type': 'application/json',
+            'Authorization': f'Bearer {api_key}',
            'User-Agent': 'PANJIT-Document-Translator/1.0'
        }
+
+        # 只有在非文件上传时才设置JSON Content-Type
+        if not files_data:
+            headers['Content-Type'] = 'application/json'
        
        # 重試邏輯
        last_exception = None
@@ -53,11 +75,15 @@ class DifyClient:
        
        for attempt in range(self.max_retries):
            try:
-                logger.debug(f"Making Dify API request: {method} {url} (attempt {attempt + 1})")
+                # logger.debug(f"Making Dify API request: {method} {url} (attempt {attempt + 1})")
                
                if method.upper() == 'GET':
                    response = requests.get(url, headers=headers, timeout=self.timeout, params=data)
+                elif files_data:
+                    # 文件上传请求，使用multipart/form-data
+                    response = requests.post(url, headers=headers, timeout=self.timeout, files=files_data, data=data)
                else:
+                    # 普通JSON请求
                    response = requests.post(url, headers=headers, timeout=self.timeout, json=data)
                
                # 計算響應時間
@@ -80,7 +106,7 @@ class DifyClient:
                        success=True
                    )
                
-                logger.debug(f"Dify API request successful: {response_time_ms}ms")
+                # logger.debug(f"Dify API request successful: {response_time_ms}ms")
                return result
            
            except requests.exceptions.RequestException as e:
@@ -107,7 +133,7 @@ class DifyClient:
                
                # 指數退避
                delay = self.retry_delay ** attempt
-                logger.debug(f"Retrying in {delay} seconds...")
+                # logger.debug(f"Retrying in {delay} seconds...")
                time.sleep(delay)
        
        # 所有重試都失敗了
@@ -137,7 +163,7 @@ class DifyClient:
            logger.warning(f"Failed to record API usage: {str(e)}")
    
    def translate_text(self, text: str, source_language: str, target_language: str,
-                      user_id: int = None, job_id: int = None) -> Dict[str, Any]:
+                      user_id: int = None, job_id: int = None, conversation_id: str = None) -> Dict[str, Any]:
        """翻譯文字"""
        
        if not text.strip():
@@ -181,7 +207,15 @@ Rules:
            'user': f"user_{user_id}" if user_id else "doc-translator-user",
            'query': query
        }
+
+        # 如果有 conversation_id，加入請求中以維持對話連續性
+        if conversation_id:
+            request_data['conversation_id'] = conversation_id
        
+        logger.info(f"[TRANSLATION] Sending translation request...")
+        logger.info(f"[TRANSLATION] Request data: {request_data}")
+        logger.info(f"[TRANSLATION] Text length: {len(text)} characters")
+
        try:
            response = self._make_request(
                method='POST',
@@ -203,6 +237,7 @@ Rules:
                'source_text': text,
                'source_language': source_language,
                'target_language': target_language,
+                'conversation_id': response.get('conversation_id'),
                'metadata': response.get('metadata', {})
            }
        
@@ -271,18 +306,165 @@ Rules:
            with open(config_file, 'r', encoding='utf-8') as f:
                for line in f:
                    line = line.strip()
-                    if line.startswith('base_url:'):
+                    if line.startswith('#') or not line:
+                        continue  # 跳过注释和空行
+
+                    # 翻译API配置（兼容旧格式）
+                    if line.startswith('base_url:') or line.startswith('translation_base_url:'):
                        base_url = line.split(':', 1)[1].strip()
+                        current_app.config['DIFY_TRANSLATION_BASE_URL'] = base_url
+                        # 兼容旧配置
                        current_app.config['DIFY_API_BASE_URL'] = base_url
-                    elif line.startswith('api:'):
+                    elif line.startswith('api:') or line.startswith('translation_api:'):
                        api_key = line.split(':', 1)[1].strip()
+                        current_app.config['DIFY_TRANSLATION_API_KEY'] = api_key
+                        # 兼容旧配置
                        current_app.config['DIFY_API_KEY'] = api_key
+
+                    # OCR API配置
+                    elif line.startswith('ocr_base_url:'):
+                        ocr_base_url = line.split(':', 1)[1].strip()
+                        current_app.config['DIFY_OCR_BASE_URL'] = ocr_base_url
+                    elif line.startswith('ocr_api:'):
+                        ocr_api_key = line.split(':', 1)[1].strip()
+                        current_app.config['DIFY_OCR_API_KEY'] = ocr_api_key
            
            logger.info("Dify API config loaded from file")
        
        except Exception as e:
            logger.error(f"Failed to load Dify config from file: {str(e)}")

+    def upload_file(self, image_data: bytes, filename: str, user_id: int = None) -> str:
+        """上传图片文件到Dify OCR API并返回file_id"""
+
+        if not image_data:
+            raise APIError("图片数据不能为空")
+
+        logger.info(f"[OCR-UPLOAD] Starting file upload to Dify OCR API")
+        logger.info(f"[OCR-UPLOAD] File: {filename}, Size: {len(image_data)} bytes, User: {user_id}")
+
+        # 构建文件上传数据
+        files_data = {
+            'file': (filename, image_data, 'image/png')  # 假设为PNG格式
+        }
+
+        form_data = {
+            'user': f"user_{user_id}" if user_id else "doc-translator-user"
+        }
+
+        # logger.debug(f"[OCR-UPLOAD] Upload form_data: {form_data}")
+        # logger.debug(f"[OCR-UPLOAD] Using OCR API: {self.ocr_base_url}")
+
+        try:
+            response = self._make_request(
+                method='POST',
+                endpoint='/files/upload',
+                data=form_data,
+                files_data=files_data,
+                user_id=user_id,
+                api_type='ocr'  # 使用OCR API
+            )
+
+            logger.info(f"[OCR-UPLOAD] Raw Dify upload response: {response}")
+
+            file_id = response.get('id')
+            if not file_id:
+                logger.error(f"[OCR-UPLOAD] No file ID in response: {response}")
+                raise APIError("Dify 文件上传失败：未返回文件ID")
+
+            logger.info(f"[OCR-UPLOAD] ✓ File uploaded successfully: {file_id}")
+            # logger.debug(f"[OCR-UPLOAD] File details: name={response.get('name')}, size={response.get('size')}, type={response.get('mime_type')}")
+
+            return file_id
+
+        except APIError:
+            raise
+        except Exception as e:
+            error_msg = f"文件上传到Dify失败: {str(e)}"
+            logger.error(f"[OCR-UPLOAD] ✗ Upload failed: {error_msg}")
+            raise APIError(error_msg)
+
+    def ocr_image_with_dify(self, image_data: bytes, filename: str = "image.png",
+                           user_id: int = None, job_id: int = None) -> str:
+        """使用Dify进行图像OCR识别"""
+
+        logger.info(f"[OCR-RECOGNITION] Starting OCR process for {filename}")
+        logger.info(f"[OCR-RECOGNITION] Image size: {len(image_data)} bytes, User: {user_id}, Job: {job_id}")
+
+        try:
+            # 1. 先上传文件获取file_id
+            logger.info(f"[OCR-RECOGNITION] Step 1: Uploading image to Dify...")
+            file_id = self.upload_file(image_data, filename, user_id)
+            logger.info(f"[OCR-RECOGNITION] Step 1 ✓ File uploaded with ID: {file_id}")
+
+            # 2. 构建OCR请求
+            # 系统提示词已在Dify Chat Flow中配置，这里只需要发送简单的用户query
+            query = "將圖片中的文字完整的提取出來"
+            logger.info(f"[OCR-RECOGNITION] Step 2: Preparing OCR request...")
+            # logger.debug(f"[OCR-RECOGNITION] Query: {query}")
+
+            # 3. 构建Chat Flow请求，根据最新Dify运行记录，图片应该放在files数组中
+            request_data = {
+                'inputs': {},
+                'response_mode': 'blocking',
+                'user': f"user_{user_id}" if user_id else "doc-translator-user",
+                'query': query,
+                'files': [
+                    {
+                        'type': 'image',
+                        'transfer_method': 'local_file',
+                        'upload_file_id': file_id
+                    }
+                ]
+            }
+
+            logger.info(f"[OCR-RECOGNITION] Step 3: Sending OCR request to Dify...")
+            logger.info(f"[OCR-RECOGNITION] Request data: {request_data}")
+            logger.info(f"[OCR-RECOGNITION] Using OCR API: {self.ocr_base_url}")
+
+            response = self._make_request(
+                method='POST',
+                endpoint='/chat-messages',
+                data=request_data,
+                user_id=user_id,
+                job_id=job_id,
+                api_type='ocr'  # 使用OCR API
+            )
+
+            logger.info(f"[OCR-RECOGNITION] Step 3 ✓ Received response from Dify")
+            logger.info(f"[OCR-RECOGNITION] Raw Dify OCR response: {response}")
+
+            # 从响应中提取OCR结果
+            answer = response.get('answer', '')
+            metadata = response.get('metadata', {})
+            conversation_id = response.get('conversation_id', '')
+
+            logger.info(f"[OCR-RECOGNITION] Response details:")
+            logger.info(f"[OCR-RECOGNITION] - Answer length: {len(answer) if answer else 0} characters")
+            logger.info(f"[OCR-RECOGNITION] - Conversation ID: {conversation_id}")
+            logger.info(f"[OCR-RECOGNITION] - Metadata: {metadata}")
+
+            if not isinstance(answer, str) or not answer.strip():
+                logger.error(f"[OCR-RECOGNITION] ✗ Empty or invalid answer from Dify")
+                logger.error(f"[OCR-RECOGNITION] Answer type: {type(answer)}, Content: '{answer}'")
+                raise APIError("Dify OCR 返回空的识别结果")
+
+            # 记录OCR识别的前100个字符用于调试
+            preview = answer[:100] + "..." if len(answer) > 100 else answer
+            logger.info(f"[OCR-RECOGNITION] ✓ OCR completed successfully")
+            logger.info(f"[OCR-RECOGNITION] Extracted {len(answer)} characters")
+            # logger.debug(f"[OCR-RECOGNITION] Text preview: {preview}")
+
+            return answer.strip()
+
+        except APIError:
+            raise
+        except Exception as e:
+            error_msg = f"Dify OCR识别失败: {str(e)}"
+            logger.error(f"[OCR-RECOGNITION] ✗ OCR process failed: {error_msg}")
+            logger.error(f"[OCR-RECOGNITION] Exception details: {type(e).__name__}: {str(e)}")
+            raise APIError(error_msg)
+

 def init_dify_config(app):
    """初始化 Dify 配置"""
@@ -291,12 +473,22 @@ def init_dify_config(app):
        DifyClient.load_config_from_file()
        
        # 檢查配置完整性
-        base_url = app.config.get('DIFY_API_BASE_URL')
-        api_key = app.config.get('DIFY_API_KEY')
-        
-        if base_url and api_key:
-            logger.info("Dify API configuration loaded successfully")
+        translation_base_url = app.config.get('DIFY_TRANSLATION_BASE_URL')
+        translation_api_key = app.config.get('DIFY_TRANSLATION_API_KEY')
+        ocr_base_url = app.config.get('DIFY_OCR_BASE_URL')
+        ocr_api_key = app.config.get('DIFY_OCR_API_KEY')
+
+        logger.info("Dify API Configuration Status:")
+        if translation_base_url and translation_api_key:
+            logger.info("✓ Translation API configured successfully")
        else:
-            logger.warning("Dify API configuration is incomplete")
-            logger.warning(f"Base URL: {'✓' if base_url else '✗'}")
-            logger.warning(f"API Key: {'✓' if api_key else '✗'}")
+            logger.warning("✗ Translation API configuration is incomplete")
+            logger.warning(f"  - Translation Base URL: {'✓' if translation_base_url else '✗'}")
+            logger.warning(f"  - Translation API Key: {'✓' if translation_api_key else '✗'}")
+
+        if ocr_base_url and ocr_api_key:
+            logger.info("✓ OCR API configured successfully")
+        else:
+            logger.warning("✗ OCR API configuration is incomplete (扫描PDF功能将不可用)")
+            logger.warning(f"  - OCR Base URL: {'✓' if ocr_base_url else '✗'}")
+            logger.warning(f"  - OCR API Key: {'✓' if ocr_api_key else '✗'}")