4th_fix time error

2025-09-03 09:05:51 +08:00
parent e6e5332705
commit cce3fd4925
26 changed files with 2551 additions and 82 deletions
--- a/app/api/admin.py
+++ b/app/api/admin.py
@@ -18,6 +18,7 @@ from app.utils.logger import get_logger
 from app.models.user import User
 from app.models.job import TranslationJob
 from app.models.stats import APIUsageStats
+from app.utils.timezone import format_taiwan_time
 from app.models.log import SystemLog
 from app.models.cache import TranslationCache
 from sqlalchemy import func, desc
@@ -75,8 +76,8 @@ def get_system_stats():
                'daily_stats': daily_stats,
                'user_rankings': user_rankings_data,
                'period': 'month',
-                'start_date': datetime.utcnow().isoformat(),
-                'end_date': datetime.utcnow().isoformat()
+                'start_date': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"),
+                'end_date': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S")
            }
        ))
    
@@ -359,7 +360,7 @@ def get_system_health():
    try:
        from datetime import datetime
        status = {
-            'timestamp': datetime.utcnow().isoformat(),
+            'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"),
            'status': 'healthy',
            'services': {}
        }
@@ -400,7 +401,7 @@ def get_system_health():
    except Exception as e:
        logger.error(f"Get system health error: {str(e)}")
        return jsonify({
-            'timestamp': datetime.utcnow().isoformat(),
+            'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"),
            'status': 'error',
            'error': str(e)
        }), 500
@@ -434,7 +435,7 @@ def get_system_metrics():
        recent_counts = {status: count for status, count in recent_jobs}
        
        metrics_data = {
-            'timestamp': datetime.utcnow().isoformat(),
+            'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"),
            'jobs': {
                'pending': job_counts.get('PENDING', 0),
                'processing': job_counts.get('PROCESSING', 0),
--- a/app/api/health.py
+++ b/app/api/health.py
@@ -13,6 +13,7 @@ from flask import Blueprint, jsonify
 from app.utils.helpers import create_response
 from app.utils.logger import get_logger
 from app.models.job import TranslationJob
+from app.utils.timezone import format_taiwan_time, now_taiwan

 health_bp = Blueprint('health', __name__, url_prefix='/health')
 logger = get_logger(__name__)
@@ -23,7 +24,7 @@ def health_check():
    """系統健康檢查"""
    try:
        status = {
-            'timestamp': datetime.utcnow().isoformat(),
+            'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"),
            'status': 'healthy',
            'services': {}
        }
@@ -108,7 +109,7 @@ def health_check():
    except Exception as e:
        logger.error(f"Health check error: {str(e)}")
        return jsonify({
-            'timestamp': datetime.utcnow().isoformat(),
+            'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"),
            'status': 'error',
            'error': str(e)
        }), 500
@@ -131,7 +132,7 @@ def get_metrics():
        
        # 系統指標
        metrics_data = {
-            'timestamp': datetime.utcnow().isoformat(),
+            'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"),
            'jobs': {
                'pending': job_counts.get('PENDING', 0),
                'processing': job_counts.get('PROCESSING', 0),
@@ -217,6 +218,6 @@ def ping():
    """簡單的 ping 檢查"""
    return jsonify({
        'status': 'ok',
-        'timestamp': datetime.utcnow().isoformat(),
+        'timestamp': format_taiwan_time(datetime.utcnow(), "%Y-%m-%d %H:%M:%S"),
        'message': 'pong'
    })
--- a/app/config.py
+++ b/app/config.py
@@ -58,7 +58,7 @@ class Config:
    CELERY_RESULT_SERIALIZER = 'json'
    CELERY_ACCEPT_CONTENT = ['json']
    CELERY_TIMEZONE = 'Asia/Taipei'
-    CELERY_ENABLE_UTC = True
+    CELERY_ENABLE_UTC = False  # 改為 False，讓 Celery 使用本地時區
    
    # LDAP 配置
    LDAP_SERVER = os.environ.get('LDAP_SERVER')
--- a/app/models/job.py
+++ b/app/models/job.py
@@ -14,6 +14,7 @@ from datetime import datetime, timedelta
 from sqlalchemy.sql import func
 from sqlalchemy import event
 from app import db
+from app.utils.timezone import format_taiwan_time


 class TranslationJob(db.Model):
@@ -80,10 +81,10 @@ class TranslationJob(db.Model):
            'error_message': self.error_message,
            'total_tokens': self.total_tokens,
            'total_cost': float(self.total_cost) if self.total_cost else 0.0,
-            'processing_started_at': self.processing_started_at.isoformat() if self.processing_started_at else None,
-            'completed_at': self.completed_at.isoformat() if self.completed_at else None,
-            'created_at': self.created_at.isoformat() if self.created_at else None,
-            'updated_at': self.updated_at.isoformat() if self.updated_at else None
+            'processing_started_at': format_taiwan_time(self.processing_started_at, "%Y-%m-%d %H:%M:%S") if self.processing_started_at else None,
+            'completed_at': format_taiwan_time(self.completed_at, "%Y-%m-%d %H:%M:%S") if self.completed_at else None,
+            'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None,
+            'updated_at': format_taiwan_time(self.updated_at, "%Y-%m-%d %H:%M:%S") if self.updated_at else None
        }
        
        if include_files:
@@ -256,7 +257,7 @@ class JobFile(db.Model):
            'filename': self.filename,
            'file_path': self.file_path,
            'file_size': self.file_size,
-            'created_at': self.created_at.isoformat() if self.created_at else None
+            'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None
        }


--- a/app/models/stats.py
+++ b/app/models/stats.py
@@ -11,6 +11,7 @@ Modified: 2024-01-28
 from datetime import datetime, timedelta
 from sqlalchemy.sql import func
 from app import db
+from app.utils.timezone import format_taiwan_time


 class APIUsageStats(db.Model):
@@ -51,7 +52,7 @@ class APIUsageStats(db.Model):
            'response_time_ms': self.response_time_ms,
            'success': self.success,
            'error_message': self.error_message,
-            'created_at': self.created_at.isoformat() if self.created_at else None
+            'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None
        }
    
    @classmethod
--- a/app/models/user.py
+++ b/app/models/user.py
@@ -11,6 +11,7 @@ Modified: 2024-01-28
 from datetime import datetime, timedelta
 from sqlalchemy.sql import func
 from app import db
+from app.utils.timezone import format_taiwan_time


 class User(db.Model):
@@ -49,9 +50,9 @@ class User(db.Model):
            'email': self.email,
            'department': self.department,
            'is_admin': self.is_admin,
-            'last_login': self.last_login.isoformat() if self.last_login else None,
-            'created_at': self.created_at.isoformat() if self.created_at else None,
-            'updated_at': self.updated_at.isoformat() if self.updated_at else None
+            'last_login': format_taiwan_time(self.last_login, "%Y-%m-%d %H:%M:%S") if self.last_login else None,
+            'created_at': format_taiwan_time(self.created_at, "%Y-%m-%d %H:%M:%S") if self.created_at else None,
+            'updated_at': format_taiwan_time(self.updated_at, "%Y-%m-%d %H:%M:%S") if self.updated_at else None
        }
        
        if include_stats:
--- a/app/services/document_processor.py
+++ b/app/services/document_processor.py
@@ -577,56 +577,24 @@ def _insert_docx_translations(doc: docx.Document, segs: List[Segment],
                    continue
            
            else:
-                # Normal paragraph (not in table cell) - enhanced logic from successful version
+                # Normal paragraph (not in table cell) - SIMPLIFIED FOR DEBUGGING
                try:
-                    # Check existing translations using the enhanced method
-                    last = _find_last_inserted_after(p, limit=max(len(translations), 4))
+                    # TEMPORARILY DISABLE existing translation check to force insertion
+                    log(f"[DEBUG] 強制插入翻譯到段落: {seg.text[:30]}...")
                    
-                    # Check if all translations already exist
-                    existing_texts = []
-                    current_check = p
-                    for _ in range(len(translations)):
-                        try:
-                            # Get the next sibling paragraph
-                            next_sibling = current_check._element.getnext()
-                            if next_sibling is not None and next_sibling.tag.endswith('}p'):
-                                next_p = Paragraph(next_sibling, p._parent)
-                                if _is_our_insert_block(next_p):
-                                    existing_texts.append(_p_text_with_breaks(next_p))
-                                    current_check = next_p
-                                else:
-                                    break
-                            else:
-                                break
-                        except Exception:
-                            break
+                    # Force all translations to be added
+                    to_add = translations
                    
-                    # Skip if all translations already exist in order
-                    if len(existing_texts) >= len(translations):
-                        if all(_normalize_text(e) == _normalize_text(t) for e, t in zip(existing_texts[:len(translations)], translations)):
-                            skip_cnt += 1
-                            log(f"[SKIP] 段落已存在翻譯: {seg.text[:30]}...")
-                            continue
-                    
-                    # Determine which translations need to be added
-                    to_add = []
-                    for t in translations:
-                        if not any(_normalize_text(t) == _normalize_text(e) for e in existing_texts):
-                            to_add.append(t)
-                    
-                    if not to_add:
-                        skip_cnt += 1
-                        log(f"[SKIP] 段落所有翻譯已存在: {seg.text[:30]}...")
-                        continue
-                    
-                    # Use enhanced insertion with proper positioning
-                    anchor = last if last else p
+                    # Use simple positioning - always insert after current paragraph
+                    anchor = p
                    
                    for block in to_add:
                        try:
+                            log(f"[DEBUG] 嘗試插入: {block[:50]}...")
                            anchor = _append_after(anchor, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
+                            log(f"[SUCCESS] _append_after成功插入")
                        except Exception as e:
-                            log(f"[ERROR] 段落插入失敗: {e}, 嘗試簡化插入")
+                            log(f"[ERROR] _append_after失敗: {e}, 嘗試簡化插入")
                            try:
                                # Fallback: simple append 
                                if hasattr(p._parent, 'add_paragraph'):
@@ -640,7 +608,7 @@ def _insert_docx_translations(doc: docx.Document, segs: List[Segment],
                                continue
                    
                    ok_cnt += 1
-                    log(f"[SUCCESS] 段落插入 {len(to_add)} 個翻譯（交錯格式）")
+                    log(f"[SUCCESS] 段落強制插入 {len(to_add)} 個翻譯")
                    
                except Exception as e:
                    log(f"[ERROR] 段落處理失敗: {e}, 跳過此段落")
@@ -686,6 +654,39 @@ class DocumentProcessor:
            self.logger.error(f"Failed to extract DOCX segments from {file_path}: {str(e)}")
            raise FileProcessingError(f"DOCX 文件分析失敗: {str(e)}")
    
+    def _rematch_segments_to_document(self, doc: docx.Document, old_segments: List[Segment]) -> List[Segment]:
+        """Re-match segments from old document instance to new document instance."""
+        try:
+            # Extract fresh segments from the current document instance
+            fresh_segments = _collect_docx_segments(doc)
+            
+            # Match old segments with fresh segments based on text content
+            matched_segments = []
+            
+            for old_seg in old_segments:
+                # Find matching segment in fresh segments
+                matched = False
+                for fresh_seg in fresh_segments:
+                    if (old_seg.kind == fresh_seg.kind and 
+                        old_seg.ctx == fresh_seg.ctx and
+                        _normalize_text(old_seg.text) == _normalize_text(fresh_seg.text)):
+                        matched_segments.append(fresh_seg)
+                        matched = True
+                        break
+                
+                if not matched:
+                    self.logger.warning(f"Failed to match segment: {old_seg.text[:50]}...")
+                    # Still add the old segment but it might not work for insertion
+                    matched_segments.append(old_seg)
+            
+            self.logger.debug(f"Re-matched {len(matched_segments)} segments to current document")
+            return matched_segments
+            
+        except Exception as e:
+            self.logger.error(f"Failed to re-match segments: {str(e)}")
+            # Return original segments as fallback
+            return old_segments
+    
    def insert_docx_translations(self, file_path: str, segments: List[Segment], 
                                 translation_map: Dict[Tuple[str, str], str],
                                 target_languages: List[str], output_path: str) -> Tuple[int, int]:
@@ -693,11 +694,15 @@ class DocumentProcessor:
        try:
            doc = docx.Document(file_path)
            
+            # CRITICAL FIX: Re-match segments with the current document instance
+            # The original segments were extracted from a different document instance
+            matched_segments = self._rematch_segments_to_document(doc, segments)
+            
            def log_func(msg: str):
                self.logger.debug(msg)
            
            ok_count, skip_count = _insert_docx_translations(
-                doc, segments, translation_map, target_languages, log_func
+                doc, matched_segments, translation_map, target_languages, log_func
            )
            
            # Save the modified document
--- a/app/services/translation_service.py
+++ b/app/services/translation_service.py
@@ -74,8 +74,11 @@ class DocxParser(DocumentParser):
    
    def generate_translated_document(self, translations: Dict[str, List[str]], 
                                   target_language: str, output_dir: Path) -> str:
-        """生成翻譯後的 DOCX 文件 - 使用增強的翻譯插入邏輯"""
+        """生成翻譯後的 DOCX 文件 - 使用增強的翻譯插入邏輯（從快取讀取）"""
        try:
+            from sqlalchemy import text as sql_text
+            from app import db
+            
            # 生成輸出檔名
            output_filename = generate_filename(
                self.file_path.name, 
@@ -88,16 +91,29 @@ class DocxParser(DocumentParser):
            # 提取段落資訊
            segments = self.extract_segments_with_context()
            
-            # 建立翻譯映射
+            # 建立翻譯映射 - 從快取讀取而非使用傳入的translations參數
            translation_map = {}
-            translated_texts = translations.get(target_language, [])
            
-            # 對應文字段落與翻譯
-            text_index = 0
+            logger.info(f"Building translation map for {len(segments)} segments in language {target_language}")
+            
            for seg in segments:
-                if text_index < len(translated_texts):
-                    translation_map[(target_language, seg.text)] = translated_texts[text_index]
-                    text_index += 1
+                # 從翻譯快取中查詢每個段落的翻譯
+                result = db.session.execute(sql_text("""
+                    SELECT translated_text 
+                    FROM dt_translation_cache 
+                    WHERE source_text = :text AND target_language = :lang
+                    ORDER BY created_at DESC 
+                    LIMIT 1
+                """), {'text': seg.text, 'lang': target_language})
+                
+                row = result.fetchone()
+                if row and row[0]:
+                    translation_map[(target_language, seg.text)] = row[0]
+                    logger.debug(f"Found translation for: {seg.text[:50]}...")
+                else:
+                    logger.warning(f"No translation found for: {seg.text[:50]}...")
+            
+            logger.info(f"Translation map built with {len(translation_map)} mappings")
            
            # 使用增強的翻譯插入邏輯
            ok_count, skip_count = self.processor.insert_docx_translations(