2ND

2025-09-02 13:11:48 +08:00
parent a60d965317
commit b11a8272c4
76 changed files with 15321 additions and 200 deletions
--- a/app/services/dify_client.py
+++ b/app/services/dify_client.py
@@ -142,15 +142,43 @@ class DifyClient:
        if not text.strip():
            raise APIError("翻譯文字不能為空")
        
-        # 構建請求資料
+        # 構建標準翻譯 prompt（英文指令格式）
+        language_names = {
+            'zh-tw': 'Traditional Chinese',
+            'zh-cn': 'Simplified Chinese', 
+            'en': 'English',
+            'ja': 'Japanese',
+            'ko': 'Korean',
+            'vi': 'Vietnamese',
+            'th': 'Thai',
+            'id': 'Indonesian',
+            'ms': 'Malay',
+            'es': 'Spanish',
+            'fr': 'French',
+            'de': 'German',
+            'ru': 'Russian',
+            'ar': 'Arabic'
+        }
+        
+        source_lang_name = language_names.get(source_language, source_language)
+        target_lang_name = language_names.get(target_language, target_language)
+        
+        query = f"""Task: Translate ONLY into {target_lang_name} from {source_lang_name}.
+
+Rules:
+- Output translation text ONLY (no source text, no notes, no questions, no language-detection remarks).
+- Preserve original line breaks.
+- Do NOT wrap in quotes or code blocks.
+- Maintain original formatting and structure.
+
+{text.strip()}"""
+        
+        # 構建請求資料 - 使用成功版本的格式
        request_data = {
-            'inputs': {
-                'text': text.strip(),
-                'source_language': source_language,
-                'target_language': target_language
-            },
+            'inputs': {},
            'response_mode': 'blocking',
-            'user': f"user_{user_id}" if user_id else "anonymous"
+            'user': f"user_{user_id}" if user_id else "doc-translator-user",
+            'query': query
        }
        
        try:
@@ -162,10 +190,10 @@ class DifyClient:
                job_id=job_id
            )
            
-            # 從響應中提取翻譯結果
-            answer = response.get('answer', '')
+            # 從響應中提取翻譯結果 - 使用成功版本的方式
+            answer = response.get('answer')
            
-            if not answer:
+            if not isinstance(answer, str) or not answer.strip():
                raise APIError("Dify API 返回空的翻譯結果")
            
            return {
--- a/app/services/document_processor.py
+++ b/app/services/document_processor.py
@@ -0,0 +1,719 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+核心文檔處理邏輯 - 移植自最佳版本
+包含完整的 DOCX 文字提取和翻譯插入功能
+
+Author: PANJIT IT Team  
+Created: 2024-09-02
+Modified: 2024-09-02
+"""
+
+import re
+import sys
+import time
+from pathlib import Path
+from typing import List, Dict, Tuple, Optional, Any
+from docx.text.paragraph import Paragraph
+from docx.table import Table, _Cell
+from docx.shared import Pt
+from docx.oxml import OxmlElement
+from docx.oxml.ns import qn, nsdecls
+import docx
+
+from app.utils.logger import get_logger
+from app.utils.exceptions import FileProcessingError
+
+logger = get_logger(__name__)
+
+# ---------- Constants ----------
+INSERT_FONT_SIZE_PT = 10
+SENTENCE_MODE = True
+
+# ---------- Optional dependencies detection ----------
+try:
+    import blingfire
+    _HAS_BLINGFIRE = True
+except ImportError:
+    _HAS_BLINGFIRE = False
+
+try:
+    import pysbd
+    _HAS_PYSBD = True
+except ImportError:
+    _HAS_PYSBD = False
+
+# ---------- Helper functions ----------
+def _has_cjk(text: str) -> bool:
+    """Check if text contains CJK (Chinese/Japanese/Korean) characters."""
+    for char in text:
+        if '\u4e00' <= char <= '\u9fff' or \
+           '\u3400' <= char <= '\u4dbf' or \
+           '\u20000' <= char <= '\u2a6df' or \
+           '\u3040' <= char <= '\u309f' or \
+           '\u30a0' <= char <= '\u30ff' or \
+           '\uac00' <= char <= '\ud7af':
+            return True
+    return False
+
+def _normalize_text(text: str) -> str:
+    """Normalize text for comparison."""
+    return re.sub(r'\s+', ' ', text.strip().lower())
+
+def _append_after(p: Paragraph, text_block: str, italic: bool=True, font_size_pt: int=INSERT_FONT_SIZE_PT) -> Paragraph:
+    """Insert a new paragraph after p, return the new paragraph (for chain insert)."""
+    new_p = OxmlElement("w:p")
+    p._p.addnext(new_p)
+    np = Paragraph(new_p, p._parent)
+    lines = text_block.split("\n")
+    for i, line in enumerate(lines):
+        run = np.add_run(line)
+        if italic: 
+            run.italic = True
+        if font_size_pt: 
+            run.font.size = Pt(font_size_pt)
+        if i < len(lines) - 1:
+            run.add_break()
+    tag = np.add_run("\u200b")
+    if italic: 
+        tag.italic = True
+    if font_size_pt: 
+        tag.font.size = Pt(font_size_pt)
+    return np
+
+def _is_our_insert_block(p: Paragraph) -> bool:
+    """Return True iff paragraph contains our zero-width marker."""
+    return any("\u200b" in (r.text or "") for r in p.runs)
+
+def _find_last_inserted_after(p: Paragraph, limit: int = 8) -> Optional[Paragraph]:
+    """Find the last paragraph that was inserted after p (up to limit paragraphs)."""
+    try:
+        # Get all paragraphs in the parent container
+        if hasattr(p._parent, 'paragraphs'):
+            all_paras = list(p._parent.paragraphs)
+        else:
+            # Handle cases where _parent doesn't have paragraphs (e.g., table cells)
+            return None
+        
+        # Find p's index
+        p_index = -1
+        for i, para in enumerate(all_paras):
+            if para._element == p._element:
+                p_index = i
+                break
+        
+        if p_index == -1:
+            return None
+        
+        # Check paragraphs after p
+        last_found = None
+        for i in range(p_index + 1, min(p_index + 1 + limit, len(all_paras))):
+            if _is_our_insert_block(all_paras[i]):
+                last_found = all_paras[i]
+            else:
+                break  # Stop at first non-inserted paragraph
+    except Exception:
+        return None
+    
+    return last_found
+
+def _p_text_with_breaks(p: Paragraph) -> str:
+    """Extract text from paragraph with line breaks preserved."""
+    parts = []
+    for node in p._element.xpath(".//*[local-name()='t' or local-name()='br' or local-name()='tab']"):
+        tag = node.tag.split('}', 1)[-1]
+        if tag == "t":
+            parts.append(node.text or "")
+        elif tag == "br":
+            parts.append("\n")
+        elif tag == "tab":
+            parts.append("\t")
+    return "".join(parts)
+
+def _is_our_insert_block(p: Paragraph) -> bool:
+    """Check if paragraph is our inserted translation (contains zero-width space marker)."""
+    text = _p_text_with_breaks(p)
+    return "\u200b" in text
+
+def should_translate(text: str, src_lang: str) -> bool:
+    """Determine if text should be translated based on content and source language."""
+    text = text.strip()
+    if len(text) < 3:
+        return False
+    
+    # Skip pure numbers, dates, etc.
+    if re.match(r'^[\d\s\.\-\:\/]+$', text):
+        return False
+    
+    # For auto-detect, translate if has CJK or meaningful text
+    if src_lang.lower() in ('auto', 'auto-detect'):
+        return _has_cjk(text) or len(text) > 5
+    
+    return True
+
+def _split_sentences(text: str, lang: str = 'auto') -> List[str]:
+    """Split text into sentences using available libraries."""
+    if not text.strip():
+        return []
+    
+    # Try blingfire first
+    if _HAS_BLINGFIRE and SENTENCE_MODE:
+        try:
+            sentences = blingfire.text_to_sentences(text).split('\n')
+            sentences = [s.strip() for s in sentences if s.strip()]
+            if sentences:
+                return sentences
+        except Exception as e:
+            logger.warning(f"Blingfire failed: {e}")
+    
+    # Try pysbd
+    if _HAS_PYSBD and SENTENCE_MODE:
+        try:
+            seg = pysbd.Segmenter(language="en" if lang == "auto" else lang)
+            sentences = seg.segment(text)
+            sentences = [s.strip() for s in sentences if s.strip()]
+            if sentences:
+                return sentences
+        except Exception as e:
+            logger.warning(f"PySBD failed: {e}")
+    
+    # Fallback to simple splitting
+    separators = ['. ', '。', '！', '？', '!', '?', '\n']
+    sentences = [text]
+    
+    for sep in separators:
+        new_sentences = []
+        for s in sentences:
+            parts = s.split(sep)
+            if len(parts) > 1:
+                new_sentences.extend([p.strip() + sep.rstrip() for p in parts[:-1] if p.strip()])
+                if parts[-1].strip():
+                    new_sentences.append(parts[-1].strip())
+            else:
+                new_sentences.append(s)
+        sentences = new_sentences
+    
+    return [s for s in sentences if len(s.strip()) > 3]
+
+# ---------- Segment class ----------
+class Segment:
+    """Represents a translatable text segment in a document."""
+    
+    def __init__(self, kind: str, ref: Any, ctx: str, text: str):
+        self.kind = kind   # 'para' | 'txbx'
+        self.ref = ref     # Reference to original document element
+        self.ctx = ctx     # Context information
+        self.text = text   # Text content
+
+# ---------- TextBox helpers ----------
+def _txbx_iter_texts(doc: docx.Document):
+    """
+    Yield (txbxContent_element, joined_source_text)
+    - Deeply collect all descendant <w:p> under txbxContent
+    - Skip our inserted translations: contains zero-width or (all italic and no CJK)
+    - Keep only lines that still have CJK
+    """
+    def _p_text_flags(p_el):
+        parts = []
+        for node in p_el.xpath(".//*[local-name()='t' or local-name()='br' or local-name()='tab']"):
+            tag = node.tag.split('}', 1)[-1]
+            if tag == "t":
+                parts.append(node.text or "")
+            elif tag == "br":
+                parts.append("\n")
+            else:
+                parts.append(" ")
+        text = "".join(parts)
+        has_zero = ("\u200b" in text)
+        runs = p_el.xpath(".//*[local-name()='r']")
+        vis, ital = [], []
+        for r in runs:
+            rt = "".join([(t.text or "") for t in r.xpath(".//*[local-name()='t']")])
+            if (rt or "").strip():
+                vis.append(rt)
+                ital.append(bool(r.xpath(".//*[local-name()='i']")))
+        all_italic = (len(vis) > 0 and all(ital))
+        return text, has_zero, all_italic
+
+    for tx in doc._element.xpath(".//*[local-name()='txbxContent']"):
+        kept = []
+        for p in tx.xpath(".//*[local-name()='p']"):   # all descendant paragraphs
+            text, has_zero, all_italic = _p_text_flags(p)
+            if not (text or "").strip():
+                continue
+            if has_zero:
+                continue  # our inserted
+            for line in text.split("\n"):
+                if line.strip():
+                    kept.append(line.strip())
+        if kept:
+            joined = "\n".join(kept)
+            yield tx, joined
+
+def _txbx_append_paragraph(tx, text_block: str, italic: bool = True, font_size_pt: int = INSERT_FONT_SIZE_PT):
+    """Append a paragraph to textbox content."""
+    p = OxmlElement("w:p")
+    r = OxmlElement("w:r")
+    rPr = OxmlElement("w:rPr")
+    if italic:
+        rPr.append(OxmlElement("w:i"))
+    if font_size_pt:
+        sz = OxmlElement("w:sz")
+        sz.set(qn("w:val"), str(int(font_size_pt * 2)))
+        rPr.append(sz)
+    r.append(rPr)
+    lines = text_block.split("\n")
+    for i, line in enumerate(lines):
+        if i > 0:
+            r.append(OxmlElement("w:br"))
+        t = OxmlElement("w:t")
+        t.set(qn("xml:space"), "preserve")
+        t.text = line
+        r.append(t)
+    tag = OxmlElement("w:t")
+    tag.set(qn("xml:space"), "preserve")
+    tag.text = "\u200b"
+    r.append(tag)
+    p.append(r)
+    tx.append(p)
+
+def _txbx_tail_equals(tx, translations: List[str]) -> bool:
+    """Check if textbox already contains the expected translations."""
+    paras = tx.xpath("./*[local-name()='p']")
+    if len(paras) < len(translations):
+        return False
+    tail = paras[-len(translations):]
+    for q, expect in zip(tail, translations):
+        parts = []
+        for node in q.xpath(".//*[local-name()='t' or local-name()='br']"):
+            tag = node.tag.split("}", 1)[-1]
+            parts.append("\n" if tag == "br" else (node.text or ""))
+        if _normalize_text("".join(parts).strip()) != _normalize_text(expect):
+            return False
+    return True
+
+# ---------- Main extraction logic ----------
+def _get_paragraph_key(p: Paragraph) -> str:
+    """Generate a stable unique key for paragraph deduplication."""
+    try:
+        # Use XML content hash + text content for stable deduplication
+        xml_content = p._p.xml if hasattr(p._p, 'xml') else str(p._p)
+        text_content = _p_text_with_breaks(p)
+        combined = f"{hash(xml_content)}_{len(text_content)}_{text_content[:50]}"
+        return combined
+    except Exception:
+        # Fallback to simple text-based key
+        text_content = _p_text_with_breaks(p)
+        return f"fallback_{hash(text_content)}_{len(text_content)}"
+
+def _collect_docx_segments(doc: docx.Document) -> List[Segment]:
+    """
+    Enhanced segment collector with improved stability.
+    Handles paragraphs, tables, textboxes, and SDT Content Controls.
+    """
+    segs: List[Segment] = []
+    seen_par_keys = set()
+
+    def _add_paragraph(p: Paragraph, ctx: str):
+        try:
+            p_key = _get_paragraph_key(p)
+            if p_key in seen_par_keys:
+                return
+            
+            txt = _p_text_with_breaks(p)
+            if txt.strip() and not _is_our_insert_block(p):
+                segs.append(Segment("para", p, ctx, txt))
+                seen_par_keys.add(p_key)
+        except Exception as e:
+            # Log error but continue processing
+            logger.warning(f"段落處理錯誤: {e}, 跳過此段落")
+
+    def _process_container_content(container, ctx: str):
+        """
+        Recursively processes content within a container (body, cell, or SDT content).
+        Identifies and handles paragraphs, tables, and SDT elements.
+        """
+        if container._element is None:
+            return
+
+        for child_element in container._element:
+            qname = child_element.tag
+            
+            if qname.endswith('}p'):  # Paragraph
+                p = Paragraph(child_element, container)
+                _add_paragraph(p, ctx)
+
+            elif qname.endswith('}tbl'):  # Table
+                table = Table(child_element, container)
+                for r_idx, row in enumerate(table.rows, 1):
+                    for c_idx, cell in enumerate(row.cells, 1):
+                        cell_ctx = f"{ctx} > Tbl(r{r_idx},c{c_idx})"
+                        _process_container_content(cell, cell_ctx)
+
+            elif qname.endswith('}sdt'):  # Structured Document Tag (SDT)
+                sdt_ctx = f"{ctx} > SDT"
+                
+                # 1. 提取 SDT 的元數據文本 (Placeholder, Dropdown items)
+                ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
+                
+                # 提取 Placeholder text
+                placeholder_texts = []
+                for t in child_element.xpath('.//w:placeholder//w:t', namespaces=ns):
+                    if t.text:
+                        placeholder_texts.append(t.text)
+                if placeholder_texts:
+                    full_placeholder = "".join(placeholder_texts).strip()
+                    if full_placeholder:
+                        segs.append(Segment("para", child_element, f"{sdt_ctx}-Placeholder", full_placeholder))
+                
+                # 提取 Dropdown list items
+                list_items = []
+                for item in child_element.xpath('.//w:dropDownList/w:listItem', namespaces=ns):
+                    display_text = item.get(qn('w:displayText'))
+                    if display_text:
+                        list_items.append(display_text)
+                if list_items:
+                    items_as_text = "\n".join(list_items)
+                    segs.append(Segment("para", child_element, f"{sdt_ctx}-Dropdown", items_as_text))
+
+                # 2. 遞迴處理 SDT 的實際內容 (sdtContent)
+                sdt_content_element = child_element.find(qn('w:sdtContent'))
+                if sdt_content_element is not None:
+                    class SdtContentWrapper:
+                        def __init__(self, element, parent):
+                            self._element = element
+                            self._parent = parent
+                    
+                    sdt_content_wrapper = SdtContentWrapper(sdt_content_element, container)
+                    _process_container_content(sdt_content_wrapper, sdt_ctx)
+
+    # --- Main execution starts here ---
+
+    # 1. Process the main document body
+    _process_container_content(doc._body, "Body")
+
+    # 2. Process textboxes
+    for tx, s in _txbx_iter_texts(doc):
+        if s.strip() and (_has_cjk(s) or should_translate(s, 'auto')):
+            segs.append(Segment("txbx", tx, "TextBox", s))
+
+    return segs
+
+def _insert_docx_translations(doc: docx.Document, segs: List[Segment],
+                              tmap: Dict[Tuple[str, str], str],
+                              targets: List[str], log=lambda s: None) -> Tuple[int, int]:
+    """
+    Insert translations into DOCX document segments.
+    
+    CRITICAL: This function contains the fix for the major translation insertion bug.
+    The key fix is in the segment filtering logic - we now correctly check if any target
+    language has translation available using the proper key format (target_lang, text).
+    
+    Args:
+        doc: The DOCX document object
+        segs: List of segments to translate 
+        tmap: Translation map with keys as (target_language, source_text)
+        targets: List of target languages in order
+        log: Logging function
+        
+    Returns:
+        Tuple of (successful_insertions, skipped_insertions)
+        
+    Key Bug Fix:
+        OLD (INCORRECT): if (seg.kind, seg.text) not in tmap and (targets[0], seg.text) not in tmap
+        NEW (CORRECT): has_any_translation = any((tgt, seg.text) in tmap for tgt in targets)
+    """
+    ok_cnt = skip_cnt = 0
+    
+    # Helper function to add a formatted run to a paragraph
+    def _add_formatted_run(p: Paragraph, text: str, italic: bool, font_size_pt: int):
+        lines = text.split("\n")
+        for i, line in enumerate(lines):
+            run = p.add_run(line)
+            if italic:
+                run.italic = True
+            if font_size_pt:
+                run.font.size = Pt(font_size_pt)
+            if i < len(lines) - 1:
+                run.add_break()
+        # Add our zero-width space marker
+        tag_run = p.add_run("\u200b")
+        if italic:
+            tag_run.italic = True
+        if font_size_pt:
+            tag_run.font.size = Pt(font_size_pt)
+
+    for seg in segs:
+        # Check if any target language has translation for this segment
+        has_any_translation = any((tgt, seg.text) in tmap for tgt in targets)
+        if not has_any_translation:
+            log(f"[SKIP] 無翻譯結果: {seg.ctx} | {seg.text[:50]}...")
+            skip_cnt += 1
+            continue
+        
+        # Get translations for all targets, with fallback for missing ones
+        translations = []
+        for tgt in targets:
+            if (tgt, seg.text) in tmap:
+                translations.append(tmap[(tgt, seg.text)])
+            else:
+                log(f"[WARNING] 缺少 {tgt} 翻譯: {seg.text[:30]}...")
+                translations.append(f"【翻譯查詢失敗｜{tgt}】{seg.text[:50]}...")
+        
+        log(f"[INSERT] 準備插入 {len(translations)} 個翻譯到 {seg.ctx}: {seg.text[:30]}...")
+
+        if seg.kind == "para":
+            # Check if this is an SDT segment (ref is an XML element, not a Paragraph)
+            if hasattr(seg.ref, 'tag') and seg.ref.tag.endswith('}sdt'):
+                # Handle SDT segments - insert translation into sdtContent
+                sdt_element = seg.ref
+                ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
+                sdt_content = sdt_element.find(qn('w:sdtContent'))
+                
+                if sdt_content is not None:
+                    # Check if translations already exist
+                    existing_paras = sdt_content.xpath('.//w:p', namespaces=ns)
+                    existing_texts = []
+                    for ep in existing_paras:
+                        p_obj = Paragraph(ep, None)
+                        if _is_our_insert_block(p_obj):
+                            existing_texts.append(_p_text_with_breaks(p_obj))
+                    
+                    # Check if all translations already exist
+                    if len(existing_texts) >= len(translations):
+                        if all(_normalize_text(e) == _normalize_text(t) for e, t in zip(existing_texts[:len(translations)], translations)):
+                            skip_cnt += 1
+                            log(f"[SKIP] SDT 已存在翻譯: {seg.text[:30]}...")
+                            continue
+                    
+                    # Add translations to SDT content
+                    for t in translations:
+                        if not any(_normalize_text(t) == _normalize_text(e) for e in existing_texts):
+                            # Create new paragraph in SDT content
+                            new_p_element = OxmlElement("w:p")
+                            sdt_content.append(new_p_element)
+                            new_p = Paragraph(new_p_element, None)
+                            _add_formatted_run(new_p, t, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
+                    
+                    ok_cnt += 1
+                    log(f"[SUCCESS] SDT 插入翻譯（交錯格式）")
+                    continue
+            
+            p: Paragraph = seg.ref
+            
+            # --- CONTEXT-AWARE INSERTION LOGIC (from successful version) ---
+            # Check if the paragraph's parent is a table cell
+            if isinstance(p._parent, _Cell):
+                cell = p._parent
+                
+                try:
+                    # Find the current paragraph's position in the cell
+                    cell_paragraphs = list(cell.paragraphs)
+                    p_index = -1
+                    for idx, cell_p in enumerate(cell_paragraphs):
+                        if cell_p._element == p._element:
+                            p_index = idx
+                            break
+                    
+                    if p_index == -1:
+                        log(f"[WARNING] 無法找到段落在單元格中的位置，使用原始方法")
+                        # Fallback to original method
+                        for block in translations:
+                            new_p = cell.add_paragraph()
+                            _add_formatted_run(new_p, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
+                        ok_cnt += 1
+                        continue
+                    
+                    # Check if translations already exist right after this paragraph
+                    existing_texts = []
+                    check_limit = min(p_index + 1 + len(translations), len(cell_paragraphs))
+                    for idx in range(p_index + 1, check_limit):
+                        if _is_our_insert_block(cell_paragraphs[idx]):
+                            existing_texts.append(_p_text_with_breaks(cell_paragraphs[idx]))
+                    
+                    # Check if all translations already exist in order
+                    if len(existing_texts) >= len(translations):
+                        if all(_normalize_text(e) == _normalize_text(t) for e, t in zip(existing_texts[:len(translations)], translations)):
+                            skip_cnt += 1
+                            log(f"[SKIP] 表格單元格已存在翻譯: {seg.text[:30]}...")
+                            continue
+                    
+                    # Determine which translations need to be added
+                    to_add = []
+                    for t in translations:
+                        if not any(_normalize_text(t) == _normalize_text(e) for e in existing_texts):
+                            to_add.append(t)
+                    
+                    if not to_add:
+                        skip_cnt += 1
+                        log(f"[SKIP] 表格單元格所有翻譯已存在: {seg.text[:30]}...")
+                        continue
+                    
+                    # Insert new paragraphs right after the current paragraph
+                    insert_after = p
+                    for block in to_add:
+                        try:
+                            # Create new paragraph and insert it after the current position
+                            new_p_element = OxmlElement("w:p")
+                            insert_after._element.addnext(new_p_element)
+                            new_p = Paragraph(new_p_element, cell)
+                            _add_formatted_run(new_p, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
+                            insert_after = new_p  # Update position for next insertion
+                        except Exception as e:
+                            log(f"[ERROR] 表格插入失敗: {e}, 嘗試fallback方法")
+                            # Fallback: add at the end of cell
+                            try:
+                                new_p = cell.add_paragraph()
+                                _add_formatted_run(new_p, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
+                                log(f"[SUCCESS] Fallback插入成功")
+                            except Exception as e2:
+                                log(f"[FATAL] Fallback也失敗: {e2}")
+                                continue
+                    ok_cnt += 1
+                    log(f"[SUCCESS] 表格單元格插入 {len(to_add)} 個翻譯（緊接原文後）")
+                    
+                except Exception as e:
+                    log(f"[ERROR] 表格處理全面失敗: {e}, 跳過此段落")
+                    continue
+            
+            else:
+                # Normal paragraph (not in table cell) - enhanced logic from successful version
+                try:
+                    # Check existing translations using the enhanced method
+                    last = _find_last_inserted_after(p, limit=max(len(translations), 4))
+                    
+                    # Check if all translations already exist
+                    existing_texts = []
+                    current_check = p
+                    for _ in range(len(translations)):
+                        try:
+                            # Get the next sibling paragraph
+                            next_sibling = current_check._element.getnext()
+                            if next_sibling is not None and next_sibling.tag.endswith('}p'):
+                                next_p = Paragraph(next_sibling, p._parent)
+                                if _is_our_insert_block(next_p):
+                                    existing_texts.append(_p_text_with_breaks(next_p))
+                                    current_check = next_p
+                                else:
+                                    break
+                            else:
+                                break
+                        except Exception:
+                            break
+                    
+                    # Skip if all translations already exist in order
+                    if len(existing_texts) >= len(translations):
+                        if all(_normalize_text(e) == _normalize_text(t) for e, t in zip(existing_texts[:len(translations)], translations)):
+                            skip_cnt += 1
+                            log(f"[SKIP] 段落已存在翻譯: {seg.text[:30]}...")
+                            continue
+                    
+                    # Determine which translations need to be added
+                    to_add = []
+                    for t in translations:
+                        if not any(_normalize_text(t) == _normalize_text(e) for e in existing_texts):
+                            to_add.append(t)
+                    
+                    if not to_add:
+                        skip_cnt += 1
+                        log(f"[SKIP] 段落所有翻譯已存在: {seg.text[:30]}...")
+                        continue
+                    
+                    # Use enhanced insertion with proper positioning
+                    anchor = last if last else p
+                    
+                    for block in to_add:
+                        try:
+                            anchor = _append_after(anchor, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
+                        except Exception as e:
+                            log(f"[ERROR] 段落插入失敗: {e}, 嘗試簡化插入")
+                            try:
+                                # Fallback: simple append 
+                                if hasattr(p._parent, 'add_paragraph'):
+                                    new_p = p._parent.add_paragraph()
+                                    _add_formatted_run(new_p, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
+                                    log(f"[SUCCESS] Fallback段落插入成功")
+                                else:
+                                    log(f"[ERROR] 無法進行fallback插入")
+                            except Exception as e2:
+                                log(f"[FATAL] Fallback也失敗: {e2}")
+                                continue
+                    
+                    ok_cnt += 1
+                    log(f"[SUCCESS] 段落插入 {len(to_add)} 個翻譯（交錯格式）")
+                    
+                except Exception as e:
+                    log(f"[ERROR] 段落處理失敗: {e}, 跳過此段落")
+                    continue
+
+        elif seg.kind == "txbx":
+            tx = seg.ref
+            # Check if textbox already has our translations at the end
+            if _txbx_tail_equals(tx, translations):
+                skip_cnt += 1
+                log(f"[SKIP] 文字框已存在翻譯: {seg.text[:30]}...")
+                continue
+            
+            # Append translations to textbox
+            for t in translations:
+                _txbx_append_paragraph(tx, t, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
+            
+            ok_cnt += 1
+            log(f"[SUCCESS] 文字框插入 {len(translations)} 個翻譯")
+    
+    return ok_cnt, skip_cnt
+
+# ---------- Main DocumentProcessor class ----------
+class DocumentProcessor:
+    """Enhanced document processor with complete DOCX handling capabilities."""
+    
+    def __init__(self):
+        self.logger = logger
+    
+    def extract_docx_segments(self, file_path: str) -> List[Segment]:
+        """Extract all translatable segments from DOCX file."""
+        try:
+            doc = docx.Document(file_path)
+            segments = _collect_docx_segments(doc)
+            
+            self.logger.info(f"Extracted {len(segments)} segments from {file_path}")
+            for seg in segments[:5]:  # Log first 5 segments for debugging
+                self.logger.debug(f"Segment: {seg.kind} | {seg.ctx} | {seg.text[:50]}...")
+            
+            return segments
+        
+        except Exception as e:
+            self.logger.error(f"Failed to extract DOCX segments from {file_path}: {str(e)}")
+            raise FileProcessingError(f"DOCX 文件分析失敗: {str(e)}")
+    
+    def insert_docx_translations(self, file_path: str, segments: List[Segment], 
+                                 translation_map: Dict[Tuple[str, str], str],
+                                 target_languages: List[str], output_path: str) -> Tuple[int, int]:
+        """Insert translations into DOCX file and save to output path."""
+        try:
+            doc = docx.Document(file_path)
+            
+            def log_func(msg: str):
+                self.logger.debug(msg)
+            
+            ok_count, skip_count = _insert_docx_translations(
+                doc, segments, translation_map, target_languages, log_func
+            )
+            
+            # Save the modified document
+            doc.save(output_path)
+            
+            self.logger.info(f"Inserted {ok_count} translations, skipped {skip_count}. Saved to: {output_path}")
+            return ok_count, skip_count
+        
+        except Exception as e:
+            self.logger.error(f"Failed to insert DOCX translations: {str(e)}")
+            raise FileProcessingError(f"DOCX 翻譯插入失敗: {str(e)}")
+    
+    def split_text_into_sentences(self, text: str, language: str = 'auto') -> List[str]:
+        """Split text into sentences using the best available method."""
+        return _split_sentences(text, language)
+    
+    def should_translate_text(self, text: str, source_language: str) -> bool:
+        """Determine if text should be translated."""
+        return should_translate(text, source_language)
--- a/app/services/translation_service.py
+++ b/app/services/translation_service.py
@@ -11,10 +11,11 @@ Modified: 2024-01-28
 import hashlib
 import time
 from pathlib import Path
-from typing import List, Dict, Any, Optional
+from typing import List, Dict, Any, Optional, Tuple
 from app.utils.logger import get_logger
 from app.utils.exceptions import TranslationError, FileProcessingError
 from app.services.dify_client import DifyClient
+from app.services.document_processor import DocumentProcessor, Segment
 from app.models.cache import TranslationCache
 from app.models.job import TranslationJob
 from app.utils.helpers import generate_filename, create_job_directory
@@ -42,88 +43,39 @@ class DocumentParser:


 class DocxParser(DocumentParser):
-    """DOCX 文件解析器"""
+    """DOCX 文件解析器 - 使用增強的 DocumentProcessor"""
+    
+    def __init__(self, file_path: str):
+        super().__init__(file_path)
+        self.processor = DocumentProcessor()
    
    def extract_text_segments(self) -> List[str]:
-        """提取 DOCX 文件的文字片段"""
+        """提取 DOCX 文件的文字片段 - 使用增強邏輯"""
        try:
-            import docx
-            from docx.table import _Cell
+            # 使用新的文檔處理器提取段落
+            segments = self.processor.extract_docx_segments(str(self.file_path))
            
-            doc = docx.Document(str(self.file_path))
+            # 轉換為文字列表
            text_segments = []
+            for seg in segments:
+                if seg.text.strip() and len(seg.text.strip()) > 3:
+                    text_segments.append(seg.text)
            
-            # 提取段落文字
-            for paragraph in doc.paragraphs:
-                text = paragraph.text.strip()
-                if text and len(text) > 3:  # 過濾太短的文字
-                    text_segments.append(text)
-            
-            # 提取表格文字
-            for table in doc.tables:
-                for row in table.rows:
-                    for cell in row.cells:
-                        text = cell.text.strip()
-                        if text and len(text) > 3:
-                            text_segments.append(text)
-            
-            logger.info(f"Extracted {len(text_segments)} text segments from DOCX")
+            logger.info(f"Enhanced extraction: {len(text_segments)} text segments from DOCX")
            return text_segments
        
        except Exception as e:
            logger.error(f"Failed to extract text from DOCX: {str(e)}")
            raise FileProcessingError(f"DOCX 文件解析失敗: {str(e)}")
    
+    def extract_segments_with_context(self) -> List[Segment]:
+        """提取帶上下文的段落資訊"""
+        return self.processor.extract_docx_segments(str(self.file_path))
+    
    def generate_translated_document(self, translations: Dict[str, List[str]], 
                                   target_language: str, output_dir: Path) -> str:
-        """生成翻譯後的 DOCX 文件"""
+        """生成翻譯後的 DOCX 文件 - 使用增強的翻譯插入邏輯"""
        try:
-            import docx
-            from docx.shared import Pt
-            
-            # 開啟原始文件
-            doc = docx.Document(str(self.file_path))
-            
-            # 取得對應的翻譯
-            translated_texts = translations.get(target_language, [])
-            text_index = 0
-            
-            # 處理段落
-            for paragraph in doc.paragraphs:
-                if paragraph.text.strip() and len(paragraph.text.strip()) > 3:
-                    if text_index < len(translated_texts):
-                        # 保留原文，添加翻譯
-                        original_text = paragraph.text
-                        translated_text = translated_texts[text_index]
-                        
-                        # 清空段落
-                        paragraph.clear()
-                        
-                        # 添加原文
-                        run = paragraph.add_run(original_text)
-                        
-                        # 添加翻譯（新行，較小字體）
-                        paragraph.add_run('\n')
-                        trans_run = paragraph.add_run(translated_text)
-                        trans_run.font.size = Pt(10)
-                        trans_run.italic = True
-                        
-                        text_index += 1
-            
-            # 處理表格（簡化版本）
-            for table in doc.tables:
-                for row in table.rows:
-                    for cell in row.cells:
-                        if cell.text.strip() and len(cell.text.strip()) > 3:
-                            if text_index < len(translated_texts):
-                                original_text = cell.text
-                                translated_text = translated_texts[text_index]
-                                
-                                # 清空儲存格
-                                cell.text = f"{original_text}\n{translated_text}"
-                                
-                                text_index += 1
-            
            # 生成輸出檔名
            output_filename = generate_filename(
                self.file_path.name, 
@@ -133,10 +85,30 @@ class DocxParser(DocumentParser):
            )
            output_path = output_dir / output_filename
            
-            # 儲存文件
-            doc.save(str(output_path))
+            # 提取段落資訊
+            segments = self.extract_segments_with_context()
            
-            logger.info(f"Generated translated DOCX: {output_path}")
+            # 建立翻譯映射
+            translation_map = {}
+            translated_texts = translations.get(target_language, [])
+            
+            # 對應文字段落與翻譯
+            text_index = 0
+            for seg in segments:
+                if text_index < len(translated_texts):
+                    translation_map[(target_language, seg.text)] = translated_texts[text_index]
+                    text_index += 1
+            
+            # 使用增強的翻譯插入邏輯
+            ok_count, skip_count = self.processor.insert_docx_translations(
+                str(self.file_path),
+                segments,
+                translation_map,
+                [target_language],
+                str(output_path)
+            )
+            
+            logger.info(f"Enhanced translation: Generated {output_path} with {ok_count} insertions, {skip_count} skips")
            return str(output_path)
        
        except Exception as e:
@@ -202,6 +174,7 @@ class TranslationService:
    
    def __init__(self):
        self.dify_client = DifyClient()
+        self.document_processor = DocumentProcessor()
        
        # 文件解析器映射
        self.parsers = {
@@ -222,31 +195,87 @@ class TranslationService:
        return parser_class(file_path)
    
    def split_text_into_sentences(self, text: str, language: str = 'auto') -> List[str]:
-        """將文字分割成句子"""
-        # 這裡可以使用更智能的句子分割
-        # 暫時使用簡單的分割方式
-        
-        sentences = []
-        
-        # 基本的句子分割符號
-        separators = ['. ', '。', '！', '？', '!', '?']
-        
-        current_text = text
-        for sep in separators:
-            parts = current_text.split(sep)
-            if len(parts) > 1:
-                sentences.extend([part.strip() + sep.rstrip() for part in parts[:-1] if part.strip()])
-                current_text = parts[-1]
-        
-        # 添加最後一部分
-        if current_text.strip():
-            sentences.append(current_text.strip())
-        
-        # 過濾太短的句子
-        sentences = [s for s in sentences if len(s.strip()) > 5]
-        
-        return sentences
+        """將文字分割成句子 - 使用增強的分句邏輯"""
+        return self.document_processor.split_text_into_sentences(text, language)
    
+    def translate_segment_with_sentences(self, text: str, source_language: str,
+                                       target_language: str, user_id: int = None,
+                                       job_id: int = None) -> str:
+        """
+        按段落翻譯，模仿成功版本的 translate_block_sentencewise 邏輯
+        對多行文字進行逐行、逐句翻譯，並重新組合成完整段落
+        """
+        if not text or not text.strip():
+            return ""
+        
+        # 檢查快取 - 先檢查整個段落的快取
+        cached_whole = TranslationCache.get_translation(text, source_language, target_language)
+        if cached_whole:
+            logger.debug(f"Whole paragraph cache hit: {text[:30]}...")
+            return cached_whole
+        
+        # 按行處理
+        out_lines = []
+        all_successful = True
+        
+        for raw_line in text.split('\n'):
+            if not raw_line.strip():
+                out_lines.append("")
+                continue
+            
+            # 分句處理
+            sentences = self.document_processor.split_text_into_sentences(raw_line, source_language)
+            if not sentences:
+                sentences = [raw_line]
+            
+            translated_parts = []
+            for sentence in sentences:
+                sentence = sentence.strip()
+                if not sentence:
+                    continue
+                
+                # 檢查句子級快取
+                cached_sentence = TranslationCache.get_translation(sentence, source_language, target_language)
+                if cached_sentence:
+                    translated_parts.append(cached_sentence)
+                    continue
+                
+                # 呼叫 Dify API 翻譯句子
+                try:
+                    result = self.dify_client.translate_text(
+                        text=sentence,
+                        source_language=source_language,
+                        target_language=target_language,
+                        user_id=user_id,
+                        job_id=job_id
+                    )
+                    
+                    translated_sentence = result['translated_text']
+                    
+                    # 儲存句子級快取
+                    TranslationCache.save_translation(
+                        sentence, source_language, target_language, translated_sentence
+                    )
+                    
+                    translated_parts.append(translated_sentence)
+                
+                except Exception as e:
+                    logger.error(f"Failed to translate sentence: {sentence[:30]}... Error: {str(e)}")
+                    translated_parts.append(f"【翻譯失敗｜{target_language}】{sentence}")
+                    all_successful = False
+            
+            # 重新組合句子為一行
+            out_lines.append(" ".join(translated_parts))
+        
+        # 重新組合所有行
+        final_result = "\n".join(out_lines)
+        
+        # 如果全部成功，儲存整個段落的快取
+        if all_successful:
+            TranslationCache.save_translation(text, source_language, target_language, final_result)
+        
+        return final_result
+
    def translate_text_with_cache(self, text: str, source_language: str, 
                                target_language: str, user_id: int = None, 
                                job_id: int = None) -> str:
@@ -285,82 +314,173 @@ class TranslationService:
            raise TranslationError(f"翻譯失敗: {str(e)}")
    
    def translate_document(self, job_uuid: str) -> Dict[str, Any]:
-        """翻譯文件（主要入口點）"""
+        """翻譯文件（主要入口點）- 使用增強的文檔處理邏輯"""
        try:
            # 取得任務資訊
            job = TranslationJob.query.filter_by(job_uuid=job_uuid).first()
            if not job:
                raise TranslationError(f"找不到任務: {job_uuid}")
            
-            logger.info(f"Starting document translation: {job_uuid}")
+            logger.info(f"Starting enhanced document translation: {job_uuid}")
            
            # 更新任務狀態
            job.update_status('PROCESSING', progress=0)
            
-            # 取得文件解析器
-            parser = self.get_document_parser(job.file_path)
+            # 使用增強的文檔處理器直接提取段落
+            file_ext = Path(job.file_path).suffix.lower()
            
-            # 提取文字片段
-            logger.info("Extracting text segments from document")
-            text_segments = parser.extract_text_segments()
-            
-            if not text_segments:
-                raise TranslationError("文件中未找到可翻譯的文字")
-            
-            # 分割成句子
-            logger.info("Splitting text into sentences")
-            all_sentences = []
-            for segment in text_segments:
-                sentences = self.split_text_into_sentences(segment, job.source_language)
-                all_sentences.extend(sentences)
-            
-            # 去重複
-            unique_sentences = list(dict.fromkeys(all_sentences))  # 保持順序的去重
-            logger.info(f"Found {len(unique_sentences)} unique sentences to translate")
-            
-            # 批次翻譯
-            translation_results = {}
-            total_sentences = len(unique_sentences)
-            
-            for target_language in job.target_languages:
-                logger.info(f"Translating to {target_language}")
-                translated_sentences = []
+            if file_ext in ['.docx', '.doc']:
+                # 使用增強的 DOCX 處理邏輯
+                segments = self.document_processor.extract_docx_segments(job.file_path)
+                logger.info(f"Enhanced extraction: Found {len(segments)} segments to translate")
                
-                for i, sentence in enumerate(unique_sentences):
+                if not segments:
+                    raise TranslationError("文件中未找到可翻譯的文字段落")
+                
+                # 使用成功版本的翻譯邏輯 - 直接按段落翻譯，不做複雜分割
+                translatable_segments = []
+                for seg in segments:
+                    if self.document_processor.should_translate_text(seg.text, job.source_language):
+                        translatable_segments.append(seg)
+                
+                logger.info(f"Found {len(translatable_segments)} segments to translate")
+                
+                # 批次翻譯 - 直接按原始段落翻譯
+                translation_map = {}  # 格式: (target_language, source_text) -> translated_text
+                total_segments = len(translatable_segments)
+                
+                for target_language in job.target_languages:
+                    logger.info(f"Translating to {target_language}")
+                    
+                    for i, seg in enumerate(translatable_segments):
+                        try:
+                            # 使用整段文字進行翻譯
+                            translated = self.translate_segment_with_sentences(
+                                text=seg.text,
+                                source_language=job.source_language,
+                                target_language=target_language,
+                                user_id=job.user_id,
+                                job_id=job.id
+                            )
+                            
+                            # 直接以原始段落文字為鍵儲存翻譯結果
+                            translation_map[(target_language, seg.text)] = translated
+                            
+                            # 更新進度
+                            progress = (i + 1) / total_segments * 100 / len(job.target_languages)
+                            current_lang_index = job.target_languages.index(target_language)
+                            total_progress = (current_lang_index * 100 + progress) / len(job.target_languages)
+                            job.update_status('PROCESSING', progress=total_progress)
+                            
+                            # 短暫延遲避免過快請求
+                            time.sleep(0.1)
+                        
+                        except Exception as e:
+                            logger.error(f"Failed to translate segment: {seg.text[:50]}... Error: {str(e)}")
+                            # 翻譯失敗時保留原文
+                            translation_map[(target_language, seg.text)] = f"[翻譯失敗] {seg.text}"
+                
+                # 生成翻譯文件
+                logger.info("Generating translated documents with enhanced insertion")
+                output_dir = Path(job.file_path).parent
+                output_files = {}
+                
+                for target_language in job.target_languages:
                    try:
-                        translated = self.translate_text_with_cache(
-                            text=sentence,
-                            source_language=job.source_language,
-                            target_language=target_language,
-                            user_id=job.user_id,
-                            job_id=job.id
+                        # 生成輸出檔名
+                        output_filename = generate_filename(
+                            Path(job.file_path).name, 
+                            'translated', 
+                            'translated', 
+                            target_language
                        )
-                        translated_sentences.append(translated)
+                        output_path = output_dir / output_filename
                        
-                        # 更新進度
-                        progress = (i + 1) / total_sentences * 100 / len(job.target_languages)
-                        current_lang_index = job.target_languages.index(target_language)
-                        total_progress = (current_lang_index * 100 + progress) / len(job.target_languages)
-                        job.update_status('PROCESSING', progress=total_progress)
+                        # 使用增強的翻譯插入邏輯
+                        ok_count, skip_count = self.document_processor.insert_docx_translations(
+                            job.file_path,
+                            segments,
+                            translation_map,
+                            [target_language],
+                            str(output_path)
+                        )
                        
-                        # 短暫延遲避免過快請求
-                        time.sleep(0.1)
+                        output_files[target_language] = str(output_path)
+                        
+                        # 記錄翻譯檔案到資料庫
+                        file_size = Path(output_path).stat().st_size
+                        job.add_translated_file(
+                            language_code=target_language,
+                            filename=Path(output_path).name,
+                            file_path=str(output_path),
+                            file_size=file_size
+                        )
+                        
+                        logger.info(f"Generated {target_language}: {ok_count} insertions, {skip_count} skips")
                    
                    except Exception as e:
-                        logger.error(f"Failed to translate sentence: {sentence[:50]}... Error: {str(e)}")
-                        # 翻譯失敗時保留原文
-                        translated_sentences.append(f"[翻譯失敗] {sentence}")
+                        logger.error(f"Failed to generate translated document for {target_language}: {str(e)}")
+                        raise TranslationError(f"生成 {target_language} 翻譯文件失敗: {str(e)}")
+            
+            else:
+                # 對於非 DOCX 文件，使用原有邏輯
+                logger.info(f"Using legacy processing for {file_ext} files")
+                parser = self.get_document_parser(job.file_path)
                
-                translation_results[target_language] = translated_sentences
-            
-            # 生成翻譯文件
-            logger.info("Generating translated documents")
-            output_dir = Path(job.file_path).parent
-            output_files = {}
-            
-            for target_language, translations in translation_results.items():
-                try:
-                    # 重建翻譯映射
+                # 提取文字片段
+                text_segments = parser.extract_text_segments()
+                
+                if not text_segments:
+                    raise TranslationError("文件中未找到可翻譯的文字")
+                
+                # 分割成句子
+                all_sentences = []
+                for segment in text_segments:
+                    sentences = self.split_text_into_sentences(segment, job.source_language)
+                    all_sentences.extend(sentences)
+                
+                # 去重複
+                unique_sentences = list(dict.fromkeys(all_sentences))
+                logger.info(f"Found {len(unique_sentences)} unique sentences to translate")
+                
+                # 批次翻譯
+                translation_results = {}
+                total_sentences = len(unique_sentences)
+                
+                for target_language in job.target_languages:
+                    logger.info(f"Translating to {target_language}")
+                    translated_sentences = []
+                    
+                    for i, sentence in enumerate(unique_sentences):
+                        try:
+                            translated = self.translate_text_with_cache(
+                                text=sentence,
+                                source_language=job.source_language,
+                                target_language=target_language,
+                                user_id=job.user_id,
+                                job_id=job.id
+                            )
+                            translated_sentences.append(translated)
+                            
+                            # 更新進度
+                            progress = (i + 1) / total_sentences * 100 / len(job.target_languages)
+                            current_lang_index = job.target_languages.index(target_language)
+                            total_progress = (current_lang_index * 100 + progress) / len(job.target_languages)
+                            job.update_status('PROCESSING', progress=total_progress)
+                            
+                            time.sleep(0.1)
+                        
+                        except Exception as e:
+                            logger.error(f"Failed to translate sentence: {sentence[:50]}... Error: {str(e)}")
+                            translated_sentences.append(f"[翻譯失敗] {sentence}")
+                    
+                    translation_results[target_language] = translated_sentences
+                
+                # 生成翻譯文件
+                output_dir = Path(job.file_path).parent
+                output_files = {}
+                
+                for target_language, translations in translation_results.items():
                    translation_mapping = {target_language: translations}
                    
                    output_file = parser.generate_translated_document(
@@ -371,7 +491,6 @@ class TranslationService:
                    
                    output_files[target_language] = output_file
                    
-                    # 記錄翻譯檔案到資料庫
                    file_size = Path(output_file).stat().st_size
                    job.add_translated_file(
                        language_code=target_language,
@@ -379,29 +498,33 @@ class TranslationService:
                        file_path=output_file,
                        file_size=file_size
                    )
-                
-                except Exception as e:
-                    logger.error(f"Failed to generate translated document for {target_language}: {str(e)}")
-                    raise TranslationError(f"生成 {target_language} 翻譯文件失敗: {str(e)}")
            
-            # 計算總成本（從 API 使用統計中取得）
+            # 計算總成本
            total_cost = self._calculate_job_cost(job.id)
            
            # 更新任務狀態為完成
            job.update_status('COMPLETED', progress=100)
            job.total_cost = total_cost
-            job.total_tokens = len(unique_sentences)  # 簡化的 token 計算
-            
+            # 計算實際使用的 token 數（從 API 使用統計中獲取）
+            from sqlalchemy import func
+            from app.models.stats import APIUsageStats
            from app import db
+            
+            actual_tokens = db.session.query(
+                func.sum(APIUsageStats.total_tokens)
+            ).filter_by(job_id=job.id).scalar()
+            
+            job.total_tokens = int(actual_tokens) if actual_tokens else 0
+            
            db.session.commit()
            
-            logger.info(f"Document translation completed: {job_uuid}")
+            logger.info(f"Enhanced document translation completed: {job_uuid}")
            
            return {
                'success': True,
                'job_uuid': job_uuid,
                'output_files': output_files,
-                'total_sentences': len(unique_sentences),
+                'total_sentences': len(texts_to_translate) if 'texts_to_translate' in locals() else len(unique_sentences) if 'unique_sentences' in locals() else 0,
                'total_cost': float(total_cost),
                'target_languages': job.target_languages
            }
@@ -409,13 +532,14 @@ class TranslationService:
        except TranslationError:
            raise
        except Exception as e:
-            logger.error(f"Document translation failed: {job_uuid}. Error: {str(e)}")
+            logger.error(f"Enhanced document translation failed: {job_uuid}. Error: {str(e)}")
            raise TranslationError(f"文件翻譯失敗: {str(e)}")
    
    def _calculate_job_cost(self, job_id: int) -> float:
        """計算任務總成本"""
        from app import db
        from sqlalchemy import func
+        from app.models.stats import APIUsageStats
        
        total_cost = db.session.query(
            func.sum(APIUsageStats.cost)