7th_fix doc

2025-09-03 19:00:29 +08:00
parent 086c4f25b9
commit 82aaa315bb
5 changed files with 629 additions and 12 deletions
--- a/app/services/document_processor.py
+++ b/app/services/document_processor.py
@@ -169,7 +169,9 @@ def _is_our_insert_block(p: Paragraph) -> bool:
 def should_translate(text: str, src_lang: str) -> bool:
    """Determine if text should be translated based on content and source language."""
    text = text.strip()
-    if len(text) < 3:
+    
+    # 只要有字就翻譯 - 最小長度設為1
+    if len(text) < 1:
        return False
    
    # Skip pure numbers, dates, etc.
@@ -678,10 +680,42 @@ def _insert_docx_translations(doc: docx.Document, segs: List[Segment],
                if _is_our_insert_block(cell_paragraphs[i]):
                    cell._element.remove(cell_paragraphs[i]._element)
            
-            # 添加新的翻譯到儲存格
-            for t in translations:
-                new_p = cell.add_paragraph()
-                _add_formatted_run(new_p, t, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
+            # 檢查是否為簡單的短文本儲存格（只有原文，沒有複雜結構）
+            cell_content = cell.text.strip()
+            is_simple_cell = len(cell_content) <= 10 and cell_content == seg.text.strip()
+            
+            if is_simple_cell:
+                # 對於簡單短文本，直接替換內容而不是添加段落
+                log(f"[INFO] 簡單儲存格內容替換: '{seg.text.strip()}' -> '{translations[0] if translations else 'N/A'}'")
+                
+                # 清空所有段落內容
+                for para in cell.paragraphs:
+                    para.clear()
+                
+                # 在第一個段落中添加原文和翻譯
+                first_para = cell.paragraphs[0] if cell.paragraphs else cell.add_paragraph()
+                
+                # 添加原文
+                run_orig = first_para.add_run(seg.text.strip())
+                
+                # 添加換行和翻譯
+                for t in translations:
+                    first_para.add_run('\n')
+                    run_trans = first_para.add_run(t)
+                    run_trans.italic = True
+                    if INSERT_FONT_SIZE_PT:
+                        run_trans.font.size = Pt(INSERT_FONT_SIZE_PT)
+                
+                # 添加標記
+                tag_run = first_para.add_run("\u200b")
+                tag_run.italic = True
+                if INSERT_FONT_SIZE_PT:
+                    tag_run.font.size = Pt(INSERT_FONT_SIZE_PT)
+            else:
+                # 對於複雜儲存格，使用原有的添加段落方式
+                for t in translations:
+                    new_p = cell.add_paragraph()
+                    _add_formatted_run(new_p, t, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
            
            ok_cnt += 1
            log(f"[SUCCESS] 表格儲存格插入 {len(translations)} 個翻譯")
--- a/app/services/translation_service.py
+++ b/app/services/translation_service.py
@@ -307,15 +307,11 @@ class ExcelParser(DocumentParser):
        return None
    
    def _should_translate(self, text: str, src_lang: str) -> bool:
-        """判斷文字是否需要翻譯（修正中文長度判斷）"""
+        """判斷文字是否需要翻譯（只要有字就翻譯）"""
        text = text.strip()
        
-        # 檢查是否包含中日韓文字
-        has_cjk = self._has_cjk(text)
-        
-        # 對於包含CJK字符的文字，放寬長度限制為2個字符
-        min_length = 2 if has_cjk else 3
-        if len(text) < min_length:
+        # 只要有字就翻譯 - 最小長度設為1
+        if len(text) < 1:
            return False
        
        # Skip pure numbers, dates, etc.