4th_fix time error

2025-09-03 09:05:51 +08:00
parent e6e5332705
commit cce3fd4925
26 changed files with 2551 additions and 82 deletions
--- a/debug_paragraph_structure.py
+++ b/debug_paragraph_structure.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+調試段落結構問題
+"""
+
+import sys
+import os
+import tempfile
+import shutil
+from pathlib import Path
+
+# Fix encoding for Windows console
+if sys.stdout.encoding != 'utf-8':
+    sys.stdout.reconfigure(encoding='utf-8')
+if sys.stderr.encoding != 'utf-8':
+    sys.stderr.reconfigure(encoding='utf-8')
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
+
+from app import create_app, db
+from app.services.document_processor import DocumentProcessor, _append_after
+from sqlalchemy import text as sql_text
+
+def debug_paragraph_structure():
+    """調試段落結構問題"""
+    
+    app = create_app()
+    
+    with app.app_context():
+        print("=== 調試段落結構問題 ===")
+        
+        # 原始文件
+        original_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\9c6548ac-2f59-45f4-aade-0a9b3895bbfd\original_-OR026_9c6548ac.docx"
+        
+        # 創建測試副本
+        test_dir = Path(tempfile.gettempdir()) / "debug_paragraph"
+        test_dir.mkdir(exist_ok=True)
+        test_path = test_dir / "debug_paragraph.docx"
+        
+        shutil.copy2(original_path, test_path)
+        print(f"✅ 創建測試副本: {test_path}")
+        
+        # 創建處理器
+        processor = DocumentProcessor()
+        
+        # 提取段落
+        segments = processor.extract_docx_segments(str(test_path))
+        
+        # 只看前3個段落
+        debug_segments = segments[:3]
+        
+        # 載入文檔
+        try:
+            from docx import Document
+            doc = Document(str(test_path))
+            
+            print(f"\n📊 文檔分析:")
+            print(f"總段落數: {len(doc.paragraphs)}")
+            
+            print(f"\n🔍 前3個段落詳細分析:")
+            
+            for i, seg in enumerate(debug_segments):
+                if seg.kind == "para":
+                    p = seg.ref
+                    
+                    print(f"\n段落 {i+1}:")
+                    print(f"  文本: {seg.text[:50]}...")
+                    print(f"  段落類型: {type(p)}")
+                    print(f"  段落父元素類型: {type(p._parent)}")
+                    print(f"  段落XML標籤: {p._p.tag if hasattr(p._p, 'tag') else 'N/A'}")
+                    
+                    # 檢查段落位置
+                    try:
+                        all_paras = list(doc.paragraphs)
+                        current_index = -1
+                        for idx, doc_p in enumerate(all_paras):
+                            if doc_p._element == p._element:
+                                current_index = idx
+                                break
+                        print(f"  在文檔中的位置: {current_index} (總共{len(all_paras)}段)")
+                        
+                        # 測試_append_after插入
+                        print(f"  測試插入翻譯...")
+                        
+                        test_translation = f"TEST TRANSLATION {i+1}: This is a test."
+                        
+                        try:
+                            before_count = len(doc.paragraphs)
+                            
+                            # 記錄插入前的下一個段落
+                            next_para_before = None
+                            if current_index + 1 < len(all_paras):
+                                next_para_before = all_paras[current_index + 1].text[:30]
+                            
+                            new_para = _append_after(p, test_translation, italic=True, font_size_pt=12)
+                            
+                            after_count = len(doc.paragraphs)
+                            
+                            print(f"    插入前段落數: {before_count}")
+                            print(f"    插入後段落數: {after_count}")
+                            print(f"    段落數變化: +{after_count - before_count}")
+                            
+                            if new_para:
+                                print(f"    新段落文本: {new_para.text}")
+                                print(f"    新段落類型: {type(new_para)}")
+                            
+                            # 檢查插入位置
+                            updated_paras = list(doc.paragraphs)
+                            if current_index + 1 < len(updated_paras):
+                                next_para_after = updated_paras[current_index + 1].text[:30]
+                                print(f"    插入前下一段: {next_para_before}")
+                                print(f"    插入後下一段: {next_para_after}")
+                                
+                                if next_para_after != next_para_before:
+                                    print(f"    ✅ 插入成功：下一段內容已改變")
+                                else:
+                                    print(f"    ❌ 插入失敗：下一段內容未變")
+                            
+                        except Exception as e:
+                            print(f"    ❌ _append_after失敗: {e}")
+                            
+                            # 嘗試簡單的段落添加測試
+                            try:
+                                simple_para = doc.add_paragraph(f"SIMPLE TEST {i+1}")
+                                print(f"    替代測試: doc.add_paragraph成功")
+                                print(f"    新段落文本: {simple_para.text}")
+                            except Exception as e2:
+                                print(f"    替代測試也失敗: {e2}")
+                    except Exception as outer_e:
+                        print(f"  ❌ 段落分析失敗: {outer_e}")
+            
+            # 保存並重新讀取驗證
+            output_path = test_dir / "debug_paragraph_modified.docx"
+            doc.save(str(output_path))
+            print(f"\n✅ 修改後文檔已保存: {output_path}")
+            
+            # 重新讀取驗證
+            doc2 = Document(str(output_path))
+            print(f"保存後重讀段落數: {len(doc2.paragraphs)}")
+            
+            print(f"\n📄 前10段內容:")
+            for i, para in enumerate(doc2.paragraphs[:10]):
+                if para.text.strip():
+                    lang_info = ""
+                    if "TEST TRANSLATION" in para.text:
+                        lang_info = "🆕 測試翻譯"
+                    elif "SIMPLE TEST" in para.text:
+                        lang_info = "🆕 簡單測試"
+                    elif any('\u4e00' <= c <= '\u9fff' for c in para.text):
+                        lang_info = "🇨🇳 中文"
+                    else:
+                        lang_info = "❓ 其他"
+                    
+                    print(f"  段落 {i+1}: {lang_info} - {para.text.strip()[:60]}...")
+                    
+        except Exception as e:
+            print(f"❌ 調試失敗: {e}")
+
+if __name__ == "__main__":
+    debug_paragraph_structure()