#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 調試段落結構問題 """ import sys import os import tempfile import shutil from pathlib import Path # Fix encoding for Windows console if sys.stdout.encoding != 'utf-8': sys.stdout.reconfigure(encoding='utf-8') if sys.stderr.encoding != 'utf-8': sys.stderr.reconfigure(encoding='utf-8') sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app')) from app import create_app, db from app.services.document_processor import DocumentProcessor, _append_after from sqlalchemy import text as sql_text def debug_paragraph_structure(): """調試段落結構問題""" app = create_app() with app.app_context(): print("=== 調試段落結構問題 ===") # 原始文件 original_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\9c6548ac-2f59-45f4-aade-0a9b3895bbfd\original_-OR026_9c6548ac.docx" # 創建測試副本 test_dir = Path(tempfile.gettempdir()) / "debug_paragraph" test_dir.mkdir(exist_ok=True) test_path = test_dir / "debug_paragraph.docx" shutil.copy2(original_path, test_path) print(f"✅ 創建測試副本: {test_path}") # 創建處理器 processor = DocumentProcessor() # 提取段落 segments = processor.extract_docx_segments(str(test_path)) # 只看前3個段落 debug_segments = segments[:3] # 載入文檔 try: from docx import Document doc = Document(str(test_path)) print(f"\n📊 文檔分析:") print(f"總段落數: {len(doc.paragraphs)}") print(f"\n🔍 前3個段落詳細分析:") for i, seg in enumerate(debug_segments): if seg.kind == "para": p = seg.ref print(f"\n段落 {i+1}:") print(f" 文本: {seg.text[:50]}...") print(f" 段落類型: {type(p)}") print(f" 段落父元素類型: {type(p._parent)}") print(f" 段落XML標籤: {p._p.tag if hasattr(p._p, 'tag') else 'N/A'}") # 檢查段落位置 try: all_paras = list(doc.paragraphs) current_index = -1 for idx, doc_p in enumerate(all_paras): if doc_p._element == p._element: current_index = idx break print(f" 在文檔中的位置: {current_index} (總共{len(all_paras)}段)") # 測試_append_after插入 print(f" 測試插入翻譯...") test_translation = f"TEST TRANSLATION {i+1}: This is a test." try: before_count = len(doc.paragraphs) # 記錄插入前的下一個段落 next_para_before = None if current_index + 1 < len(all_paras): next_para_before = all_paras[current_index + 1].text[:30] new_para = _append_after(p, test_translation, italic=True, font_size_pt=12) after_count = len(doc.paragraphs) print(f" 插入前段落數: {before_count}") print(f" 插入後段落數: {after_count}") print(f" 段落數變化: +{after_count - before_count}") if new_para: print(f" 新段落文本: {new_para.text}") print(f" 新段落類型: {type(new_para)}") # 檢查插入位置 updated_paras = list(doc.paragraphs) if current_index + 1 < len(updated_paras): next_para_after = updated_paras[current_index + 1].text[:30] print(f" 插入前下一段: {next_para_before}") print(f" 插入後下一段: {next_para_after}") if next_para_after != next_para_before: print(f" ✅ 插入成功:下一段內容已改變") else: print(f" ❌ 插入失敗:下一段內容未變") except Exception as e: print(f" ❌ _append_after失敗: {e}") # 嘗試簡單的段落添加測試 try: simple_para = doc.add_paragraph(f"SIMPLE TEST {i+1}") print(f" 替代測試: doc.add_paragraph成功") print(f" 新段落文本: {simple_para.text}") except Exception as e2: print(f" 替代測試也失敗: {e2}") except Exception as outer_e: print(f" ❌ 段落分析失敗: {outer_e}") # 保存並重新讀取驗證 output_path = test_dir / "debug_paragraph_modified.docx" doc.save(str(output_path)) print(f"\n✅ 修改後文檔已保存: {output_path}") # 重新讀取驗證 doc2 = Document(str(output_path)) print(f"保存後重讀段落數: {len(doc2.paragraphs)}") print(f"\n📄 前10段內容:") for i, para in enumerate(doc2.paragraphs[:10]): if para.text.strip(): lang_info = "" if "TEST TRANSLATION" in para.text: lang_info = "🆕 測試翻譯" elif "SIMPLE TEST" in para.text: lang_info = "🆕 簡單測試" elif any('\u4e00' <= c <= '\u9fff' for c in para.text): lang_info = "🇨🇳 中文" else: lang_info = "❓ 其他" print(f" 段落 {i+1}: {lang_info} - {para.text.strip()[:60]}...") except Exception as e: print(f"❌ 調試失敗: {e}") if __name__ == "__main__": debug_paragraph_structure()