4th_fix time error
This commit is contained in:
@@ -577,56 +577,24 @@ def _insert_docx_translations(doc: docx.Document, segs: List[Segment],
|
||||
continue
|
||||
|
||||
else:
|
||||
# Normal paragraph (not in table cell) - enhanced logic from successful version
|
||||
# Normal paragraph (not in table cell) - SIMPLIFIED FOR DEBUGGING
|
||||
try:
|
||||
# Check existing translations using the enhanced method
|
||||
last = _find_last_inserted_after(p, limit=max(len(translations), 4))
|
||||
# TEMPORARILY DISABLE existing translation check to force insertion
|
||||
log(f"[DEBUG] 強制插入翻譯到段落: {seg.text[:30]}...")
|
||||
|
||||
# Check if all translations already exist
|
||||
existing_texts = []
|
||||
current_check = p
|
||||
for _ in range(len(translations)):
|
||||
try:
|
||||
# Get the next sibling paragraph
|
||||
next_sibling = current_check._element.getnext()
|
||||
if next_sibling is not None and next_sibling.tag.endswith('}p'):
|
||||
next_p = Paragraph(next_sibling, p._parent)
|
||||
if _is_our_insert_block(next_p):
|
||||
existing_texts.append(_p_text_with_breaks(next_p))
|
||||
current_check = next_p
|
||||
else:
|
||||
break
|
||||
else:
|
||||
break
|
||||
except Exception:
|
||||
break
|
||||
# Force all translations to be added
|
||||
to_add = translations
|
||||
|
||||
# Skip if all translations already exist in order
|
||||
if len(existing_texts) >= len(translations):
|
||||
if all(_normalize_text(e) == _normalize_text(t) for e, t in zip(existing_texts[:len(translations)], translations)):
|
||||
skip_cnt += 1
|
||||
log(f"[SKIP] 段落已存在翻譯: {seg.text[:30]}...")
|
||||
continue
|
||||
|
||||
# Determine which translations need to be added
|
||||
to_add = []
|
||||
for t in translations:
|
||||
if not any(_normalize_text(t) == _normalize_text(e) for e in existing_texts):
|
||||
to_add.append(t)
|
||||
|
||||
if not to_add:
|
||||
skip_cnt += 1
|
||||
log(f"[SKIP] 段落所有翻譯已存在: {seg.text[:30]}...")
|
||||
continue
|
||||
|
||||
# Use enhanced insertion with proper positioning
|
||||
anchor = last if last else p
|
||||
# Use simple positioning - always insert after current paragraph
|
||||
anchor = p
|
||||
|
||||
for block in to_add:
|
||||
try:
|
||||
log(f"[DEBUG] 嘗試插入: {block[:50]}...")
|
||||
anchor = _append_after(anchor, block, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
|
||||
log(f"[SUCCESS] _append_after成功插入")
|
||||
except Exception as e:
|
||||
log(f"[ERROR] 段落插入失敗: {e}, 嘗試簡化插入")
|
||||
log(f"[ERROR] _append_after失敗: {e}, 嘗試簡化插入")
|
||||
try:
|
||||
# Fallback: simple append
|
||||
if hasattr(p._parent, 'add_paragraph'):
|
||||
@@ -640,7 +608,7 @@ def _insert_docx_translations(doc: docx.Document, segs: List[Segment],
|
||||
continue
|
||||
|
||||
ok_cnt += 1
|
||||
log(f"[SUCCESS] 段落插入 {len(to_add)} 個翻譯(交錯格式)")
|
||||
log(f"[SUCCESS] 段落強制插入 {len(to_add)} 個翻譯")
|
||||
|
||||
except Exception as e:
|
||||
log(f"[ERROR] 段落處理失敗: {e}, 跳過此段落")
|
||||
@@ -686,6 +654,39 @@ class DocumentProcessor:
|
||||
self.logger.error(f"Failed to extract DOCX segments from {file_path}: {str(e)}")
|
||||
raise FileProcessingError(f"DOCX 文件分析失敗: {str(e)}")
|
||||
|
||||
def _rematch_segments_to_document(self, doc: docx.Document, old_segments: List[Segment]) -> List[Segment]:
|
||||
"""Re-match segments from old document instance to new document instance."""
|
||||
try:
|
||||
# Extract fresh segments from the current document instance
|
||||
fresh_segments = _collect_docx_segments(doc)
|
||||
|
||||
# Match old segments with fresh segments based on text content
|
||||
matched_segments = []
|
||||
|
||||
for old_seg in old_segments:
|
||||
# Find matching segment in fresh segments
|
||||
matched = False
|
||||
for fresh_seg in fresh_segments:
|
||||
if (old_seg.kind == fresh_seg.kind and
|
||||
old_seg.ctx == fresh_seg.ctx and
|
||||
_normalize_text(old_seg.text) == _normalize_text(fresh_seg.text)):
|
||||
matched_segments.append(fresh_seg)
|
||||
matched = True
|
||||
break
|
||||
|
||||
if not matched:
|
||||
self.logger.warning(f"Failed to match segment: {old_seg.text[:50]}...")
|
||||
# Still add the old segment but it might not work for insertion
|
||||
matched_segments.append(old_seg)
|
||||
|
||||
self.logger.debug(f"Re-matched {len(matched_segments)} segments to current document")
|
||||
return matched_segments
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to re-match segments: {str(e)}")
|
||||
# Return original segments as fallback
|
||||
return old_segments
|
||||
|
||||
def insert_docx_translations(self, file_path: str, segments: List[Segment],
|
||||
translation_map: Dict[Tuple[str, str], str],
|
||||
target_languages: List[str], output_path: str) -> Tuple[int, int]:
|
||||
@@ -693,11 +694,15 @@ class DocumentProcessor:
|
||||
try:
|
||||
doc = docx.Document(file_path)
|
||||
|
||||
# CRITICAL FIX: Re-match segments with the current document instance
|
||||
# The original segments were extracted from a different document instance
|
||||
matched_segments = self._rematch_segments_to_document(doc, segments)
|
||||
|
||||
def log_func(msg: str):
|
||||
self.logger.debug(msg)
|
||||
|
||||
ok_count, skip_count = _insert_docx_translations(
|
||||
doc, segments, translation_map, target_languages, log_func
|
||||
doc, matched_segments, translation_map, target_languages, log_func
|
||||
)
|
||||
|
||||
# Save the modified document
|
||||
|
@@ -74,8 +74,11 @@ class DocxParser(DocumentParser):
|
||||
|
||||
def generate_translated_document(self, translations: Dict[str, List[str]],
|
||||
target_language: str, output_dir: Path) -> str:
|
||||
"""生成翻譯後的 DOCX 文件 - 使用增強的翻譯插入邏輯"""
|
||||
"""生成翻譯後的 DOCX 文件 - 使用增強的翻譯插入邏輯(從快取讀取)"""
|
||||
try:
|
||||
from sqlalchemy import text as sql_text
|
||||
from app import db
|
||||
|
||||
# 生成輸出檔名
|
||||
output_filename = generate_filename(
|
||||
self.file_path.name,
|
||||
@@ -88,16 +91,29 @@ class DocxParser(DocumentParser):
|
||||
# 提取段落資訊
|
||||
segments = self.extract_segments_with_context()
|
||||
|
||||
# 建立翻譯映射
|
||||
# 建立翻譯映射 - 從快取讀取而非使用傳入的translations參數
|
||||
translation_map = {}
|
||||
translated_texts = translations.get(target_language, [])
|
||||
|
||||
# 對應文字段落與翻譯
|
||||
text_index = 0
|
||||
logger.info(f"Building translation map for {len(segments)} segments in language {target_language}")
|
||||
|
||||
for seg in segments:
|
||||
if text_index < len(translated_texts):
|
||||
translation_map[(target_language, seg.text)] = translated_texts[text_index]
|
||||
text_index += 1
|
||||
# 從翻譯快取中查詢每個段落的翻譯
|
||||
result = db.session.execute(sql_text("""
|
||||
SELECT translated_text
|
||||
FROM dt_translation_cache
|
||||
WHERE source_text = :text AND target_language = :lang
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
"""), {'text': seg.text, 'lang': target_language})
|
||||
|
||||
row = result.fetchone()
|
||||
if row and row[0]:
|
||||
translation_map[(target_language, seg.text)] = row[0]
|
||||
logger.debug(f"Found translation for: {seg.text[:50]}...")
|
||||
else:
|
||||
logger.warning(f"No translation found for: {seg.text[:50]}...")
|
||||
|
||||
logger.info(f"Translation map built with {len(translation_map)} mappings")
|
||||
|
||||
# 使用增強的翻譯插入邏輯
|
||||
ok_count, skip_count = self.processor.insert_docx_translations(
|
||||
|
Reference in New Issue
Block a user