7th_fix doc

This commit is contained in:
beabigegg
2025-09-03 19:00:29 +08:00
parent 086c4f25b9
commit 82aaa315bb
5 changed files with 629 additions and 12 deletions

View File

@@ -169,7 +169,9 @@ def _is_our_insert_block(p: Paragraph) -> bool:
def should_translate(text: str, src_lang: str) -> bool:
"""Determine if text should be translated based on content and source language."""
text = text.strip()
if len(text) < 3:
# 只要有字就翻譯 - 最小長度設為1
if len(text) < 1:
return False
# Skip pure numbers, dates, etc.
@@ -678,10 +680,42 @@ def _insert_docx_translations(doc: docx.Document, segs: List[Segment],
if _is_our_insert_block(cell_paragraphs[i]):
cell._element.remove(cell_paragraphs[i]._element)
# 添加新的翻譯到儲存格
for t in translations:
new_p = cell.add_paragraph()
_add_formatted_run(new_p, t, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
# 檢查是否為簡單的短文本儲存格(只有原文,沒有複雜結構)
cell_content = cell.text.strip()
is_simple_cell = len(cell_content) <= 10 and cell_content == seg.text.strip()
if is_simple_cell:
# 對於簡單短文本,直接替換內容而不是添加段落
log(f"[INFO] 簡單儲存格內容替換: '{seg.text.strip()}' -> '{translations[0] if translations else 'N/A'}'")
# 清空所有段落內容
for para in cell.paragraphs:
para.clear()
# 在第一個段落中添加原文和翻譯
first_para = cell.paragraphs[0] if cell.paragraphs else cell.add_paragraph()
# 添加原文
run_orig = first_para.add_run(seg.text.strip())
# 添加換行和翻譯
for t in translations:
first_para.add_run('\n')
run_trans = first_para.add_run(t)
run_trans.italic = True
if INSERT_FONT_SIZE_PT:
run_trans.font.size = Pt(INSERT_FONT_SIZE_PT)
# 添加標記
tag_run = first_para.add_run("\u200b")
tag_run.italic = True
if INSERT_FONT_SIZE_PT:
tag_run.font.size = Pt(INSERT_FONT_SIZE_PT)
else:
# 對於複雜儲存格,使用原有的添加段落方式
for t in translations:
new_p = cell.add_paragraph()
_add_formatted_run(new_p, t, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
ok_cnt += 1
log(f"[SUCCESS] 表格儲存格插入 {len(translations)} 個翻譯")

View File

@@ -307,15 +307,11 @@ class ExcelParser(DocumentParser):
return None
def _should_translate(self, text: str, src_lang: str) -> bool:
"""判斷文字是否需要翻譯(修正中文長度判斷"""
"""判斷文字是否需要翻譯(只要有字就翻譯"""
text = text.strip()
# 檢查是否包含中日韓文字
has_cjk = self._has_cjk(text)
# 對於包含CJK字符的文字放寬長度限制為2個字符
min_length = 2 if has_cjk else 3
if len(text) < min_length:
# 只要有字就翻譯 - 最小長度設為1
if len(text) < 1:
return False
# Skip pure numbers, dates, etc.