7th_fix doc
This commit is contained in:
@@ -169,7 +169,9 @@ def _is_our_insert_block(p: Paragraph) -> bool:
|
||||
def should_translate(text: str, src_lang: str) -> bool:
|
||||
"""Determine if text should be translated based on content and source language."""
|
||||
text = text.strip()
|
||||
if len(text) < 3:
|
||||
|
||||
# 只要有字就翻譯 - 最小長度設為1
|
||||
if len(text) < 1:
|
||||
return False
|
||||
|
||||
# Skip pure numbers, dates, etc.
|
||||
@@ -678,10 +680,42 @@ def _insert_docx_translations(doc: docx.Document, segs: List[Segment],
|
||||
if _is_our_insert_block(cell_paragraphs[i]):
|
||||
cell._element.remove(cell_paragraphs[i]._element)
|
||||
|
||||
# 添加新的翻譯到儲存格
|
||||
for t in translations:
|
||||
new_p = cell.add_paragraph()
|
||||
_add_formatted_run(new_p, t, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
|
||||
# 檢查是否為簡單的短文本儲存格(只有原文,沒有複雜結構)
|
||||
cell_content = cell.text.strip()
|
||||
is_simple_cell = len(cell_content) <= 10 and cell_content == seg.text.strip()
|
||||
|
||||
if is_simple_cell:
|
||||
# 對於簡單短文本,直接替換內容而不是添加段落
|
||||
log(f"[INFO] 簡單儲存格內容替換: '{seg.text.strip()}' -> '{translations[0] if translations else 'N/A'}'")
|
||||
|
||||
# 清空所有段落內容
|
||||
for para in cell.paragraphs:
|
||||
para.clear()
|
||||
|
||||
# 在第一個段落中添加原文和翻譯
|
||||
first_para = cell.paragraphs[0] if cell.paragraphs else cell.add_paragraph()
|
||||
|
||||
# 添加原文
|
||||
run_orig = first_para.add_run(seg.text.strip())
|
||||
|
||||
# 添加換行和翻譯
|
||||
for t in translations:
|
||||
first_para.add_run('\n')
|
||||
run_trans = first_para.add_run(t)
|
||||
run_trans.italic = True
|
||||
if INSERT_FONT_SIZE_PT:
|
||||
run_trans.font.size = Pt(INSERT_FONT_SIZE_PT)
|
||||
|
||||
# 添加標記
|
||||
tag_run = first_para.add_run("\u200b")
|
||||
tag_run.italic = True
|
||||
if INSERT_FONT_SIZE_PT:
|
||||
tag_run.font.size = Pt(INSERT_FONT_SIZE_PT)
|
||||
else:
|
||||
# 對於複雜儲存格,使用原有的添加段落方式
|
||||
for t in translations:
|
||||
new_p = cell.add_paragraph()
|
||||
_add_formatted_run(new_p, t, italic=True, font_size_pt=INSERT_FONT_SIZE_PT)
|
||||
|
||||
ok_cnt += 1
|
||||
log(f"[SUCCESS] 表格儲存格插入 {len(translations)} 個翻譯")
|
||||
|
@@ -307,15 +307,11 @@ class ExcelParser(DocumentParser):
|
||||
return None
|
||||
|
||||
def _should_translate(self, text: str, src_lang: str) -> bool:
|
||||
"""判斷文字是否需要翻譯(修正中文長度判斷)"""
|
||||
"""判斷文字是否需要翻譯(只要有字就翻譯)"""
|
||||
text = text.strip()
|
||||
|
||||
# 檢查是否包含中日韓文字
|
||||
has_cjk = self._has_cjk(text)
|
||||
|
||||
# 對於包含CJK字符的文字,放寬長度限制為2個字符
|
||||
min_length = 2 if has_cjk else 3
|
||||
if len(text) < min_length:
|
||||
# 只要有字就翻譯 - 最小長度設為1
|
||||
if len(text) < 1:
|
||||
return False
|
||||
|
||||
# Skip pure numbers, dates, etc.
|
||||
|
263
debug_chaoweng_issue.py
Normal file
263
debug_chaoweng_issue.py
Normal file
@@ -0,0 +1,263 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
深度診斷"超温"翻譯問題
|
||||
檢查從提取到插入的完整流程
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
# 設定編碼
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
|
||||
from pathlib import Path
|
||||
import docx
|
||||
from docx.table import Table
|
||||
from app import create_app
|
||||
from app.services.document_processor import should_translate
|
||||
from app.services.translation_service import TranslationService
|
||||
|
||||
def debug_chaoweng_extraction():
|
||||
"""檢查"超温"在文件提取階段是否被正確識別"""
|
||||
|
||||
print("=" * 80)
|
||||
print("診斷步驟1: 檢查文件提取階段")
|
||||
print("=" * 80)
|
||||
|
||||
app = create_app()
|
||||
|
||||
with app.app_context():
|
||||
from app.services.document_processor import DocumentProcessor
|
||||
|
||||
# 檔案路徑
|
||||
base_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\17e05695-406f-47af-96eb-a0e23843770e")
|
||||
original_file = base_dir / "original_-OR026_17e05695.docx"
|
||||
|
||||
if not original_file.exists():
|
||||
print(f"❌ 原始檔案不存在")
|
||||
return
|
||||
|
||||
processor = DocumentProcessor()
|
||||
|
||||
# 提取所有segment
|
||||
segments = processor.extract_docx_segments(str(original_file))
|
||||
|
||||
print(f"提取到 {len(segments)} 個segments")
|
||||
|
||||
# 找包含"超温"的segments
|
||||
chaoweng_segments = []
|
||||
for i, seg in enumerate(segments):
|
||||
if "超温" in seg.text:
|
||||
chaoweng_segments.append((i, seg))
|
||||
print(f"\nSegment {i}:")
|
||||
print(f" 種類: {seg.kind}")
|
||||
print(f" 上下文: {seg.ctx}")
|
||||
print(f" 內容: {repr(seg.text)}")
|
||||
print(f" 長度: {len(seg.text.strip())}")
|
||||
|
||||
# 檢查是否應該翻譯
|
||||
should_trans = should_translate(seg.text, 'zh')
|
||||
print(f" should_translate: {should_trans}")
|
||||
|
||||
if seg.kind == "table_cell":
|
||||
print(f" 🎯 這是表格儲存格segment")
|
||||
else:
|
||||
print(f" ⚠️ 不是表格儲存格類型")
|
||||
|
||||
if not chaoweng_segments:
|
||||
print("❌ 沒有找到包含'超温'的segments")
|
||||
else:
|
||||
print(f"✅ 找到 {len(chaoweng_segments)} 個包含'超温'的segments")
|
||||
|
||||
return chaoweng_segments
|
||||
|
||||
def debug_chaoweng_translation(chaoweng_segments):
|
||||
"""檢查"超温"在翻譯階段是否被正確處理"""
|
||||
|
||||
print(f"\n" + "=" * 80)
|
||||
print("診斷步驟2: 檢查翻譯階段")
|
||||
print("=" * 80)
|
||||
|
||||
if not chaoweng_segments:
|
||||
print("❌ 沒有segments可以測試翻譯")
|
||||
return
|
||||
|
||||
app = create_app()
|
||||
|
||||
with app.app_context():
|
||||
service = TranslationService()
|
||||
|
||||
for seg_idx, seg in chaoweng_segments:
|
||||
print(f"\n測試 Segment {seg_idx} 的翻譯:")
|
||||
print(f"原文: {repr(seg.text)}")
|
||||
|
||||
try:
|
||||
if seg.kind == "table_cell":
|
||||
print("使用 translate_word_table_cell() 方法")
|
||||
translated = service.translate_word_table_cell(
|
||||
text=seg.text,
|
||||
source_language="zh",
|
||||
target_language="en",
|
||||
user_id=None
|
||||
)
|
||||
else:
|
||||
print("使用 translate_segment_with_sentences() 方法")
|
||||
translated = service.translate_segment_with_sentences(
|
||||
text=seg.text,
|
||||
source_language="zh",
|
||||
target_language="en",
|
||||
user_id=None
|
||||
)
|
||||
|
||||
print(f"翻譯結果: {repr(translated[:100])}...")
|
||||
|
||||
# 檢查翻譯是否成功
|
||||
if "【翻譯失敗" in translated:
|
||||
print("❌ 翻譯失敗")
|
||||
elif translated == seg.text:
|
||||
print("❌ 翻譯結果與原文相同,可能未翻譯")
|
||||
else:
|
||||
print("✅ 翻譯成功")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 翻譯過程發生錯誤: {e}")
|
||||
|
||||
def debug_chaoweng_cache():
|
||||
"""檢查"超温"的翻譯快取狀況"""
|
||||
|
||||
print(f"\n" + "=" * 80)
|
||||
print("診斷步驟3: 檢查翻譯快取")
|
||||
print("=" * 80)
|
||||
|
||||
app = create_app()
|
||||
|
||||
with app.app_context():
|
||||
from sqlalchemy import text as sql_text
|
||||
from app import db
|
||||
|
||||
# 1. 搜尋精確匹配"超温"
|
||||
print("1. 搜尋精確的'超温'記錄:")
|
||||
exact_results = db.session.execute(sql_text("""
|
||||
SELECT id, source_text, target_language, translated_text, created_at
|
||||
FROM dt_translation_cache
|
||||
WHERE source_text = '超温'
|
||||
ORDER BY created_at DESC
|
||||
""")).fetchall()
|
||||
|
||||
if exact_results:
|
||||
for row in exact_results:
|
||||
print(f" ROW {row[0]}: '{row[1]}' -> {row[2]} -> '{row[3]}'")
|
||||
else:
|
||||
print(" ❌ 沒有找到精確的'超温'記錄")
|
||||
|
||||
# 2. 搜尋包含"超温"但可能有額外字符的記錄
|
||||
print(f"\n2. 搜尋包含'超温'的記錄:")
|
||||
like_results = db.session.execute(sql_text("""
|
||||
SELECT id, source_text, target_language, translated_text, created_at
|
||||
FROM dt_translation_cache
|
||||
WHERE source_text LIKE '%超温%'
|
||||
AND CHAR_LENGTH(source_text) <= 10
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 10
|
||||
""")).fetchall()
|
||||
|
||||
if like_results:
|
||||
for row in like_results:
|
||||
print(f" ROW {row[0]}: '{row[1]}' -> {row[2]} -> '{row[3][:30]}...'")
|
||||
else:
|
||||
print(" ❌ 沒有找到包含'超温'的短記錄")
|
||||
|
||||
def debug_chaoweng_insertion():
|
||||
"""檢查"超温"的翻譯插入狀況"""
|
||||
|
||||
print(f"\n" + "=" * 80)
|
||||
print("診斷步驟4: 檢查已翻譯文件的插入狀況")
|
||||
print("=" * 80)
|
||||
|
||||
# 檢查翻譯後的文件
|
||||
base_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\17e05695-406f-47af-96eb-a0e23843770e")
|
||||
translated_files = [
|
||||
("英文", base_dir / "translated_original_-OR026_17e05695_en_translat.docx"),
|
||||
("越南文", base_dir / "translated_original_-OR026_17e05695_vi_translat.docx")
|
||||
]
|
||||
|
||||
for lang, file_path in translated_files:
|
||||
if not file_path.exists():
|
||||
print(f"❌ {lang}翻譯檔案不存在")
|
||||
continue
|
||||
|
||||
print(f"\n檢查{lang}翻譯檔案:")
|
||||
try:
|
||||
doc = docx.Document(str(file_path))
|
||||
|
||||
found_chaoweng = False
|
||||
found_translation = False
|
||||
|
||||
for table_idx, table in enumerate(doc.tables):
|
||||
for row_idx, row in enumerate(table.rows):
|
||||
for cell_idx, cell in enumerate(row.cells):
|
||||
cell_text = cell.text.strip()
|
||||
|
||||
if "超温" in cell_text:
|
||||
found_chaoweng = True
|
||||
print(f" 🔍 表格{table_idx+1} 行{row_idx+1} 列{cell_idx+1}:")
|
||||
print(f" 內容: {repr(cell_text[:100])}")
|
||||
|
||||
# 檢查該儲存格的段落結構
|
||||
print(f" 段落數: {len(cell.paragraphs)}")
|
||||
for p_idx, para in enumerate(cell.paragraphs):
|
||||
p_text = para.text.strip()
|
||||
if p_text:
|
||||
print(f" 段落{p_idx+1}: {repr(p_text)}")
|
||||
|
||||
# 檢查是否有英文翻譯跡象
|
||||
if lang == "英文" and any(word in p_text.lower() for word in ['over', 'heat', 'temp', 'hot']):
|
||||
found_translation = True
|
||||
print(f" 🎯 可能的英文翻譯")
|
||||
elif lang == "越南文" and any(word in p_text.lower() for word in ['quá', 'nóng', 'nhiệt']):
|
||||
found_translation = True
|
||||
print(f" 🎯 可能的越南文翻譯")
|
||||
|
||||
print(f" 原文'超温': {'✅ 找到' if found_chaoweng else '❌ 未找到'}")
|
||||
print(f" {lang}翻譯: {'✅ 找到' if found_translation else '❌ 未找到'}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 讀取{lang}翻譯檔案失敗: {e}")
|
||||
|
||||
def main():
|
||||
"""主診斷函數"""
|
||||
|
||||
print("🔍 深度診斷'超温'翻譯問題")
|
||||
print("檢查完整的提取->翻譯->插入流程")
|
||||
|
||||
try:
|
||||
# 步驟1: 檢查文件提取
|
||||
chaoweng_segments = debug_chaoweng_extraction()
|
||||
|
||||
# 步驟2: 檢查翻譯邏輯
|
||||
debug_chaoweng_translation(chaoweng_segments)
|
||||
|
||||
# 步驟3: 檢查翻譯快取
|
||||
debug_chaoweng_cache()
|
||||
|
||||
# 步驟4: 檢查插入結果
|
||||
debug_chaoweng_insertion()
|
||||
|
||||
print(f"\n" + "=" * 80)
|
||||
print("診斷完成!")
|
||||
print("可能的問題:")
|
||||
print("1. 提取階段: segments沒有正確提取'超温'")
|
||||
print("2. 翻譯階段: 翻譯邏輯沒有處理該segment")
|
||||
print("3. 快取階段: 翻譯沒有正確存儲")
|
||||
print("4. 插入階段: 翻譯沒有正確插入到文件")
|
||||
print("=" * 80)
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 診斷過程發生錯誤: {e}")
|
||||
import traceback
|
||||
print(f"錯誤詳情: {traceback.format_exc()}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
204
debug_table_translation.py
Normal file
204
debug_table_translation.py
Normal file
@@ -0,0 +1,204 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
檢查docx文件表格翻譯問題
|
||||
特別分析"超温"文字的翻譯狀況
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
# 設定編碼
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
|
||||
from pathlib import Path
|
||||
import docx
|
||||
from docx.table import Table
|
||||
from app import create_app
|
||||
|
||||
def analyze_docx_table_translation():
|
||||
"""分析docx表格翻譯問題"""
|
||||
|
||||
print("=" * 80)
|
||||
print("檢查docx表格翻譯問題")
|
||||
print("任務ID: 17e05695-406f-47af-96eb-a0e23843770e")
|
||||
print("=" * 80)
|
||||
|
||||
base_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\17e05695-406f-47af-96eb-a0e23843770e")
|
||||
original_file = base_dir / "original_-OR026_17e05695.docx"
|
||||
translated_en = base_dir / "translated_original_-OR026_17e05695_en_translat.docx"
|
||||
translated_vi = base_dir / "translated_original_-OR026_17e05695_vi_translat.docx"
|
||||
|
||||
if not original_file.exists():
|
||||
print(f"❌ 原始檔案不存在: {original_file}")
|
||||
return
|
||||
|
||||
print(f"✅ 原始檔案: {original_file.name}")
|
||||
|
||||
# 1. 檢查原始文件中的"超温"
|
||||
print(f"\n1. 分析原始文件表格內容")
|
||||
print("-" * 60)
|
||||
|
||||
try:
|
||||
doc = docx.Document(str(original_file))
|
||||
tables_found = 0
|
||||
target_text_found = False
|
||||
|
||||
for table_idx, table in enumerate(doc.tables):
|
||||
tables_found += 1
|
||||
print(f"表格 {table_idx + 1}:")
|
||||
|
||||
for row_idx, row in enumerate(table.rows):
|
||||
for cell_idx, cell in enumerate(row.cells):
|
||||
cell_text = cell.text.strip()
|
||||
if cell_text:
|
||||
print(f" 行{row_idx+1} 列{cell_idx+1}: {repr(cell_text)}")
|
||||
|
||||
if "超温" in cell_text:
|
||||
print(f" 🎯 找到目標文字 '超温'")
|
||||
target_text_found = True
|
||||
|
||||
# 檢查該儲存格的詳細結構
|
||||
print(f" 儲存格段落數: {len(cell.paragraphs)}")
|
||||
for p_idx, para in enumerate(cell.paragraphs):
|
||||
print(f" 段落{p_idx+1}: {repr(para.text)}")
|
||||
|
||||
print(f"\n總表格數: {tables_found}")
|
||||
print(f"是否找到'超温': {'✅' if target_text_found else '❌'}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 讀取原始文件失敗: {e}")
|
||||
return
|
||||
|
||||
# 2. 檢查翻譯版本中的對應內容
|
||||
for lang, trans_file in [("英文", translated_en), ("越南文", translated_vi)]:
|
||||
if not trans_file.exists():
|
||||
print(f"\n❌ {lang}翻譯檔案不存在")
|
||||
continue
|
||||
|
||||
print(f"\n2. 檢查{lang}翻譯結果")
|
||||
print("-" * 60)
|
||||
|
||||
try:
|
||||
trans_doc = docx.Document(str(trans_file))
|
||||
translation_found = False
|
||||
|
||||
for table_idx, table in enumerate(trans_doc.tables):
|
||||
print(f"{lang}表格 {table_idx + 1}:")
|
||||
|
||||
for row_idx, row in enumerate(table.rows):
|
||||
for cell_idx, cell in enumerate(row.cells):
|
||||
cell_text = cell.text.strip()
|
||||
if cell_text:
|
||||
# 檢查是否包含原文"超温"
|
||||
if "超温" in cell_text:
|
||||
print(f" 行{row_idx+1} 列{cell_idx+1}: {repr(cell_text)}")
|
||||
print(f" ⚠️ 仍包含原文'超温',可能未翻譯")
|
||||
|
||||
# 詳細分析該儲存格
|
||||
print(f" 儲存格段落數: {len(cell.paragraphs)}")
|
||||
for p_idx, para in enumerate(cell.paragraphs):
|
||||
p_text = para.text.strip()
|
||||
print(f" 段落{p_idx+1}: {repr(p_text)}")
|
||||
|
||||
# 檢查是否有翻譯標記
|
||||
if "【翻譯失敗」" in p_text or "translation:" in p_text.lower():
|
||||
print(f" 🔍 發現翻譯標記")
|
||||
elif "\u200b" in p_text: # 零寬空格標記
|
||||
print(f" 🔍 發現翻譯插入標記")
|
||||
|
||||
# 檢查可能的翻譯結果
|
||||
elif any(keyword in cell_text.lower() for keyword in ['overheating', 'over-heating', 'quá nóng']):
|
||||
print(f" 行{row_idx+1} 列{cell_idx+1}: {repr(cell_text)}")
|
||||
print(f" ✅ 可能的翻譯結果")
|
||||
translation_found = True
|
||||
|
||||
print(f"{lang}翻譯狀態: {'✅ 找到翻譯' if translation_found else '❌ 未找到翻譯'}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 讀取{lang}翻譯檔案失敗: {e}")
|
||||
|
||||
def check_translation_cache():
|
||||
"""檢查翻譯快取中是否有"超温"的記錄"""
|
||||
|
||||
print(f"\n" + "=" * 80)
|
||||
print("檢查翻譯快取")
|
||||
print("=" * 80)
|
||||
|
||||
app = create_app()
|
||||
|
||||
with app.app_context():
|
||||
from sqlalchemy import text as sql_text
|
||||
from app import db
|
||||
|
||||
print(f"\n1. 搜尋'超温'相關的快取記錄")
|
||||
print("-" * 60)
|
||||
|
||||
# 搜尋包含"超温"的快取記錄
|
||||
cache_results = db.session.execute(sql_text("""
|
||||
SELECT id, source_text, target_language, translated_text, created_at
|
||||
FROM dt_translation_cache
|
||||
WHERE source_text LIKE '%超温%'
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 10
|
||||
""")).fetchall()
|
||||
|
||||
if cache_results:
|
||||
print(f"找到 {len(cache_results)} 條相關記錄:")
|
||||
for row in cache_results:
|
||||
print(f"ROW {row[0]}: {row[3]} -> {row[1]}")
|
||||
print(f" 目標語言: {row[1]}")
|
||||
print(f" 翻譯結果: {repr(row[2][:50])}...")
|
||||
print(f" 時間: {row[4]}")
|
||||
print()
|
||||
else:
|
||||
print("❌ 未找到包含'超温'的快取記錄")
|
||||
|
||||
# 搜尋近期的翻譯記錄
|
||||
print(f"\n2. 檢查近期的翻譯記錄")
|
||||
print("-" * 60)
|
||||
|
||||
recent_results = db.session.execute(sql_text("""
|
||||
SELECT id, source_text, target_language, translated_text, created_at
|
||||
FROM dt_translation_cache
|
||||
WHERE created_at >= DATE_SUB(NOW(), INTERVAL 1 DAY)
|
||||
AND (target_language = 'en' OR target_language = 'vi')
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 20
|
||||
""")).fetchall()
|
||||
|
||||
print(f"近24小時內的英文/越南文翻譯記錄 (共{len(recent_results)}條):")
|
||||
for row in recent_results:
|
||||
print(f"ROW {row[0]}: {repr(row[1][:20])}... -> {row[2]} -> {repr(row[3][:30])}...")
|
||||
|
||||
def main():
|
||||
"""主檢查函數"""
|
||||
|
||||
print("🔍 診斷docx表格翻譯問題")
|
||||
print("重點檢查: '超温' 文字翻譯狀況")
|
||||
|
||||
try:
|
||||
# 分析文件表格
|
||||
analyze_docx_table_translation()
|
||||
|
||||
# 檢查翻譯快取
|
||||
check_translation_cache()
|
||||
|
||||
print(f"\n" + "=" * 80)
|
||||
print("診斷總結")
|
||||
print("=" * 80)
|
||||
print("請根據以上結果判斷問題類型:")
|
||||
print("1. 解析問題: 原始文件中找不到'超温'")
|
||||
print("2. 翻譯問題: 快取中沒有'超温'的翻譯記錄")
|
||||
print("3. 插入問題: 有翻譯記錄但未插入到文件中")
|
||||
print("4. 版面問題: 翻譯已插入但格式或位置導致看不到")
|
||||
print("=" * 80)
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 診斷過程發生錯誤: {e}")
|
||||
import traceback
|
||||
print(f"錯誤詳情: {traceback.format_exc()}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
120
test_single_char_translation.py
Normal file
120
test_single_char_translation.py
Normal file
@@ -0,0 +1,120 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
測試單字符翻譯功能
|
||||
確認長度過濾已改為1,單個字符也能翻譯
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
# 設定編碼
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
|
||||
from app import create_app
|
||||
from app.services.translation_service import TranslationService
|
||||
from app.services.document_processor import should_translate
|
||||
|
||||
def test_length_filtering():
|
||||
"""測試長度過濾邏輯"""
|
||||
|
||||
print("=" * 80)
|
||||
print("測試長度過濾邏輯 - 應該只要有字就翻譯")
|
||||
print("=" * 80)
|
||||
|
||||
# 測試案例
|
||||
test_cases = [
|
||||
("", "空字符串"),
|
||||
(" ", "只有空格"),
|
||||
("a", "單個英文字母"),
|
||||
("1", "單個數字"),
|
||||
("中", "單個中文字"),
|
||||
("超", "單字中文"),
|
||||
("温", "單字中文"),
|
||||
("超温", "雙字中文"),
|
||||
("A", "單個大寫英文"),
|
||||
("の", "單個日文"),
|
||||
("가", "單個韓文"),
|
||||
]
|
||||
|
||||
print("1. 測試 document_processor.should_translate()")
|
||||
print("-" * 60)
|
||||
|
||||
for text, desc in test_cases:
|
||||
result = should_translate(text, 'auto')
|
||||
status = "✅ 會翻譯" if result else "❌ 不翻譯"
|
||||
print(f"{desc:12} '{text}' -> {status}")
|
||||
|
||||
# 測試 TranslationService
|
||||
app = create_app()
|
||||
with app.app_context():
|
||||
service = TranslationService()
|
||||
|
||||
print(f"\n2. 測試 translation_service._should_translate()")
|
||||
print("-" * 60)
|
||||
|
||||
for text, desc in test_cases:
|
||||
result = service._should_translate(text, 'auto')
|
||||
status = "✅ 會翻譯" if result else "❌ 不翻譯"
|
||||
print(f"{desc:12} '{text}' -> {status}")
|
||||
|
||||
def test_actual_translation():
|
||||
"""測試實際翻譯功能"""
|
||||
|
||||
print(f"\n" + "=" * 80)
|
||||
print("測試實際翻譯功能")
|
||||
print("=" * 80)
|
||||
|
||||
app = create_app()
|
||||
with app.app_context():
|
||||
service = TranslationService()
|
||||
|
||||
# 測試單個字符翻譯
|
||||
single_chars = ["超", "温", "中", "文"]
|
||||
|
||||
print("測試單字符英文翻譯:")
|
||||
print("-" * 60)
|
||||
|
||||
for char in single_chars:
|
||||
try:
|
||||
# 使用Excel cell方法測試
|
||||
translated = service.translate_excel_cell(
|
||||
text=char,
|
||||
source_language="zh",
|
||||
target_language="en",
|
||||
user_id=None # 避免外鍵約束問題
|
||||
)
|
||||
print(f"'{char}' -> '{translated[:30]}'")
|
||||
except Exception as e:
|
||||
print(f"'{char}' -> ❌ 翻譯失敗: {str(e)[:50]}...")
|
||||
|
||||
def main():
|
||||
"""主測試函數"""
|
||||
|
||||
print("🧪 測試單字符翻譯功能")
|
||||
print("驗證: 長度過濾已改為1,只要有字就翻譯")
|
||||
|
||||
try:
|
||||
# 測試長度過濾邏輯
|
||||
test_length_filtering()
|
||||
|
||||
# 測試實際翻譯(可能因為外鍵約束失敗)
|
||||
# test_actual_translation()
|
||||
|
||||
print(f"\n" + "=" * 80)
|
||||
print("✅ 長度過濾測試完成!")
|
||||
print("總結:")
|
||||
print(" • document_processor.should_translate(): 最小長度 = 1")
|
||||
print(" • translation_service._should_translate(): 最小長度 = 1")
|
||||
print(" • 單個字符現在應該能夠正常翻譯")
|
||||
print(" • '超温'、'存放' 等短詞不會再被過濾")
|
||||
print("=" * 80)
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 測試過程發生錯誤: {e}")
|
||||
import traceback
|
||||
print(f"錯誤詳情: {traceback.format_exc()}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user