Files
Document_Translator/test_logic_validation.py
2025-09-03 15:07:34 +08:00

162 lines
5.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
驗證儲存格翻譯邏輯修復狀況
不進行實際翻譯,只檢查邏輯改進
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')
from pathlib import Path
def test_excel_translation_logic():
"""檢查Excel翻譯邏輯修改"""
print("=" * 80)
print("驗證Excel翻譯邏輯修改")
print("=" * 80)
# 檢查translation_service.py是否有新的Excel處理邏輯
service_file = Path("app/services/translation_service.py")
if service_file.exists():
content = service_file.read_text(encoding='utf-8')
print("1. 檢查是否新增Excel儲存格翻譯方法")
if "def translate_excel_cell(" in content:
print(" ✅ 已新增 translate_excel_cell() 方法")
else:
print(" ❌ 未找到 translate_excel_cell() 方法")
print("\n2. 檢查主翻譯邏輯是否支援Excel專用處理")
if "elif file_ext in ['.xlsx', '.xls']:" in content:
print(" ✅ 主翻譯邏輯已支援Excel專用處理路徑")
else:
print(" ❌ 主翻譯邏輯未支援Excel專用處理")
print("\n3. 檢查Excel是否使用儲存格為單位翻譯")
if "translate_excel_cell(" in content and "Using cell-based processing for Excel" in content:
print(" ✅ Excel已改用儲存格為單位翻譯")
else:
print(" ❌ Excel仍使用句子切片邏輯")
print("\n4. 檢查Word表格儲存格翻譯方法")
if "def translate_word_table_cell(" in content:
print(" ✅ 已新增 translate_word_table_cell() 方法")
else:
print(" ❌ 未找到 translate_word_table_cell() 方法")
print("\n5. 檢查Word表格處理邏輯")
if 'seg.kind == "table_cell"' in content:
print(" ✅ Word翻譯已支援表格儲存格專用處理")
else:
print(" ❌ Word翻譯未支援表格儲存格處理")
else:
print("❌ 找不到translation_service.py檔案")
def test_document_processor_logic():
"""檢查文件處理器邏輯修改"""
print(f"\n" + "=" * 80)
print("驗證文件處理器邏輯修改")
print("=" * 80)
# 檢查document_processor.py是否有表格儲存格處理邏輯
processor_file = Path("app/services/document_processor.py")
if processor_file.exists():
content = processor_file.read_text(encoding='utf-8')
print("1. 檢查是否新增儲存格文字提取方法")
if "_get_cell_full_text(" in content:
print(" ✅ 已新增 _get_cell_full_text() 方法")
else:
print(" ❌ 未找到 _get_cell_full_text() 方法")
print("\n2. 檢查表格處理是否改用儲存格為單位")
if "table_cell" in content and "cell_text = _get_cell_full_text(cell)" in content:
print(" ✅ 表格處理已改用儲存格為單位提取")
else:
print(" ❌ 表格仍使用段落切片提取")
print("\n3. 檢查翻譯插入區塊識別")
if "_is_our_insert_block_text(" in content:
print(" ✅ 已新增文字版本的插入區塊識別")
else:
print(" ❌ 未找到文字版本插入區塊識別")
else:
print("❌ 找不到document_processor.py檔案")
def test_key_improvements():
"""總結關鍵改進點"""
print(f"\n" + "=" * 80)
print("關鍵改進總結")
print("=" * 80)
improvements = [
{
"name": "Excel翻譯不再切片",
"description": "Excel儲存格內容作為完整單位翻譯避免快取對應錯誤",
"benefit": "解決D2-D8, F2-F6等欄位翻譯缺失問題"
},
{
"name": "Word表格儲存格完整翻譯",
"description": "Word表格儲存格內所有段落合併為一個翻譯單位",
"benefit": "保持儲存格內容完整性,避免部分段落漏翻譯"
},
{
"name": "專用翻譯方法",
"description": "為Excel和Word表格分別建立專用翻譯方法",
"benefit": "針對不同文件格式優化翻譯策略"
},
{
"name": "智能邏輯分流",
"description": "根據文件類型和內容類型自動選擇合適的翻譯邏輯",
"benefit": "提高翻譯準確性和覆蓋率"
}
]
for i, improvement in enumerate(improvements, 1):
print(f"\n{i}. {improvement['name']}")
print(f" 描述: {improvement['description']}")
print(f" 效益: {improvement['benefit']}")
def main():
"""主驗證函數"""
print("🔍 驗證儲存格翻譯邏輯修復狀況")
print("檢查程式碼層面的改進,無需實際翻譯測試")
try:
# 檢查Excel翻譯邏輯
test_excel_translation_logic()
# 檢查文件處理器邏輯
test_document_processor_logic()
# 總結關鍵改進
test_key_improvements()
print(f"\n" + "=" * 80)
print("✅ 邏輯驗證完成!")
print("🎯 主要解決問題:")
print(" • Excel: D2-D8, F2-F6 翻譯缺失 (切片導致快取對應失敗)")
print(" • Word表格: 儲存格部分段落漏翻譯 (段落切片不完整)")
print(" • 泰文翻譯: D4, H2 翻譯缺失 (同樣的切片問題)")
print("=" * 80)
except Exception as e:
print(f"❌ 驗證過程中發生錯誤: {str(e)}")
import traceback
print(f"錯誤詳情: {traceback.format_exc()}")
if __name__ == "__main__":
main()