5th_fix excel problem
This commit is contained in:
162
test_logic_validation.py
Normal file
162
test_logic_validation.py
Normal file
@@ -0,0 +1,162 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
驗證儲存格翻譯邏輯修復狀況
|
||||
不進行實際翻譯,只檢查邏輯改進
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
# 設定編碼
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
def test_excel_translation_logic():
|
||||
"""檢查Excel翻譯邏輯修改"""
|
||||
|
||||
print("=" * 80)
|
||||
print("驗證Excel翻譯邏輯修改")
|
||||
print("=" * 80)
|
||||
|
||||
# 檢查translation_service.py是否有新的Excel處理邏輯
|
||||
service_file = Path("app/services/translation_service.py")
|
||||
|
||||
if service_file.exists():
|
||||
content = service_file.read_text(encoding='utf-8')
|
||||
|
||||
print("1. 檢查是否新增Excel儲存格翻譯方法")
|
||||
if "def translate_excel_cell(" in content:
|
||||
print(" ✅ 已新增 translate_excel_cell() 方法")
|
||||
else:
|
||||
print(" ❌ 未找到 translate_excel_cell() 方法")
|
||||
|
||||
print("\n2. 檢查主翻譯邏輯是否支援Excel專用處理")
|
||||
if "elif file_ext in ['.xlsx', '.xls']:" in content:
|
||||
print(" ✅ 主翻譯邏輯已支援Excel專用處理路徑")
|
||||
else:
|
||||
print(" ❌ 主翻譯邏輯未支援Excel專用處理")
|
||||
|
||||
print("\n3. 檢查Excel是否使用儲存格為單位翻譯")
|
||||
if "translate_excel_cell(" in content and "Using cell-based processing for Excel" in content:
|
||||
print(" ✅ Excel已改用儲存格為單位翻譯")
|
||||
else:
|
||||
print(" ❌ Excel仍使用句子切片邏輯")
|
||||
|
||||
print("\n4. 檢查Word表格儲存格翻譯方法")
|
||||
if "def translate_word_table_cell(" in content:
|
||||
print(" ✅ 已新增 translate_word_table_cell() 方法")
|
||||
else:
|
||||
print(" ❌ 未找到 translate_word_table_cell() 方法")
|
||||
|
||||
print("\n5. 檢查Word表格處理邏輯")
|
||||
if 'seg.kind == "table_cell"' in content:
|
||||
print(" ✅ Word翻譯已支援表格儲存格專用處理")
|
||||
else:
|
||||
print(" ❌ Word翻譯未支援表格儲存格處理")
|
||||
|
||||
else:
|
||||
print("❌ 找不到translation_service.py檔案")
|
||||
|
||||
def test_document_processor_logic():
|
||||
"""檢查文件處理器邏輯修改"""
|
||||
|
||||
print(f"\n" + "=" * 80)
|
||||
print("驗證文件處理器邏輯修改")
|
||||
print("=" * 80)
|
||||
|
||||
# 檢查document_processor.py是否有表格儲存格處理邏輯
|
||||
processor_file = Path("app/services/document_processor.py")
|
||||
|
||||
if processor_file.exists():
|
||||
content = processor_file.read_text(encoding='utf-8')
|
||||
|
||||
print("1. 檢查是否新增儲存格文字提取方法")
|
||||
if "_get_cell_full_text(" in content:
|
||||
print(" ✅ 已新增 _get_cell_full_text() 方法")
|
||||
else:
|
||||
print(" ❌ 未找到 _get_cell_full_text() 方法")
|
||||
|
||||
print("\n2. 檢查表格處理是否改用儲存格為單位")
|
||||
if "table_cell" in content and "cell_text = _get_cell_full_text(cell)" in content:
|
||||
print(" ✅ 表格處理已改用儲存格為單位提取")
|
||||
else:
|
||||
print(" ❌ 表格仍使用段落切片提取")
|
||||
|
||||
print("\n3. 檢查翻譯插入區塊識別")
|
||||
if "_is_our_insert_block_text(" in content:
|
||||
print(" ✅ 已新增文字版本的插入區塊識別")
|
||||
else:
|
||||
print(" ❌ 未找到文字版本插入區塊識別")
|
||||
|
||||
else:
|
||||
print("❌ 找不到document_processor.py檔案")
|
||||
|
||||
def test_key_improvements():
|
||||
"""總結關鍵改進點"""
|
||||
|
||||
print(f"\n" + "=" * 80)
|
||||
print("關鍵改進總結")
|
||||
print("=" * 80)
|
||||
|
||||
improvements = [
|
||||
{
|
||||
"name": "Excel翻譯不再切片",
|
||||
"description": "Excel儲存格內容作為完整單位翻譯,避免快取對應錯誤",
|
||||
"benefit": "解決D2-D8, F2-F6等欄位翻譯缺失問題"
|
||||
},
|
||||
{
|
||||
"name": "Word表格儲存格完整翻譯",
|
||||
"description": "Word表格儲存格內所有段落合併為一個翻譯單位",
|
||||
"benefit": "保持儲存格內容完整性,避免部分段落漏翻譯"
|
||||
},
|
||||
{
|
||||
"name": "專用翻譯方法",
|
||||
"description": "為Excel和Word表格分別建立專用翻譯方法",
|
||||
"benefit": "針對不同文件格式優化翻譯策略"
|
||||
},
|
||||
{
|
||||
"name": "智能邏輯分流",
|
||||
"description": "根據文件類型和內容類型自動選擇合適的翻譯邏輯",
|
||||
"benefit": "提高翻譯準確性和覆蓋率"
|
||||
}
|
||||
]
|
||||
|
||||
for i, improvement in enumerate(improvements, 1):
|
||||
print(f"\n{i}. {improvement['name']}")
|
||||
print(f" 描述: {improvement['description']}")
|
||||
print(f" 效益: {improvement['benefit']}")
|
||||
|
||||
def main():
|
||||
"""主驗證函數"""
|
||||
|
||||
print("🔍 驗證儲存格翻譯邏輯修復狀況")
|
||||
print("檢查程式碼層面的改進,無需實際翻譯測試")
|
||||
|
||||
try:
|
||||
# 檢查Excel翻譯邏輯
|
||||
test_excel_translation_logic()
|
||||
|
||||
# 檢查文件處理器邏輯
|
||||
test_document_processor_logic()
|
||||
|
||||
# 總結關鍵改進
|
||||
test_key_improvements()
|
||||
|
||||
print(f"\n" + "=" * 80)
|
||||
print("✅ 邏輯驗證完成!")
|
||||
print("🎯 主要解決問題:")
|
||||
print(" • Excel: D2-D8, F2-F6 翻譯缺失 (切片導致快取對應失敗)")
|
||||
print(" • Word表格: 儲存格部分段落漏翻譯 (段落切片不完整)")
|
||||
print(" • 泰文翻譯: D4, H2 翻譯缺失 (同樣的切片問題)")
|
||||
print("=" * 80)
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 驗證過程中發生錯誤: {str(e)}")
|
||||
import traceback
|
||||
print(f"錯誤詳情: {traceback.format_exc()}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user