5th_fix excel problem

This commit is contained in:
beabigegg
2025-09-03 15:07:34 +08:00
parent cce3fd4925
commit 5fd0671b4f
28 changed files with 4484 additions and 97 deletions

120
test_excel_fix.py Normal file
View File

@@ -0,0 +1,120 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
測試Excel翻譯修正效果
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')
from pathlib import Path
from app.services.translation_service import ExcelParser
def test_excel_translation_fix():
"""測試Excel翻譯修正效果"""
print("=" * 80)
print("測試Excel翻譯修正效果")
print("=" * 80)
# 文件路徑
excel_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f0b78200-2c5e-41a4-bac8-1536f92529e9")
original_file = excel_dir / "original_panjit_f0b78200.xlsx"
if not original_file.exists():
print(f"原始文件不存在: {original_file}")
return
# 創建解析器實例
parser = ExcelParser(str(original_file))
print("\n1. 測試修正後的should_translate函數")
print("-" * 60)
# 測試關鍵詞彙
test_texts = [
"製程", # A1儲存格之前未翻譯
"主要特點", # C1儲存格
"優勢亮點", # D1儲存格
"AB", # 2個英文字母
"123", # 純數字
"工藝", # 2個中文字符
"Epoxy 膠黏(導電/導熱銀膠)" # B3儲存格
]
for text in test_texts:
should_translate = parser._should_translate(text, 'auto')
has_cjk = parser._has_cjk(text)
print(f"'{text}': should_translate={should_translate}, has_cjk={has_cjk}, len={len(text)}")
print("\n2. 測試提取的文字片段")
print("-" * 60)
segments = parser.extract_text_segments()
print(f"修正後提取到 {len(segments)} 個文字片段")
# 檢查A1是否被包含
a1_content = "製程"
if a1_content in segments:
print(f"✅ A1內容 '{a1_content}' 已被包含在提取列表中")
else:
print(f"❌ A1內容 '{a1_content}' 仍未被包含在提取列表中")
# 顯示前10個片段
print("\n前10個提取片段:")
for i, segment in enumerate(segments[:10]):
safe_segment = repr(segment)
print(f" {i+1:2d}. {safe_segment}")
print("\n3. 測試翻譯快取映射邏輯(模擬)")
print("-" * 60)
# 模擬翻譯映射過程
from app import create_app
app = create_app()
with app.app_context():
from sqlalchemy import text as sql_text
from app import db
target_language = 'ja' # 日文
tmap = {}
found_count = 0
print(f"查詢翻譯快取中的 {target_language} 翻譯...")
for original_text in segments[:10]: # 只檢查前10個
result = db.session.execute(sql_text("""
SELECT translated_text
FROM dt_translation_cache
WHERE source_text = :text AND target_language = :lang
ORDER BY created_at DESC
LIMIT 1
"""), {'text': original_text, 'lang': target_language})
row = result.fetchone()
if row and row[0]:
tmap[original_text] = row[0]
print(f"'{original_text[:20]}...' -> '{row[0][:20]}...'")
found_count += 1
else:
print(f"❌ 未找到翻譯: '{original_text[:30]}...'")
print(f"\n翻譯映射結果: {found_count}/{min(10, len(segments))} 個片段找到翻譯")
# 特別檢查A1
if a1_content in tmap:
print(f"✅ A1內容 '{a1_content}' 的翻譯: '{tmap[a1_content]}'")
else:
print(f"❌ A1內容 '{a1_content}' 沒有找到翻譯")
print("\n" + "=" * 80)
print("測試完成!")
print("=" * 80)
if __name__ == "__main__":
test_excel_translation_fix()