Files
Document_Translator/analyze_latest_excel_test.py
2025-09-03 15:07:34 +08:00

220 lines
7.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
分析最新Excel測試結果 - 檢查修正是否真正生效
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')
from pathlib import Path
import openpyxl
from app.services.translation_service import ExcelParser
def analyze_latest_excel_test():
"""詳細分析最新Excel測試結果"""
print("=" * 80)
print("分析最新Excel測試結果")
print("UUID: 185bb457-b703-4e98-94a2-fde072b895c4")
print("=" * 80)
# 文件路徑
test_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\185bb457-b703-4e98-94a2-fde072b895c4")
original_file = test_dir / "original_panjit_185bb457.xlsx"
translated_file = test_dir / "original_panjit_185bb457_ja_translated.xlsx"
if not original_file.exists():
print(f"原始文件不存在: {original_file}")
return
if not translated_file.exists():
print(f"翻譯文件不存在: {translated_file}")
return
print(f"\n✅ 檔案確認:")
print(f" 原始文件: {original_file.name}")
print(f" 翻譯文件: {translated_file.name}")
# 1. 測試ExcelParser的_should_translate函數
print(f"\n1. 測試ExcelParser的_should_translate函數")
print("-" * 60)
parser = ExcelParser(str(original_file))
test_texts = [
("製程", "A1儲存格"),
("主要特點", "標題文字"),
("AB", "2個英文字母"),
("123", "純數字"),
("工藝", "2個中文字符"),
("Epoxy 膠黏(導電/導熱銀膠)", "複合文字")
]
for text, desc in test_texts:
should_translate = parser._should_translate(text, 'auto')
has_cjk = parser._has_cjk(text)
min_length = 2 if has_cjk else 3
print(f" '{text}' ({desc}):")
print(f" 長度: {len(text)}, CJK: {has_cjk}, 最小長度: {min_length}")
print(f" 應翻譯: {should_translate}")
print()
# 2. 檢查實際提取的文字片段
print(f"\n2. 檢查實際提取的文字片段")
print("-" * 60)
segments = parser.extract_text_segments()
print(f"✅ 總共提取 {len(segments)} 個文字片段")
# 特別檢查A1
a1_content = "製程"
if a1_content in segments:
print(f"✅ A1內容 '{a1_content}' 已被提取")
index = segments.index(a1_content)
print(f" 在列表中的位置: 第{index+1}")
else:
print(f"❌ A1內容 '{a1_content}' 仍未被提取")
# 顯示所有提取的片段
print(f"\n 所有提取的片段:")
for i, segment in enumerate(segments):
safe_segment = repr(segment)
print(f" {i+1:2d}. {safe_segment}")
if segment == a1_content:
print(f" ⬆️ 這是A1的內容")
# 3. 檢查原始和翻譯文件的A1儲存格
print(f"\n3. 檢查A1儲存格內容")
print("-" * 60)
wb_orig = openpyxl.load_workbook(str(original_file), data_only=False)
wb_trans = openpyxl.load_workbook(str(translated_file), data_only=False)
try:
wb_orig_vals = openpyxl.load_workbook(str(original_file), data_only=True)
except:
wb_orig_vals = None
# A1儲存格比較
a1_orig = wb_orig.active['A1'].value
a1_trans = wb_trans.active['A1'].value
a1_orig_display = wb_orig_vals.active['A1'].value if wb_orig_vals else None
print(f" A1原始值: {repr(a1_orig)}")
if wb_orig_vals:
print(f" A1顯示值: {repr(a1_orig_display)}")
print(f" A1翻譯值: {repr(a1_trans)}")
# 判斷A1是否被翻譯
if isinstance(a1_trans, str) and '\n' in a1_trans:
lines = a1_trans.split('\n')
if len(lines) >= 2:
print(f" ✅ A1已翻譯格式: 原文+換行+譯文")
print(f" 原文行: {repr(lines[0])}")
print(f" 譯文行: {repr(lines[1])}")
else:
print(f" ❌ A1格式異常")
elif a1_orig == a1_trans:
print(f" ❌ A1未翻譯 - 內容相同")
else:
print(f" ⚠️ A1內容有變化但格式不明")
# 4. 檢查其他重要儲存格
print(f"\n4. 檢查其他重要儲存格")
print("-" * 60)
important_cells = ['B1', 'C1', 'D1', 'A2', 'B2', 'C2']
for cell_name in important_cells:
orig_cell = wb_orig.active[cell_name]
trans_cell = wb_trans.active[cell_name]
orig_val = orig_cell.value
trans_val = trans_cell.value
if orig_val: # 只檢查有內容的儲存格
print(f"\n {cell_name}儲存格:")
print(f" 原始: {repr(orig_val)}")
print(f" 翻譯: {repr(trans_val)}")
if isinstance(trans_val, str) and '\n' in trans_val:
lines = trans_val.split('\n')
print(f" 狀態: ✅ 已翻譯 (雙行格式)")
if len(lines) >= 2:
print(f" 原文: {repr(lines[0])}")
print(f" 譯文: {repr(lines[1])}")
elif orig_val == trans_val:
print(f" 狀態: ❌ 未翻譯")
else:
print(f" 狀態: ⚠️ 內容有變化")
# 5. 檢查翻譯快取狀況
print(f"\n5. 檢查翻譯快取狀況")
print("-" * 60)
from app import create_app
app = create_app()
with app.app_context():
from sqlalchemy import text as sql_text
from app import db
target_language = 'ja'
print(f"查詢 '{a1_content}' 在翻譯快取中的狀況...")
# 查詢精確匹配
result = db.session.execute(sql_text("""
SELECT source_text, translated_text, created_at
FROM dt_translation_cache
WHERE source_text = :text AND target_language = :lang
ORDER BY created_at DESC
LIMIT 3
"""), {'text': a1_content, 'lang': target_language})
rows = result.fetchall()
if rows:
print(f"✅ 找到 {len(rows)} 筆精確匹配的翻譯記錄:")
for i, (src, trans, created_at) in enumerate(rows):
print(f" {i+1}. 原文: {repr(src)}")
print(f" 譯文: {repr(trans)}")
print(f" 時間: {created_at}")
else:
print(f"❌ 未找到精確匹配的翻譯記錄")
# 查詢所有提取片段的翻譯狀況
print(f"\n檢查所有提取片段的翻譯快取狀況:")
found_count = 0
for i, segment in enumerate(segments[:10]): # 只檢查前10個
result = db.session.execute(sql_text("""
SELECT translated_text
FROM dt_translation_cache
WHERE source_text = :text AND target_language = :lang
ORDER BY created_at DESC
LIMIT 1
"""), {'text': segment, 'lang': target_language})
row = result.fetchone()
if row:
found_count += 1
print(f"{i+1:2d}. '{segment[:20]}...' -> '{row[0][:20]}...'")
else:
print(f"{i+1:2d}. '{segment[:20]}...' -> 無翻譯記錄")
print(f"\n翻譯快取命中率: {found_count}/{min(10, len(segments))} = {found_count/min(10, len(segments))*100:.1f}%")
wb_orig.close()
wb_trans.close()
if wb_orig_vals:
wb_orig_vals.close()
print("\n" + "=" * 80)
print("分析完成!")
print("=" * 80)
if __name__ == "__main__":
analyze_latest_excel_test()