220 lines
7.7 KiB
Python
220 lines
7.7 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
分析最新Excel測試結果 - 檢查修正是否真正生效
|
||
"""
|
||
|
||
import sys
|
||
import os
|
||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||
|
||
# 設定編碼
|
||
sys.stdout.reconfigure(encoding='utf-8')
|
||
|
||
from pathlib import Path
|
||
import openpyxl
|
||
from app.services.translation_service import ExcelParser
|
||
|
||
def analyze_latest_excel_test():
|
||
"""詳細分析最新Excel測試結果"""
|
||
|
||
print("=" * 80)
|
||
print("分析最新Excel測試結果")
|
||
print("UUID: 185bb457-b703-4e98-94a2-fde072b895c4")
|
||
print("=" * 80)
|
||
|
||
# 文件路徑
|
||
test_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\185bb457-b703-4e98-94a2-fde072b895c4")
|
||
original_file = test_dir / "original_panjit_185bb457.xlsx"
|
||
translated_file = test_dir / "original_panjit_185bb457_ja_translated.xlsx"
|
||
|
||
if not original_file.exists():
|
||
print(f"原始文件不存在: {original_file}")
|
||
return
|
||
|
||
if not translated_file.exists():
|
||
print(f"翻譯文件不存在: {translated_file}")
|
||
return
|
||
|
||
print(f"\n✅ 檔案確認:")
|
||
print(f" 原始文件: {original_file.name}")
|
||
print(f" 翻譯文件: {translated_file.name}")
|
||
|
||
# 1. 測試ExcelParser的_should_translate函數
|
||
print(f"\n1. 測試ExcelParser的_should_translate函數")
|
||
print("-" * 60)
|
||
|
||
parser = ExcelParser(str(original_file))
|
||
test_texts = [
|
||
("製程", "A1儲存格"),
|
||
("主要特點", "標題文字"),
|
||
("AB", "2個英文字母"),
|
||
("123", "純數字"),
|
||
("工藝", "2個中文字符"),
|
||
("Epoxy 膠黏(導電/導熱銀膠)", "複合文字")
|
||
]
|
||
|
||
for text, desc in test_texts:
|
||
should_translate = parser._should_translate(text, 'auto')
|
||
has_cjk = parser._has_cjk(text)
|
||
min_length = 2 if has_cjk else 3
|
||
|
||
print(f" '{text}' ({desc}):")
|
||
print(f" 長度: {len(text)}, CJK: {has_cjk}, 最小長度: {min_length}")
|
||
print(f" 應翻譯: {should_translate}")
|
||
print()
|
||
|
||
# 2. 檢查實際提取的文字片段
|
||
print(f"\n2. 檢查實際提取的文字片段")
|
||
print("-" * 60)
|
||
|
||
segments = parser.extract_text_segments()
|
||
print(f"✅ 總共提取 {len(segments)} 個文字片段")
|
||
|
||
# 特別檢查A1
|
||
a1_content = "製程"
|
||
if a1_content in segments:
|
||
print(f"✅ A1內容 '{a1_content}' 已被提取")
|
||
index = segments.index(a1_content)
|
||
print(f" 在列表中的位置: 第{index+1}個")
|
||
else:
|
||
print(f"❌ A1內容 '{a1_content}' 仍未被提取")
|
||
|
||
# 顯示所有提取的片段
|
||
print(f"\n 所有提取的片段:")
|
||
for i, segment in enumerate(segments):
|
||
safe_segment = repr(segment)
|
||
print(f" {i+1:2d}. {safe_segment}")
|
||
if segment == a1_content:
|
||
print(f" ⬆️ 這是A1的內容!")
|
||
|
||
# 3. 檢查原始和翻譯文件的A1儲存格
|
||
print(f"\n3. 檢查A1儲存格內容")
|
||
print("-" * 60)
|
||
|
||
wb_orig = openpyxl.load_workbook(str(original_file), data_only=False)
|
||
wb_trans = openpyxl.load_workbook(str(translated_file), data_only=False)
|
||
|
||
try:
|
||
wb_orig_vals = openpyxl.load_workbook(str(original_file), data_only=True)
|
||
except:
|
||
wb_orig_vals = None
|
||
|
||
# A1儲存格比較
|
||
a1_orig = wb_orig.active['A1'].value
|
||
a1_trans = wb_trans.active['A1'].value
|
||
a1_orig_display = wb_orig_vals.active['A1'].value if wb_orig_vals else None
|
||
|
||
print(f" A1原始值: {repr(a1_orig)}")
|
||
if wb_orig_vals:
|
||
print(f" A1顯示值: {repr(a1_orig_display)}")
|
||
print(f" A1翻譯值: {repr(a1_trans)}")
|
||
|
||
# 判斷A1是否被翻譯
|
||
if isinstance(a1_trans, str) and '\n' in a1_trans:
|
||
lines = a1_trans.split('\n')
|
||
if len(lines) >= 2:
|
||
print(f" ✅ A1已翻譯!格式: 原文+換行+譯文")
|
||
print(f" 原文行: {repr(lines[0])}")
|
||
print(f" 譯文行: {repr(lines[1])}")
|
||
else:
|
||
print(f" ❌ A1格式異常")
|
||
elif a1_orig == a1_trans:
|
||
print(f" ❌ A1未翻譯 - 內容相同")
|
||
else:
|
||
print(f" ⚠️ A1內容有變化但格式不明")
|
||
|
||
# 4. 檢查其他重要儲存格
|
||
print(f"\n4. 檢查其他重要儲存格")
|
||
print("-" * 60)
|
||
|
||
important_cells = ['B1', 'C1', 'D1', 'A2', 'B2', 'C2']
|
||
|
||
for cell_name in important_cells:
|
||
orig_cell = wb_orig.active[cell_name]
|
||
trans_cell = wb_trans.active[cell_name]
|
||
|
||
orig_val = orig_cell.value
|
||
trans_val = trans_cell.value
|
||
|
||
if orig_val: # 只檢查有內容的儲存格
|
||
print(f"\n {cell_name}儲存格:")
|
||
print(f" 原始: {repr(orig_val)}")
|
||
print(f" 翻譯: {repr(trans_val)}")
|
||
|
||
if isinstance(trans_val, str) and '\n' in trans_val:
|
||
lines = trans_val.split('\n')
|
||
print(f" 狀態: ✅ 已翻譯 (雙行格式)")
|
||
if len(lines) >= 2:
|
||
print(f" 原文: {repr(lines[0])}")
|
||
print(f" 譯文: {repr(lines[1])}")
|
||
elif orig_val == trans_val:
|
||
print(f" 狀態: ❌ 未翻譯")
|
||
else:
|
||
print(f" 狀態: ⚠️ 內容有變化")
|
||
|
||
# 5. 檢查翻譯快取狀況
|
||
print(f"\n5. 檢查翻譯快取狀況")
|
||
print("-" * 60)
|
||
|
||
from app import create_app
|
||
app = create_app()
|
||
|
||
with app.app_context():
|
||
from sqlalchemy import text as sql_text
|
||
from app import db
|
||
|
||
target_language = 'ja'
|
||
print(f"查詢 '{a1_content}' 在翻譯快取中的狀況...")
|
||
|
||
# 查詢精確匹配
|
||
result = db.session.execute(sql_text("""
|
||
SELECT source_text, translated_text, created_at
|
||
FROM dt_translation_cache
|
||
WHERE source_text = :text AND target_language = :lang
|
||
ORDER BY created_at DESC
|
||
LIMIT 3
|
||
"""), {'text': a1_content, 'lang': target_language})
|
||
|
||
rows = result.fetchall()
|
||
if rows:
|
||
print(f"✅ 找到 {len(rows)} 筆精確匹配的翻譯記錄:")
|
||
for i, (src, trans, created_at) in enumerate(rows):
|
||
print(f" {i+1}. 原文: {repr(src)}")
|
||
print(f" 譯文: {repr(trans)}")
|
||
print(f" 時間: {created_at}")
|
||
else:
|
||
print(f"❌ 未找到精確匹配的翻譯記錄")
|
||
|
||
# 查詢所有提取片段的翻譯狀況
|
||
print(f"\n檢查所有提取片段的翻譯快取狀況:")
|
||
found_count = 0
|
||
for i, segment in enumerate(segments[:10]): # 只檢查前10個
|
||
result = db.session.execute(sql_text("""
|
||
SELECT translated_text
|
||
FROM dt_translation_cache
|
||
WHERE source_text = :text AND target_language = :lang
|
||
ORDER BY created_at DESC
|
||
LIMIT 1
|
||
"""), {'text': segment, 'lang': target_language})
|
||
|
||
row = result.fetchone()
|
||
if row:
|
||
found_count += 1
|
||
print(f" ✅ {i+1:2d}. '{segment[:20]}...' -> '{row[0][:20]}...'")
|
||
else:
|
||
print(f" ❌ {i+1:2d}. '{segment[:20]}...' -> 無翻譯記錄")
|
||
|
||
print(f"\n翻譯快取命中率: {found_count}/{min(10, len(segments))} = {found_count/min(10, len(segments))*100:.1f}%")
|
||
|
||
wb_orig.close()
|
||
wb_trans.close()
|
||
if wb_orig_vals:
|
||
wb_orig_vals.close()
|
||
|
||
print("\n" + "=" * 80)
|
||
print("分析完成!")
|
||
print("=" * 80)
|
||
|
||
if __name__ == "__main__":
|
||
analyze_latest_excel_test() |