5th_fix excel problem
This commit is contained in:
195
debug_new_excel_upload.py
Normal file
195
debug_new_excel_upload.py
Normal file
@@ -0,0 +1,195 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
調試新上傳的Excel檔案翻譯問題
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
# 設定編碼
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
|
||||
from pathlib import Path
|
||||
from app.services.translation_service import ExcelParser
|
||||
|
||||
def debug_new_excel_upload():
|
||||
"""調試新上傳Excel檔案的翻譯問題"""
|
||||
|
||||
print("=" * 80)
|
||||
print("調試新上傳Excel檔案翻譯問題")
|
||||
print("=" * 80)
|
||||
|
||||
# 新上傳的檔案路徑
|
||||
excel_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\686d4ac5-3a45-4582-870b-893dd6a83b50")
|
||||
|
||||
# 尋找Excel檔案
|
||||
excel_files = list(excel_dir.glob("*.xlsx"))
|
||||
if not excel_files:
|
||||
print(f"在目錄中找不到Excel檔案: {excel_dir}")
|
||||
return
|
||||
|
||||
original_file = excel_files[0] # 取第一個Excel檔案
|
||||
print(f"找到Excel檔案: {original_file}")
|
||||
|
||||
# 檢查是否存在翻譯後的檔案
|
||||
translated_files = list(excel_dir.glob("*_translated.xlsx"))
|
||||
print(f"翻譯後檔案數量: {len(translated_files)}")
|
||||
if translated_files:
|
||||
for tf in translated_files:
|
||||
print(f" 翻譯檔案: {tf.name}")
|
||||
|
||||
# 創建解析器實例
|
||||
print(f"\n1. 測試ExcelParser實例化")
|
||||
print("-" * 60)
|
||||
try:
|
||||
parser = ExcelParser(str(original_file))
|
||||
print("✅ ExcelParser實例化成功")
|
||||
except Exception as e:
|
||||
print(f"❌ ExcelParser實例化失敗: {e}")
|
||||
return
|
||||
|
||||
print(f"\n2. 測試修正後的_should_translate函數")
|
||||
print("-" * 60)
|
||||
|
||||
# 測試A1儲存格的內容
|
||||
test_content = "製程" # A1儲存格內容
|
||||
|
||||
print(f"測試文字: '{test_content}'")
|
||||
print(f"文字長度: {len(test_content)}")
|
||||
|
||||
# 檢查是否包含CJK字符
|
||||
has_cjk = parser._has_cjk(test_content)
|
||||
print(f"包含CJK字符: {has_cjk}")
|
||||
|
||||
# 檢查是否應該翻譯
|
||||
should_translate = parser._should_translate(test_content, 'auto')
|
||||
print(f"應該翻譯: {should_translate}")
|
||||
|
||||
# 詳細分析_should_translate的邏輯
|
||||
text = test_content.strip()
|
||||
min_length = 2 if has_cjk else 3
|
||||
print(f"最小長度要求: {min_length}")
|
||||
print(f"是否滿足長度要求: {len(text) >= min_length}")
|
||||
|
||||
import re
|
||||
is_pure_number_date = re.match(r'^[\d\s\.\-\:\/ ]+$', text)
|
||||
print(f"是否為純數字/日期格式: {bool(is_pure_number_date)}")
|
||||
|
||||
print(f"\n3. 測試文字片段提取")
|
||||
print("-" * 60)
|
||||
|
||||
segments = parser.extract_text_segments()
|
||||
print(f"提取到的文字片段總數: {len(segments)}")
|
||||
|
||||
# 檢查A1內容是否在提取列表中
|
||||
if test_content in segments:
|
||||
print(f"✅ A1內容 '{test_content}' 已被提取")
|
||||
index = segments.index(test_content)
|
||||
print(f" 在列表中的索引: {index}")
|
||||
else:
|
||||
print(f"❌ A1內容 '{test_content}' 未被提取")
|
||||
|
||||
# 顯示前10個提取的片段
|
||||
print(f"\n前10個提取片段:")
|
||||
for i, segment in enumerate(segments[:10]):
|
||||
safe_segment = repr(segment)
|
||||
print(f" {i+1:2d}. {safe_segment}")
|
||||
|
||||
# 特別標記A1內容
|
||||
if segment == test_content:
|
||||
print(f" ⬆️ 這是A1的內容")
|
||||
|
||||
print(f"\n4. 檢查翻譯快取")
|
||||
print("-" * 60)
|
||||
|
||||
from app import create_app
|
||||
app = create_app()
|
||||
|
||||
with app.app_context():
|
||||
from sqlalchemy import text as sql_text
|
||||
from app import db
|
||||
|
||||
target_language = 'ja' # 日文翻譯
|
||||
|
||||
print(f"查詢 '{test_content}' 的日文翻譯...")
|
||||
|
||||
result = db.session.execute(sql_text("""
|
||||
SELECT source_text, translated_text, created_at
|
||||
FROM dt_translation_cache
|
||||
WHERE source_text = :text AND target_language = :lang
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 5
|
||||
"""), {'text': test_content, 'lang': target_language})
|
||||
|
||||
rows = result.fetchall()
|
||||
if rows:
|
||||
print(f"✅ 找到 {len(rows)} 筆翻譯記錄:")
|
||||
for i, (src, trans, created_at) in enumerate(rows):
|
||||
print(f" {i+1}. 原文: {repr(src)}")
|
||||
print(f" 譯文: {repr(trans)}")
|
||||
print(f" 時間: {created_at}")
|
||||
else:
|
||||
print(f"❌ 未找到翻譯記錄")
|
||||
|
||||
# 檢查是否有類似的記錄
|
||||
print(f"\n檢查是否有類似的記錄...")
|
||||
result2 = db.session.execute(sql_text("""
|
||||
SELECT source_text, translated_text
|
||||
FROM dt_translation_cache
|
||||
WHERE source_text LIKE :text AND target_language = :lang
|
||||
LIMIT 10
|
||||
"""), {'text': f'%{test_content}%', 'lang': target_language})
|
||||
|
||||
similar_rows = result2.fetchall()
|
||||
if similar_rows:
|
||||
print(f"找到 {len(similar_rows)} 筆類似記錄:")
|
||||
for src, trans in similar_rows:
|
||||
print(f" 原文: {repr(src)} -> 譯文: {repr(trans)}")
|
||||
else:
|
||||
print(f"沒有找到類似記錄")
|
||||
|
||||
print(f"\n5. 檢查原始檔案A1儲存格內容")
|
||||
print("-" * 60)
|
||||
|
||||
import openpyxl
|
||||
wb = openpyxl.load_workbook(str(original_file), data_only=False)
|
||||
try:
|
||||
wb_vals = openpyxl.load_workbook(str(original_file), data_only=True)
|
||||
except:
|
||||
wb_vals = None
|
||||
|
||||
ws = wb.active
|
||||
ws_vals = wb_vals.active if wb_vals else None
|
||||
|
||||
a1_cell = ws['A1']
|
||||
a1_value = a1_cell.value
|
||||
a1_display_value = ws_vals['A1'].value if ws_vals else None
|
||||
|
||||
print(f"A1儲存格:")
|
||||
print(f" 原始值: {repr(a1_value)}")
|
||||
print(f" 顯示值: {repr(a1_display_value)}")
|
||||
print(f" 是否為公式: {isinstance(a1_value, str) and a1_value.startswith('=')}")
|
||||
|
||||
# 模擬get_display_text_for_translation函數
|
||||
if isinstance(a1_value, str) and a1_value.startswith("="):
|
||||
display_text = a1_display_value if isinstance(a1_display_value, str) and a1_display_value.strip() else None
|
||||
elif isinstance(a1_value, str) and a1_value.strip():
|
||||
display_text = a1_value
|
||||
else:
|
||||
display_text = a1_display_value if ws_vals and isinstance(a1_display_value, str) and a1_display_value.strip() else None
|
||||
|
||||
print(f" 用於翻譯的文字: {repr(display_text)}")
|
||||
print(f" 是否應該翻譯: {parser._should_translate(display_text, 'auto') if display_text else False}")
|
||||
|
||||
wb.close()
|
||||
if wb_vals:
|
||||
wb_vals.close()
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("調試完成!")
|
||||
print("=" * 80)
|
||||
|
||||
if __name__ == "__main__":
|
||||
debug_new_excel_upload()
|
Reference in New Issue
Block a user