#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 調試新上傳的Excel檔案翻譯問題 """ import sys import os sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) # 設定編碼 sys.stdout.reconfigure(encoding='utf-8') from pathlib import Path from app.services.translation_service import ExcelParser def debug_new_excel_upload(): """調試新上傳Excel檔案的翻譯問題""" print("=" * 80) print("調試新上傳Excel檔案翻譯問題") print("=" * 80) # 新上傳的檔案路徑 excel_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\686d4ac5-3a45-4582-870b-893dd6a83b50") # 尋找Excel檔案 excel_files = list(excel_dir.glob("*.xlsx")) if not excel_files: print(f"在目錄中找不到Excel檔案: {excel_dir}") return original_file = excel_files[0] # 取第一個Excel檔案 print(f"找到Excel檔案: {original_file}") # 檢查是否存在翻譯後的檔案 translated_files = list(excel_dir.glob("*_translated.xlsx")) print(f"翻譯後檔案數量: {len(translated_files)}") if translated_files: for tf in translated_files: print(f" 翻譯檔案: {tf.name}") # 創建解析器實例 print(f"\n1. 測試ExcelParser實例化") print("-" * 60) try: parser = ExcelParser(str(original_file)) print("✅ ExcelParser實例化成功") except Exception as e: print(f"❌ ExcelParser實例化失敗: {e}") return print(f"\n2. 測試修正後的_should_translate函數") print("-" * 60) # 測試A1儲存格的內容 test_content = "製程" # A1儲存格內容 print(f"測試文字: '{test_content}'") print(f"文字長度: {len(test_content)}") # 檢查是否包含CJK字符 has_cjk = parser._has_cjk(test_content) print(f"包含CJK字符: {has_cjk}") # 檢查是否應該翻譯 should_translate = parser._should_translate(test_content, 'auto') print(f"應該翻譯: {should_translate}") # 詳細分析_should_translate的邏輯 text = test_content.strip() min_length = 2 if has_cjk else 3 print(f"最小長度要求: {min_length}") print(f"是否滿足長度要求: {len(text) >= min_length}") import re is_pure_number_date = re.match(r'^[\d\s\.\-\:\/ ]+$', text) print(f"是否為純數字/日期格式: {bool(is_pure_number_date)}") print(f"\n3. 測試文字片段提取") print("-" * 60) segments = parser.extract_text_segments() print(f"提取到的文字片段總數: {len(segments)}") # 檢查A1內容是否在提取列表中 if test_content in segments: print(f"✅ A1內容 '{test_content}' 已被提取") index = segments.index(test_content) print(f" 在列表中的索引: {index}") else: print(f"❌ A1內容 '{test_content}' 未被提取") # 顯示前10個提取的片段 print(f"\n前10個提取片段:") for i, segment in enumerate(segments[:10]): safe_segment = repr(segment) print(f" {i+1:2d}. {safe_segment}") # 特別標記A1內容 if segment == test_content: print(f" ⬆️ 這是A1的內容") print(f"\n4. 檢查翻譯快取") print("-" * 60) from app import create_app app = create_app() with app.app_context(): from sqlalchemy import text as sql_text from app import db target_language = 'ja' # 日文翻譯 print(f"查詢 '{test_content}' 的日文翻譯...") result = db.session.execute(sql_text(""" SELECT source_text, translated_text, created_at FROM dt_translation_cache WHERE source_text = :text AND target_language = :lang ORDER BY created_at DESC LIMIT 5 """), {'text': test_content, 'lang': target_language}) rows = result.fetchall() if rows: print(f"✅ 找到 {len(rows)} 筆翻譯記錄:") for i, (src, trans, created_at) in enumerate(rows): print(f" {i+1}. 原文: {repr(src)}") print(f" 譯文: {repr(trans)}") print(f" 時間: {created_at}") else: print(f"❌ 未找到翻譯記錄") # 檢查是否有類似的記錄 print(f"\n檢查是否有類似的記錄...") result2 = db.session.execute(sql_text(""" SELECT source_text, translated_text FROM dt_translation_cache WHERE source_text LIKE :text AND target_language = :lang LIMIT 10 """), {'text': f'%{test_content}%', 'lang': target_language}) similar_rows = result2.fetchall() if similar_rows: print(f"找到 {len(similar_rows)} 筆類似記錄:") for src, trans in similar_rows: print(f" 原文: {repr(src)} -> 譯文: {repr(trans)}") else: print(f"沒有找到類似記錄") print(f"\n5. 檢查原始檔案A1儲存格內容") print("-" * 60) import openpyxl wb = openpyxl.load_workbook(str(original_file), data_only=False) try: wb_vals = openpyxl.load_workbook(str(original_file), data_only=True) except: wb_vals = None ws = wb.active ws_vals = wb_vals.active if wb_vals else None a1_cell = ws['A1'] a1_value = a1_cell.value a1_display_value = ws_vals['A1'].value if ws_vals else None print(f"A1儲存格:") print(f" 原始值: {repr(a1_value)}") print(f" 顯示值: {repr(a1_display_value)}") print(f" 是否為公式: {isinstance(a1_value, str) and a1_value.startswith('=')}") # 模擬get_display_text_for_translation函數 if isinstance(a1_value, str) and a1_value.startswith("="): display_text = a1_display_value if isinstance(a1_display_value, str) and a1_display_value.strip() else None elif isinstance(a1_value, str) and a1_value.strip(): display_text = a1_value else: display_text = a1_display_value if ws_vals and isinstance(a1_display_value, str) and a1_display_value.strip() else None print(f" 用於翻譯的文字: {repr(display_text)}") print(f" 是否應該翻譯: {parser._should_translate(display_text, 'auto') if display_text else False}") wb.close() if wb_vals: wb_vals.close() print("\n" + "=" * 80) print("調試完成!") print("=" * 80) if __name__ == "__main__": debug_new_excel_upload()