#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 調試Excel翻譯問題 """ import sys import os sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) import openpyxl from pathlib import Path # 設定編碼 sys.stdout.reconfigure(encoding='utf-8') def debug_excel_translation_process(): """調試Excel翻譯過程""" print("=" * 80) print("Excel 翻譯過程調試") print("=" * 80) # 文件路徑 excel_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f0b78200-2c5e-41a4-bac8-1536f92529e9") original_file = excel_dir / "original_panjit_f0b78200.xlsx" translated_file = excel_dir / "original_panjit_f0b78200_ja_translated.xlsx" if not original_file.exists(): print(f"原始文件不存在: {original_file}") return if not translated_file.exists(): print(f"翻譯文件不存在: {translated_file}") return print(f"\n1. 分析原始文件提取過程") print("-" * 50) # 模擬 ExcelParser.extract_text_segments() 的過程 wb = openpyxl.load_workbook(str(original_file), data_only=False) try: wb_vals = openpyxl.load_workbook(str(original_file), data_only=True) except Exception: wb_vals = None print(f"工作簿載入成功,共 {len(wb.worksheets)} 個工作表") # 提取文字段落 segs = [] cell_info = [] # 記錄每個提取片段的來源位置 for ws in wb.worksheets: print(f"\n處理工作表: {ws.title}") ws_vals = wb_vals[ws.title] if wb_vals and ws.title in wb_vals.sheetnames else None max_row, max_col = ws.max_row, ws.max_column print(f"工作表大小: {max_row} x {max_col}") for r in range(1, max_row + 1): for c in range(1, max_col + 1): src_text = get_display_text_for_translation(ws, ws_vals, r, c) if not src_text: continue if not should_translate(src_text, 'auto'): continue # 記錄提取到的文字和位置 cell_name = f"{openpyxl.utils.get_column_letter(c)}{r}" segs.append(src_text) cell_info.append((cell_name, src_text)) # 詳細記錄前20個儲存格 if len(segs) <= 20: # 安全輸出,避免特殊字符問題 safe_text = repr(src_text) print(f" {cell_name}: {safe_text}") print(f"\n提取結果: 共提取到 {len(segs)} 個文字片段") # 去重保持順序 unique_segments = [] seen = set() for seg in segs: if seg not in seen: unique_segments.append(seg) seen.add(seg) print(f"去重後: {len(unique_segments)} 個唯一文字片段") print(f"\n2. 分析翻譯結果寫入過程") print("-" * 50) # 檢查翻譯檔案的內容 wb_trans = openpyxl.load_workbook(str(translated_file), data_only=False) # 檢查重要儲存格的翻譯狀況 important_cells = ['A1', 'B1', 'C1', 'D1', 'B3', 'C3', 'D3'] for cell_name in important_cells: row = int(''.join(filter(str.isdigit, cell_name))) col = openpyxl.utils.column_index_from_string(''.join(filter(str.isalpha, cell_name))) # 原始內容 orig_val = wb.active.cell(row=row, column=col).value # 翻譯後內容 trans_val = wb_trans.active.cell(row=row, column=col).value print(f"\n儲存格 {cell_name}:") print(f" 原始: {repr(orig_val)}") print(f" 翻譯: {repr(trans_val)}") # 檢查是否為期望的格式(原文+換行+譯文) if isinstance(trans_val, str) and '\n' in trans_val: lines = trans_val.split('\n') print(f" 格式: 雙行格式,共 {len(lines)} 行") for i, line in enumerate(lines): print(f" 行{i+1}: {repr(line)}") else: print(f" 格式: 單行格式") print(f"\n3. 檢查 A1 儲存格特殊情況") print("-" * 50) # 檢查A1儲存格的特殊處理 a1_orig = wb.active['A1'].value a1_trans = wb_trans.active['A1'].value print(f"A1 原始值: {repr(a1_orig)}") print(f"A1 翻譯值: {repr(a1_trans)}") print(f"A1 是否需要翻譯: {should_translate(str(a1_orig) if a1_orig else '', 'auto')}") print(f"A1 是否在提取列表中: {str(a1_orig) in unique_segments if a1_orig else False}") wb.close() wb_trans.close() if wb_vals: wb_vals.close() def get_display_text_for_translation(ws, ws_vals, r: int, c: int): """取得儲存格用於翻譯的顯示文字(移植自原始程式碼)""" val = ws.cell(row=r, column=c).value if isinstance(val, str) and val.startswith("="): if ws_vals is not None: shown = ws_vals.cell(row=r, column=c).value return shown if isinstance(shown, str) and shown.strip() else None return None if isinstance(val, str) and val.strip(): return val if ws_vals is not None: shown = ws_vals.cell(row=r, column=c).value if isinstance(shown, str) and shown.strip(): return shown return None def should_translate(text: str, src_lang: str) -> bool: """判斷文字是否需要翻譯(移植自原始程式碼)""" text = text.strip() if len(text) < 3: return False # Skip pure numbers, dates, etc. import re if re.match(r'^[\d\s\.\-\:\/]+$', text): return False # For auto-detect, translate if has CJK or meaningful text if src_lang.lower() in ('auto', 'auto-detect'): return has_cjk(text) or len(text) > 5 return True def has_cjk(text: str) -> bool: """檢查是否包含中日韓文字(移植自原始程式碼)""" for char in text: if '\u4e00' <= char <= '\u9fff' or \ '\u3400' <= char <= '\u4dbf' or \ '\u20000' <= char <= '\u2a6df' or \ '\u3040' <= char <= '\u309f' or \ '\u30a0' <= char <= '\u30ff' or \ '\uac00' <= char <= '\ud7af': return True return False if __name__ == "__main__": debug_excel_translation_process()