8th_fix combine

This commit is contained in:
beabigegg
2025-09-03 19:43:49 +08:00
parent 82aaa315bb
commit e3dc5506bc
2 changed files with 197 additions and 2 deletions

View File

@@ -827,4 +827,38 @@ class DocumentProcessor:
def should_translate_text(self, text: str, source_language: str) -> bool:
"""Determine if text should be translated."""
return should_translate(text, source_language)
return should_translate(text, source_language)
def insert_docx_combined_translations(self, file_path: str, segments: List[Segment],
translation_map: Dict[Tuple[str, str], str],
target_languages: List[str], output_path: str) -> Tuple[int, int]:
"""Insert all translations into a single DOCX file with combined multi-language output.
This creates a combined file where each original text is followed by all translations
in the format: original\n英文\n越南文 etc.
"""
try:
doc = docx.Document(file_path)
# Re-match segments with the current document instance
matched_segments = self._rematch_segments_to_document(doc, segments)
def log_func(msg: str):
self.logger.debug(msg)
# Use the existing _insert_docx_translations function which already supports
# multiple target languages in a single document
ok_count, skip_count = _insert_docx_translations(
doc, matched_segments, translation_map, target_languages, log_func
)
# Save the combined document
doc.save(output_path)
self.logger.info(f"Generated combined multi-language file: {output_path}")
self.logger.info(f"Inserted {ok_count} translations, skipped {skip_count}")
return ok_count, skip_count
except Exception as e:
self.logger.error(f"Failed to create combined DOCX translations: {str(e)}")
raise FileProcessingError(f"組合多語言 DOCX 檔案生成失敗: {str(e)}")

View File

@@ -871,6 +871,45 @@ class TranslationService:
except Exception as e:
logger.error(f"Failed to generate translated document for {target_language}: {str(e)}")
raise TranslationError(f"生成 {target_language} 翻譯文件失敗: {str(e)}")
# 生成組合多語言檔案 - 包含所有翻譯在一個文件中
if len(job.target_languages) > 1:
try:
# 生成組合檔案的檔名
combined_filename = generate_filename(
Path(job.file_path).name,
'translated',
'combined',
'multilang'
)
combined_output_path = output_dir / combined_filename
# 使用新的組合翻譯插入方法
combined_ok_count, combined_skip_count = self.document_processor.insert_docx_combined_translations(
job.file_path,
segments,
translation_map,
job.target_languages,
str(combined_output_path)
)
output_files['combined'] = str(combined_output_path)
# 記錄組合翻譯檔案到資料庫
file_size = Path(combined_output_path).stat().st_size
job.add_translated_file(
language_code='combined',
filename=Path(combined_output_path).name,
file_path=str(combined_output_path),
file_size=file_size
)
logger.info(f"Generated combined multi-language file: {combined_ok_count} insertions, {combined_skip_count} skips")
except Exception as e:
logger.error(f"Failed to generate combined multi-language document: {str(e)}")
# 不要因為組合檔案失敗而讓整個任務失敗,只記錄警告
logger.warning("Combined multi-language file generation failed, but individual files were successful")
elif file_ext in ['.xlsx', '.xls']:
# Excel 文件使用儲存格為單位的翻譯邏輯
@@ -941,6 +980,48 @@ class TranslationService:
file_path=output_file,
file_size=file_size
)
# 生成組合多語言Excel檔案
if len(job.target_languages) > 1:
try:
# 生成組合檔案的檔名
combined_filename = generate_filename(
Path(job.file_path).name,
'translated',
'combined',
'multilang'
)
combined_output_path = output_dir / combined_filename
# 為Excel組合檔案建立翻譯映射
combined_translation_mapping = {}
for lang in job.target_languages:
combined_translation_mapping[lang] = translation_results[lang]
# 使用修改過的generate_combined_excel_document方法
combined_output_file = self._generate_combined_excel_document(
parser,
combined_translation_mapping,
job.target_languages,
combined_output_path
)
output_files['combined'] = combined_output_file
# 記錄組合翻譯檔案到資料庫
file_size = Path(combined_output_file).stat().st_size
job.add_translated_file(
language_code='combined',
filename=Path(combined_output_file).name,
file_path=combined_output_file,
file_size=file_size
)
logger.info(f"Generated combined multi-language Excel file")
except Exception as e:
logger.error(f"Failed to generate combined multi-language Excel document: {str(e)}")
logger.warning("Combined multi-language Excel file generation failed, but individual files were successful")
else:
# 對於其他文件格式,使用原有邏輯
@@ -1065,4 +1146,84 @@ class TranslationService:
func.sum(APIUsageStats.cost)
).filter_by(job_id=job_id).scalar()
return float(total_cost) if total_cost else 0.0
return float(total_cost) if total_cost else 0.0
def _generate_combined_excel_document(self, parser, translation_mapping: Dict[str, List[str]],
target_languages: List[str], output_path: Path) -> str:
"""生成包含所有翻譯語言的組合Excel檔案"""
try:
import openpyxl
from openpyxl.styles import Alignment, Font
from sqlalchemy import text as sql_text
from app import db
# 載入原始工作簿
wb = openpyxl.load_workbook(str(parser.file_path), data_only=False)
try:
wb_vals = openpyxl.load_workbook(str(parser.file_path), data_only=True)
except Exception:
wb_vals = None
# 取得原始文字段落以建立翻譯映射
original_segments = parser.extract_text_segments()
combined_tmap = {}
logger.info(f"Building combined translation map for {len(original_segments)} segments")
for original_text in original_segments:
# 從翻譯快取中查詢所有語言的翻譯
for target_lang in target_languages:
result = db.session.execute(sql_text("""
SELECT translated_text
FROM dt_translation_cache
WHERE source_text = :text AND target_language = :lang
ORDER BY created_at ASC
LIMIT 1
"""), {'text': original_text, 'lang': target_lang})
row = result.fetchone()
if row and row[0]:
combined_tmap[(target_lang, original_text)] = row[0]
logger.info(f"Built combined translation map with {len(combined_tmap)} mappings")
# 處理每個工作表,插入組合翻譯
for ws in wb.worksheets:
logger.info(f"Processing combined worksheet: {ws.title}")
ws_vals = wb_vals[ws.title] if wb_vals and ws.title in wb_vals.sheetnames else None
max_row, max_col = ws.max_row, ws.max_column
for r in range(1, max_row + 1):
for c in range(1, max_col + 1):
cell = ws.cell(row=r, column=c)
src_text = parser._get_display_text_for_translation(ws, ws_vals, r, c)
if not src_text or not parser._should_translate(src_text, 'auto'):
continue
# 收集所有語言的翻譯
translations = []
for target_lang in target_languages:
if (target_lang, src_text) in combined_tmap:
translations.append(combined_tmap[(target_lang, src_text)])
else:
translations.append(f"【翻譯缺失|{target_lang}")
# 組合翻譯文字:原文\n英文\n越南文
if translations:
combined_text = src_text + '\n' + '\n'.join(translations)
# 設置儲存格值
cell.value = combined_text
cell.alignment = Alignment(wrap_text=True, vertical='top')
cell.font = Font(size=10)
# 儲存組合檔案
wb.save(str(output_path))
logger.info(f"Generated combined Excel file: {output_path}")
return str(output_path)
except Exception as e:
logger.error(f"Failed to generate combined Excel document: {str(e)}")
raise FileProcessingError(f"組合 Excel 檔案生成失敗: {str(e)}")