From e3dc5506bc1a0086f4a4c5ce00b331b1716a392e Mon Sep 17 00:00:00 2001 From: beabigegg Date: Wed, 3 Sep 2025 19:43:49 +0800 Subject: [PATCH] 8th_fix combine --- app/services/document_processor.py | 36 +++++- app/services/translation_service.py | 163 +++++++++++++++++++++++++++- 2 files changed, 197 insertions(+), 2 deletions(-) diff --git a/app/services/document_processor.py b/app/services/document_processor.py index 6efc7d9..8bf6ebf 100644 --- a/app/services/document_processor.py +++ b/app/services/document_processor.py @@ -827,4 +827,38 @@ class DocumentProcessor: def should_translate_text(self, text: str, source_language: str) -> bool: """Determine if text should be translated.""" - return should_translate(text, source_language) \ No newline at end of file + return should_translate(text, source_language) + + def insert_docx_combined_translations(self, file_path: str, segments: List[Segment], + translation_map: Dict[Tuple[str, str], str], + target_languages: List[str], output_path: str) -> Tuple[int, int]: + """Insert all translations into a single DOCX file with combined multi-language output. + + This creates a combined file where each original text is followed by all translations + in the format: original\n英文\n越南文 etc. + """ + try: + doc = docx.Document(file_path) + + # Re-match segments with the current document instance + matched_segments = self._rematch_segments_to_document(doc, segments) + + def log_func(msg: str): + self.logger.debug(msg) + + # Use the existing _insert_docx_translations function which already supports + # multiple target languages in a single document + ok_count, skip_count = _insert_docx_translations( + doc, matched_segments, translation_map, target_languages, log_func + ) + + # Save the combined document + doc.save(output_path) + + self.logger.info(f"Generated combined multi-language file: {output_path}") + self.logger.info(f"Inserted {ok_count} translations, skipped {skip_count}") + return ok_count, skip_count + + except Exception as e: + self.logger.error(f"Failed to create combined DOCX translations: {str(e)}") + raise FileProcessingError(f"組合多語言 DOCX 檔案生成失敗: {str(e)}") \ No newline at end of file diff --git a/app/services/translation_service.py b/app/services/translation_service.py index a447b77..17ea878 100644 --- a/app/services/translation_service.py +++ b/app/services/translation_service.py @@ -871,6 +871,45 @@ class TranslationService: except Exception as e: logger.error(f"Failed to generate translated document for {target_language}: {str(e)}") raise TranslationError(f"生成 {target_language} 翻譯文件失敗: {str(e)}") + + # 生成組合多語言檔案 - 包含所有翻譯在一個文件中 + if len(job.target_languages) > 1: + try: + # 生成組合檔案的檔名 + combined_filename = generate_filename( + Path(job.file_path).name, + 'translated', + 'combined', + 'multilang' + ) + combined_output_path = output_dir / combined_filename + + # 使用新的組合翻譯插入方法 + combined_ok_count, combined_skip_count = self.document_processor.insert_docx_combined_translations( + job.file_path, + segments, + translation_map, + job.target_languages, + str(combined_output_path) + ) + + output_files['combined'] = str(combined_output_path) + + # 記錄組合翻譯檔案到資料庫 + file_size = Path(combined_output_path).stat().st_size + job.add_translated_file( + language_code='combined', + filename=Path(combined_output_path).name, + file_path=str(combined_output_path), + file_size=file_size + ) + + logger.info(f"Generated combined multi-language file: {combined_ok_count} insertions, {combined_skip_count} skips") + + except Exception as e: + logger.error(f"Failed to generate combined multi-language document: {str(e)}") + # 不要因為組合檔案失敗而讓整個任務失敗,只記錄警告 + logger.warning("Combined multi-language file generation failed, but individual files were successful") elif file_ext in ['.xlsx', '.xls']: # Excel 文件使用儲存格為單位的翻譯邏輯 @@ -941,6 +980,48 @@ class TranslationService: file_path=output_file, file_size=file_size ) + + # 生成組合多語言Excel檔案 + if len(job.target_languages) > 1: + try: + # 生成組合檔案的檔名 + combined_filename = generate_filename( + Path(job.file_path).name, + 'translated', + 'combined', + 'multilang' + ) + combined_output_path = output_dir / combined_filename + + # 為Excel組合檔案建立翻譯映射 + combined_translation_mapping = {} + for lang in job.target_languages: + combined_translation_mapping[lang] = translation_results[lang] + + # 使用修改過的generate_combined_excel_document方法 + combined_output_file = self._generate_combined_excel_document( + parser, + combined_translation_mapping, + job.target_languages, + combined_output_path + ) + + output_files['combined'] = combined_output_file + + # 記錄組合翻譯檔案到資料庫 + file_size = Path(combined_output_file).stat().st_size + job.add_translated_file( + language_code='combined', + filename=Path(combined_output_file).name, + file_path=combined_output_file, + file_size=file_size + ) + + logger.info(f"Generated combined multi-language Excel file") + + except Exception as e: + logger.error(f"Failed to generate combined multi-language Excel document: {str(e)}") + logger.warning("Combined multi-language Excel file generation failed, but individual files were successful") else: # 對於其他文件格式,使用原有邏輯 @@ -1065,4 +1146,84 @@ class TranslationService: func.sum(APIUsageStats.cost) ).filter_by(job_id=job_id).scalar() - return float(total_cost) if total_cost else 0.0 \ No newline at end of file + return float(total_cost) if total_cost else 0.0 + + def _generate_combined_excel_document(self, parser, translation_mapping: Dict[str, List[str]], + target_languages: List[str], output_path: Path) -> str: + """生成包含所有翻譯語言的組合Excel檔案""" + try: + import openpyxl + from openpyxl.styles import Alignment, Font + from sqlalchemy import text as sql_text + from app import db + + # 載入原始工作簿 + wb = openpyxl.load_workbook(str(parser.file_path), data_only=False) + try: + wb_vals = openpyxl.load_workbook(str(parser.file_path), data_only=True) + except Exception: + wb_vals = None + + # 取得原始文字段落以建立翻譯映射 + original_segments = parser.extract_text_segments() + combined_tmap = {} + + logger.info(f"Building combined translation map for {len(original_segments)} segments") + + for original_text in original_segments: + # 從翻譯快取中查詢所有語言的翻譯 + for target_lang in target_languages: + result = db.session.execute(sql_text(""" + SELECT translated_text + FROM dt_translation_cache + WHERE source_text = :text AND target_language = :lang + ORDER BY created_at ASC + LIMIT 1 + """), {'text': original_text, 'lang': target_lang}) + + row = result.fetchone() + if row and row[0]: + combined_tmap[(target_lang, original_text)] = row[0] + + logger.info(f"Built combined translation map with {len(combined_tmap)} mappings") + + # 處理每個工作表,插入組合翻譯 + for ws in wb.worksheets: + logger.info(f"Processing combined worksheet: {ws.title}") + ws_vals = wb_vals[ws.title] if wb_vals and ws.title in wb_vals.sheetnames else None + max_row, max_col = ws.max_row, ws.max_column + + for r in range(1, max_row + 1): + for c in range(1, max_col + 1): + cell = ws.cell(row=r, column=c) + src_text = parser._get_display_text_for_translation(ws, ws_vals, r, c) + + if not src_text or not parser._should_translate(src_text, 'auto'): + continue + + # 收集所有語言的翻譯 + translations = [] + for target_lang in target_languages: + if (target_lang, src_text) in combined_tmap: + translations.append(combined_tmap[(target_lang, src_text)]) + else: + translations.append(f"【翻譯缺失|{target_lang}】") + + # 組合翻譯文字:原文\n英文\n越南文 + if translations: + combined_text = src_text + '\n' + '\n'.join(translations) + + # 設置儲存格值 + cell.value = combined_text + cell.alignment = Alignment(wrap_text=True, vertical='top') + cell.font = Font(size=10) + + # 儲存組合檔案 + wb.save(str(output_path)) + + logger.info(f"Generated combined Excel file: {output_path}") + return str(output_path) + + except Exception as e: + logger.error(f"Failed to generate combined Excel document: {str(e)}") + raise FileProcessingError(f"組合 Excel 檔案生成失敗: {str(e)}") \ No newline at end of file