""" Smart Slide Voiceover System 智慧簡報旁白生成系統 A desktop application for batch converting Excel scripts to professional voiceover audio files. """ import sys import asyncio import os from pathlib import Path from dataclasses import dataclass from typing import Optional import pandas as pd import edge_tts from PyQt6.QtWidgets import ( QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QPushButton, QLabel, QLineEdit, QFileDialog, QProgressBar, QTextEdit, QComboBox, QMessageBox, QGroupBox ) from PyQt6.QtCore import QThread, pyqtSignal, Qt # ============================================================================= # Voice Registry - All available voices with bilingual support annotations # ============================================================================= @dataclass class VoiceInfo: voice_id: str language: str gender: str bilingual: str description: str VOICE_REGISTRY = [ # Chinese (Taiwan) - supports Chinese-English mixing VoiceInfo("zh-TW-HsiaoChenNeural", "zh-TW", "女", "中英混雜 ✓", "知性專業 (預設)"), VoiceInfo("zh-TW-HsiaoYuNeural", "zh-TW", "女", "中英混雜 ✓", "活潑年輕"), VoiceInfo("zh-TW-YunJheNeural", "zh-TW", "男", "中英混雜 ✓", "成熟穩重"), # Chinese (Mainland) - supports Chinese-English mixing VoiceInfo("zh-CN-XiaoxiaoNeural", "zh-CN", "女", "中英混雜 ✓", "甜美親切"), VoiceInfo("zh-CN-YunyangNeural", "zh-CN", "男", "中英混雜 ✓", "新聞播報風格"), # Vietnamese - supports Vietnamese-English mixing VoiceInfo("vi-VN-HoaiMyNeural", "vi-VN", "女", "越英混雜 ✓", "溫柔清晰 (預設)"), VoiceInfo("vi-VN-NamMinhNeural", "vi-VN", "男", "越英混雜 ✓", "專業沉穩"), # English (US) - English only VoiceInfo("en-US-JennyNeural", "en-US", "女", "純英文", "標準美式 (預設)"), VoiceInfo("en-US-AriaNeural", "en-US", "女", "純英文", "自然對話"), VoiceInfo("en-US-GuyNeural", "en-US", "男", "純英文", "專業旁白"), ] # Default voice mapping by language code DEFAULT_VOICE_MAP = { "zh": "zh-TW-HsiaoChenNeural", "zh-tw": "zh-TW-HsiaoChenNeural", "zh-cn": "zh-CN-XiaoxiaoNeural", "vi": "vi-VN-HoaiMyNeural", "en": "en-US-JennyNeural", } # Voice groups for dropdown VOICE_GROUPS = { "中文語音 (適合中英混雜簡報)": ["zh-TW", "zh-CN"], "越南語音 (適合越英混雜簡報)": ["vi-VN"], "英文語音 (適合純英文簡報)": ["en-US"], } # ============================================================================= # Excel Input Module # ============================================================================= @dataclass class ScriptRow: filename: str text: str lang: str def load_excel(file_path: str) -> list[ScriptRow]: """Load and parse Excel file, returning list of ScriptRow objects.""" df = pd.read_excel(file_path, engine='openpyxl') # Normalize column names (case-insensitive) df.columns = df.columns.str.strip().str.lower() # Check required columns if 'filename' not in df.columns: raise ValueError("Excel 檔案缺少必要欄位: Filename") if 'text' not in df.columns: raise ValueError("Excel 檔案缺少必要欄位: Text") rows = [] for idx, row in df.iterrows(): filename = str(row.get('filename', '')).strip() text = str(row.get('text', '')).strip() lang = str(row.get('lang', 'zh')).strip().lower() # Skip rows with empty required fields if not filename or filename == 'nan': continue if not text or text == 'nan': continue # Default language fallback if not lang or lang == 'nan': lang = 'zh' rows.append(ScriptRow(filename=filename, text=text, lang=lang)) return rows # ============================================================================= # TTS Engine Module # ============================================================================= async def synthesize_speech(text: str, voice_id: str, output_path: str) -> None: """Generate speech audio using edge-tts.""" communicate = edge_tts.Communicate(text, voice_id) await communicate.save(output_path) def get_voice_for_lang(lang: str) -> str: """Get default voice ID for a language code.""" return DEFAULT_VOICE_MAP.get(lang.lower(), "zh-TW-HsiaoChenNeural") # ============================================================================= # TTS Worker Thread # ============================================================================= class TTSWorker(QThread): """Worker thread for batch TTS processing.""" progress = pyqtSignal(int, int) # current, total log_message = pyqtSignal(str) finished_batch = pyqtSignal(int, int) # success_count, fail_count def __init__(self, rows: list[ScriptRow], output_dir: str, selected_voice: Optional[str] = None): super().__init__() self.rows = rows self.output_dir = output_dir self.selected_voice = selected_voice self._stop_flag = False def stop(self): """Request graceful stop after current file.""" self._stop_flag = True def run(self): """Execute batch TTS processing in worker thread.""" # Create output directory if needed Path(self.output_dir).mkdir(parents=True, exist_ok=True) # Create new event loop for this thread loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) success_count = 0 fail_count = 0 total = len(self.rows) try: for i, row in enumerate(self.rows): if self._stop_flag: self.log_message.emit(f"已停止處理 (完成 {i}/{total})") break # Determine voice to use if self.selected_voice: voice_id = self.selected_voice else: voice_id = get_voice_for_lang(row.lang) output_path = os.path.join(self.output_dir, f"{row.filename}.mp3") self.log_message.emit(f"正在處理: {row.filename}") try: # Run async TTS with retry loop.run_until_complete( self._synthesize_with_retry(row.text, voice_id, output_path) ) self.log_message.emit(f"完成: {row.filename}") success_count += 1 except Exception as e: self.log_message.emit(f"錯誤: {row.filename} - {str(e)}") fail_count += 1 self.progress.emit(i + 1, total) # Rate limit delay (0.5s between requests) if not self._stop_flag and i < total - 1: loop.run_until_complete(asyncio.sleep(0.5)) finally: loop.close() self.finished_batch.emit(success_count, fail_count) async def _synthesize_with_retry(self, text: str, voice_id: str, output_path: str, max_retries: int = 1): """Synthesize with retry on network error.""" last_error = None for attempt in range(max_retries + 1): try: await synthesize_speech(text, voice_id, output_path) return except Exception as e: last_error = e if attempt < max_retries: await asyncio.sleep(1) # Wait before retry raise last_error # ============================================================================= # Main Window GUI # ============================================================================= class MainWindow(QMainWindow): def __init__(self): super().__init__() self.worker = None self.init_ui() def init_ui(self): self.setWindowTitle("智慧簡報旁白生成系統 - Smart Slide Voiceover System") self.setMinimumSize(700, 500) central_widget = QWidget() self.setCentralWidget(central_widget) layout = QVBoxLayout(central_widget) # File selection group file_group = QGroupBox("檔案設定") file_layout = QVBoxLayout(file_group) # Excel file browser excel_layout = QHBoxLayout() excel_layout.addWidget(QLabel("Excel 講稿:")) self.file_path_edit = QLineEdit() self.file_path_edit.setReadOnly(True) self.file_path_edit.setPlaceholderText("請選擇 .xlsx 檔案...") excel_layout.addWidget(self.file_path_edit, 1) self.browse_btn = QPushButton("瀏覽...") self.browse_btn.clicked.connect(self.browse_file) excel_layout.addWidget(self.browse_btn) file_layout.addLayout(excel_layout) # Output directory output_layout = QHBoxLayout() output_layout.addWidget(QLabel("輸出資料夾:")) self.output_path_edit = QLineEdit() self.output_path_edit.setPlaceholderText("預設: Excel 檔案所在目錄/output") output_layout.addWidget(self.output_path_edit, 1) self.output_browse_btn = QPushButton("瀏覽...") self.output_browse_btn.clicked.connect(self.browse_output) output_layout.addWidget(self.output_browse_btn) file_layout.addLayout(output_layout) layout.addWidget(file_group) # Voice selection group voice_group = QGroupBox("語音設定") voice_layout = QHBoxLayout(voice_group) voice_layout.addWidget(QLabel("選擇語音:")) self.voice_combo = QComboBox() self.voice_combo.setMinimumWidth(350) self._populate_voice_combo() voice_layout.addWidget(self.voice_combo, 1) layout.addWidget(voice_group) # Control buttons btn_layout = QHBoxLayout() self.start_btn = QPushButton("開始") self.start_btn.setMinimumHeight(40) self.start_btn.clicked.connect(self.start_processing) btn_layout.addWidget(self.start_btn) self.stop_btn = QPushButton("停止") self.stop_btn.setMinimumHeight(40) self.stop_btn.setEnabled(False) self.stop_btn.clicked.connect(self.stop_processing) btn_layout.addWidget(self.stop_btn) layout.addLayout(btn_layout) # Progress bar progress_layout = QHBoxLayout() progress_layout.addWidget(QLabel("進度:")) self.progress_bar = QProgressBar() self.progress_bar.setValue(0) progress_layout.addWidget(self.progress_bar, 1) self.progress_label = QLabel("0/0") progress_layout.addWidget(self.progress_label) layout.addLayout(progress_layout) # Log console log_group = QGroupBox("處理日誌") log_layout = QVBoxLayout(log_group) self.log_console = QTextEdit() self.log_console.setReadOnly(True) self.log_console.setMinimumHeight(150) log_layout.addWidget(self.log_console) layout.addWidget(log_group, 1) def _populate_voice_combo(self): """Populate voice dropdown with grouped options.""" # Add "Auto" option first self.voice_combo.addItem("自動 (依 Excel Lang 欄位決定)", None) # Add voices grouped by language for group_name, lang_codes in VOICE_GROUPS.items(): self.voice_combo.addItem(f"─── {group_name} ───", "separator") # Make separator non-selectable idx = self.voice_combo.count() - 1 self.voice_combo.model().item(idx).setEnabled(False) for voice in VOICE_REGISTRY: if voice.language in lang_codes: display = f" {voice.voice_id} ({voice.gender}) - {voice.bilingual} - {voice.description}" self.voice_combo.addItem(display, voice.voice_id) def browse_file(self): """Open file dialog to select Excel file.""" file_path, _ = QFileDialog.getOpenFileName( self, "選擇 Excel 講稿檔案", "", "Excel Files (*.xlsx);;All Files (*)" ) if file_path: self.file_path_edit.setText(file_path) # Auto-set output directory if not self.output_path_edit.text(): default_output = os.path.join(os.path.dirname(file_path), "output") self.output_path_edit.setText(default_output) def browse_output(self): """Open dialog to select output directory.""" dir_path = QFileDialog.getExistingDirectory(self, "選擇輸出資料夾") if dir_path: self.output_path_edit.setText(dir_path) def start_processing(self): """Start batch TTS processing.""" # Validate file selection file_path = self.file_path_edit.text() if not file_path: QMessageBox.warning(self, "警告", "請先選擇 Excel 講稿檔案") return if not os.path.exists(file_path): QMessageBox.warning(self, "警告", "選擇的檔案不存在") return # Get output directory output_dir = self.output_path_edit.text() if not output_dir: output_dir = os.path.join(os.path.dirname(file_path), "output") self.output_path_edit.setText(output_dir) # Get selected voice selected_voice = self.voice_combo.currentData() # Load Excel try: rows = load_excel(file_path) if not rows: QMessageBox.warning(self, "警告", "Excel 檔案中沒有有效的資料") return except Exception as e: QMessageBox.critical(self, "錯誤", f"載入 Excel 失敗:\n{str(e)}") return # Clear log and reset progress self.log_console.clear() self.progress_bar.setValue(0) self.progress_label.setText(f"0/{len(rows)}") # Update UI state self.start_btn.setEnabled(False) self.stop_btn.setEnabled(True) self.browse_btn.setEnabled(False) self.output_browse_btn.setEnabled(False) self.voice_combo.setEnabled(False) # Log start self.log_console.append(f"開始處理 {len(rows)} 筆資料...") if selected_voice: self.log_console.append(f"使用語音: {selected_voice}") else: self.log_console.append("使用自動語音選擇 (依 Lang 欄位)") self.log_console.append("") # Create and start worker self.worker = TTSWorker(rows, output_dir, selected_voice) self.worker.progress.connect(self.on_progress) self.worker.log_message.connect(self.on_log) self.worker.finished_batch.connect(self.on_finished) self.worker.start() def stop_processing(self): """Request stop of current processing.""" if self.worker: self.worker.stop() self.stop_btn.setEnabled(False) self.log_console.append("\n正在停止...") def on_progress(self, current: int, total: int): """Update progress bar.""" percent = int((current / total) * 100) if total > 0 else 0 self.progress_bar.setValue(percent) self.progress_label.setText(f"{current}/{total}") def on_log(self, message: str): """Append message to log console.""" self.log_console.append(message) # Auto-scroll to bottom scrollbar = self.log_console.verticalScrollBar() scrollbar.setValue(scrollbar.maximum()) def on_finished(self, success_count: int, fail_count: int): """Handle batch completion.""" # Reset UI state self.start_btn.setEnabled(True) self.stop_btn.setEnabled(False) self.browse_btn.setEnabled(True) self.output_browse_btn.setEnabled(True) self.voice_combo.setEnabled(True) # Show completion message total = success_count + fail_count self.log_console.append("") self.log_console.append(f"===== 處理完成 =====") self.log_console.append(f"成功: {success_count} / {total}") if fail_count > 0: self.log_console.append(f"失敗: {fail_count}") # Show dialog if fail_count == 0: QMessageBox.information( self, "完成", f"所有 {success_count} 個音檔已成功生成!\n\n" f"輸出位置: {self.output_path_edit.text()}" ) else: QMessageBox.warning( self, "完成 (有錯誤)", f"處理完成\n\n成功: {success_count}\n失敗: {fail_count}\n\n" f"請查看日誌了解詳情。" ) self.worker = None # ============================================================================= # Application Entry Point # ============================================================================= def main(): app = QApplication(sys.argv) # Set application style app.setStyle("Fusion") window = MainWindow() window.show() sys.exit(app.exec()) if __name__ == "__main__": main()