feat: 新增智慧簡報旁白生成系統 (Smart Slide Voiceover System)

- 新增 Excel 輸入模組：解析 .xlsx 格式講稿檔案 - 新增 TTS 引擎模組：整合 edge-tts 調用 Azure Neural Voice - 新增 PyQt6 圖形介面：檔案選擇、語音選擇、進度監控 - 新增執行緒模型：QThread + Asyncio 確保 UI 響應性 - 支援 10 種 Neural Voice (中文/越南/英文) - 支援中英混雜、越英混雜發音 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-27 15:42:11 +08:00
commit 33ea22f259
25 changed files with 1943 additions and 0 deletions
--- a/main.py
+++ b/main.py
@@ -0,0 +1,474 @@
+"""
+Smart Slide Voiceover System
+智慧簡報旁白生成系統
+
+A desktop application for batch converting Excel scripts to professional voiceover audio files.
+"""
+
+import sys
+import asyncio
+import os
+from pathlib import Path
+from dataclasses import dataclass
+from typing import Optional
+
+import pandas as pd
+import edge_tts
+from PyQt6.QtWidgets import (
+    QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
+    QPushButton, QLabel, QLineEdit, QFileDialog, QProgressBar,
+    QTextEdit, QComboBox, QMessageBox, QGroupBox
+)
+from PyQt6.QtCore import QThread, pyqtSignal, Qt
+
+
+# =============================================================================
+# Voice Registry - All available voices with bilingual support annotations
+# =============================================================================
+
+@dataclass
+class VoiceInfo:
+    voice_id: str
+    language: str
+    gender: str
+    bilingual: str
+    description: str
+
+VOICE_REGISTRY = [
+    # Chinese (Taiwan) - supports Chinese-English mixing
+    VoiceInfo("zh-TW-HsiaoChenNeural", "zh-TW", "女", "中英混雜 ✓", "知性專業 (預設)"),
+    VoiceInfo("zh-TW-HsiaoYuNeural", "zh-TW", "女", "中英混雜 ✓", "活潑年輕"),
+    VoiceInfo("zh-TW-YunJheNeural", "zh-TW", "男", "中英混雜 ✓", "成熟穩重"),
+    # Chinese (Mainland) - supports Chinese-English mixing
+    VoiceInfo("zh-CN-XiaoxiaoNeural", "zh-CN", "女", "中英混雜 ✓", "甜美親切"),
+    VoiceInfo("zh-CN-YunyangNeural", "zh-CN", "男", "中英混雜 ✓", "新聞播報風格"),
+    # Vietnamese - supports Vietnamese-English mixing
+    VoiceInfo("vi-VN-HoaiMyNeural", "vi-VN", "女", "越英混雜 ✓", "溫柔清晰 (預設)"),
+    VoiceInfo("vi-VN-NamMinhNeural", "vi-VN", "男", "越英混雜 ✓", "專業沉穩"),
+    # English (US) - English only
+    VoiceInfo("en-US-JennyNeural", "en-US", "女", "純英文", "標準美式 (預設)"),
+    VoiceInfo("en-US-AriaNeural", "en-US", "女", "純英文", "自然對話"),
+    VoiceInfo("en-US-GuyNeural", "en-US", "男", "純英文", "專業旁白"),
+]
+
+# Default voice mapping by language code
+DEFAULT_VOICE_MAP = {
+    "zh": "zh-TW-HsiaoChenNeural",
+    "zh-tw": "zh-TW-HsiaoChenNeural",
+    "zh-cn": "zh-CN-XiaoxiaoNeural",
+    "vi": "vi-VN-HoaiMyNeural",
+    "en": "en-US-JennyNeural",
+}
+
+# Voice groups for dropdown
+VOICE_GROUPS = {
+    "中文語音 (適合中英混雜簡報)": ["zh-TW", "zh-CN"],
+    "越南語音 (適合越英混雜簡報)": ["vi-VN"],
+    "英文語音 (適合純英文簡報)": ["en-US"],
+}
+
+
+# =============================================================================
+# Excel Input Module
+# =============================================================================
+
+@dataclass
+class ScriptRow:
+    filename: str
+    text: str
+    lang: str
+
+def load_excel(file_path: str) -> list[ScriptRow]:
+    """Load and parse Excel file, returning list of ScriptRow objects."""
+    df = pd.read_excel(file_path, engine='openpyxl')
+
+    # Normalize column names (case-insensitive)
+    df.columns = df.columns.str.strip().str.lower()
+
+    # Check required columns
+    if 'filename' not in df.columns:
+        raise ValueError("Excel 檔案缺少必要欄位: Filename")
+    if 'text' not in df.columns:
+        raise ValueError("Excel 檔案缺少必要欄位: Text")
+
+    rows = []
+    for idx, row in df.iterrows():
+        filename = str(row.get('filename', '')).strip()
+        text = str(row.get('text', '')).strip()
+        lang = str(row.get('lang', 'zh')).strip().lower()
+
+        # Skip rows with empty required fields
+        if not filename or filename == 'nan':
+            continue
+        if not text or text == 'nan':
+            continue
+
+        # Default language fallback
+        if not lang or lang == 'nan':
+            lang = 'zh'
+
+        rows.append(ScriptRow(filename=filename, text=text, lang=lang))
+
+    return rows
+
+
+# =============================================================================
+# TTS Engine Module
+# =============================================================================
+
+async def synthesize_speech(text: str, voice_id: str, output_path: str) -> None:
+    """Generate speech audio using edge-tts."""
+    communicate = edge_tts.Communicate(text, voice_id)
+    await communicate.save(output_path)
+
+
+def get_voice_for_lang(lang: str) -> str:
+    """Get default voice ID for a language code."""
+    return DEFAULT_VOICE_MAP.get(lang.lower(), "zh-TW-HsiaoChenNeural")
+
+
+# =============================================================================
+# TTS Worker Thread
+# =============================================================================
+
+class TTSWorker(QThread):
+    """Worker thread for batch TTS processing."""
+
+    progress = pyqtSignal(int, int)  # current, total
+    log_message = pyqtSignal(str)
+    finished_batch = pyqtSignal(int, int)  # success_count, fail_count
+
+    def __init__(self, rows: list[ScriptRow], output_dir: str,
+                 selected_voice: Optional[str] = None):
+        super().__init__()
+        self.rows = rows
+        self.output_dir = output_dir
+        self.selected_voice = selected_voice
+        self._stop_flag = False
+
+    def stop(self):
+        """Request graceful stop after current file."""
+        self._stop_flag = True
+
+    def run(self):
+        """Execute batch TTS processing in worker thread."""
+        # Create output directory if needed
+        Path(self.output_dir).mkdir(parents=True, exist_ok=True)
+
+        # Create new event loop for this thread
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+
+        success_count = 0
+        fail_count = 0
+        total = len(self.rows)
+
+        try:
+            for i, row in enumerate(self.rows):
+                if self._stop_flag:
+                    self.log_message.emit(f"已停止處理 (完成 {i}/{total})")
+                    break
+
+                # Determine voice to use
+                if self.selected_voice:
+                    voice_id = self.selected_voice
+                else:
+                    voice_id = get_voice_for_lang(row.lang)
+
+                output_path = os.path.join(self.output_dir, f"{row.filename}.mp3")
+
+                self.log_message.emit(f"正在處理: {row.filename}")
+
+                try:
+                    # Run async TTS with retry
+                    loop.run_until_complete(
+                        self._synthesize_with_retry(row.text, voice_id, output_path)
+                    )
+                    self.log_message.emit(f"完成: {row.filename}")
+                    success_count += 1
+                except Exception as e:
+                    self.log_message.emit(f"錯誤: {row.filename} - {str(e)}")
+                    fail_count += 1
+
+                self.progress.emit(i + 1, total)
+
+                # Rate limit delay (0.5s between requests)
+                if not self._stop_flag and i < total - 1:
+                    loop.run_until_complete(asyncio.sleep(0.5))
+
+        finally:
+            loop.close()
+
+        self.finished_batch.emit(success_count, fail_count)
+
+    async def _synthesize_with_retry(self, text: str, voice_id: str,
+                                      output_path: str, max_retries: int = 1):
+        """Synthesize with retry on network error."""
+        last_error = None
+        for attempt in range(max_retries + 1):
+            try:
+                await synthesize_speech(text, voice_id, output_path)
+                return
+            except Exception as e:
+                last_error = e
+                if attempt < max_retries:
+                    await asyncio.sleep(1)  # Wait before retry
+        raise last_error
+
+
+# =============================================================================
+# Main Window GUI
+# =============================================================================
+
+class MainWindow(QMainWindow):
+    def __init__(self):
+        super().__init__()
+        self.worker = None
+        self.init_ui()
+
+    def init_ui(self):
+        self.setWindowTitle("智慧簡報旁白生成系統 - Smart Slide Voiceover System")
+        self.setMinimumSize(700, 500)
+
+        central_widget = QWidget()
+        self.setCentralWidget(central_widget)
+        layout = QVBoxLayout(central_widget)
+
+        # File selection group
+        file_group = QGroupBox("檔案設定")
+        file_layout = QVBoxLayout(file_group)
+
+        # Excel file browser
+        excel_layout = QHBoxLayout()
+        excel_layout.addWidget(QLabel("Excel 講稿:"))
+        self.file_path_edit = QLineEdit()
+        self.file_path_edit.setReadOnly(True)
+        self.file_path_edit.setPlaceholderText("請選擇 .xlsx 檔案...")
+        excel_layout.addWidget(self.file_path_edit, 1)
+        self.browse_btn = QPushButton("瀏覽...")
+        self.browse_btn.clicked.connect(self.browse_file)
+        excel_layout.addWidget(self.browse_btn)
+        file_layout.addLayout(excel_layout)
+
+        # Output directory
+        output_layout = QHBoxLayout()
+        output_layout.addWidget(QLabel("輸出資料夾:"))
+        self.output_path_edit = QLineEdit()
+        self.output_path_edit.setPlaceholderText("預設: Excel 檔案所在目錄/output")
+        output_layout.addWidget(self.output_path_edit, 1)
+        self.output_browse_btn = QPushButton("瀏覽...")
+        self.output_browse_btn.clicked.connect(self.browse_output)
+        output_layout.addWidget(self.output_browse_btn)
+        file_layout.addLayout(output_layout)
+
+        layout.addWidget(file_group)
+
+        # Voice selection group
+        voice_group = QGroupBox("語音設定")
+        voice_layout = QHBoxLayout(voice_group)
+        voice_layout.addWidget(QLabel("選擇語音:"))
+        self.voice_combo = QComboBox()
+        self.voice_combo.setMinimumWidth(350)
+        self._populate_voice_combo()
+        voice_layout.addWidget(self.voice_combo, 1)
+        layout.addWidget(voice_group)
+
+        # Control buttons
+        btn_layout = QHBoxLayout()
+        self.start_btn = QPushButton("開始")
+        self.start_btn.setMinimumHeight(40)
+        self.start_btn.clicked.connect(self.start_processing)
+        btn_layout.addWidget(self.start_btn)
+
+        self.stop_btn = QPushButton("停止")
+        self.stop_btn.setMinimumHeight(40)
+        self.stop_btn.setEnabled(False)
+        self.stop_btn.clicked.connect(self.stop_processing)
+        btn_layout.addWidget(self.stop_btn)
+        layout.addLayout(btn_layout)
+
+        # Progress bar
+        progress_layout = QHBoxLayout()
+        progress_layout.addWidget(QLabel("進度:"))
+        self.progress_bar = QProgressBar()
+        self.progress_bar.setValue(0)
+        progress_layout.addWidget(self.progress_bar, 1)
+        self.progress_label = QLabel("0/0")
+        progress_layout.addWidget(self.progress_label)
+        layout.addLayout(progress_layout)
+
+        # Log console
+        log_group = QGroupBox("處理日誌")
+        log_layout = QVBoxLayout(log_group)
+        self.log_console = QTextEdit()
+        self.log_console.setReadOnly(True)
+        self.log_console.setMinimumHeight(150)
+        log_layout.addWidget(self.log_console)
+        layout.addWidget(log_group, 1)
+
+    def _populate_voice_combo(self):
+        """Populate voice dropdown with grouped options."""
+        # Add "Auto" option first
+        self.voice_combo.addItem("自動 (依 Excel Lang 欄位決定)", None)
+
+        # Add voices grouped by language
+        for group_name, lang_codes in VOICE_GROUPS.items():
+            self.voice_combo.addItem(f"─── {group_name} ───", "separator")
+            # Make separator non-selectable
+            idx = self.voice_combo.count() - 1
+            self.voice_combo.model().item(idx).setEnabled(False)
+
+            for voice in VOICE_REGISTRY:
+                if voice.language in lang_codes:
+                    display = f"  {voice.voice_id} ({voice.gender}) - {voice.bilingual} - {voice.description}"
+                    self.voice_combo.addItem(display, voice.voice_id)
+
+    def browse_file(self):
+        """Open file dialog to select Excel file."""
+        file_path, _ = QFileDialog.getOpenFileName(
+            self, "選擇 Excel 講稿檔案", "",
+            "Excel Files (*.xlsx);;All Files (*)"
+        )
+        if file_path:
+            self.file_path_edit.setText(file_path)
+            # Auto-set output directory
+            if not self.output_path_edit.text():
+                default_output = os.path.join(os.path.dirname(file_path), "output")
+                self.output_path_edit.setText(default_output)
+
+    def browse_output(self):
+        """Open dialog to select output directory."""
+        dir_path = QFileDialog.getExistingDirectory(self, "選擇輸出資料夾")
+        if dir_path:
+            self.output_path_edit.setText(dir_path)
+
+    def start_processing(self):
+        """Start batch TTS processing."""
+        # Validate file selection
+        file_path = self.file_path_edit.text()
+        if not file_path:
+            QMessageBox.warning(self, "警告", "請先選擇 Excel 講稿檔案")
+            return
+
+        if not os.path.exists(file_path):
+            QMessageBox.warning(self, "警告", "選擇的檔案不存在")
+            return
+
+        # Get output directory
+        output_dir = self.output_path_edit.text()
+        if not output_dir:
+            output_dir = os.path.join(os.path.dirname(file_path), "output")
+            self.output_path_edit.setText(output_dir)
+
+        # Get selected voice
+        selected_voice = self.voice_combo.currentData()
+
+        # Load Excel
+        try:
+            rows = load_excel(file_path)
+            if not rows:
+                QMessageBox.warning(self, "警告", "Excel 檔案中沒有有效的資料")
+                return
+        except Exception as e:
+            QMessageBox.critical(self, "錯誤", f"載入 Excel 失敗:\n{str(e)}")
+            return
+
+        # Clear log and reset progress
+        self.log_console.clear()
+        self.progress_bar.setValue(0)
+        self.progress_label.setText(f"0/{len(rows)}")
+
+        # Update UI state
+        self.start_btn.setEnabled(False)
+        self.stop_btn.setEnabled(True)
+        self.browse_btn.setEnabled(False)
+        self.output_browse_btn.setEnabled(False)
+        self.voice_combo.setEnabled(False)
+
+        # Log start
+        self.log_console.append(f"開始處理 {len(rows)} 筆資料...")
+        if selected_voice:
+            self.log_console.append(f"使用語音: {selected_voice}")
+        else:
+            self.log_console.append("使用自動語音選擇 (依 Lang 欄位)")
+        self.log_console.append("")
+
+        # Create and start worker
+        self.worker = TTSWorker(rows, output_dir, selected_voice)
+        self.worker.progress.connect(self.on_progress)
+        self.worker.log_message.connect(self.on_log)
+        self.worker.finished_batch.connect(self.on_finished)
+        self.worker.start()
+
+    def stop_processing(self):
+        """Request stop of current processing."""
+        if self.worker:
+            self.worker.stop()
+            self.stop_btn.setEnabled(False)
+            self.log_console.append("\n正在停止...")
+
+    def on_progress(self, current: int, total: int):
+        """Update progress bar."""
+        percent = int((current / total) * 100) if total > 0 else 0
+        self.progress_bar.setValue(percent)
+        self.progress_label.setText(f"{current}/{total}")
+
+    def on_log(self, message: str):
+        """Append message to log console."""
+        self.log_console.append(message)
+        # Auto-scroll to bottom
+        scrollbar = self.log_console.verticalScrollBar()
+        scrollbar.setValue(scrollbar.maximum())
+
+    def on_finished(self, success_count: int, fail_count: int):
+        """Handle batch completion."""
+        # Reset UI state
+        self.start_btn.setEnabled(True)
+        self.stop_btn.setEnabled(False)
+        self.browse_btn.setEnabled(True)
+        self.output_browse_btn.setEnabled(True)
+        self.voice_combo.setEnabled(True)
+
+        # Show completion message
+        total = success_count + fail_count
+        self.log_console.append("")
+        self.log_console.append(f"===== 處理完成 =====")
+        self.log_console.append(f"成功: {success_count} / {total}")
+        if fail_count > 0:
+            self.log_console.append(f"失敗: {fail_count}")
+
+        # Show dialog
+        if fail_count == 0:
+            QMessageBox.information(
+                self, "完成",
+                f"所有 {success_count} 個音檔已成功生成！\n\n"
+                f"輸出位置: {self.output_path_edit.text()}"
+            )
+        else:
+            QMessageBox.warning(
+                self, "完成 (有錯誤)",
+                f"處理完成\n\n成功: {success_count}\n失敗: {fail_count}\n\n"
+                f"請查看日誌了解詳情。"
+            )
+
+        self.worker = None
+
+
+# =============================================================================
+# Application Entry Point
+# =============================================================================
+
+def main():
+    app = QApplication(sys.argv)
+
+    # Set application style
+    app.setStyle("Fusion")
+
+    window = MainWindow()
+    window.show()
+
+    sys.exit(app.exec())
+
+
+if __name__ == "__main__":
+    main()