feat: 新增智慧簡報旁白生成系統 (Smart Slide Voiceover System)

- 新增 Excel 輸入模組:解析 .xlsx 格式講稿檔案
- 新增 TTS 引擎模組:整合 edge-tts 調用 Azure Neural Voice
- 新增 PyQt6 圖形介面:檔案選擇、語音選擇、進度監控
- 新增執行緒模型:QThread + Asyncio 確保 UI 響應性
- 支援 10 種 Neural Voice (中文/越南/英文)
- 支援中英混雜、越英混雜發音

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
beabigegg
2025-12-27 15:42:11 +08:00
commit 33ea22f259
25 changed files with 1943 additions and 0 deletions

474
main.py Normal file
View File

@@ -0,0 +1,474 @@
"""
Smart Slide Voiceover System
智慧簡報旁白生成系統
A desktop application for batch converting Excel scripts to professional voiceover audio files.
"""
import sys
import asyncio
import os
from pathlib import Path
from dataclasses import dataclass
from typing import Optional
import pandas as pd
import edge_tts
from PyQt6.QtWidgets import (
QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
QPushButton, QLabel, QLineEdit, QFileDialog, QProgressBar,
QTextEdit, QComboBox, QMessageBox, QGroupBox
)
from PyQt6.QtCore import QThread, pyqtSignal, Qt
# =============================================================================
# Voice Registry - All available voices with bilingual support annotations
# =============================================================================
@dataclass
class VoiceInfo:
voice_id: str
language: str
gender: str
bilingual: str
description: str
VOICE_REGISTRY = [
# Chinese (Taiwan) - supports Chinese-English mixing
VoiceInfo("zh-TW-HsiaoChenNeural", "zh-TW", "", "中英混雜 ✓", "知性專業 (預設)"),
VoiceInfo("zh-TW-HsiaoYuNeural", "zh-TW", "", "中英混雜 ✓", "活潑年輕"),
VoiceInfo("zh-TW-YunJheNeural", "zh-TW", "", "中英混雜 ✓", "成熟穩重"),
# Chinese (Mainland) - supports Chinese-English mixing
VoiceInfo("zh-CN-XiaoxiaoNeural", "zh-CN", "", "中英混雜 ✓", "甜美親切"),
VoiceInfo("zh-CN-YunyangNeural", "zh-CN", "", "中英混雜 ✓", "新聞播報風格"),
# Vietnamese - supports Vietnamese-English mixing
VoiceInfo("vi-VN-HoaiMyNeural", "vi-VN", "", "越英混雜 ✓", "溫柔清晰 (預設)"),
VoiceInfo("vi-VN-NamMinhNeural", "vi-VN", "", "越英混雜 ✓", "專業沉穩"),
# English (US) - English only
VoiceInfo("en-US-JennyNeural", "en-US", "", "純英文", "標準美式 (預設)"),
VoiceInfo("en-US-AriaNeural", "en-US", "", "純英文", "自然對話"),
VoiceInfo("en-US-GuyNeural", "en-US", "", "純英文", "專業旁白"),
]
# Default voice mapping by language code
DEFAULT_VOICE_MAP = {
"zh": "zh-TW-HsiaoChenNeural",
"zh-tw": "zh-TW-HsiaoChenNeural",
"zh-cn": "zh-CN-XiaoxiaoNeural",
"vi": "vi-VN-HoaiMyNeural",
"en": "en-US-JennyNeural",
}
# Voice groups for dropdown
VOICE_GROUPS = {
"中文語音 (適合中英混雜簡報)": ["zh-TW", "zh-CN"],
"越南語音 (適合越英混雜簡報)": ["vi-VN"],
"英文語音 (適合純英文簡報)": ["en-US"],
}
# =============================================================================
# Excel Input Module
# =============================================================================
@dataclass
class ScriptRow:
filename: str
text: str
lang: str
def load_excel(file_path: str) -> list[ScriptRow]:
"""Load and parse Excel file, returning list of ScriptRow objects."""
df = pd.read_excel(file_path, engine='openpyxl')
# Normalize column names (case-insensitive)
df.columns = df.columns.str.strip().str.lower()
# Check required columns
if 'filename' not in df.columns:
raise ValueError("Excel 檔案缺少必要欄位: Filename")
if 'text' not in df.columns:
raise ValueError("Excel 檔案缺少必要欄位: Text")
rows = []
for idx, row in df.iterrows():
filename = str(row.get('filename', '')).strip()
text = str(row.get('text', '')).strip()
lang = str(row.get('lang', 'zh')).strip().lower()
# Skip rows with empty required fields
if not filename or filename == 'nan':
continue
if not text or text == 'nan':
continue
# Default language fallback
if not lang or lang == 'nan':
lang = 'zh'
rows.append(ScriptRow(filename=filename, text=text, lang=lang))
return rows
# =============================================================================
# TTS Engine Module
# =============================================================================
async def synthesize_speech(text: str, voice_id: str, output_path: str) -> None:
"""Generate speech audio using edge-tts."""
communicate = edge_tts.Communicate(text, voice_id)
await communicate.save(output_path)
def get_voice_for_lang(lang: str) -> str:
"""Get default voice ID for a language code."""
return DEFAULT_VOICE_MAP.get(lang.lower(), "zh-TW-HsiaoChenNeural")
# =============================================================================
# TTS Worker Thread
# =============================================================================
class TTSWorker(QThread):
"""Worker thread for batch TTS processing."""
progress = pyqtSignal(int, int) # current, total
log_message = pyqtSignal(str)
finished_batch = pyqtSignal(int, int) # success_count, fail_count
def __init__(self, rows: list[ScriptRow], output_dir: str,
selected_voice: Optional[str] = None):
super().__init__()
self.rows = rows
self.output_dir = output_dir
self.selected_voice = selected_voice
self._stop_flag = False
def stop(self):
"""Request graceful stop after current file."""
self._stop_flag = True
def run(self):
"""Execute batch TTS processing in worker thread."""
# Create output directory if needed
Path(self.output_dir).mkdir(parents=True, exist_ok=True)
# Create new event loop for this thread
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
success_count = 0
fail_count = 0
total = len(self.rows)
try:
for i, row in enumerate(self.rows):
if self._stop_flag:
self.log_message.emit(f"已停止處理 (完成 {i}/{total})")
break
# Determine voice to use
if self.selected_voice:
voice_id = self.selected_voice
else:
voice_id = get_voice_for_lang(row.lang)
output_path = os.path.join(self.output_dir, f"{row.filename}.mp3")
self.log_message.emit(f"正在處理: {row.filename}")
try:
# Run async TTS with retry
loop.run_until_complete(
self._synthesize_with_retry(row.text, voice_id, output_path)
)
self.log_message.emit(f"完成: {row.filename}")
success_count += 1
except Exception as e:
self.log_message.emit(f"錯誤: {row.filename} - {str(e)}")
fail_count += 1
self.progress.emit(i + 1, total)
# Rate limit delay (0.5s between requests)
if not self._stop_flag and i < total - 1:
loop.run_until_complete(asyncio.sleep(0.5))
finally:
loop.close()
self.finished_batch.emit(success_count, fail_count)
async def _synthesize_with_retry(self, text: str, voice_id: str,
output_path: str, max_retries: int = 1):
"""Synthesize with retry on network error."""
last_error = None
for attempt in range(max_retries + 1):
try:
await synthesize_speech(text, voice_id, output_path)
return
except Exception as e:
last_error = e
if attempt < max_retries:
await asyncio.sleep(1) # Wait before retry
raise last_error
# =============================================================================
# Main Window GUI
# =============================================================================
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.worker = None
self.init_ui()
def init_ui(self):
self.setWindowTitle("智慧簡報旁白生成系統 - Smart Slide Voiceover System")
self.setMinimumSize(700, 500)
central_widget = QWidget()
self.setCentralWidget(central_widget)
layout = QVBoxLayout(central_widget)
# File selection group
file_group = QGroupBox("檔案設定")
file_layout = QVBoxLayout(file_group)
# Excel file browser
excel_layout = QHBoxLayout()
excel_layout.addWidget(QLabel("Excel 講稿:"))
self.file_path_edit = QLineEdit()
self.file_path_edit.setReadOnly(True)
self.file_path_edit.setPlaceholderText("請選擇 .xlsx 檔案...")
excel_layout.addWidget(self.file_path_edit, 1)
self.browse_btn = QPushButton("瀏覽...")
self.browse_btn.clicked.connect(self.browse_file)
excel_layout.addWidget(self.browse_btn)
file_layout.addLayout(excel_layout)
# Output directory
output_layout = QHBoxLayout()
output_layout.addWidget(QLabel("輸出資料夾:"))
self.output_path_edit = QLineEdit()
self.output_path_edit.setPlaceholderText("預設: Excel 檔案所在目錄/output")
output_layout.addWidget(self.output_path_edit, 1)
self.output_browse_btn = QPushButton("瀏覽...")
self.output_browse_btn.clicked.connect(self.browse_output)
output_layout.addWidget(self.output_browse_btn)
file_layout.addLayout(output_layout)
layout.addWidget(file_group)
# Voice selection group
voice_group = QGroupBox("語音設定")
voice_layout = QHBoxLayout(voice_group)
voice_layout.addWidget(QLabel("選擇語音:"))
self.voice_combo = QComboBox()
self.voice_combo.setMinimumWidth(350)
self._populate_voice_combo()
voice_layout.addWidget(self.voice_combo, 1)
layout.addWidget(voice_group)
# Control buttons
btn_layout = QHBoxLayout()
self.start_btn = QPushButton("開始")
self.start_btn.setMinimumHeight(40)
self.start_btn.clicked.connect(self.start_processing)
btn_layout.addWidget(self.start_btn)
self.stop_btn = QPushButton("停止")
self.stop_btn.setMinimumHeight(40)
self.stop_btn.setEnabled(False)
self.stop_btn.clicked.connect(self.stop_processing)
btn_layout.addWidget(self.stop_btn)
layout.addLayout(btn_layout)
# Progress bar
progress_layout = QHBoxLayout()
progress_layout.addWidget(QLabel("進度:"))
self.progress_bar = QProgressBar()
self.progress_bar.setValue(0)
progress_layout.addWidget(self.progress_bar, 1)
self.progress_label = QLabel("0/0")
progress_layout.addWidget(self.progress_label)
layout.addLayout(progress_layout)
# Log console
log_group = QGroupBox("處理日誌")
log_layout = QVBoxLayout(log_group)
self.log_console = QTextEdit()
self.log_console.setReadOnly(True)
self.log_console.setMinimumHeight(150)
log_layout.addWidget(self.log_console)
layout.addWidget(log_group, 1)
def _populate_voice_combo(self):
"""Populate voice dropdown with grouped options."""
# Add "Auto" option first
self.voice_combo.addItem("自動 (依 Excel Lang 欄位決定)", None)
# Add voices grouped by language
for group_name, lang_codes in VOICE_GROUPS.items():
self.voice_combo.addItem(f"─── {group_name} ───", "separator")
# Make separator non-selectable
idx = self.voice_combo.count() - 1
self.voice_combo.model().item(idx).setEnabled(False)
for voice in VOICE_REGISTRY:
if voice.language in lang_codes:
display = f" {voice.voice_id} ({voice.gender}) - {voice.bilingual} - {voice.description}"
self.voice_combo.addItem(display, voice.voice_id)
def browse_file(self):
"""Open file dialog to select Excel file."""
file_path, _ = QFileDialog.getOpenFileName(
self, "選擇 Excel 講稿檔案", "",
"Excel Files (*.xlsx);;All Files (*)"
)
if file_path:
self.file_path_edit.setText(file_path)
# Auto-set output directory
if not self.output_path_edit.text():
default_output = os.path.join(os.path.dirname(file_path), "output")
self.output_path_edit.setText(default_output)
def browse_output(self):
"""Open dialog to select output directory."""
dir_path = QFileDialog.getExistingDirectory(self, "選擇輸出資料夾")
if dir_path:
self.output_path_edit.setText(dir_path)
def start_processing(self):
"""Start batch TTS processing."""
# Validate file selection
file_path = self.file_path_edit.text()
if not file_path:
QMessageBox.warning(self, "警告", "請先選擇 Excel 講稿檔案")
return
if not os.path.exists(file_path):
QMessageBox.warning(self, "警告", "選擇的檔案不存在")
return
# Get output directory
output_dir = self.output_path_edit.text()
if not output_dir:
output_dir = os.path.join(os.path.dirname(file_path), "output")
self.output_path_edit.setText(output_dir)
# Get selected voice
selected_voice = self.voice_combo.currentData()
# Load Excel
try:
rows = load_excel(file_path)
if not rows:
QMessageBox.warning(self, "警告", "Excel 檔案中沒有有效的資料")
return
except Exception as e:
QMessageBox.critical(self, "錯誤", f"載入 Excel 失敗:\n{str(e)}")
return
# Clear log and reset progress
self.log_console.clear()
self.progress_bar.setValue(0)
self.progress_label.setText(f"0/{len(rows)}")
# Update UI state
self.start_btn.setEnabled(False)
self.stop_btn.setEnabled(True)
self.browse_btn.setEnabled(False)
self.output_browse_btn.setEnabled(False)
self.voice_combo.setEnabled(False)
# Log start
self.log_console.append(f"開始處理 {len(rows)} 筆資料...")
if selected_voice:
self.log_console.append(f"使用語音: {selected_voice}")
else:
self.log_console.append("使用自動語音選擇 (依 Lang 欄位)")
self.log_console.append("")
# Create and start worker
self.worker = TTSWorker(rows, output_dir, selected_voice)
self.worker.progress.connect(self.on_progress)
self.worker.log_message.connect(self.on_log)
self.worker.finished_batch.connect(self.on_finished)
self.worker.start()
def stop_processing(self):
"""Request stop of current processing."""
if self.worker:
self.worker.stop()
self.stop_btn.setEnabled(False)
self.log_console.append("\n正在停止...")
def on_progress(self, current: int, total: int):
"""Update progress bar."""
percent = int((current / total) * 100) if total > 0 else 0
self.progress_bar.setValue(percent)
self.progress_label.setText(f"{current}/{total}")
def on_log(self, message: str):
"""Append message to log console."""
self.log_console.append(message)
# Auto-scroll to bottom
scrollbar = self.log_console.verticalScrollBar()
scrollbar.setValue(scrollbar.maximum())
def on_finished(self, success_count: int, fail_count: int):
"""Handle batch completion."""
# Reset UI state
self.start_btn.setEnabled(True)
self.stop_btn.setEnabled(False)
self.browse_btn.setEnabled(True)
self.output_browse_btn.setEnabled(True)
self.voice_combo.setEnabled(True)
# Show completion message
total = success_count + fail_count
self.log_console.append("")
self.log_console.append(f"===== 處理完成 =====")
self.log_console.append(f"成功: {success_count} / {total}")
if fail_count > 0:
self.log_console.append(f"失敗: {fail_count}")
# Show dialog
if fail_count == 0:
QMessageBox.information(
self, "完成",
f"所有 {success_count} 個音檔已成功生成!\n\n"
f"輸出位置: {self.output_path_edit.text()}"
)
else:
QMessageBox.warning(
self, "完成 (有錯誤)",
f"處理完成\n\n成功: {success_count}\n失敗: {fail_count}\n\n"
f"請查看日誌了解詳情。"
)
self.worker = None
# =============================================================================
# Application Entry Point
# =============================================================================
def main():
app = QApplication(sys.argv)
# Set application style
app.setStyle("Fusion")
window = MainWindow()
window.show()
sys.exit(app.exec())
if __name__ == "__main__":
main()