From 43c413c5ced4c593fab649490b537f8e221df378 Mon Sep 17 00:00:00 2001 From: egg Date: Fri, 12 Dec 2025 08:25:25 +0800 Subject: [PATCH] feat: Upgrade Whisper model to medium and increase beam size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Change default model from small to medium for better accuracy - Increase beam_size from 5 to 8 for improved transcription quality - Add Whisper environment variables to start.sh for centralized config 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- sidecar/transcriber.py | 2 +- start.sh | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/sidecar/transcriber.py b/sidecar/transcriber.py index a7cc776..afe0334 100644 --- a/sidecar/transcriber.py +++ b/sidecar/transcriber.py @@ -368,7 +368,7 @@ class Transcriber: segments, info = self.model.transcribe( audio_path, language="zh", # Use "nan" for Taiwanese/Hokkien, "zh" for Mandarin - beam_size=5, + beam_size=8, vad_filter=True, word_timestamps=add_punctuation, # Anti-hallucination settings diff --git a/start.sh b/start.sh index 5595fc1..0210bf8 100755 --- a/start.sh +++ b/start.sh @@ -22,6 +22,11 @@ SIDECAR_DIR="$PROJECT_DIR/sidecar" # Port 設定 BACKEND_PORT=8000 +# Whisper 語音轉文字設定 +export WHISPER_MODEL="medium" # 模型大小: tiny, base, small, medium, large +export WHISPER_DEVICE="cpu" # 執行裝置: cpu, cuda +export WHISPER_COMPUTE="int8" # 運算精度: int8, float16, float32 + # PID 檔案 PID_FILE="$PROJECT_DIR/.running_pids"