fix: Improve Whisper model status verification and PyInstaller builds

- Add robust model cache verification (check model.bin + config.json) - Add new status messages: model_cached, incomplete_cache, model_error - Forward model status events to frontend for better UI feedback - Add clean_build_cache() to remove stale spec files before build - Add --clean flag to PyInstaller commands - Change sidecar from --onefile to --onedir for faster startup - Add missing hidden imports: onnxruntime, wave, huggingface_hub.utils 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-17 20:33:59 +08:00
parent 012cdaf5f3
commit d75789f23e
5 changed files with 136 additions and 15 deletions
--- a/sidecar/build.py
+++ b/sidecar/build.py
@@ -1,29 +1,66 @@
 #!/usr/bin/env python3
 """
 Build script for creating standalone transcriber executable using PyInstaller.
+Uses --onedir mode for faster startup compared to --onefile.
 """

 import subprocess
 import sys
 import os
+import shutil
+
+
+def clean_build_cache(script_dir):
+    """Clean old build artifacts that may cause stale spec file issues."""
+    dirs_to_clean = [
+        os.path.join(script_dir, "build"),
+        os.path.join(script_dir, "__pycache__"),
+    ]
+    files_to_clean = [
+        os.path.join(script_dir, "build", "transcriber.spec"),
+    ]
+
+    for f in files_to_clean:
+        if os.path.exists(f):
+            print(f"Removing old spec file: {f}")
+            os.remove(f)
+
+    for d in dirs_to_clean:
+        pycache = os.path.join(d)
+        if os.path.exists(pycache) and "__pycache__" in pycache:
+            print(f"Removing cache: {pycache}")
+            shutil.rmtree(pycache)


 def build():
    """Build the transcriber executable."""
-    # PyInstaller command
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+
+    # Clean old build cache to avoid stale spec file issues
+    clean_build_cache(script_dir)
+
+    # PyInstaller command with --onedir for faster startup
    cmd = [
        sys.executable, "-m", "PyInstaller",
-        "--onefile",
+        "--onedir",
+        "--clean",  # Clean PyInstaller cache before building
        "--name", "transcriber",
        "--distpath", "dist",
        "--workpath", "build",
        "--specpath", "build",
+        # Core dependencies
        "--hidden-import", "faster_whisper",
        "--hidden-import", "opencc",
        "--hidden-import", "numpy",
        "--hidden-import", "ctranslate2",
        "--hidden-import", "huggingface_hub",
+        "--hidden-import", "huggingface_hub.utils",
        "--hidden-import", "tokenizers",
+        # ONNX Runtime for VAD
+        "--hidden-import", "onnxruntime",
+        # Audio processing
+        "--hidden-import", "wave",
+        # Collect data files
        "--collect-data", "faster_whisper",
        "--collect-data", "opencc",
        "transcriber.py"
@@ -32,10 +69,12 @@ def build():
    print("Building transcriber executable...")
    print(f"Command: {' '.join(cmd)}")

-    result = subprocess.run(cmd, cwd=os.path.dirname(os.path.abspath(__file__)))
+    result = subprocess.run(cmd, cwd=script_dir)

    if result.returncode == 0:
-        print("\nBuild successful! Executable created at: dist/transcriber")
+        print("\nBuild successful!")
+        print("Executable created at: dist/transcriber/transcriber.exe (Windows) or dist/transcriber/transcriber (Linux)")
+        print("\nNote: The Whisper model will be downloaded on first run if not cached.")
    else:
        print("\nBuild failed!")
        sys.exit(1)
--- a/sidecar/transcriber.py
+++ b/sidecar/transcriber.py
@@ -60,12 +60,31 @@ def check_and_download_whisper_model(model_size: str) -> bool:
    repo_cache_name = f"models--Systran--faster-whisper-{model_size}"
    model_cache_path = cache_dir / repo_cache_name

-    # Check if model files exist
+    # Check if model files exist - verify essential files are present
    if model_cache_path.exists():
        snapshots_dir = model_cache_path / "snapshots"
-        if snapshots_dir.exists() and any(snapshots_dir.iterdir()):
-            # Model is cached, no download needed
-            return True
+        if snapshots_dir.exists():
+            # Check for actual model files, not just any file
+            for snapshot in snapshots_dir.iterdir():
+                if snapshot.is_dir():
+                    # Essential faster-whisper model files
+                    required_files = ["model.bin", "config.json"]
+                    has_all_files = all(
+                        (snapshot / f).exists() for f in required_files
+                    )
+                    if has_all_files:
+                        print(json.dumps({
+                            "status": "model_cached",
+                            "model": model_size,
+                            "path": str(snapshot)
+                        }), flush=True)
+                        return True
+            # Snapshots exist but no valid model found
+            print(json.dumps({
+                "status": "incomplete_cache",
+                "model": model_size,
+                "message": "Model cache incomplete, will re-download"
+            }), flush=True)

    # Model not cached, need to download
    print(json.dumps({
@@ -491,20 +510,29 @@ class Transcriber:

        try:
            # Check if model needs to be downloaded (with progress reporting)
-            check_and_download_whisper_model(model_size)
+            download_ok = check_and_download_whisper_model(model_size)
+            if not download_ok:
+                print(json.dumps({
+                    "status": "model_error",
+                    "error": "Failed to download model"
+                }), flush=True)
+                raise RuntimeError("Failed to download Whisper model")

            # Now load the model
            print(json.dumps({"status": "loading_model", "model": model_size}), flush=True)
            self.model = WhisperModel(model_size, device=device, compute_type=compute_type)
            self.converter = opencc.OpenCC("s2twp")
-            print(json.dumps({"status": "model_loaded"}), flush=True)
+            print(json.dumps({"status": "model_loaded", "model": model_size}), flush=True)

            # Pre-load VAD model at startup (not when streaming starts)
            if ONNX_AVAILABLE:
                self.vad_model = SileroVAD()

        except Exception as e:
-            print(json.dumps({"error": f"Failed to load model: {e}"}), flush=True)
+            print(json.dumps({
+                "status": "model_error",
+                "error": f"Failed to load model: {e}"
+            }), flush=True)
            raise

    def transcribe_file(self, audio_path: str, add_punctuation: bool = False) -> str: