feat: Add embedded backend packaging for all-in-one deployment

- Add backend/run_server.py entry point for embedded deployment - Add backend/build.py PyInstaller script for backend packaging - Modify config.py to support frozen executable paths - Extend client/config.json with backend configuration section - Add backend sidecar management in Electron main process - Add Whisper model download progress reporting - Update build-client.bat with --embedded-backend flag - Update DEPLOYMENT.md with all-in-one deployment documentation This enables packaging frontend and backend into a single executable for simplified enterprise deployment. Backward compatible with existing separate deployment mode (backend.embedded: false). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-17 10:06:29 +08:00
parent b1633fdcff
commit 58f379bc0c
11 changed files with 1003 additions and 17 deletions
--- a/sidecar/transcriber.py
+++ b/sidecar/transcriber.py
@@ -31,6 +31,8 @@ try:
    from faster_whisper import WhisperModel
    import opencc
    import numpy as np
+    from huggingface_hub import snapshot_download, hf_hub_download
+    from huggingface_hub.utils import tqdm as hf_tqdm
 except ImportError as e:
    print(json.dumps({"error": f"Missing dependency: {e}"}), file=sys.stderr)
    sys.exit(1)
@@ -43,6 +45,152 @@ except ImportError:
    ONNX_AVAILABLE = False


+def check_and_download_whisper_model(model_size: str) -> bool:
+    """
+    Check if Whisper model is cached, download with progress if not.
+
+    Returns:
+        True if model is ready (cached or downloaded), False on error
+    """
+    # faster-whisper model repository mapping
+    repo_id = f"Systran/faster-whisper-{model_size}"
+
+    # Check if model is already cached
+    cache_dir = Path.home() / ".cache" / "huggingface" / "hub"
+    repo_cache_name = f"models--Systran--faster-whisper-{model_size}"
+    model_cache_path = cache_dir / repo_cache_name
+
+    # Check if model files exist
+    if model_cache_path.exists():
+        snapshots_dir = model_cache_path / "snapshots"
+        if snapshots_dir.exists() and any(snapshots_dir.iterdir()):
+            # Model is cached, no download needed
+            return True
+
+    # Model not cached, need to download
+    print(json.dumps({
+        "status": "downloading_model",
+        "model": model_size,
+        "repo": repo_id,
+        "progress": 0
+    }), flush=True)
+
+    try:
+        # Custom progress callback class
+        class DownloadProgressCallback:
+            def __init__(self):
+                self.total_files = 0
+                self.downloaded_files = 0
+                self.current_file_progress = 0
+                self.last_reported_percent = -5  # Report every 5%
+
+            def __call__(self, progress: float, total: float, filename: str = ""):
+                if total > 0:
+                    percent = int((progress / total) * 100)
+                    # Report every 5% or at completion
+                    if percent >= self.last_reported_percent + 5 or percent == 100:
+                        self.last_reported_percent = percent
+                        downloaded_mb = progress / (1024 * 1024)
+                        total_mb = total / (1024 * 1024)
+                        print(json.dumps({
+                            "status": "downloading_model",
+                            "model": model_size,
+                            "progress": percent,
+                            "downloaded_mb": round(downloaded_mb, 1),
+                            "total_mb": round(total_mb, 1),
+                            "file": filename
+                        }), flush=True)
+
+        # Use huggingface_hub to download with a simple approach
+        # We'll monitor the download by checking file sizes
+        import threading
+        import time
+
+        download_complete = False
+        download_error = None
+
+        def download_thread():
+            nonlocal download_complete, download_error
+            try:
+                snapshot_download(
+                    repo_id,
+                    local_dir=None,  # Use default cache
+                    local_dir_use_symlinks=False,
+                )
+                download_complete = True
+            except Exception as e:
+                download_error = str(e)
+
+        # Start download in background thread
+        thread = threading.Thread(target=download_thread)
+        thread.start()
+
+        # Monitor progress by checking cache directory
+        last_size = 0
+        last_report_time = time.time()
+        estimated_size_mb = {
+            "tiny": 77,
+            "base": 145,
+            "small": 488,
+            "medium": 1530,
+            "large": 3100,
+            "large-v2": 3100,
+            "large-v3": 3100,
+        }.get(model_size, 1530)  # Default to medium size
+
+        while thread.is_alive():
+            time.sleep(1)
+            try:
+                # Check current download size
+                current_size = 0
+                if model_cache_path.exists():
+                    for file in model_cache_path.rglob("*"):
+                        if file.is_file():
+                            current_size += file.stat().st_size
+
+                current_mb = current_size / (1024 * 1024)
+                progress = min(99, int((current_mb / estimated_size_mb) * 100))
+
+                # Report progress every 5 seconds or if significant change
+                now = time.time()
+                if now - last_report_time >= 5 or (current_mb - last_size / (1024 * 1024)) > 50:
+                    if current_size > last_size:
+                        print(json.dumps({
+                            "status": "downloading_model",
+                            "model": model_size,
+                            "progress": progress,
+                            "downloaded_mb": round(current_mb, 1),
+                            "total_mb": estimated_size_mb
+                        }), flush=True)
+                        last_size = current_size
+                        last_report_time = now
+            except Exception:
+                pass
+
+        thread.join()
+
+        if download_error:
+            print(json.dumps({
+                "status": "download_error",
+                "error": download_error
+            }), flush=True)
+            return False
+
+        print(json.dumps({
+            "status": "model_downloaded",
+            "model": model_size
+        }), flush=True)
+
+        return True
+
+    except Exception as e:
+        print(json.dumps({
+            "status": "download_error",
+            "error": str(e)
+        }), flush=True)
+        return False
+
+
 class ChinesePunctuator:
    """Rule-based Chinese punctuation processor."""

@@ -342,17 +490,21 @@ class Transcriber:
        self.vad_model: Optional[SileroVAD] = None

        try:
-            print(json.dumps({"status": "loading_model", "model": model_size}), file=sys.stderr)
+            # Check if model needs to be downloaded (with progress reporting)
+            check_and_download_whisper_model(model_size)
+
+            # Now load the model
+            print(json.dumps({"status": "loading_model", "model": model_size}), flush=True)
            self.model = WhisperModel(model_size, device=device, compute_type=compute_type)
            self.converter = opencc.OpenCC("s2twp")
-            print(json.dumps({"status": "model_loaded"}), file=sys.stderr)
+            print(json.dumps({"status": "model_loaded"}), flush=True)

            # Pre-load VAD model at startup (not when streaming starts)
            if ONNX_AVAILABLE:
                self.vad_model = SileroVAD()

        except Exception as e:
-            print(json.dumps({"error": f"Failed to load model: {e}"}), file=sys.stderr)
+            print(json.dumps({"error": f"Failed to load model: {e}"}), flush=True)
            raise

    def transcribe_file(self, audio_path: str, add_punctuation: bool = False) -> str: