feat: Add embedded backend packaging for all-in-one deployment

- Add backend/run_server.py entry point for embedded deployment
- Add backend/build.py PyInstaller script for backend packaging
- Modify config.py to support frozen executable paths
- Extend client/config.json with backend configuration section
- Add backend sidecar management in Electron main process
- Add Whisper model download progress reporting
- Update build-client.bat with --embedded-backend flag
- Update DEPLOYMENT.md with all-in-one deployment documentation

This enables packaging frontend and backend into a single executable
for simplified enterprise deployment. Backward compatible with
existing separate deployment mode (backend.embedded: false).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-17 10:06:29 +08:00
parent b1633fdcff
commit 58f379bc0c
11 changed files with 1003 additions and 17 deletions

View File

@@ -31,6 +31,8 @@ try:
from faster_whisper import WhisperModel
import opencc
import numpy as np
from huggingface_hub import snapshot_download, hf_hub_download
from huggingface_hub.utils import tqdm as hf_tqdm
except ImportError as e:
print(json.dumps({"error": f"Missing dependency: {e}"}), file=sys.stderr)
sys.exit(1)
@@ -43,6 +45,152 @@ except ImportError:
ONNX_AVAILABLE = False
def check_and_download_whisper_model(model_size: str) -> bool:
"""
Check if Whisper model is cached, download with progress if not.
Returns:
True if model is ready (cached or downloaded), False on error
"""
# faster-whisper model repository mapping
repo_id = f"Systran/faster-whisper-{model_size}"
# Check if model is already cached
cache_dir = Path.home() / ".cache" / "huggingface" / "hub"
repo_cache_name = f"models--Systran--faster-whisper-{model_size}"
model_cache_path = cache_dir / repo_cache_name
# Check if model files exist
if model_cache_path.exists():
snapshots_dir = model_cache_path / "snapshots"
if snapshots_dir.exists() and any(snapshots_dir.iterdir()):
# Model is cached, no download needed
return True
# Model not cached, need to download
print(json.dumps({
"status": "downloading_model",
"model": model_size,
"repo": repo_id,
"progress": 0
}), flush=True)
try:
# Custom progress callback class
class DownloadProgressCallback:
def __init__(self):
self.total_files = 0
self.downloaded_files = 0
self.current_file_progress = 0
self.last_reported_percent = -5 # Report every 5%
def __call__(self, progress: float, total: float, filename: str = ""):
if total > 0:
percent = int((progress / total) * 100)
# Report every 5% or at completion
if percent >= self.last_reported_percent + 5 or percent == 100:
self.last_reported_percent = percent
downloaded_mb = progress / (1024 * 1024)
total_mb = total / (1024 * 1024)
print(json.dumps({
"status": "downloading_model",
"model": model_size,
"progress": percent,
"downloaded_mb": round(downloaded_mb, 1),
"total_mb": round(total_mb, 1),
"file": filename
}), flush=True)
# Use huggingface_hub to download with a simple approach
# We'll monitor the download by checking file sizes
import threading
import time
download_complete = False
download_error = None
def download_thread():
nonlocal download_complete, download_error
try:
snapshot_download(
repo_id,
local_dir=None, # Use default cache
local_dir_use_symlinks=False,
)
download_complete = True
except Exception as e:
download_error = str(e)
# Start download in background thread
thread = threading.Thread(target=download_thread)
thread.start()
# Monitor progress by checking cache directory
last_size = 0
last_report_time = time.time()
estimated_size_mb = {
"tiny": 77,
"base": 145,
"small": 488,
"medium": 1530,
"large": 3100,
"large-v2": 3100,
"large-v3": 3100,
}.get(model_size, 1530) # Default to medium size
while thread.is_alive():
time.sleep(1)
try:
# Check current download size
current_size = 0
if model_cache_path.exists():
for file in model_cache_path.rglob("*"):
if file.is_file():
current_size += file.stat().st_size
current_mb = current_size / (1024 * 1024)
progress = min(99, int((current_mb / estimated_size_mb) * 100))
# Report progress every 5 seconds or if significant change
now = time.time()
if now - last_report_time >= 5 or (current_mb - last_size / (1024 * 1024)) > 50:
if current_size > last_size:
print(json.dumps({
"status": "downloading_model",
"model": model_size,
"progress": progress,
"downloaded_mb": round(current_mb, 1),
"total_mb": estimated_size_mb
}), flush=True)
last_size = current_size
last_report_time = now
except Exception:
pass
thread.join()
if download_error:
print(json.dumps({
"status": "download_error",
"error": download_error
}), flush=True)
return False
print(json.dumps({
"status": "model_downloaded",
"model": model_size
}), flush=True)
return True
except Exception as e:
print(json.dumps({
"status": "download_error",
"error": str(e)
}), flush=True)
return False
class ChinesePunctuator:
"""Rule-based Chinese punctuation processor."""
@@ -342,17 +490,21 @@ class Transcriber:
self.vad_model: Optional[SileroVAD] = None
try:
print(json.dumps({"status": "loading_model", "model": model_size}), file=sys.stderr)
# Check if model needs to be downloaded (with progress reporting)
check_and_download_whisper_model(model_size)
# Now load the model
print(json.dumps({"status": "loading_model", "model": model_size}), flush=True)
self.model = WhisperModel(model_size, device=device, compute_type=compute_type)
self.converter = opencc.OpenCC("s2twp")
print(json.dumps({"status": "model_loaded"}), file=sys.stderr)
print(json.dumps({"status": "model_loaded"}), flush=True)
# Pre-load VAD model at startup (not when streaming starts)
if ONNX_AVAILABLE:
self.vad_model = SileroVAD()
except Exception as e:
print(json.dumps({"error": f"Failed to load model: {e}"}), file=sys.stderr)
print(json.dumps({"error": f"Failed to load model: {e}"}), flush=True)
raise
def transcribe_file(self, audio_path: str, add_punctuation: bool = False) -> str: