feat: Add embedded backend packaging for all-in-one deployment
- Add backend/run_server.py entry point for embedded deployment - Add backend/build.py PyInstaller script for backend packaging - Modify config.py to support frozen executable paths - Extend client/config.json with backend configuration section - Add backend sidecar management in Electron main process - Add Whisper model download progress reporting - Update build-client.bat with --embedded-backend flag - Update DEPLOYMENT.md with all-in-one deployment documentation This enables packaging frontend and backend into a single executable for simplified enterprise deployment. Backward compatible with existing separate deployment mode (backend.embedded: false). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -31,6 +31,8 @@ try:
|
||||
from faster_whisper import WhisperModel
|
||||
import opencc
|
||||
import numpy as np
|
||||
from huggingface_hub import snapshot_download, hf_hub_download
|
||||
from huggingface_hub.utils import tqdm as hf_tqdm
|
||||
except ImportError as e:
|
||||
print(json.dumps({"error": f"Missing dependency: {e}"}), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
@@ -43,6 +45,152 @@ except ImportError:
|
||||
ONNX_AVAILABLE = False
|
||||
|
||||
|
||||
def check_and_download_whisper_model(model_size: str) -> bool:
|
||||
"""
|
||||
Check if Whisper model is cached, download with progress if not.
|
||||
|
||||
Returns:
|
||||
True if model is ready (cached or downloaded), False on error
|
||||
"""
|
||||
# faster-whisper model repository mapping
|
||||
repo_id = f"Systran/faster-whisper-{model_size}"
|
||||
|
||||
# Check if model is already cached
|
||||
cache_dir = Path.home() / ".cache" / "huggingface" / "hub"
|
||||
repo_cache_name = f"models--Systran--faster-whisper-{model_size}"
|
||||
model_cache_path = cache_dir / repo_cache_name
|
||||
|
||||
# Check if model files exist
|
||||
if model_cache_path.exists():
|
||||
snapshots_dir = model_cache_path / "snapshots"
|
||||
if snapshots_dir.exists() and any(snapshots_dir.iterdir()):
|
||||
# Model is cached, no download needed
|
||||
return True
|
||||
|
||||
# Model not cached, need to download
|
||||
print(json.dumps({
|
||||
"status": "downloading_model",
|
||||
"model": model_size,
|
||||
"repo": repo_id,
|
||||
"progress": 0
|
||||
}), flush=True)
|
||||
|
||||
try:
|
||||
# Custom progress callback class
|
||||
class DownloadProgressCallback:
|
||||
def __init__(self):
|
||||
self.total_files = 0
|
||||
self.downloaded_files = 0
|
||||
self.current_file_progress = 0
|
||||
self.last_reported_percent = -5 # Report every 5%
|
||||
|
||||
def __call__(self, progress: float, total: float, filename: str = ""):
|
||||
if total > 0:
|
||||
percent = int((progress / total) * 100)
|
||||
# Report every 5% or at completion
|
||||
if percent >= self.last_reported_percent + 5 or percent == 100:
|
||||
self.last_reported_percent = percent
|
||||
downloaded_mb = progress / (1024 * 1024)
|
||||
total_mb = total / (1024 * 1024)
|
||||
print(json.dumps({
|
||||
"status": "downloading_model",
|
||||
"model": model_size,
|
||||
"progress": percent,
|
||||
"downloaded_mb": round(downloaded_mb, 1),
|
||||
"total_mb": round(total_mb, 1),
|
||||
"file": filename
|
||||
}), flush=True)
|
||||
|
||||
# Use huggingface_hub to download with a simple approach
|
||||
# We'll monitor the download by checking file sizes
|
||||
import threading
|
||||
import time
|
||||
|
||||
download_complete = False
|
||||
download_error = None
|
||||
|
||||
def download_thread():
|
||||
nonlocal download_complete, download_error
|
||||
try:
|
||||
snapshot_download(
|
||||
repo_id,
|
||||
local_dir=None, # Use default cache
|
||||
local_dir_use_symlinks=False,
|
||||
)
|
||||
download_complete = True
|
||||
except Exception as e:
|
||||
download_error = str(e)
|
||||
|
||||
# Start download in background thread
|
||||
thread = threading.Thread(target=download_thread)
|
||||
thread.start()
|
||||
|
||||
# Monitor progress by checking cache directory
|
||||
last_size = 0
|
||||
last_report_time = time.time()
|
||||
estimated_size_mb = {
|
||||
"tiny": 77,
|
||||
"base": 145,
|
||||
"small": 488,
|
||||
"medium": 1530,
|
||||
"large": 3100,
|
||||
"large-v2": 3100,
|
||||
"large-v3": 3100,
|
||||
}.get(model_size, 1530) # Default to medium size
|
||||
|
||||
while thread.is_alive():
|
||||
time.sleep(1)
|
||||
try:
|
||||
# Check current download size
|
||||
current_size = 0
|
||||
if model_cache_path.exists():
|
||||
for file in model_cache_path.rglob("*"):
|
||||
if file.is_file():
|
||||
current_size += file.stat().st_size
|
||||
|
||||
current_mb = current_size / (1024 * 1024)
|
||||
progress = min(99, int((current_mb / estimated_size_mb) * 100))
|
||||
|
||||
# Report progress every 5 seconds or if significant change
|
||||
now = time.time()
|
||||
if now - last_report_time >= 5 or (current_mb - last_size / (1024 * 1024)) > 50:
|
||||
if current_size > last_size:
|
||||
print(json.dumps({
|
||||
"status": "downloading_model",
|
||||
"model": model_size,
|
||||
"progress": progress,
|
||||
"downloaded_mb": round(current_mb, 1),
|
||||
"total_mb": estimated_size_mb
|
||||
}), flush=True)
|
||||
last_size = current_size
|
||||
last_report_time = now
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
thread.join()
|
||||
|
||||
if download_error:
|
||||
print(json.dumps({
|
||||
"status": "download_error",
|
||||
"error": download_error
|
||||
}), flush=True)
|
||||
return False
|
||||
|
||||
print(json.dumps({
|
||||
"status": "model_downloaded",
|
||||
"model": model_size
|
||||
}), flush=True)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(json.dumps({
|
||||
"status": "download_error",
|
||||
"error": str(e)
|
||||
}), flush=True)
|
||||
return False
|
||||
|
||||
|
||||
class ChinesePunctuator:
|
||||
"""Rule-based Chinese punctuation processor."""
|
||||
|
||||
@@ -342,17 +490,21 @@ class Transcriber:
|
||||
self.vad_model: Optional[SileroVAD] = None
|
||||
|
||||
try:
|
||||
print(json.dumps({"status": "loading_model", "model": model_size}), file=sys.stderr)
|
||||
# Check if model needs to be downloaded (with progress reporting)
|
||||
check_and_download_whisper_model(model_size)
|
||||
|
||||
# Now load the model
|
||||
print(json.dumps({"status": "loading_model", "model": model_size}), flush=True)
|
||||
self.model = WhisperModel(model_size, device=device, compute_type=compute_type)
|
||||
self.converter = opencc.OpenCC("s2twp")
|
||||
print(json.dumps({"status": "model_loaded"}), file=sys.stderr)
|
||||
print(json.dumps({"status": "model_loaded"}), flush=True)
|
||||
|
||||
# Pre-load VAD model at startup (not when streaming starts)
|
||||
if ONNX_AVAILABLE:
|
||||
self.vad_model = SileroVAD()
|
||||
|
||||
except Exception as e:
|
||||
print(json.dumps({"error": f"Failed to load model: {e}"}), file=sys.stderr)
|
||||
print(json.dumps({"error": f"Failed to load model: {e}"}), flush=True)
|
||||
raise
|
||||
|
||||
def transcribe_file(self, audio_path: str, add_punctuation: bool = False) -> str:
|
||||
|
||||
Reference in New Issue
Block a user