fix: Improve Whisper model status verification and PyInstaller builds
- Add robust model cache verification (check model.bin + config.json) - Add new status messages: model_cached, incomplete_cache, model_error - Forward model status events to frontend for better UI feedback - Add clean_build_cache() to remove stale spec files before build - Add --clean flag to PyInstaller commands - Change sidecar from --onefile to --onedir for faster startup - Add missing hidden imports: onnxruntime, wave, huggingface_hub.utils 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -60,12 +60,31 @@ def check_and_download_whisper_model(model_size: str) -> bool:
|
||||
repo_cache_name = f"models--Systran--faster-whisper-{model_size}"
|
||||
model_cache_path = cache_dir / repo_cache_name
|
||||
|
||||
# Check if model files exist
|
||||
# Check if model files exist - verify essential files are present
|
||||
if model_cache_path.exists():
|
||||
snapshots_dir = model_cache_path / "snapshots"
|
||||
if snapshots_dir.exists() and any(snapshots_dir.iterdir()):
|
||||
# Model is cached, no download needed
|
||||
return True
|
||||
if snapshots_dir.exists():
|
||||
# Check for actual model files, not just any file
|
||||
for snapshot in snapshots_dir.iterdir():
|
||||
if snapshot.is_dir():
|
||||
# Essential faster-whisper model files
|
||||
required_files = ["model.bin", "config.json"]
|
||||
has_all_files = all(
|
||||
(snapshot / f).exists() for f in required_files
|
||||
)
|
||||
if has_all_files:
|
||||
print(json.dumps({
|
||||
"status": "model_cached",
|
||||
"model": model_size,
|
||||
"path": str(snapshot)
|
||||
}), flush=True)
|
||||
return True
|
||||
# Snapshots exist but no valid model found
|
||||
print(json.dumps({
|
||||
"status": "incomplete_cache",
|
||||
"model": model_size,
|
||||
"message": "Model cache incomplete, will re-download"
|
||||
}), flush=True)
|
||||
|
||||
# Model not cached, need to download
|
||||
print(json.dumps({
|
||||
@@ -491,20 +510,29 @@ class Transcriber:
|
||||
|
||||
try:
|
||||
# Check if model needs to be downloaded (with progress reporting)
|
||||
check_and_download_whisper_model(model_size)
|
||||
download_ok = check_and_download_whisper_model(model_size)
|
||||
if not download_ok:
|
||||
print(json.dumps({
|
||||
"status": "model_error",
|
||||
"error": "Failed to download model"
|
||||
}), flush=True)
|
||||
raise RuntimeError("Failed to download Whisper model")
|
||||
|
||||
# Now load the model
|
||||
print(json.dumps({"status": "loading_model", "model": model_size}), flush=True)
|
||||
self.model = WhisperModel(model_size, device=device, compute_type=compute_type)
|
||||
self.converter = opencc.OpenCC("s2twp")
|
||||
print(json.dumps({"status": "model_loaded"}), flush=True)
|
||||
print(json.dumps({"status": "model_loaded", "model": model_size}), flush=True)
|
||||
|
||||
# Pre-load VAD model at startup (not when streaming starts)
|
||||
if ONNX_AVAILABLE:
|
||||
self.vad_model = SileroVAD()
|
||||
|
||||
except Exception as e:
|
||||
print(json.dumps({"error": f"Failed to load model: {e}"}), flush=True)
|
||||
print(json.dumps({
|
||||
"status": "model_error",
|
||||
"error": f"Failed to load model: {e}"
|
||||
}), flush=True)
|
||||
raise
|
||||
|
||||
def transcribe_file(self, audio_path: str, add_punctuation: bool = False) -> str:
|
||||
|
||||
Reference in New Issue
Block a user