fix: Improve Whisper model status verification and PyInstaller builds
- Add robust model cache verification (check model.bin + config.json) - Add new status messages: model_cached, incomplete_cache, model_error - Forward model status events to frontend for better UI feedback - Add clean_build_cache() to remove stale spec files before build - Add --clean flag to PyInstaller commands - Change sidecar from --onefile to --onedir for faster startup - Add missing hidden imports: onnxruntime, wave, huggingface_hub.utils 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -10,14 +10,40 @@ import os
|
|||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
|
def clean_build_cache(script_dir):
|
||||||
|
"""Clean old build artifacts that may cause stale spec file issues."""
|
||||||
|
dirs_to_clean = [
|
||||||
|
os.path.join(script_dir, "build"),
|
||||||
|
os.path.join(script_dir, "__pycache__"),
|
||||||
|
]
|
||||||
|
files_to_clean = [
|
||||||
|
os.path.join(script_dir, "build", "backend.spec"),
|
||||||
|
]
|
||||||
|
|
||||||
|
for f in files_to_clean:
|
||||||
|
if os.path.exists(f):
|
||||||
|
print(f"Removing old spec file: {f}")
|
||||||
|
os.remove(f)
|
||||||
|
|
||||||
|
for d in dirs_to_clean:
|
||||||
|
pycache = os.path.join(d)
|
||||||
|
if os.path.exists(pycache) and "__pycache__" in pycache:
|
||||||
|
print(f"Removing cache: {pycache}")
|
||||||
|
shutil.rmtree(pycache)
|
||||||
|
|
||||||
|
|
||||||
def build():
|
def build():
|
||||||
"""Build the backend executable."""
|
"""Build the backend executable."""
|
||||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
# Clean old build cache to avoid stale spec file issues
|
||||||
|
clean_build_cache(script_dir)
|
||||||
|
|
||||||
# PyInstaller command with --onedir for faster startup
|
# PyInstaller command with --onedir for faster startup
|
||||||
cmd = [
|
cmd = [
|
||||||
sys.executable, "-m", "PyInstaller",
|
sys.executable, "-m", "PyInstaller",
|
||||||
"--onedir",
|
"--onedir",
|
||||||
|
"--clean", # Clean PyInstaller cache before building
|
||||||
"--name", "backend",
|
"--name", "backend",
|
||||||
"--distpath", "dist",
|
"--distpath", "dist",
|
||||||
"--workpath", "build",
|
"--workpath", "build",
|
||||||
@@ -39,9 +65,11 @@ def build():
|
|||||||
"--hidden-import", "starlette",
|
"--hidden-import", "starlette",
|
||||||
"--hidden-import", "pydantic",
|
"--hidden-import", "pydantic",
|
||||||
"--hidden-import", "pydantic_core",
|
"--hidden-import", "pydantic_core",
|
||||||
# Database
|
# Database - MySQL
|
||||||
"--hidden-import", "mysql.connector",
|
"--hidden-import", "mysql.connector",
|
||||||
"--hidden-import", "mysql.connector.pooling",
|
"--hidden-import", "mysql.connector.pooling",
|
||||||
|
# Database - SQLite (built-in, but ensure it's included)
|
||||||
|
"--hidden-import", "sqlite3",
|
||||||
# HTTP client
|
# HTTP client
|
||||||
"--hidden-import", "httpx",
|
"--hidden-import", "httpx",
|
||||||
"--hidden-import", "httpcore",
|
"--hidden-import", "httpcore",
|
||||||
@@ -56,7 +84,9 @@ def build():
|
|||||||
"--hidden-import", "python_multipart",
|
"--hidden-import", "python_multipart",
|
||||||
# Environment loading
|
# Environment loading
|
||||||
"--hidden-import", "dotenv",
|
"--hidden-import", "dotenv",
|
||||||
# Application modules
|
# Timezone data
|
||||||
|
"--hidden-import", "tzdata",
|
||||||
|
# Application modules - only include modules that exist
|
||||||
"--hidden-import", "app",
|
"--hidden-import", "app",
|
||||||
"--hidden-import", "app.main",
|
"--hidden-import", "app.main",
|
||||||
"--hidden-import", "app.config",
|
"--hidden-import", "app.config",
|
||||||
|
|||||||
@@ -423,6 +423,21 @@ function startSidecar() {
|
|||||||
if (msg.status === "model_loaded" && mainWindow) {
|
if (msg.status === "model_loaded" && mainWindow) {
|
||||||
mainWindow.webContents.send("model-download-progress", msg);
|
mainWindow.webContents.send("model-download-progress", msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Forward model cached status (model was already downloaded)
|
||||||
|
if (msg.status === "model_cached" && mainWindow) {
|
||||||
|
mainWindow.webContents.send("model-download-progress", msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Forward incomplete cache status
|
||||||
|
if (msg.status === "incomplete_cache" && mainWindow) {
|
||||||
|
mainWindow.webContents.send("model-download-progress", msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Forward model error status
|
||||||
|
if (msg.status === "model_error" && mainWindow) {
|
||||||
|
mainWindow.webContents.send("model-download-progress", msg);
|
||||||
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log("Sidecar output:", line);
|
console.log("Sidecar output:", line);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -319,16 +319,25 @@
|
|||||||
whisperStatusEl.textContent = `⬇️ Downloading ${progress.model}: ${percent}% (${downloadedMb}/${totalMb} MB)`;
|
whisperStatusEl.textContent = `⬇️ Downloading ${progress.model}: ${percent}% (${downloadedMb}/${totalMb} MB)`;
|
||||||
whisperStatusEl.style.color = '#ff9800';
|
whisperStatusEl.style.color = '#ff9800';
|
||||||
} else if (progress.status === 'model_downloaded') {
|
} else if (progress.status === 'model_downloaded') {
|
||||||
whisperStatusEl.textContent = `✅ ${progress.model} downloaded`;
|
whisperStatusEl.textContent = `✅ ${progress.model} downloaded, loading...`;
|
||||||
whisperStatusEl.style.color = '#28a745';
|
whisperStatusEl.style.color = '#28a745';
|
||||||
|
} else if (progress.status === 'model_cached') {
|
||||||
|
whisperStatusEl.textContent = `✅ ${progress.model} cached, loading...`;
|
||||||
|
whisperStatusEl.style.color = '#28a745';
|
||||||
|
} else if (progress.status === 'incomplete_cache') {
|
||||||
|
whisperStatusEl.textContent = `⚠️ ${progress.model} cache incomplete, re-downloading...`;
|
||||||
|
whisperStatusEl.style.color = '#ff9800';
|
||||||
} else if (progress.status === 'loading_model') {
|
} else if (progress.status === 'loading_model') {
|
||||||
whisperStatusEl.textContent = `⏳ Loading ${progress.model}...`;
|
whisperStatusEl.textContent = `⏳ Loading ${progress.model}...`;
|
||||||
whisperStatusEl.style.color = '#ffc107';
|
whisperStatusEl.style.color = '#ffc107';
|
||||||
} else if (progress.status === 'model_loaded') {
|
} else if (progress.status === 'model_loaded') {
|
||||||
whisperStatusEl.textContent = `✅ Ready`;
|
whisperStatusEl.textContent = `✅ Model ready`;
|
||||||
whisperStatusEl.style.color = '#28a745';
|
whisperStatusEl.style.color = '#28a745';
|
||||||
// Trigger a status refresh
|
// Trigger a status refresh
|
||||||
updateWhisperStatus();
|
updateWhisperStatus();
|
||||||
|
} else if (progress.status === 'model_error') {
|
||||||
|
whisperStatusEl.textContent = `❌ Error: ${progress.error || 'Model load failed'}`;
|
||||||
|
whisperStatusEl.style.color = '#dc3545';
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -1,29 +1,66 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
Build script for creating standalone transcriber executable using PyInstaller.
|
Build script for creating standalone transcriber executable using PyInstaller.
|
||||||
|
Uses --onedir mode for faster startup compared to --onefile.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
|
def clean_build_cache(script_dir):
|
||||||
|
"""Clean old build artifacts that may cause stale spec file issues."""
|
||||||
|
dirs_to_clean = [
|
||||||
|
os.path.join(script_dir, "build"),
|
||||||
|
os.path.join(script_dir, "__pycache__"),
|
||||||
|
]
|
||||||
|
files_to_clean = [
|
||||||
|
os.path.join(script_dir, "build", "transcriber.spec"),
|
||||||
|
]
|
||||||
|
|
||||||
|
for f in files_to_clean:
|
||||||
|
if os.path.exists(f):
|
||||||
|
print(f"Removing old spec file: {f}")
|
||||||
|
os.remove(f)
|
||||||
|
|
||||||
|
for d in dirs_to_clean:
|
||||||
|
pycache = os.path.join(d)
|
||||||
|
if os.path.exists(pycache) and "__pycache__" in pycache:
|
||||||
|
print(f"Removing cache: {pycache}")
|
||||||
|
shutil.rmtree(pycache)
|
||||||
|
|
||||||
|
|
||||||
def build():
|
def build():
|
||||||
"""Build the transcriber executable."""
|
"""Build the transcriber executable."""
|
||||||
# PyInstaller command
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
# Clean old build cache to avoid stale spec file issues
|
||||||
|
clean_build_cache(script_dir)
|
||||||
|
|
||||||
|
# PyInstaller command with --onedir for faster startup
|
||||||
cmd = [
|
cmd = [
|
||||||
sys.executable, "-m", "PyInstaller",
|
sys.executable, "-m", "PyInstaller",
|
||||||
"--onefile",
|
"--onedir",
|
||||||
|
"--clean", # Clean PyInstaller cache before building
|
||||||
"--name", "transcriber",
|
"--name", "transcriber",
|
||||||
"--distpath", "dist",
|
"--distpath", "dist",
|
||||||
"--workpath", "build",
|
"--workpath", "build",
|
||||||
"--specpath", "build",
|
"--specpath", "build",
|
||||||
|
# Core dependencies
|
||||||
"--hidden-import", "faster_whisper",
|
"--hidden-import", "faster_whisper",
|
||||||
"--hidden-import", "opencc",
|
"--hidden-import", "opencc",
|
||||||
"--hidden-import", "numpy",
|
"--hidden-import", "numpy",
|
||||||
"--hidden-import", "ctranslate2",
|
"--hidden-import", "ctranslate2",
|
||||||
"--hidden-import", "huggingface_hub",
|
"--hidden-import", "huggingface_hub",
|
||||||
|
"--hidden-import", "huggingface_hub.utils",
|
||||||
"--hidden-import", "tokenizers",
|
"--hidden-import", "tokenizers",
|
||||||
|
# ONNX Runtime for VAD
|
||||||
|
"--hidden-import", "onnxruntime",
|
||||||
|
# Audio processing
|
||||||
|
"--hidden-import", "wave",
|
||||||
|
# Collect data files
|
||||||
"--collect-data", "faster_whisper",
|
"--collect-data", "faster_whisper",
|
||||||
"--collect-data", "opencc",
|
"--collect-data", "opencc",
|
||||||
"transcriber.py"
|
"transcriber.py"
|
||||||
@@ -32,10 +69,12 @@ def build():
|
|||||||
print("Building transcriber executable...")
|
print("Building transcriber executable...")
|
||||||
print(f"Command: {' '.join(cmd)}")
|
print(f"Command: {' '.join(cmd)}")
|
||||||
|
|
||||||
result = subprocess.run(cmd, cwd=os.path.dirname(os.path.abspath(__file__)))
|
result = subprocess.run(cmd, cwd=script_dir)
|
||||||
|
|
||||||
if result.returncode == 0:
|
if result.returncode == 0:
|
||||||
print("\nBuild successful! Executable created at: dist/transcriber")
|
print("\nBuild successful!")
|
||||||
|
print("Executable created at: dist/transcriber/transcriber.exe (Windows) or dist/transcriber/transcriber (Linux)")
|
||||||
|
print("\nNote: The Whisper model will be downloaded on first run if not cached.")
|
||||||
else:
|
else:
|
||||||
print("\nBuild failed!")
|
print("\nBuild failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|||||||
@@ -60,12 +60,31 @@ def check_and_download_whisper_model(model_size: str) -> bool:
|
|||||||
repo_cache_name = f"models--Systran--faster-whisper-{model_size}"
|
repo_cache_name = f"models--Systran--faster-whisper-{model_size}"
|
||||||
model_cache_path = cache_dir / repo_cache_name
|
model_cache_path = cache_dir / repo_cache_name
|
||||||
|
|
||||||
# Check if model files exist
|
# Check if model files exist - verify essential files are present
|
||||||
if model_cache_path.exists():
|
if model_cache_path.exists():
|
||||||
snapshots_dir = model_cache_path / "snapshots"
|
snapshots_dir = model_cache_path / "snapshots"
|
||||||
if snapshots_dir.exists() and any(snapshots_dir.iterdir()):
|
if snapshots_dir.exists():
|
||||||
# Model is cached, no download needed
|
# Check for actual model files, not just any file
|
||||||
return True
|
for snapshot in snapshots_dir.iterdir():
|
||||||
|
if snapshot.is_dir():
|
||||||
|
# Essential faster-whisper model files
|
||||||
|
required_files = ["model.bin", "config.json"]
|
||||||
|
has_all_files = all(
|
||||||
|
(snapshot / f).exists() for f in required_files
|
||||||
|
)
|
||||||
|
if has_all_files:
|
||||||
|
print(json.dumps({
|
||||||
|
"status": "model_cached",
|
||||||
|
"model": model_size,
|
||||||
|
"path": str(snapshot)
|
||||||
|
}), flush=True)
|
||||||
|
return True
|
||||||
|
# Snapshots exist but no valid model found
|
||||||
|
print(json.dumps({
|
||||||
|
"status": "incomplete_cache",
|
||||||
|
"model": model_size,
|
||||||
|
"message": "Model cache incomplete, will re-download"
|
||||||
|
}), flush=True)
|
||||||
|
|
||||||
# Model not cached, need to download
|
# Model not cached, need to download
|
||||||
print(json.dumps({
|
print(json.dumps({
|
||||||
@@ -491,20 +510,29 @@ class Transcriber:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# Check if model needs to be downloaded (with progress reporting)
|
# Check if model needs to be downloaded (with progress reporting)
|
||||||
check_and_download_whisper_model(model_size)
|
download_ok = check_and_download_whisper_model(model_size)
|
||||||
|
if not download_ok:
|
||||||
|
print(json.dumps({
|
||||||
|
"status": "model_error",
|
||||||
|
"error": "Failed to download model"
|
||||||
|
}), flush=True)
|
||||||
|
raise RuntimeError("Failed to download Whisper model")
|
||||||
|
|
||||||
# Now load the model
|
# Now load the model
|
||||||
print(json.dumps({"status": "loading_model", "model": model_size}), flush=True)
|
print(json.dumps({"status": "loading_model", "model": model_size}), flush=True)
|
||||||
self.model = WhisperModel(model_size, device=device, compute_type=compute_type)
|
self.model = WhisperModel(model_size, device=device, compute_type=compute_type)
|
||||||
self.converter = opencc.OpenCC("s2twp")
|
self.converter = opencc.OpenCC("s2twp")
|
||||||
print(json.dumps({"status": "model_loaded"}), flush=True)
|
print(json.dumps({"status": "model_loaded", "model": model_size}), flush=True)
|
||||||
|
|
||||||
# Pre-load VAD model at startup (not when streaming starts)
|
# Pre-load VAD model at startup (not when streaming starts)
|
||||||
if ONNX_AVAILABLE:
|
if ONNX_AVAILABLE:
|
||||||
self.vad_model = SileroVAD()
|
self.vad_model = SileroVAD()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(json.dumps({"error": f"Failed to load model: {e}"}), flush=True)
|
print(json.dumps({
|
||||||
|
"status": "model_error",
|
||||||
|
"error": f"Failed to load model: {e}"
|
||||||
|
}), flush=True)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def transcribe_file(self, audio_path: str, add_punctuation: bool = False) -> str:
|
def transcribe_file(self, audio_path: str, add_punctuation: bool = False) -> str:
|
||||||
|
|||||||
Reference in New Issue
Block a user