diff --git a/backend/build.py b/backend/build.py index 5ab9e1d..f48268e 100644 --- a/backend/build.py +++ b/backend/build.py @@ -10,14 +10,40 @@ import os import shutil +def clean_build_cache(script_dir): + """Clean old build artifacts that may cause stale spec file issues.""" + dirs_to_clean = [ + os.path.join(script_dir, "build"), + os.path.join(script_dir, "__pycache__"), + ] + files_to_clean = [ + os.path.join(script_dir, "build", "backend.spec"), + ] + + for f in files_to_clean: + if os.path.exists(f): + print(f"Removing old spec file: {f}") + os.remove(f) + + for d in dirs_to_clean: + pycache = os.path.join(d) + if os.path.exists(pycache) and "__pycache__" in pycache: + print(f"Removing cache: {pycache}") + shutil.rmtree(pycache) + + def build(): """Build the backend executable.""" script_dir = os.path.dirname(os.path.abspath(__file__)) + # Clean old build cache to avoid stale spec file issues + clean_build_cache(script_dir) + # PyInstaller command with --onedir for faster startup cmd = [ sys.executable, "-m", "PyInstaller", "--onedir", + "--clean", # Clean PyInstaller cache before building "--name", "backend", "--distpath", "dist", "--workpath", "build", @@ -39,9 +65,11 @@ def build(): "--hidden-import", "starlette", "--hidden-import", "pydantic", "--hidden-import", "pydantic_core", - # Database + # Database - MySQL "--hidden-import", "mysql.connector", "--hidden-import", "mysql.connector.pooling", + # Database - SQLite (built-in, but ensure it's included) + "--hidden-import", "sqlite3", # HTTP client "--hidden-import", "httpx", "--hidden-import", "httpcore", @@ -56,7 +84,9 @@ def build(): "--hidden-import", "python_multipart", # Environment loading "--hidden-import", "dotenv", - # Application modules + # Timezone data + "--hidden-import", "tzdata", + # Application modules - only include modules that exist "--hidden-import", "app", "--hidden-import", "app.main", "--hidden-import", "app.config", diff --git a/client/src/main.js b/client/src/main.js index 7c61708..929f281 100644 --- a/client/src/main.js +++ b/client/src/main.js @@ -423,6 +423,21 @@ function startSidecar() { if (msg.status === "model_loaded" && mainWindow) { mainWindow.webContents.send("model-download-progress", msg); } + + // Forward model cached status (model was already downloaded) + if (msg.status === "model_cached" && mainWindow) { + mainWindow.webContents.send("model-download-progress", msg); + } + + // Forward incomplete cache status + if (msg.status === "incomplete_cache" && mainWindow) { + mainWindow.webContents.send("model-download-progress", msg); + } + + // Forward model error status + if (msg.status === "model_error" && mainWindow) { + mainWindow.webContents.send("model-download-progress", msg); + } } catch (e) { console.log("Sidecar output:", line); } diff --git a/client/src/pages/meeting-detail.html b/client/src/pages/meeting-detail.html index 2699fb5..ab1a75d 100644 --- a/client/src/pages/meeting-detail.html +++ b/client/src/pages/meeting-detail.html @@ -319,16 +319,25 @@ whisperStatusEl.textContent = `⬇️ Downloading ${progress.model}: ${percent}% (${downloadedMb}/${totalMb} MB)`; whisperStatusEl.style.color = '#ff9800'; } else if (progress.status === 'model_downloaded') { - whisperStatusEl.textContent = `✅ ${progress.model} downloaded`; + whisperStatusEl.textContent = `✅ ${progress.model} downloaded, loading...`; whisperStatusEl.style.color = '#28a745'; + } else if (progress.status === 'model_cached') { + whisperStatusEl.textContent = `✅ ${progress.model} cached, loading...`; + whisperStatusEl.style.color = '#28a745'; + } else if (progress.status === 'incomplete_cache') { + whisperStatusEl.textContent = `⚠️ ${progress.model} cache incomplete, re-downloading...`; + whisperStatusEl.style.color = '#ff9800'; } else if (progress.status === 'loading_model') { whisperStatusEl.textContent = `⏳ Loading ${progress.model}...`; whisperStatusEl.style.color = '#ffc107'; } else if (progress.status === 'model_loaded') { - whisperStatusEl.textContent = `✅ Ready`; + whisperStatusEl.textContent = `✅ Model ready`; whisperStatusEl.style.color = '#28a745'; // Trigger a status refresh updateWhisperStatus(); + } else if (progress.status === 'model_error') { + whisperStatusEl.textContent = `❌ Error: ${progress.error || 'Model load failed'}`; + whisperStatusEl.style.color = '#dc3545'; } }); diff --git a/sidecar/build.py b/sidecar/build.py index 5632a5d..9de586d 100644 --- a/sidecar/build.py +++ b/sidecar/build.py @@ -1,29 +1,66 @@ #!/usr/bin/env python3 """ Build script for creating standalone transcriber executable using PyInstaller. +Uses --onedir mode for faster startup compared to --onefile. """ import subprocess import sys import os +import shutil + + +def clean_build_cache(script_dir): + """Clean old build artifacts that may cause stale spec file issues.""" + dirs_to_clean = [ + os.path.join(script_dir, "build"), + os.path.join(script_dir, "__pycache__"), + ] + files_to_clean = [ + os.path.join(script_dir, "build", "transcriber.spec"), + ] + + for f in files_to_clean: + if os.path.exists(f): + print(f"Removing old spec file: {f}") + os.remove(f) + + for d in dirs_to_clean: + pycache = os.path.join(d) + if os.path.exists(pycache) and "__pycache__" in pycache: + print(f"Removing cache: {pycache}") + shutil.rmtree(pycache) def build(): """Build the transcriber executable.""" - # PyInstaller command + script_dir = os.path.dirname(os.path.abspath(__file__)) + + # Clean old build cache to avoid stale spec file issues + clean_build_cache(script_dir) + + # PyInstaller command with --onedir for faster startup cmd = [ sys.executable, "-m", "PyInstaller", - "--onefile", + "--onedir", + "--clean", # Clean PyInstaller cache before building "--name", "transcriber", "--distpath", "dist", "--workpath", "build", "--specpath", "build", + # Core dependencies "--hidden-import", "faster_whisper", "--hidden-import", "opencc", "--hidden-import", "numpy", "--hidden-import", "ctranslate2", "--hidden-import", "huggingface_hub", + "--hidden-import", "huggingface_hub.utils", "--hidden-import", "tokenizers", + # ONNX Runtime for VAD + "--hidden-import", "onnxruntime", + # Audio processing + "--hidden-import", "wave", + # Collect data files "--collect-data", "faster_whisper", "--collect-data", "opencc", "transcriber.py" @@ -32,10 +69,12 @@ def build(): print("Building transcriber executable...") print(f"Command: {' '.join(cmd)}") - result = subprocess.run(cmd, cwd=os.path.dirname(os.path.abspath(__file__))) + result = subprocess.run(cmd, cwd=script_dir) if result.returncode == 0: - print("\nBuild successful! Executable created at: dist/transcriber") + print("\nBuild successful!") + print("Executable created at: dist/transcriber/transcriber.exe (Windows) or dist/transcriber/transcriber (Linux)") + print("\nNote: The Whisper model will be downloaded on first run if not cached.") else: print("\nBuild failed!") sys.exit(1) diff --git a/sidecar/transcriber.py b/sidecar/transcriber.py index ccbcd95..ec25ef6 100644 --- a/sidecar/transcriber.py +++ b/sidecar/transcriber.py @@ -60,12 +60,31 @@ def check_and_download_whisper_model(model_size: str) -> bool: repo_cache_name = f"models--Systran--faster-whisper-{model_size}" model_cache_path = cache_dir / repo_cache_name - # Check if model files exist + # Check if model files exist - verify essential files are present if model_cache_path.exists(): snapshots_dir = model_cache_path / "snapshots" - if snapshots_dir.exists() and any(snapshots_dir.iterdir()): - # Model is cached, no download needed - return True + if snapshots_dir.exists(): + # Check for actual model files, not just any file + for snapshot in snapshots_dir.iterdir(): + if snapshot.is_dir(): + # Essential faster-whisper model files + required_files = ["model.bin", "config.json"] + has_all_files = all( + (snapshot / f).exists() for f in required_files + ) + if has_all_files: + print(json.dumps({ + "status": "model_cached", + "model": model_size, + "path": str(snapshot) + }), flush=True) + return True + # Snapshots exist but no valid model found + print(json.dumps({ + "status": "incomplete_cache", + "model": model_size, + "message": "Model cache incomplete, will re-download" + }), flush=True) # Model not cached, need to download print(json.dumps({ @@ -491,20 +510,29 @@ class Transcriber: try: # Check if model needs to be downloaded (with progress reporting) - check_and_download_whisper_model(model_size) + download_ok = check_and_download_whisper_model(model_size) + if not download_ok: + print(json.dumps({ + "status": "model_error", + "error": "Failed to download model" + }), flush=True) + raise RuntimeError("Failed to download Whisper model") # Now load the model print(json.dumps({"status": "loading_model", "model": model_size}), flush=True) self.model = WhisperModel(model_size, device=device, compute_type=compute_type) self.converter = opencc.OpenCC("s2twp") - print(json.dumps({"status": "model_loaded"}), flush=True) + print(json.dumps({"status": "model_loaded", "model": model_size}), flush=True) # Pre-load VAD model at startup (not when streaming starts) if ONNX_AVAILABLE: self.vad_model = SileroVAD() except Exception as e: - print(json.dumps({"error": f"Failed to load model: {e}"}), flush=True) + print(json.dumps({ + "status": "model_error", + "error": f"Failed to load model: {e}" + }), flush=True) raise def transcribe_file(self, audio_path: str, add_punctuation: bool = False) -> str: