diff --git a/backend/app/main.py b/backend/app/main.py index 75293b7..0c6c781 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,9 +1,19 @@ +import os +from pathlib import Path from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse from contextlib import asynccontextmanager from .database import init_db_pool, init_tables -from .routers import auth, meetings, ai, export +from .routers import auth, meetings, ai, export, sidecar +from .sidecar_manager import get_sidecar_manager + +# Determine client directory path +BACKEND_DIR = Path(__file__).parent.parent +PROJECT_DIR = BACKEND_DIR.parent +CLIENT_DIR = PROJECT_DIR / "client" / "src" @asynccontextmanager @@ -11,8 +21,25 @@ async def lifespan(app: FastAPI): # Startup init_db_pool() init_tables() + + # Only start sidecar in browser mode (not when Electron manages it) + # Set BROWSER_MODE=true in start-browser.sh to enable + browser_mode = os.environ.get("BROWSER_MODE", "").lower() == "true" + sidecar_mgr = get_sidecar_manager() + + if browser_mode and sidecar_mgr.is_available(): + print("[Backend] Browser mode: Starting sidecar...") + await sidecar_mgr.start() + elif browser_mode: + print("[Backend] Browser mode: Sidecar not available (transcription disabled)") + else: + print("[Backend] Electron mode: Sidecar managed by Electron") + yield - # Shutdown (cleanup if needed) + + # Shutdown - only stop if we started it + if browser_mode: + sidecar_mgr.stop() app = FastAPI( @@ -36,9 +63,42 @@ app.include_router(auth.router, prefix="/api", tags=["Authentication"]) app.include_router(meetings.router, prefix="/api", tags=["Meetings"]) app.include_router(ai.router, prefix="/api", tags=["AI"]) app.include_router(export.router, prefix="/api", tags=["Export"]) +app.include_router(sidecar.router, prefix="/api", tags=["Sidecar"]) @app.get("/api/health") async def health_check(): """Health check endpoint.""" return {"status": "healthy", "service": "meeting-assistant"} + + +# ======================================== +# Browser Mode: Serve static files +# ======================================== + +# Check if client directory exists for browser mode +if CLIENT_DIR.exists(): + # Serve static assets (CSS, JS, etc.) + app.mount("/styles", StaticFiles(directory=CLIENT_DIR / "styles"), name="styles") + app.mount("/services", StaticFiles(directory=CLIENT_DIR / "services"), name="services") + app.mount("/config", StaticFiles(directory=CLIENT_DIR / "config"), name="config") + + @app.get("/") + async def serve_login(): + """Serve login page.""" + return FileResponse(CLIENT_DIR / "pages" / "login.html") + + @app.get("/login") + async def serve_login_page(): + """Serve login page.""" + return FileResponse(CLIENT_DIR / "pages" / "login.html") + + @app.get("/meetings") + async def serve_meetings_page(): + """Serve meetings list page.""" + return FileResponse(CLIENT_DIR / "pages" / "meetings.html") + + @app.get("/meeting-detail") + async def serve_meeting_detail_page(): + """Serve meeting detail page.""" + return FileResponse(CLIENT_DIR / "pages" / "meeting-detail.html") diff --git a/backend/app/routers/sidecar.py b/backend/app/routers/sidecar.py new file mode 100644 index 0000000..12c14c0 --- /dev/null +++ b/backend/app/routers/sidecar.py @@ -0,0 +1,346 @@ +""" +Sidecar API Router + +Provides HTTP endpoints for browser-based clients to access +the Whisper transcription sidecar functionality. +""" + +import os +import tempfile +import base64 +from typing import Optional +from fastapi import APIRouter, HTTPException, UploadFile, File, WebSocket, WebSocketDisconnect +from fastapi.responses import JSONResponse +from pydantic import BaseModel + +from ..sidecar_manager import get_sidecar_manager + +router = APIRouter(prefix="/sidecar", tags=["Sidecar"]) + + +class TranscribeRequest(BaseModel): + """Request for transcribing audio from base64 data.""" + audio_data: str # Base64 encoded audio (webm/opus) + + +class AudioChunkRequest(BaseModel): + """Request for sending an audio chunk in streaming mode.""" + data: str # Base64 encoded PCM audio + + +@router.get("/status") +async def get_sidecar_status(): + """ + Get the current status of the sidecar transcription engine. + + Returns: + Status object with ready state, whisper model info, etc. + """ + manager = get_sidecar_manager() + return manager.get_status() + + +@router.post("/start") +async def start_sidecar(): + """ + Start the sidecar transcription engine. + + This is typically called automatically on backend startup, + but can be used to restart the sidecar if needed. + """ + manager = get_sidecar_manager() + + if not manager.is_available(): + raise HTTPException( + status_code=503, + detail="Sidecar not available. Check if sidecar/transcriber.py and sidecar/venv exist." + ) + + success = await manager.start() + if not success: + raise HTTPException( + status_code=503, + detail="Failed to start sidecar. Check backend logs for details." + ) + + return {"status": "started", "ready": manager.ready} + + +@router.post("/stop") +async def stop_sidecar(): + """Stop the sidecar transcription engine.""" + manager = get_sidecar_manager() + manager.stop() + return {"status": "stopped"} + + +@router.post("/transcribe") +async def transcribe_audio(request: TranscribeRequest): + """ + Transcribe base64-encoded audio data. + + The audio should be in webm/opus format (as recorded by MediaRecorder). + """ + manager = get_sidecar_manager() + + if not manager.ready: + raise HTTPException( + status_code=503, + detail="Sidecar not ready. Please wait for model to load." + ) + + try: + # Decode base64 audio + audio_data = base64.b64decode(request.audio_data) + + # Save to temp file + with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as f: + f.write(audio_data) + temp_path = f.name + + try: + # Transcribe + result = await manager.transcribe_file(temp_path) + + if result.get("error"): + raise HTTPException(status_code=500, detail=result["error"]) + + return { + "result": result.get("result", ""), + "file": result.get("file", "") + } + + finally: + # Clean up temp file + os.unlink(temp_path) + + except base64.binascii.Error: + raise HTTPException(status_code=400, detail="Invalid base64 audio data") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/transcribe-file") +async def transcribe_audio_file(file: UploadFile = File(...)): + """ + Transcribe an uploaded audio file. + + Accepts common audio formats: mp3, wav, m4a, webm, ogg, flac, aac + """ + manager = get_sidecar_manager() + + if not manager.ready: + raise HTTPException( + status_code=503, + detail="Sidecar not ready. Please wait for model to load." + ) + + # Validate file extension + allowed_extensions = {".mp3", ".wav", ".m4a", ".webm", ".ogg", ".flac", ".aac"} + ext = os.path.splitext(file.filename or "")[1].lower() + if ext not in allowed_extensions: + raise HTTPException( + status_code=400, + detail=f"Unsupported audio format. Allowed: {', '.join(allowed_extensions)}" + ) + + try: + # Save uploaded file + with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as f: + content = await file.read() + f.write(content) + temp_path = f.name + + try: + result = await manager.transcribe_file(temp_path) + + if result.get("error"): + raise HTTPException(status_code=500, detail=result["error"]) + + return { + "result": result.get("result", ""), + "filename": file.filename + } + + finally: + os.unlink(temp_path) + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/stream/start") +async def start_streaming(): + """ + Start a streaming transcription session. + + Returns a session ID that should be used for subsequent audio chunks. + """ + manager = get_sidecar_manager() + + if not manager.ready: + raise HTTPException( + status_code=503, + detail="Sidecar not ready. Please wait for model to load." + ) + + result = await manager.start_stream() + + if result.get("error"): + raise HTTPException(status_code=500, detail=result["error"]) + + return result + + +@router.post("/stream/chunk") +async def send_audio_chunk(request: AudioChunkRequest): + """ + Send an audio chunk for streaming transcription. + + The audio should be base64-encoded PCM data (16-bit, 16kHz, mono). + + Returns a transcription segment if speech end was detected, + or null if more audio is needed. + """ + manager = get_sidecar_manager() + + if not manager.ready: + raise HTTPException( + status_code=503, + detail="Sidecar not ready" + ) + + result = await manager.send_audio_chunk(request.data) + + # Result may be None if no segment ready yet + if result is None: + return {"segment": None} + + if result.get("error"): + raise HTTPException(status_code=500, detail=result["error"]) + + return {"segment": result} + + +@router.post("/stream/stop") +async def stop_streaming(): + """ + Stop the streaming transcription session. + + Returns any final transcription segments and session statistics. + """ + manager = get_sidecar_manager() + + result = await manager.stop_stream() + + if result.get("error"): + raise HTTPException(status_code=500, detail=result["error"]) + + return result + + +@router.post("/segment-audio") +async def segment_audio_file(file: UploadFile = File(...), max_chunk_seconds: int = 300): + """ + Segment an audio file using VAD for natural speech boundaries. + + This is used for processing large audio files before cloud transcription. + + Args: + file: The audio file to segment + max_chunk_seconds: Maximum duration per chunk (default 300s / 5 minutes) + + Returns: + List of segment metadata with file paths + """ + manager = get_sidecar_manager() + + if not manager.ready: + raise HTTPException( + status_code=503, + detail="Sidecar not ready. Please wait for model to load." + ) + + try: + # Save uploaded file + ext = os.path.splitext(file.filename or "")[1].lower() or ".wav" + with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as f: + content = await file.read() + f.write(content) + temp_path = f.name + + try: + result = await manager.segment_audio(temp_path, max_chunk_seconds) + + if result.get("error"): + raise HTTPException(status_code=500, detail=result["error"]) + + return result + + finally: + # Keep temp file for now - segments reference it + # Will be cleaned up by the transcription process + pass + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@router.websocket("/ws") +async def websocket_endpoint(websocket: WebSocket): + """ + WebSocket endpoint for real-time streaming transcription. + + Protocol: + 1. Client connects + 2. Client sends: {"action": "start_stream"} + 3. Server responds: {"status": "streaming", "session_id": "..."} + 4. Client sends: {"action": "audio_chunk", "data": ""} + 5. Server responds: {"segment": {...}} when speech detected, or {"segment": null} + 6. Client sends: {"action": "stop_stream"} + 7. Server responds: {"status": "stream_stopped", ...} + """ + await websocket.accept() + + manager = get_sidecar_manager() + + if not manager.ready: + await websocket.send_json({"error": "Sidecar not ready"}) + await websocket.close() + return + + try: + while True: + data = await websocket.receive_json() + action = data.get("action") + + if action == "start_stream": + result = await manager.start_stream() + await websocket.send_json(result) + + elif action == "audio_chunk": + audio_data = data.get("data") + if audio_data: + result = await manager.send_audio_chunk(audio_data) + await websocket.send_json({"segment": result}) + else: + await websocket.send_json({"error": "No audio data"}) + + elif action == "stop_stream": + result = await manager.stop_stream() + await websocket.send_json(result) + break + + elif action == "ping": + await websocket.send_json({"status": "pong"}) + + else: + await websocket.send_json({"error": f"Unknown action: {action}"}) + + except WebSocketDisconnect: + # Clean up streaming session if active + if manager._is_streaming(): + await manager.stop_stream() + except Exception as e: + await websocket.send_json({"error": str(e)}) + await websocket.close() diff --git a/backend/app/sidecar_manager.py b/backend/app/sidecar_manager.py new file mode 100644 index 0000000..3a2713a --- /dev/null +++ b/backend/app/sidecar_manager.py @@ -0,0 +1,307 @@ +""" +Sidecar Process Manager + +Manages the Python sidecar process for speech-to-text transcription. +Provides an interface for the backend to communicate with the sidecar +via subprocess stdin/stdout. +""" + +import asyncio +import json +import os +import subprocess +import sys +import tempfile +import base64 +from pathlib import Path +from typing import Optional, Dict, Any, Callable +from threading import Thread, Lock +import queue + + +class SidecarManager: + """ + Manages the Whisper transcription sidecar process. + + The sidecar is a Python process running transcriber.py that handles + speech-to-text conversion using faster-whisper. + """ + + def __init__(self): + self.process: Optional[subprocess.Popen] = None + self.ready = False + self.whisper_info: Optional[Dict] = None + self._lock = Lock() + self._response_queue = queue.Queue() + self._reader_thread: Optional[Thread] = None + self._progress_callbacks: list[Callable] = [] + self._last_status: Dict[str, Any] = {} + + # Paths + self.project_dir = Path(__file__).parent.parent.parent + self.sidecar_dir = self.project_dir / "sidecar" + self.transcriber_path = self.sidecar_dir / "transcriber.py" + self.venv_python = self.sidecar_dir / "venv" / "bin" / "python" + + def is_available(self) -> bool: + """Check if sidecar is available (files exist).""" + return self.transcriber_path.exists() and self.venv_python.exists() + + def get_status(self) -> Dict[str, Any]: + """Get current sidecar status.""" + return { + "ready": self.ready, + "streaming": self._is_streaming(), + "whisper": self.whisper_info, + "available": self.is_available(), + "browserMode": False, + **self._last_status + } + + def _is_streaming(self) -> bool: + """Check if currently in streaming mode.""" + return self._last_status.get("streaming", False) + + async def start(self) -> bool: + """Start the sidecar process.""" + if self.process and self.process.poll() is None: + return True # Already running + + if not self.is_available(): + print(f"[Sidecar] Not available: transcriber={self.transcriber_path.exists()}, venv={self.venv_python.exists()}") + return False + + try: + # Get Whisper configuration from environment + env = os.environ.copy() + env["WHISPER_MODEL"] = os.getenv("WHISPER_MODEL", "medium") + env["WHISPER_DEVICE"] = os.getenv("WHISPER_DEVICE", "cpu") + env["WHISPER_COMPUTE"] = os.getenv("WHISPER_COMPUTE", "int8") + + print(f"[Sidecar] Starting with model={env['WHISPER_MODEL']}, device={env['WHISPER_DEVICE']}, compute={env['WHISPER_COMPUTE']}") + + self.process = subprocess.Popen( + [str(self.venv_python), str(self.transcriber_path), "--server"], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env, + cwd=str(self.sidecar_dir), + bufsize=1, # Line buffered + text=True + ) + + # Start reader threads + self._reader_thread = Thread(target=self._read_stdout, daemon=True) + self._reader_thread.start() + + stderr_thread = Thread(target=self._read_stderr, daemon=True) + stderr_thread.start() + + # Wait for ready signal + try: + response = await asyncio.wait_for( + asyncio.get_event_loop().run_in_executor( + None, self._wait_for_ready + ), + timeout=120.0 # 2 minutes for model download + ) + if response and response.get("status") == "ready": + self.ready = True + print("[Sidecar] Ready") + return True + except asyncio.TimeoutError: + print("[Sidecar] Timeout waiting for ready") + self.stop() + return False + + except Exception as e: + print(f"[Sidecar] Start error: {e}") + return False + + return False + + def _wait_for_ready(self) -> Optional[Dict]: + """Wait for the ready signal from sidecar.""" + while True: + try: + response = self._response_queue.get(timeout=1.0) + status = response.get("status", "") + + # Track progress events + if status in ["downloading_model", "model_downloaded", "model_cached", + "loading_model", "model_loaded", "model_error"]: + self._last_status = response + self._notify_progress(response) + + if status == "model_loaded": + # Extract whisper info + self.whisper_info = { + "model": os.getenv("WHISPER_MODEL", "medium"), + "device": os.getenv("WHISPER_DEVICE", "cpu"), + "compute": os.getenv("WHISPER_COMPUTE", "int8"), + "configSource": "environment" + } + elif status == "model_error": + self.whisper_info = {"error": response.get("error", "Unknown error")} + + if status == "ready": + return response + + except queue.Empty: + if self.process and self.process.poll() is not None: + return None # Process died + continue + + def _read_stdout(self): + """Read stdout from sidecar process.""" + if not self.process or not self.process.stdout: + return + + for line in self.process.stdout: + line = line.strip() + if not line: + continue + try: + data = json.loads(line) + self._response_queue.put(data) + except json.JSONDecodeError as e: + print(f"[Sidecar] Invalid JSON: {line[:100]}") + + def _read_stderr(self): + """Read stderr from sidecar process.""" + if not self.process or not self.process.stderr: + return + + for line in self.process.stderr: + line = line.strip() + if line: + # Try to parse as JSON (some status messages go to stderr) + try: + data = json.loads(line) + if "status" in data or "warning" in data: + self._notify_progress(data) + except json.JSONDecodeError: + print(f"[Sidecar stderr] {line}") + + def _notify_progress(self, data: Dict): + """Notify all progress callbacks.""" + for callback in self._progress_callbacks: + try: + callback(data) + except Exception as e: + print(f"[Sidecar] Progress callback error: {e}") + + def add_progress_callback(self, callback: Callable): + """Add a callback for progress updates.""" + self._progress_callbacks.append(callback) + + def remove_progress_callback(self, callback: Callable): + """Remove a progress callback.""" + if callback in self._progress_callbacks: + self._progress_callbacks.remove(callback) + + async def send_command(self, command: Dict) -> Optional[Dict]: + """Send a command to the sidecar and wait for response.""" + if not self.process or self.process.poll() is not None: + return {"error": "Sidecar not running"} + + with self._lock: + try: + # Clear queue before sending + while not self._response_queue.empty(): + try: + self._response_queue.get_nowait() + except queue.Empty: + break + + # Send command + cmd_json = json.dumps(command) + "\n" + self.process.stdin.write(cmd_json) + self.process.stdin.flush() + + # Wait for response + try: + response = await asyncio.wait_for( + asyncio.get_event_loop().run_in_executor( + None, lambda: self._response_queue.get(timeout=60.0) + ), + timeout=65.0 + ) + return response + except (asyncio.TimeoutError, queue.Empty): + return {"error": "Command timeout"} + + except Exception as e: + return {"error": f"Command error: {e}"} + + async def transcribe_file(self, audio_path: str) -> Dict: + """Transcribe an audio file.""" + return await self.send_command({ + "action": "transcribe", + "file": audio_path + }) or {"error": "No response"} + + async def start_stream(self) -> Dict: + """Start a streaming transcription session.""" + result = await self.send_command({"action": "start_stream"}) + if result and result.get("status") == "streaming": + self._last_status["streaming"] = True + return result or {"error": "No response"} + + async def send_audio_chunk(self, base64_audio: str) -> Optional[Dict]: + """Send an audio chunk for streaming transcription.""" + return await self.send_command({ + "action": "audio_chunk", + "data": base64_audio + }) + + async def stop_stream(self) -> Dict: + """Stop the streaming session.""" + result = await self.send_command({"action": "stop_stream"}) + self._last_status["streaming"] = False + return result or {"error": "No response"} + + async def segment_audio(self, file_path: str, max_chunk_seconds: int = 300) -> Dict: + """Segment an audio file using VAD.""" + return await self.send_command({ + "action": "segment_audio", + "file_path": file_path, + "max_chunk_seconds": max_chunk_seconds + }) or {"error": "No response"} + + def stop(self): + """Stop the sidecar process.""" + self.ready = False + self._last_status = {} + + if self.process: + try: + # Try graceful shutdown + self.process.stdin.write('{"action": "quit"}\n') + self.process.stdin.flush() + self.process.wait(timeout=5.0) + except: + pass + finally: + if self.process.poll() is None: + self.process.terminate() + try: + self.process.wait(timeout=2.0) + except: + self.process.kill() + self.process = None + + print("[Sidecar] Stopped") + + +# Global instance +_sidecar_manager: Optional[SidecarManager] = None + + +def get_sidecar_manager() -> SidecarManager: + """Get or create the global sidecar manager instance.""" + global _sidecar_manager + if _sidecar_manager is None: + _sidecar_manager = SidecarManager() + return _sidecar_manager diff --git a/backend/build.py b/backend/build.py index f48268e..874c62d 100644 --- a/backend/build.py +++ b/backend/build.py @@ -96,6 +96,8 @@ def build(): "--hidden-import", "app.routers.meetings", "--hidden-import", "app.routers.ai", "--hidden-import", "app.routers.export", + "--hidden-import", "app.routers.sidecar", + "--hidden-import", "app.sidecar_manager", "--hidden-import", "app.models", "--hidden-import", "app.models.schemas", # Collect package data diff --git a/client/src/main.js b/client/src/main.js index 5875a99..fcacbba 100644 --- a/client/src/main.js +++ b/client/src/main.js @@ -1,4 +1,4 @@ -const { app, BrowserWindow, ipcMain, session } = require("electron"); +const { app, BrowserWindow, ipcMain, session, shell } = require("electron"); const path = require("path"); const fs = require("fs"); const { spawn } = require("child_process"); @@ -724,3 +724,33 @@ ipcMain.handle("transcribe-audio", async (event, audioFilePath) => { }, 60000); }); }); + +// === Browser Mode Handler === +// Opens the current page in the system's default browser +// This is useful when Electron's audio access is blocked by security software + +ipcMain.handle("open-in-browser", async () => { + const backendConfig = appConfig?.backend || {}; + const host = backendConfig.host || "127.0.0.1"; + const port = backendConfig.port || 8000; + + // Determine the current page URL + let currentPage = "login"; + if (mainWindow) { + const currentUrl = mainWindow.webContents.getURL(); + if (currentUrl.includes("meetings.html")) { + currentPage = "meetings"; + } else if (currentUrl.includes("meeting-detail.html")) { + currentPage = "meeting-detail"; + } + } + + const browserUrl = `http://${host}:${port}/${currentPage}`; + + try { + await shell.openExternal(browserUrl); + return { success: true, url: browserUrl }; + } catch (error) { + return { error: error.message }; + } +}); diff --git a/client/src/pages/login.html b/client/src/pages/login.html index fbafc0b..64c00d6 100644 --- a/client/src/pages/login.html +++ b/client/src/pages/login.html @@ -26,6 +26,8 @@