feat: Add browser mode fallback for Kaspersky audio blocking

- Add sidecar management to backend (sidecar_manager.py)
- Add sidecar API router for browser mode (/api/sidecar/*)
- Add browser-api.js polyfill for running in Chrome/Edge
- Add "Open in Browser" button when audio access fails
- Update build scripts with new sidecar modules
- Add start-browser.sh for development browser mode

Browser mode allows users to open the app in their system browser
when Electron's audio access is blocked by security software.
The backend manages the sidecar process in browser mode (BROWSER_MODE=true).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-22 16:41:25 +08:00
parent e7a06e2b8f
commit 7d3fc72bd2
12 changed files with 1374 additions and 3 deletions

View File

@@ -1,9 +1,19 @@
import os
from pathlib import Path
from fastapi import FastAPI from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from .database import init_db_pool, init_tables from .database import init_db_pool, init_tables
from .routers import auth, meetings, ai, export from .routers import auth, meetings, ai, export, sidecar
from .sidecar_manager import get_sidecar_manager
# Determine client directory path
BACKEND_DIR = Path(__file__).parent.parent
PROJECT_DIR = BACKEND_DIR.parent
CLIENT_DIR = PROJECT_DIR / "client" / "src"
@asynccontextmanager @asynccontextmanager
@@ -11,8 +21,25 @@ async def lifespan(app: FastAPI):
# Startup # Startup
init_db_pool() init_db_pool()
init_tables() init_tables()
# Only start sidecar in browser mode (not when Electron manages it)
# Set BROWSER_MODE=true in start-browser.sh to enable
browser_mode = os.environ.get("BROWSER_MODE", "").lower() == "true"
sidecar_mgr = get_sidecar_manager()
if browser_mode and sidecar_mgr.is_available():
print("[Backend] Browser mode: Starting sidecar...")
await sidecar_mgr.start()
elif browser_mode:
print("[Backend] Browser mode: Sidecar not available (transcription disabled)")
else:
print("[Backend] Electron mode: Sidecar managed by Electron")
yield yield
# Shutdown (cleanup if needed)
# Shutdown - only stop if we started it
if browser_mode:
sidecar_mgr.stop()
app = FastAPI( app = FastAPI(
@@ -36,9 +63,42 @@ app.include_router(auth.router, prefix="/api", tags=["Authentication"])
app.include_router(meetings.router, prefix="/api", tags=["Meetings"]) app.include_router(meetings.router, prefix="/api", tags=["Meetings"])
app.include_router(ai.router, prefix="/api", tags=["AI"]) app.include_router(ai.router, prefix="/api", tags=["AI"])
app.include_router(export.router, prefix="/api", tags=["Export"]) app.include_router(export.router, prefix="/api", tags=["Export"])
app.include_router(sidecar.router, prefix="/api", tags=["Sidecar"])
@app.get("/api/health") @app.get("/api/health")
async def health_check(): async def health_check():
"""Health check endpoint.""" """Health check endpoint."""
return {"status": "healthy", "service": "meeting-assistant"} return {"status": "healthy", "service": "meeting-assistant"}
# ========================================
# Browser Mode: Serve static files
# ========================================
# Check if client directory exists for browser mode
if CLIENT_DIR.exists():
# Serve static assets (CSS, JS, etc.)
app.mount("/styles", StaticFiles(directory=CLIENT_DIR / "styles"), name="styles")
app.mount("/services", StaticFiles(directory=CLIENT_DIR / "services"), name="services")
app.mount("/config", StaticFiles(directory=CLIENT_DIR / "config"), name="config")
@app.get("/")
async def serve_login():
"""Serve login page."""
return FileResponse(CLIENT_DIR / "pages" / "login.html")
@app.get("/login")
async def serve_login_page():
"""Serve login page."""
return FileResponse(CLIENT_DIR / "pages" / "login.html")
@app.get("/meetings")
async def serve_meetings_page():
"""Serve meetings list page."""
return FileResponse(CLIENT_DIR / "pages" / "meetings.html")
@app.get("/meeting-detail")
async def serve_meeting_detail_page():
"""Serve meeting detail page."""
return FileResponse(CLIENT_DIR / "pages" / "meeting-detail.html")

View File

@@ -0,0 +1,346 @@
"""
Sidecar API Router
Provides HTTP endpoints for browser-based clients to access
the Whisper transcription sidecar functionality.
"""
import os
import tempfile
import base64
from typing import Optional
from fastapi import APIRouter, HTTPException, UploadFile, File, WebSocket, WebSocketDisconnect
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from ..sidecar_manager import get_sidecar_manager
router = APIRouter(prefix="/sidecar", tags=["Sidecar"])
class TranscribeRequest(BaseModel):
"""Request for transcribing audio from base64 data."""
audio_data: str # Base64 encoded audio (webm/opus)
class AudioChunkRequest(BaseModel):
"""Request for sending an audio chunk in streaming mode."""
data: str # Base64 encoded PCM audio
@router.get("/status")
async def get_sidecar_status():
"""
Get the current status of the sidecar transcription engine.
Returns:
Status object with ready state, whisper model info, etc.
"""
manager = get_sidecar_manager()
return manager.get_status()
@router.post("/start")
async def start_sidecar():
"""
Start the sidecar transcription engine.
This is typically called automatically on backend startup,
but can be used to restart the sidecar if needed.
"""
manager = get_sidecar_manager()
if not manager.is_available():
raise HTTPException(
status_code=503,
detail="Sidecar not available. Check if sidecar/transcriber.py and sidecar/venv exist."
)
success = await manager.start()
if not success:
raise HTTPException(
status_code=503,
detail="Failed to start sidecar. Check backend logs for details."
)
return {"status": "started", "ready": manager.ready}
@router.post("/stop")
async def stop_sidecar():
"""Stop the sidecar transcription engine."""
manager = get_sidecar_manager()
manager.stop()
return {"status": "stopped"}
@router.post("/transcribe")
async def transcribe_audio(request: TranscribeRequest):
"""
Transcribe base64-encoded audio data.
The audio should be in webm/opus format (as recorded by MediaRecorder).
"""
manager = get_sidecar_manager()
if not manager.ready:
raise HTTPException(
status_code=503,
detail="Sidecar not ready. Please wait for model to load."
)
try:
# Decode base64 audio
audio_data = base64.b64decode(request.audio_data)
# Save to temp file
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as f:
f.write(audio_data)
temp_path = f.name
try:
# Transcribe
result = await manager.transcribe_file(temp_path)
if result.get("error"):
raise HTTPException(status_code=500, detail=result["error"])
return {
"result": result.get("result", ""),
"file": result.get("file", "")
}
finally:
# Clean up temp file
os.unlink(temp_path)
except base64.binascii.Error:
raise HTTPException(status_code=400, detail="Invalid base64 audio data")
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/transcribe-file")
async def transcribe_audio_file(file: UploadFile = File(...)):
"""
Transcribe an uploaded audio file.
Accepts common audio formats: mp3, wav, m4a, webm, ogg, flac, aac
"""
manager = get_sidecar_manager()
if not manager.ready:
raise HTTPException(
status_code=503,
detail="Sidecar not ready. Please wait for model to load."
)
# Validate file extension
allowed_extensions = {".mp3", ".wav", ".m4a", ".webm", ".ogg", ".flac", ".aac"}
ext = os.path.splitext(file.filename or "")[1].lower()
if ext not in allowed_extensions:
raise HTTPException(
status_code=400,
detail=f"Unsupported audio format. Allowed: {', '.join(allowed_extensions)}"
)
try:
# Save uploaded file
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as f:
content = await file.read()
f.write(content)
temp_path = f.name
try:
result = await manager.transcribe_file(temp_path)
if result.get("error"):
raise HTTPException(status_code=500, detail=result["error"])
return {
"result": result.get("result", ""),
"filename": file.filename
}
finally:
os.unlink(temp_path)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/stream/start")
async def start_streaming():
"""
Start a streaming transcription session.
Returns a session ID that should be used for subsequent audio chunks.
"""
manager = get_sidecar_manager()
if not manager.ready:
raise HTTPException(
status_code=503,
detail="Sidecar not ready. Please wait for model to load."
)
result = await manager.start_stream()
if result.get("error"):
raise HTTPException(status_code=500, detail=result["error"])
return result
@router.post("/stream/chunk")
async def send_audio_chunk(request: AudioChunkRequest):
"""
Send an audio chunk for streaming transcription.
The audio should be base64-encoded PCM data (16-bit, 16kHz, mono).
Returns a transcription segment if speech end was detected,
or null if more audio is needed.
"""
manager = get_sidecar_manager()
if not manager.ready:
raise HTTPException(
status_code=503,
detail="Sidecar not ready"
)
result = await manager.send_audio_chunk(request.data)
# Result may be None if no segment ready yet
if result is None:
return {"segment": None}
if result.get("error"):
raise HTTPException(status_code=500, detail=result["error"])
return {"segment": result}
@router.post("/stream/stop")
async def stop_streaming():
"""
Stop the streaming transcription session.
Returns any final transcription segments and session statistics.
"""
manager = get_sidecar_manager()
result = await manager.stop_stream()
if result.get("error"):
raise HTTPException(status_code=500, detail=result["error"])
return result
@router.post("/segment-audio")
async def segment_audio_file(file: UploadFile = File(...), max_chunk_seconds: int = 300):
"""
Segment an audio file using VAD for natural speech boundaries.
This is used for processing large audio files before cloud transcription.
Args:
file: The audio file to segment
max_chunk_seconds: Maximum duration per chunk (default 300s / 5 minutes)
Returns:
List of segment metadata with file paths
"""
manager = get_sidecar_manager()
if not manager.ready:
raise HTTPException(
status_code=503,
detail="Sidecar not ready. Please wait for model to load."
)
try:
# Save uploaded file
ext = os.path.splitext(file.filename or "")[1].lower() or ".wav"
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as f:
content = await file.read()
f.write(content)
temp_path = f.name
try:
result = await manager.segment_audio(temp_path, max_chunk_seconds)
if result.get("error"):
raise HTTPException(status_code=500, detail=result["error"])
return result
finally:
# Keep temp file for now - segments reference it
# Will be cleaned up by the transcription process
pass
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
"""
WebSocket endpoint for real-time streaming transcription.
Protocol:
1. Client connects
2. Client sends: {"action": "start_stream"}
3. Server responds: {"status": "streaming", "session_id": "..."}
4. Client sends: {"action": "audio_chunk", "data": "<base64_pcm>"}
5. Server responds: {"segment": {...}} when speech detected, or {"segment": null}
6. Client sends: {"action": "stop_stream"}
7. Server responds: {"status": "stream_stopped", ...}
"""
await websocket.accept()
manager = get_sidecar_manager()
if not manager.ready:
await websocket.send_json({"error": "Sidecar not ready"})
await websocket.close()
return
try:
while True:
data = await websocket.receive_json()
action = data.get("action")
if action == "start_stream":
result = await manager.start_stream()
await websocket.send_json(result)
elif action == "audio_chunk":
audio_data = data.get("data")
if audio_data:
result = await manager.send_audio_chunk(audio_data)
await websocket.send_json({"segment": result})
else:
await websocket.send_json({"error": "No audio data"})
elif action == "stop_stream":
result = await manager.stop_stream()
await websocket.send_json(result)
break
elif action == "ping":
await websocket.send_json({"status": "pong"})
else:
await websocket.send_json({"error": f"Unknown action: {action}"})
except WebSocketDisconnect:
# Clean up streaming session if active
if manager._is_streaming():
await manager.stop_stream()
except Exception as e:
await websocket.send_json({"error": str(e)})
await websocket.close()

View File

@@ -0,0 +1,307 @@
"""
Sidecar Process Manager
Manages the Python sidecar process for speech-to-text transcription.
Provides an interface for the backend to communicate with the sidecar
via subprocess stdin/stdout.
"""
import asyncio
import json
import os
import subprocess
import sys
import tempfile
import base64
from pathlib import Path
from typing import Optional, Dict, Any, Callable
from threading import Thread, Lock
import queue
class SidecarManager:
"""
Manages the Whisper transcription sidecar process.
The sidecar is a Python process running transcriber.py that handles
speech-to-text conversion using faster-whisper.
"""
def __init__(self):
self.process: Optional[subprocess.Popen] = None
self.ready = False
self.whisper_info: Optional[Dict] = None
self._lock = Lock()
self._response_queue = queue.Queue()
self._reader_thread: Optional[Thread] = None
self._progress_callbacks: list[Callable] = []
self._last_status: Dict[str, Any] = {}
# Paths
self.project_dir = Path(__file__).parent.parent.parent
self.sidecar_dir = self.project_dir / "sidecar"
self.transcriber_path = self.sidecar_dir / "transcriber.py"
self.venv_python = self.sidecar_dir / "venv" / "bin" / "python"
def is_available(self) -> bool:
"""Check if sidecar is available (files exist)."""
return self.transcriber_path.exists() and self.venv_python.exists()
def get_status(self) -> Dict[str, Any]:
"""Get current sidecar status."""
return {
"ready": self.ready,
"streaming": self._is_streaming(),
"whisper": self.whisper_info,
"available": self.is_available(),
"browserMode": False,
**self._last_status
}
def _is_streaming(self) -> bool:
"""Check if currently in streaming mode."""
return self._last_status.get("streaming", False)
async def start(self) -> bool:
"""Start the sidecar process."""
if self.process and self.process.poll() is None:
return True # Already running
if not self.is_available():
print(f"[Sidecar] Not available: transcriber={self.transcriber_path.exists()}, venv={self.venv_python.exists()}")
return False
try:
# Get Whisper configuration from environment
env = os.environ.copy()
env["WHISPER_MODEL"] = os.getenv("WHISPER_MODEL", "medium")
env["WHISPER_DEVICE"] = os.getenv("WHISPER_DEVICE", "cpu")
env["WHISPER_COMPUTE"] = os.getenv("WHISPER_COMPUTE", "int8")
print(f"[Sidecar] Starting with model={env['WHISPER_MODEL']}, device={env['WHISPER_DEVICE']}, compute={env['WHISPER_COMPUTE']}")
self.process = subprocess.Popen(
[str(self.venv_python), str(self.transcriber_path), "--server"],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=env,
cwd=str(self.sidecar_dir),
bufsize=1, # Line buffered
text=True
)
# Start reader threads
self._reader_thread = Thread(target=self._read_stdout, daemon=True)
self._reader_thread.start()
stderr_thread = Thread(target=self._read_stderr, daemon=True)
stderr_thread.start()
# Wait for ready signal
try:
response = await asyncio.wait_for(
asyncio.get_event_loop().run_in_executor(
None, self._wait_for_ready
),
timeout=120.0 # 2 minutes for model download
)
if response and response.get("status") == "ready":
self.ready = True
print("[Sidecar] Ready")
return True
except asyncio.TimeoutError:
print("[Sidecar] Timeout waiting for ready")
self.stop()
return False
except Exception as e:
print(f"[Sidecar] Start error: {e}")
return False
return False
def _wait_for_ready(self) -> Optional[Dict]:
"""Wait for the ready signal from sidecar."""
while True:
try:
response = self._response_queue.get(timeout=1.0)
status = response.get("status", "")
# Track progress events
if status in ["downloading_model", "model_downloaded", "model_cached",
"loading_model", "model_loaded", "model_error"]:
self._last_status = response
self._notify_progress(response)
if status == "model_loaded":
# Extract whisper info
self.whisper_info = {
"model": os.getenv("WHISPER_MODEL", "medium"),
"device": os.getenv("WHISPER_DEVICE", "cpu"),
"compute": os.getenv("WHISPER_COMPUTE", "int8"),
"configSource": "environment"
}
elif status == "model_error":
self.whisper_info = {"error": response.get("error", "Unknown error")}
if status == "ready":
return response
except queue.Empty:
if self.process and self.process.poll() is not None:
return None # Process died
continue
def _read_stdout(self):
"""Read stdout from sidecar process."""
if not self.process or not self.process.stdout:
return
for line in self.process.stdout:
line = line.strip()
if not line:
continue
try:
data = json.loads(line)
self._response_queue.put(data)
except json.JSONDecodeError as e:
print(f"[Sidecar] Invalid JSON: {line[:100]}")
def _read_stderr(self):
"""Read stderr from sidecar process."""
if not self.process or not self.process.stderr:
return
for line in self.process.stderr:
line = line.strip()
if line:
# Try to parse as JSON (some status messages go to stderr)
try:
data = json.loads(line)
if "status" in data or "warning" in data:
self._notify_progress(data)
except json.JSONDecodeError:
print(f"[Sidecar stderr] {line}")
def _notify_progress(self, data: Dict):
"""Notify all progress callbacks."""
for callback in self._progress_callbacks:
try:
callback(data)
except Exception as e:
print(f"[Sidecar] Progress callback error: {e}")
def add_progress_callback(self, callback: Callable):
"""Add a callback for progress updates."""
self._progress_callbacks.append(callback)
def remove_progress_callback(self, callback: Callable):
"""Remove a progress callback."""
if callback in self._progress_callbacks:
self._progress_callbacks.remove(callback)
async def send_command(self, command: Dict) -> Optional[Dict]:
"""Send a command to the sidecar and wait for response."""
if not self.process or self.process.poll() is not None:
return {"error": "Sidecar not running"}
with self._lock:
try:
# Clear queue before sending
while not self._response_queue.empty():
try:
self._response_queue.get_nowait()
except queue.Empty:
break
# Send command
cmd_json = json.dumps(command) + "\n"
self.process.stdin.write(cmd_json)
self.process.stdin.flush()
# Wait for response
try:
response = await asyncio.wait_for(
asyncio.get_event_loop().run_in_executor(
None, lambda: self._response_queue.get(timeout=60.0)
),
timeout=65.0
)
return response
except (asyncio.TimeoutError, queue.Empty):
return {"error": "Command timeout"}
except Exception as e:
return {"error": f"Command error: {e}"}
async def transcribe_file(self, audio_path: str) -> Dict:
"""Transcribe an audio file."""
return await self.send_command({
"action": "transcribe",
"file": audio_path
}) or {"error": "No response"}
async def start_stream(self) -> Dict:
"""Start a streaming transcription session."""
result = await self.send_command({"action": "start_stream"})
if result and result.get("status") == "streaming":
self._last_status["streaming"] = True
return result or {"error": "No response"}
async def send_audio_chunk(self, base64_audio: str) -> Optional[Dict]:
"""Send an audio chunk for streaming transcription."""
return await self.send_command({
"action": "audio_chunk",
"data": base64_audio
})
async def stop_stream(self) -> Dict:
"""Stop the streaming session."""
result = await self.send_command({"action": "stop_stream"})
self._last_status["streaming"] = False
return result or {"error": "No response"}
async def segment_audio(self, file_path: str, max_chunk_seconds: int = 300) -> Dict:
"""Segment an audio file using VAD."""
return await self.send_command({
"action": "segment_audio",
"file_path": file_path,
"max_chunk_seconds": max_chunk_seconds
}) or {"error": "No response"}
def stop(self):
"""Stop the sidecar process."""
self.ready = False
self._last_status = {}
if self.process:
try:
# Try graceful shutdown
self.process.stdin.write('{"action": "quit"}\n')
self.process.stdin.flush()
self.process.wait(timeout=5.0)
except:
pass
finally:
if self.process.poll() is None:
self.process.terminate()
try:
self.process.wait(timeout=2.0)
except:
self.process.kill()
self.process = None
print("[Sidecar] Stopped")
# Global instance
_sidecar_manager: Optional[SidecarManager] = None
def get_sidecar_manager() -> SidecarManager:
"""Get or create the global sidecar manager instance."""
global _sidecar_manager
if _sidecar_manager is None:
_sidecar_manager = SidecarManager()
return _sidecar_manager

View File

@@ -96,6 +96,8 @@ def build():
"--hidden-import", "app.routers.meetings", "--hidden-import", "app.routers.meetings",
"--hidden-import", "app.routers.ai", "--hidden-import", "app.routers.ai",
"--hidden-import", "app.routers.export", "--hidden-import", "app.routers.export",
"--hidden-import", "app.routers.sidecar",
"--hidden-import", "app.sidecar_manager",
"--hidden-import", "app.models", "--hidden-import", "app.models",
"--hidden-import", "app.models.schemas", "--hidden-import", "app.models.schemas",
# Collect package data # Collect package data

View File

@@ -1,4 +1,4 @@
const { app, BrowserWindow, ipcMain, session } = require("electron"); const { app, BrowserWindow, ipcMain, session, shell } = require("electron");
const path = require("path"); const path = require("path");
const fs = require("fs"); const fs = require("fs");
const { spawn } = require("child_process"); const { spawn } = require("child_process");
@@ -724,3 +724,33 @@ ipcMain.handle("transcribe-audio", async (event, audioFilePath) => {
}, 60000); }, 60000);
}); });
}); });
// === Browser Mode Handler ===
// Opens the current page in the system's default browser
// This is useful when Electron's audio access is blocked by security software
ipcMain.handle("open-in-browser", async () => {
const backendConfig = appConfig?.backend || {};
const host = backendConfig.host || "127.0.0.1";
const port = backendConfig.port || 8000;
// Determine the current page URL
let currentPage = "login";
if (mainWindow) {
const currentUrl = mainWindow.webContents.getURL();
if (currentUrl.includes("meetings.html")) {
currentPage = "meetings";
} else if (currentUrl.includes("meeting-detail.html")) {
currentPage = "meeting-detail";
}
}
const browserUrl = `http://${host}:${port}/${currentPage}`;
try {
await shell.openExternal(browserUrl);
return { success: true, url: browserUrl };
} catch (error) {
return { error: error.message };
}
});

View File

@@ -26,6 +26,8 @@
</div> </div>
<script type="module"> <script type="module">
// Browser mode polyfill (must be first)
import '../services/browser-api.js';
import { initApp } from '../services/init.js'; import { initApp } from '../services/init.js';
import { login } from '../services/api.js'; import { login } from '../services/api.js';

View File

@@ -305,6 +305,35 @@
color: #dc3545; color: #dc3545;
margin-top: 4px; margin-top: 4px;
} }
/* Browser Mode Hint */
.browser-mode-hint {
display: flex;
align-items: center;
justify-content: space-between;
padding: 10px 15px;
background: #fff3cd;
border: 1px solid #ffc107;
border-radius: 6px;
margin-top: 10px;
font-size: 12px;
color: #856404;
}
.browser-mode-hint.hidden {
display: none;
}
.browser-mode-btn {
padding: 6px 12px;
background: #007bff;
color: white;
border: none;
border-radius: 4px;
font-size: 12px;
cursor: pointer;
white-space: nowrap;
}
.browser-mode-btn:hover {
background: #0056b3;
}
</style> </style>
</head> </head>
<body> <body>
@@ -368,6 +397,11 @@
<span id="audio-status" class="audio-status">準備就緒</span> <span id="audio-status" class="audio-status">準備就緒</span>
</div> </div>
</div> </div>
<!-- Browser Mode Hint (shown when audio access fails) -->
<div id="browser-mode-hint" class="browser-mode-hint hidden">
<span>無法存取麥克風?安全軟體可能阻擋了 Electron。請嘗試在瀏覽器中開啟。</span>
<button id="open-browser-btn" class="browser-mode-btn">在瀏覽器中開啟</button>
</div>
</div> </div>
</div> </div>
@@ -443,6 +477,8 @@
</div> </div>
<script type="module"> <script type="module">
// Browser mode polyfill (must be first)
import '../services/browser-api.js';
import { initApp } from '../services/init.js'; import { initApp } from '../services/init.js';
import { import {
getMeeting, getMeeting,
@@ -501,6 +537,8 @@
const testRecordBtn = document.getElementById('test-record-btn'); const testRecordBtn = document.getElementById('test-record-btn');
const testPlayBtn = document.getElementById('test-play-btn'); const testPlayBtn = document.getElementById('test-play-btn');
const audioStatusEl = document.getElementById('audio-status'); const audioStatusEl = document.getElementById('audio-status');
const browserModeHint = document.getElementById('browser-mode-hint');
const openBrowserBtn = document.getElementById('open-browser-btn');
// Audio Device State // Audio Device State
const audioDeviceState = { const audioDeviceState = {
@@ -663,6 +701,11 @@
} else { } else {
setAudioStatus('無法存取麥克風', 'error'); setAudioStatus('無法存取麥克風', 'error');
} }
// Show browser mode hint when audio access fails (only in Electron)
if (window.electronAPI && window.electronAPI.openInBrowser) {
browserModeHint.classList.remove('hidden');
}
} }
} }
@@ -939,6 +982,31 @@
} }
}); });
// Browser mode button - opens in system browser when audio is blocked
if (openBrowserBtn && window.electronAPI && window.electronAPI.openInBrowser) {
openBrowserBtn.addEventListener('click', async () => {
try {
openBrowserBtn.disabled = true;
openBrowserBtn.textContent = '開啟中...';
const result = await window.electronAPI.openInBrowser();
if (result.error) {
console.error('Failed to open browser:', result.error);
openBrowserBtn.textContent = '開啟失敗';
} else {
openBrowserBtn.textContent = '已開啟';
}
setTimeout(() => {
openBrowserBtn.disabled = false;
openBrowserBtn.textContent = '在瀏覽器中開啟';
}, 2000);
} catch (error) {
console.error('Error opening browser:', error);
openBrowserBtn.disabled = false;
openBrowserBtn.textContent = '在瀏覽器中開啟';
}
});
}
// Listen for device changes (hot-plug) // Listen for device changes (hot-plug)
navigator.mediaDevices.addEventListener('devicechange', () => { navigator.mediaDevices.addEventListener('devicechange', () => {
console.log('Audio devices changed'); console.log('Audio devices changed');

View File

@@ -67,6 +67,8 @@
</div> </div>
<script type="module"> <script type="module">
// Browser mode polyfill (must be first)
import '../services/browser-api.js';
import { initApp } from '../services/init.js'; import { initApp } from '../services/init.js';
import { getMeetings, createMeeting, clearToken } from '../services/api.js'; import { getMeetings, createMeeting, clearToken } from '../services/api.js';

View File

@@ -40,4 +40,8 @@ contextBridge.exposeInMainWorld("electronAPI", {
onTranscriptionResult: (callback) => { onTranscriptionResult: (callback) => {
ipcRenderer.on("transcription-result", (event, text) => callback(text)); ipcRenderer.on("transcription-result", (event, text) => callback(text));
}, },
// === Browser Mode ===
// Open current page in system browser (useful when Electron audio is blocked)
openInBrowser: () => ipcRenderer.invoke("open-in-browser"),
}); });

View File

@@ -0,0 +1,288 @@
/**
* Browser API Implementation
*
* Provides a compatible interface for pages that normally use electronAPI
* when running in browser mode. Uses HTTP API to communicate with the
* backend sidecar for transcription functionality.
*/
// Check if we're running in Electron or browser
const isElectron = typeof window !== 'undefined' && window.electronAPI !== undefined;
// Base URL for API calls (relative in browser mode)
const API_BASE = '';
// Progress listeners
const progressListeners = [];
const segmentListeners = [];
const streamStopListeners = [];
// WebSocket for streaming
let streamingSocket = null;
// Browser mode API implementation
const browserAPI = {
// Navigate to a page
navigate: (page) => {
const pageMap = {
'login': '/login',
'meetings': '/meetings',
'meeting-detail': '/meeting-detail'
};
window.location.href = pageMap[page] || `/${page}`;
},
// Get sidecar status
getSidecarStatus: async () => {
try {
const response = await fetch(`${API_BASE}/api/sidecar/status`);
if (response.ok) {
return await response.json();
}
return {
ready: false,
streaming: false,
whisper: null,
browserMode: true,
message: '無法取得轉寫引擎狀態'
};
} catch (error) {
console.error('[Browser Mode] getSidecarStatus error:', error);
return {
ready: false,
streaming: false,
whisper: null,
browserMode: true,
available: false,
message: '無法連接到後端服務'
};
}
},
// Model download progress listener
onModelDownloadProgress: (callback) => {
progressListeners.push(callback);
// Start polling for status updates
if (progressListeners.length === 1) {
startProgressPolling();
}
},
// Save audio file and return path (for browser mode, we handle differently)
saveAudioFile: async (arrayBuffer) => {
// In browser mode, we don't save to file system
// Instead, we'll convert to base64 and return it
// The transcribeAudio function will handle the base64 data
const base64 = arrayBufferToBase64(arrayBuffer);
return `base64:${base64}`;
},
// Transcribe audio
transcribeAudio: async (filePath) => {
try {
let response;
if (filePath.startsWith('base64:')) {
// Handle base64 encoded audio from saveAudioFile
const base64Data = filePath.substring(7);
response = await fetch(`${API_BASE}/api/sidecar/transcribe`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ audio_data: base64Data })
});
} else {
// Handle actual file path (shouldn't happen in browser mode)
throw new Error('File path transcription not supported in browser mode');
}
if (!response.ok) {
const error = await response.json();
throw new Error(error.detail || 'Transcription failed');
}
return await response.json();
} catch (error) {
console.error('[Browser Mode] transcribeAudio error:', error);
throw error;
}
},
// Transcription segment listener (for streaming mode)
onTranscriptionSegment: (callback) => {
segmentListeners.push(callback);
},
// Stream stopped listener
onStreamStopped: (callback) => {
streamStopListeners.push(callback);
},
// Start recording stream (WebSocket-based)
startRecordingStream: async () => {
try {
// Use HTTP endpoint for starting stream
const response = await fetch(`${API_BASE}/api/sidecar/stream/start`, {
method: 'POST'
});
if (!response.ok) {
const error = await response.json();
return { error: error.detail || 'Failed to start stream' };
}
const result = await response.json();
if (result.status === 'streaming') {
return { status: 'streaming', session_id: result.session_id };
}
return result;
} catch (error) {
console.error('[Browser Mode] startRecordingStream error:', error);
return { error: error.message };
}
},
// Stream audio chunk
streamAudioChunk: async (base64Audio) => {
try {
const response = await fetch(`${API_BASE}/api/sidecar/stream/chunk`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ data: base64Audio })
});
if (!response.ok) {
const error = await response.json();
return { error: error.detail || 'Failed to send chunk' };
}
const result = await response.json();
// If we got a segment, notify listeners
if (result.segment && result.segment.text) {
segmentListeners.forEach(cb => {
try {
cb(result.segment);
} catch (e) {
console.error('[Browser Mode] Segment listener error:', e);
}
});
}
return result;
} catch (error) {
console.error('[Browser Mode] streamAudioChunk error:', error);
return { error: error.message };
}
},
// Stop recording stream
stopRecordingStream: async () => {
try {
const response = await fetch(`${API_BASE}/api/sidecar/stream/stop`, {
method: 'POST'
});
if (!response.ok) {
const error = await response.json();
return { error: error.detail || 'Failed to stop stream' };
}
const result = await response.json();
// Notify stream stop listeners
streamStopListeners.forEach(cb => {
try {
cb(result);
} catch (e) {
console.error('[Browser Mode] Stream stop listener error:', e);
}
});
return result;
} catch (error) {
console.error('[Browser Mode] stopRecordingStream error:', error);
return { error: error.message };
}
},
// Get backend status
getBackendStatus: async () => {
try {
const response = await fetch('/api/health');
if (response.ok) {
return { ready: true };
}
return { ready: false };
} catch {
return { ready: false };
}
}
};
// Helper function to convert ArrayBuffer to base64
function arrayBufferToBase64(buffer) {
const bytes = new Uint8Array(buffer);
let binary = '';
for (let i = 0; i < bytes.byteLength; i++) {
binary += String.fromCharCode(bytes[i]);
}
return btoa(binary);
}
// Poll for sidecar status/progress updates
let progressPollingInterval = null;
let lastStatus = {};
function startProgressPolling() {
if (progressPollingInterval) return;
progressPollingInterval = setInterval(async () => {
try {
const response = await fetch(`${API_BASE}/api/sidecar/status`);
if (response.ok) {
const status = await response.json();
// Check for status changes to report
const currentStatus = status.status || (status.ready ? 'ready' : 'loading');
if (currentStatus !== lastStatus.status) {
// Notify progress listeners
progressListeners.forEach(cb => {
try {
cb(status);
} catch (e) {
console.error('[Browser Mode] Progress listener error:', e);
}
});
}
lastStatus = status;
// Stop polling once ready
if (status.ready) {
clearInterval(progressPollingInterval);
progressPollingInterval = null;
}
}
} catch (error) {
console.error('[Browser Mode] Progress polling error:', error);
}
}, 2000);
}
// Export the appropriate API based on environment
export const electronAPI = isElectron ? window.electronAPI : browserAPI;
// Also set it on window for pages that access it directly
if (!isElectron && typeof window !== 'undefined') {
window.electronAPI = browserAPI;
console.log('[Browser Mode] Running in browser mode with full transcription support');
console.log('[Browser Mode] 透過後端 Sidecar 提供即時語音轉寫功能');
}

View File

@@ -290,6 +290,8 @@ pyinstaller ^
--hidden-import=app.routers.meetings ^ --hidden-import=app.routers.meetings ^
--hidden-import=app.routers.ai ^ --hidden-import=app.routers.ai ^
--hidden-import=app.routers.export ^ --hidden-import=app.routers.export ^
--hidden-import=app.routers.sidecar ^
--hidden-import=app.sidecar_manager ^
--collect-data=pydantic ^ --collect-data=pydantic ^
--collect-data=uvicorn ^ --collect-data=uvicorn ^
run_server.py run_server.py

260
start-browser.sh Executable file
View File

@@ -0,0 +1,260 @@
#!/bin/bash
#
# Meeting Assistant - Browser Mode Startup Script
# 使用瀏覽器運行 Meeting Assistant完整功能包含即時語音轉寫
#
# 此模式下:
# - 後端會自動啟動並管理 SidecarWhisper 語音轉寫引擎)
# - 前端在 Chrome/Edge 瀏覽器中運行
# - 所有功能皆可正常使用
#
set -e
# 顏色定義
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# 專案路徑
PROJECT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
BACKEND_DIR="$PROJECT_DIR/backend"
SIDECAR_DIR="$PROJECT_DIR/sidecar"
# Server Configuration (can be overridden by .env)
BACKEND_HOST="${BACKEND_HOST:-0.0.0.0}"
BACKEND_PORT="${BACKEND_PORT:-8000}"
# Whisper Configuration (can be overridden by .env)
export WHISPER_MODEL="${WHISPER_MODEL:-medium}"
export WHISPER_DEVICE="${WHISPER_DEVICE:-cpu}"
export WHISPER_COMPUTE="${WHISPER_COMPUTE:-int8}"
# Browser mode flag - tells backend to manage sidecar
export BROWSER_MODE="true"
# 函數:印出訊息
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[OK]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Load environment variables from .env file if it exists
if [ -f "$BACKEND_DIR/.env" ]; then
log_info "Loading backend environment from $BACKEND_DIR/.env"
export $(grep -v '^#' "$BACKEND_DIR/.env" | grep -v '^$' | xargs)
fi
# 函數:檢查 port 是否被佔用
check_port() {
local port=$1
if lsof -i :$port > /dev/null 2>&1; then
return 0 # port 被佔用
else
return 1 # port 可用
fi
}
# 函數:開啟瀏覽器
open_browser() {
local url=$1
log_info "Opening browser at $url"
# Try different browser commands
if command -v xdg-open &> /dev/null; then
xdg-open "$url" &
elif command -v wslview &> /dev/null; then
wslview "$url" &
elif command -v explorer.exe &> /dev/null; then
# WSL: use Windows browser
explorer.exe "$url" &
elif command -v open &> /dev/null; then
# macOS
open "$url" &
else
log_warn "Could not find a browser to open. Please manually visit: $url"
fi
}
# 函數:檢查環境
check_environment() {
local all_ok=true
# 檢查後端虛擬環境
if [ ! -d "$BACKEND_DIR/venv" ]; then
log_error "Backend virtual environment not found"
log_error "Please run: cd $BACKEND_DIR && python3 -m venv venv && source venv/bin/activate && pip install -r requirements.txt"
all_ok=false
fi
# 檢查 Sidecar 虛擬環境
if [ ! -d "$SIDECAR_DIR/venv" ]; then
log_warn "Sidecar virtual environment not found"
log_warn "即時語音轉寫功能將無法使用"
log_warn "To enable: cd $SIDECAR_DIR && python3 -m venv venv && source venv/bin/activate && pip install -r requirements.txt"
else
log_success "Sidecar environment found - 即時語音轉寫功能可用"
fi
if [ "$all_ok" = false ]; then
exit 1
fi
}
# 函數:啟動後端(包含 Sidecar
start_backend() {
log_info "Checking backend status..."
# Check if backend is already running
if check_port $BACKEND_PORT; then
# Verify it's our backend by checking health endpoint
if curl -s http://localhost:$BACKEND_PORT/api/health > /dev/null 2>&1; then
log_success "Backend is already running on port $BACKEND_PORT"
return 0
else
log_warn "Port $BACKEND_PORT is in use but not by our backend"
log_error "Please stop the process using port $BACKEND_PORT and try again"
exit 1
fi
fi
log_info "Starting backend server (with Sidecar management)..."
log_info "Whisper config: model=$WHISPER_MODEL, device=$WHISPER_DEVICE, compute=$WHISPER_COMPUTE"
cd "$BACKEND_DIR"
source venv/bin/activate
# Start uvicorn in background
nohup uvicorn app.main:app --host $BACKEND_HOST --port $BACKEND_PORT > "$PROJECT_DIR/backend-browser.log" 2>&1 &
local backend_pid=$!
# Wait for backend to be ready
log_info "Waiting for backend and sidecar to start..."
log_info "(This may take a minute if Whisper model needs to download)"
local max_wait=120 # 2 minutes for model download
local waited=0
while [ $waited -lt $max_wait ]; do
sleep 2
waited=$((waited + 2))
if curl -s http://localhost:$BACKEND_PORT/api/health > /dev/null 2>&1; then
log_success "Backend started (PID: $backend_pid)"
# Check sidecar status
local sidecar_status=$(curl -s http://localhost:$BACKEND_PORT/api/sidecar/status 2>/dev/null)
if echo "$sidecar_status" | grep -q '"ready":true'; then
log_success "Sidecar (Whisper) ready"
elif echo "$sidecar_status" | grep -q '"available":false'; then
log_warn "Sidecar not available - transcription disabled"
else
log_info "Sidecar loading... (model may be downloading)"
fi
return 0
fi
# Show progress every 10 seconds
if [ $((waited % 10)) -eq 0 ]; then
log_info "Still waiting... ($waited seconds)"
fi
done
log_error "Backend failed to start. Check $PROJECT_DIR/backend-browser.log for details"
exit 1
}
# 函數:停止服務
stop_services() {
log_info "Stopping services..."
pkill -f "uvicorn app.main:app" 2>/dev/null || true
sleep 1
log_success "Services stopped"
}
# 主程式
main() {
echo ""
echo "=========================================="
echo " Meeting Assistant - Browser Mode"
echo "=========================================="
echo ""
# Check environment
check_environment
# Start backend (which manages sidecar)
start_backend
# Give it a moment
sleep 1
# Open browser
local url="http://localhost:$BACKEND_PORT"
open_browser "$url"
echo ""
echo "=========================================="
log_success "Browser mode started!"
echo "=========================================="
echo ""
echo " Access URL: $url"
echo " API Docs: $url/docs"
echo ""
echo " Features:"
echo " - 即時語音轉寫(透過後端 Sidecar"
echo " - 上傳音訊轉寫"
echo " - AI 摘要"
echo " - 匯出 Excel"
echo ""
echo " To stop: $0 stop"
echo ""
log_info "Press Ctrl+C to exit (backend will keep running)"
echo ""
# Keep script running
trap 'echo ""; log_info "Exiting (backend still running)"; exit 0' INT TERM
while true; do
sleep 60
done
}
# 處理命令
case "${1:-start}" in
start)
main
;;
stop)
stop_services
;;
restart)
stop_services
sleep 2
main
;;
status)
if check_port $BACKEND_PORT; then
log_success "Backend running on port $BACKEND_PORT"
curl -s http://localhost:$BACKEND_PORT/api/sidecar/status | python3 -m json.tool 2>/dev/null || echo "(Could not parse sidecar status)"
else
log_warn "Backend not running"
fi
;;
*)
echo "Usage: $0 {start|stop|restart|status}"
exit 1
;;
esac