Meeting_Assistant/backend/app/routers/sidecar.py

"""
Sidecar API Router

Provides HTTP endpoints for browser-based clients to access
the Whisper transcription sidecar functionality.
"""

import os
import tempfile
import base64
from typing import Optional
from fastapi import APIRouter, HTTPException, UploadFile, File, WebSocket, WebSocketDisconnect
from fastapi.responses import JSONResponse
from pydantic import BaseModel

from ..sidecar_manager import get_sidecar_manager

router = APIRouter(prefix="/sidecar", tags=["Sidecar"])


class TranscribeRequest(BaseModel):
    """Request for transcribing audio from base64 data."""
    audio_data: str  # Base64 encoded audio (webm/opus)


class AudioChunkRequest(BaseModel):
    """Request for sending an audio chunk in streaming mode."""
    data: str  # Base64 encoded PCM audio


@router.get("/status")
async def get_sidecar_status():
    """
    Get the current status of the sidecar transcription engine.

    Returns:
        Status object with ready state, whisper model info, etc.
    """
    manager = get_sidecar_manager()
    return manager.get_status()


@router.post("/start")
async def start_sidecar():
    """
    Start the sidecar transcription engine.

    This is typically called automatically on backend startup,
    but can be used to restart the sidecar if needed.
    """
    manager = get_sidecar_manager()

    if not manager.is_available():
        raise HTTPException(
            status_code=503,
            detail="Sidecar not available. Check if sidecar/transcriber.py and sidecar/venv exist."
        )

    success = await manager.start()
    if not success:
        raise HTTPException(
            status_code=503,
            detail="Failed to start sidecar. Check backend logs for details."
        )

    return {"status": "started", "ready": manager.ready}


@router.post("/stop")
async def stop_sidecar():
    """Stop the sidecar transcription engine."""
    manager = get_sidecar_manager()
    manager.stop()
    return {"status": "stopped"}


@router.post("/transcribe")
async def transcribe_audio(request: TranscribeRequest):
    """
    Transcribe base64-encoded audio data.

    The audio should be in webm/opus format (as recorded by MediaRecorder).
    """
    manager = get_sidecar_manager()

    if not manager.ready:
        raise HTTPException(
            status_code=503,
            detail="Sidecar not ready. Please wait for model to load."
        )

    try:
        # Decode base64 audio
        audio_data = base64.b64decode(request.audio_data)

        # Save to temp file
        with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as f:
            f.write(audio_data)
            temp_path = f.name

        try:
            # Transcribe
            result = await manager.transcribe_file(temp_path)

            if result.get("error"):
                raise HTTPException(status_code=500, detail=result["error"])

            return {
                "result": result.get("result", ""),
                "file": result.get("file", "")
            }

        finally:
            # Clean up temp file
            os.unlink(temp_path)

    except base64.binascii.Error:
        raise HTTPException(status_code=400, detail="Invalid base64 audio data")
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/transcribe-file")
async def transcribe_audio_file(file: UploadFile = File(...)):
    """
    Transcribe an uploaded audio file.

    Accepts common audio formats: mp3, wav, m4a, webm, ogg, flac, aac
    """
    manager = get_sidecar_manager()

    if not manager.ready:
        raise HTTPException(
            status_code=503,
            detail="Sidecar not ready. Please wait for model to load."
        )

    # Validate file extension
    allowed_extensions = {".mp3", ".wav", ".m4a", ".webm", ".ogg", ".flac", ".aac"}
    ext = os.path.splitext(file.filename or "")[1].lower()
    if ext not in allowed_extensions:
        raise HTTPException(
            status_code=400,
            detail=f"Unsupported audio format. Allowed: {', '.join(allowed_extensions)}"
        )

    try:
        # Save uploaded file
        with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as f:
            content = await file.read()
            f.write(content)
            temp_path = f.name

        try:
            result = await manager.transcribe_file(temp_path)

            if result.get("error"):
                raise HTTPException(status_code=500, detail=result["error"])

            return {
                "result": result.get("result", ""),
                "filename": file.filename
            }

        finally:
            os.unlink(temp_path)

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/stream/start")
async def start_streaming():
    """
    Start a streaming transcription session.

    Returns a session ID that should be used for subsequent audio chunks.
    """
    manager = get_sidecar_manager()

    if not manager.ready:
        raise HTTPException(
            status_code=503,
            detail="Sidecar not ready. Please wait for model to load."
        )

    result = await manager.start_stream()

    if result.get("error"):
        raise HTTPException(status_code=500, detail=result["error"])

    return result


@router.post("/stream/chunk")
async def send_audio_chunk(request: AudioChunkRequest):
    """
    Send an audio chunk for streaming transcription.

    The audio should be base64-encoded PCM data (16-bit, 16kHz, mono).

    Returns a transcription segment if speech end was detected,
    or null if more audio is needed.
    """
    manager = get_sidecar_manager()

    if not manager.ready:
        raise HTTPException(
            status_code=503,
            detail="Sidecar not ready"
        )

    result = await manager.send_audio_chunk(request.data)

    # Result may be None if no segment ready yet
    if result is None:
        return {"segment": None}

    if result.get("error"):
        raise HTTPException(status_code=500, detail=result["error"])

    return {"segment": result}


@router.post("/stream/stop")
async def stop_streaming():
    """
    Stop the streaming transcription session.

    Returns any final transcription segments and session statistics.
    """
    manager = get_sidecar_manager()

    result = await manager.stop_stream()

    if result.get("error"):
        raise HTTPException(status_code=500, detail=result["error"])

    return result


@router.post("/segment-audio")
async def segment_audio_file(file: UploadFile = File(...), max_chunk_seconds: int = 300):
    """
    Segment an audio file using VAD for natural speech boundaries.

    This is used for processing large audio files before cloud transcription.

    Args:
        file: The audio file to segment
        max_chunk_seconds: Maximum duration per chunk (default 300s / 5 minutes)

    Returns:
        List of segment metadata with file paths
    """
    manager = get_sidecar_manager()

    if not manager.ready:
        raise HTTPException(
            status_code=503,
            detail="Sidecar not ready. Please wait for model to load."
        )

    try:
        # Save uploaded file
        ext = os.path.splitext(file.filename or "")[1].lower() or ".wav"
        with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as f:
            content = await file.read()
            f.write(content)
            temp_path = f.name

        try:
            result = await manager.segment_audio(temp_path, max_chunk_seconds)

            if result.get("error"):
                raise HTTPException(status_code=500, detail=result["error"])

            return result

        finally:
            # Keep temp file for now - segments reference it
            # Will be cleaned up by the transcription process
            pass

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@router.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
    """
    WebSocket endpoint for real-time streaming transcription.

    Protocol:
    1. Client connects
    2. Client sends: {"action": "start_stream"}
    3. Server responds: {"status": "streaming", "session_id": "..."}
    4. Client sends: {"action": "audio_chunk", "data": "<base64_pcm>"}
    5. Server responds: {"segment": {...}} when speech detected, or {"segment": null}
    6. Client sends: {"action": "stop_stream"}
    7. Server responds: {"status": "stream_stopped", ...}
    """
    await websocket.accept()

    manager = get_sidecar_manager()

    if not manager.ready:
        await websocket.send_json({"error": "Sidecar not ready"})
        await websocket.close()
        return

    try:
        while True:
            data = await websocket.receive_json()
            action = data.get("action")

            if action == "start_stream":
                result = await manager.start_stream()
                await websocket.send_json(result)

            elif action == "audio_chunk":
                audio_data = data.get("data")
                if audio_data:
                    result = await manager.send_audio_chunk(audio_data)
                    await websocket.send_json({"segment": result})
                else:
                    await websocket.send_json({"error": "No audio data"})

            elif action == "stop_stream":
                result = await manager.stop_stream()
                await websocket.send_json(result)
                break

            elif action == "ping":
                await websocket.send_json({"status": "pong"})

            else:
                await websocket.send_json({"error": f"Unknown action: {action}"})

    except WebSocketDisconnect:
        # Clean up streaming session if active
        if manager._is_streaming():
            await manager.stop_stream()
    except Exception as e:
        await websocket.send_json({"error": str(e)})
        await websocket.close()