This commit implements comprehensive external Azure AD authentication with complete task management, file download, and admin monitoring systems. ## Core Features Implemented (80% Complete) ### 1. Token Auto-Refresh Mechanism ✅ - Backend: POST /api/v2/auth/refresh endpoint - Frontend: Auto-refresh 5 minutes before expiration - Auto-retry on 401 errors with seamless token refresh ### 2. File Download System ✅ - Three format support: JSON / Markdown / PDF - Endpoints: GET /api/v2/tasks/{id}/download/{format} - File access control with ownership validation - Frontend download buttons in TaskHistoryPage ### 3. Complete Task Management ✅ Backend Endpoints: - POST /api/v2/tasks/{id}/start - Start task - POST /api/v2/tasks/{id}/cancel - Cancel task - POST /api/v2/tasks/{id}/retry - Retry failed task - GET /api/v2/tasks - List with filters (status, filename, date range) - GET /api/v2/tasks/stats - User statistics Frontend Features: - Status-based action buttons (Start/Cancel/Retry) - Advanced search and filtering (status, filename, date range) - Pagination and sorting - Task statistics dashboard (5 stat cards) ### 4. Admin Monitoring System ✅ (Backend) Admin APIs: - GET /api/v2/admin/stats - System statistics - GET /api/v2/admin/users - User list with stats - GET /api/v2/admin/users/top - User leaderboard - GET /api/v2/admin/audit-logs - Audit log query system - GET /api/v2/admin/audit-logs/user/{id}/summary Admin Features: - Email-based admin check (ymirliu@panjit.com.tw) - Comprehensive system metrics (users, tasks, sessions, activity) - Audit logging service for security tracking ### 5. User Isolation & Security ✅ - Row-level security on all task queries - File access control with ownership validation - Strict user_id filtering on all operations - Session validation and expiry checking - Admin privilege verification ## New Files Created Backend: - backend/app/models/user_v2.py - User model for external auth - backend/app/models/task.py - Task model with user isolation - backend/app/models/session.py - Session management - backend/app/models/audit_log.py - Audit log model - backend/app/services/external_auth_service.py - External API client - backend/app/services/task_service.py - Task CRUD with isolation - backend/app/services/file_access_service.py - File access control - backend/app/services/admin_service.py - Admin operations - backend/app/services/audit_service.py - Audit logging - backend/app/routers/auth_v2.py - V2 auth endpoints - backend/app/routers/tasks.py - Task management endpoints - backend/app/routers/admin.py - Admin endpoints - backend/alembic/versions/5e75a59fb763_*.py - DB migration Frontend: - frontend/src/services/apiV2.ts - Complete V2 API client - frontend/src/types/apiV2.ts - V2 type definitions - frontend/src/pages/TaskHistoryPage.tsx - Task history UI Modified Files: - backend/app/core/deps.py - Added get_current_admin_user_v2 - backend/app/main.py - Registered admin router - frontend/src/pages/LoginPage.tsx - V2 login integration - frontend/src/components/Layout.tsx - User display and logout - frontend/src/App.tsx - Added /tasks route ## Documentation - openspec/changes/.../PROGRESS_UPDATE.md - Detailed progress report ## Pending Items (20%) 1. Database migration execution for audit_logs table 2. Frontend admin dashboard page 3. Frontend audit log viewer ## Testing Status - Manual testing: ✅ Authentication flow verified - Unit tests: ⏳ Pending - Integration tests: ⏳ Pending ## Security Enhancements - ✅ User isolation (row-level security) - ✅ File access control - ✅ Token expiry validation - ✅ Admin privilege verification - ✅ Audit logging infrastructure - ⏳ Token encryption (noted, low priority) - ⏳ Rate limiting (noted, low priority) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
171 lines
4.6 KiB
Python
171 lines
4.6 KiB
Python
"""
|
|
Tool_OCR - FastAPI Application Entry Point
|
|
Main application setup with CORS, routes, and startup/shutdown events
|
|
"""
|
|
|
|
from fastapi import FastAPI
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from contextlib import asynccontextmanager
|
|
import logging
|
|
import asyncio
|
|
from pathlib import Path
|
|
|
|
from app.core.config import settings
|
|
from app.services.background_tasks import task_manager
|
|
|
|
# Ensure log directory exists before configuring logging
|
|
Path(settings.log_file).parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=getattr(logging, settings.log_level),
|
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
handlers=[
|
|
logging.FileHandler(settings.log_file),
|
|
logging.StreamHandler(),
|
|
],
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
"""Application lifespan events"""
|
|
# Startup
|
|
logger.info("Starting Tool_OCR application...")
|
|
|
|
# Ensure all directories exist
|
|
settings.ensure_directories()
|
|
logger.info("All directories created/verified")
|
|
|
|
# Start cleanup scheduler as background task
|
|
cleanup_task = asyncio.create_task(task_manager.start_cleanup_scheduler())
|
|
logger.info("Started cleanup scheduler for expired files")
|
|
|
|
# TODO: Initialize database connection pool
|
|
# TODO: Load PaddleOCR models
|
|
|
|
logger.info("Application startup complete")
|
|
|
|
yield
|
|
|
|
# Shutdown
|
|
logger.info("Shutting down Tool_OCR application...")
|
|
|
|
# Cancel cleanup task
|
|
cleanup_task.cancel()
|
|
try:
|
|
await cleanup_task
|
|
except asyncio.CancelledError:
|
|
logger.info("Cleanup scheduler stopped")
|
|
|
|
# TODO: Close database connections
|
|
|
|
|
|
# Create FastAPI application
|
|
app = FastAPI(
|
|
title="Tool_OCR",
|
|
description="OCR Batch Processing System with Structure Extraction",
|
|
version="0.1.0",
|
|
lifespan=lifespan,
|
|
)
|
|
|
|
# Configure CORS
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=settings.cors_origins_list,
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
|
|
# Health check endpoint
|
|
@app.get("/health")
|
|
async def health_check():
|
|
"""Health check endpoint with GPU status"""
|
|
from app.services.ocr_service import OCRService
|
|
|
|
response = {
|
|
"status": "healthy",
|
|
"service": "Tool_OCR",
|
|
"version": "0.1.0",
|
|
}
|
|
|
|
# Add GPU status information
|
|
try:
|
|
# Create temporary OCRService instance to get GPU status
|
|
# In production, this should be a singleton service
|
|
ocr_service = OCRService()
|
|
gpu_status = ocr_service.get_gpu_status()
|
|
|
|
response["gpu"] = {
|
|
"available": gpu_status.get("gpu_available", False),
|
|
"enabled": gpu_status.get("gpu_enabled", False),
|
|
"device_name": gpu_status.get("device_name", "N/A"),
|
|
"device_count": gpu_status.get("device_count", 0),
|
|
"compute_capability": gpu_status.get("compute_capability", "N/A"),
|
|
}
|
|
|
|
# Add memory info if available
|
|
if gpu_status.get("memory_total_mb"):
|
|
response["gpu"]["memory"] = {
|
|
"total_mb": round(gpu_status.get("memory_total_mb", 0), 2),
|
|
"allocated_mb": round(gpu_status.get("memory_allocated_mb", 0), 2),
|
|
"utilization_percent": round(gpu_status.get("memory_utilization", 0), 2),
|
|
}
|
|
|
|
# Add reason if GPU is not available
|
|
if not gpu_status.get("gpu_available") and gpu_status.get("reason"):
|
|
response["gpu"]["reason"] = gpu_status.get("reason")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Failed to get GPU status: {e}")
|
|
response["gpu"] = {
|
|
"available": False,
|
|
"error": str(e),
|
|
}
|
|
|
|
return response
|
|
|
|
|
|
# Root endpoint
|
|
@app.get("/")
|
|
async def root():
|
|
"""Root endpoint with API information"""
|
|
return {
|
|
"message": "Tool_OCR API",
|
|
"version": "0.1.0",
|
|
"docs_url": "/docs",
|
|
"health_check": "/health",
|
|
}
|
|
|
|
|
|
# Include API routers
|
|
from app.routers import auth, ocr, export, translation
|
|
# V2 routers with external authentication
|
|
from app.routers import auth_v2, tasks, admin
|
|
|
|
# Legacy V1 routers
|
|
app.include_router(auth.router)
|
|
app.include_router(ocr.router)
|
|
app.include_router(export.router)
|
|
app.include_router(translation.router) # RESERVED for Phase 5
|
|
|
|
# New V2 routers with external authentication
|
|
app.include_router(auth_v2.router)
|
|
app.include_router(tasks.router)
|
|
app.include_router(admin.router)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
|
|
uvicorn.run(
|
|
"app.main:app",
|
|
host="0.0.0.0",
|
|
port=settings.backend_port,
|
|
reload=True,
|
|
log_level=settings.log_level.lower(),
|
|
)
|