refactor: complete V1 to V2 migration and remove legacy architecture
Remove all V1 architecture components and promote V2 to primary: - Delete all paddle_ocr_* table models (export, ocr, translation, user) - Delete legacy routers (auth, export, ocr, translation) - Delete legacy schemas and services - Promote user_v2.py to user.py as primary user model - Update all imports and dependencies to use V2 models only - Update main.py version to 2.0.0 Database changes: - Fix SQLAlchemy reserved word: rename audit_log.metadata to extra_data - Add migration to drop all paddle_ocr_* tables - Update alembic env to only import V2 models Frontend fixes: - Fix Select component exports in TaskHistoryPage.tsx - Update to use simplified Select API with options prop - Fix AxiosInstance TypeScript import syntax 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
Tool_OCR - API Routers
|
||||
Tool_OCR - API Routers (V2)
|
||||
"""
|
||||
|
||||
from app.routers import auth, ocr, export, translation
|
||||
from app.routers import auth, tasks, admin
|
||||
|
||||
__all__ = ["auth", "ocr", "export", "translation"]
|
||||
__all__ = ["auth", "tasks", "admin"]
|
||||
|
||||
@@ -10,8 +10,8 @@ from datetime import datetime
|
||||
from fastapi import APIRouter, Depends, HTTPException, status, Query
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.deps import get_db, get_current_admin_user_v2
|
||||
from app.models.user_v2 import User
|
||||
from app.core.deps import get_db, get_current_admin_user
|
||||
from app.models.user import User
|
||||
from app.services.admin_service import admin_service
|
||||
from app.services.audit_service import audit_service
|
||||
|
||||
@@ -23,7 +23,7 @@ router = APIRouter(prefix="/api/v2/admin", tags=["Admin"])
|
||||
@router.get("/stats", summary="Get system statistics")
|
||||
async def get_system_stats(
|
||||
db: Session = Depends(get_db),
|
||||
admin_user: User = Depends(get_current_admin_user_v2)
|
||||
admin_user: User = Depends(get_current_admin_user)
|
||||
):
|
||||
"""
|
||||
Get overall system statistics
|
||||
@@ -47,7 +47,7 @@ async def list_users(
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(50, ge=1, le=100),
|
||||
db: Session = Depends(get_db),
|
||||
admin_user: User = Depends(get_current_admin_user_v2)
|
||||
admin_user: User = Depends(get_current_admin_user)
|
||||
):
|
||||
"""
|
||||
Get list of all users with statistics
|
||||
@@ -79,7 +79,7 @@ async def get_top_users(
|
||||
metric: str = Query("tasks", regex="^(tasks|completed_tasks)$"),
|
||||
limit: int = Query(10, ge=1, le=50),
|
||||
db: Session = Depends(get_db),
|
||||
admin_user: User = Depends(get_current_admin_user_v2)
|
||||
admin_user: User = Depends(get_current_admin_user)
|
||||
):
|
||||
"""
|
||||
Get top users by metric
|
||||
@@ -115,7 +115,7 @@ async def get_audit_logs(
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(100, ge=1, le=500),
|
||||
db: Session = Depends(get_db),
|
||||
admin_user: User = Depends(get_current_admin_user_v2)
|
||||
admin_user: User = Depends(get_current_admin_user)
|
||||
):
|
||||
"""
|
||||
Get audit logs with filtering
|
||||
@@ -169,7 +169,7 @@ async def get_user_activity_summary(
|
||||
user_id: int,
|
||||
days: int = Query(30, ge=1, le=365),
|
||||
db: Session = Depends(get_db),
|
||||
admin_user: User = Depends(get_current_admin_user_v2)
|
||||
admin_user: User = Depends(get_current_admin_user)
|
||||
):
|
||||
"""
|
||||
Get user activity summary for the last N days
|
||||
|
||||
@@ -1,70 +1,347 @@
|
||||
"""
|
||||
Tool_OCR - Authentication Router
|
||||
JWT login endpoint
|
||||
Tool_OCR - External Authentication Router (V2)
|
||||
Handles authentication via external Microsoft Azure AD API
|
||||
"""
|
||||
|
||||
from datetime import timedelta
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from fastapi import APIRouter, Depends, HTTPException, status, Request
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.deps import get_db
|
||||
from app.core.security import verify_password, create_access_token
|
||||
from app.core.deps import get_db, get_current_user
|
||||
from app.core.security import create_access_token
|
||||
from app.models.user import User
|
||||
from app.schemas.auth import LoginRequest, Token
|
||||
|
||||
from app.models.session import Session as UserSession
|
||||
from app.schemas.auth import LoginRequest, Token, UserResponse
|
||||
from app.services.external_auth_service import external_auth_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/auth", tags=["Authentication"])
|
||||
router = APIRouter(prefix="/api/v2/auth", tags=["Authentication V2"])
|
||||
|
||||
|
||||
@router.post("/login", response_model=Token, summary="User login")
|
||||
def get_client_ip(request: Request) -> str:
|
||||
"""Extract client IP address from request"""
|
||||
# Check X-Forwarded-For header (for proxies)
|
||||
forwarded = request.headers.get("X-Forwarded-For")
|
||||
if forwarded:
|
||||
return forwarded.split(",")[0].strip()
|
||||
# Check X-Real-IP header
|
||||
real_ip = request.headers.get("X-Real-IP")
|
||||
if real_ip:
|
||||
return real_ip
|
||||
# Fallback to direct client
|
||||
return request.client.host if request.client else "unknown"
|
||||
|
||||
|
||||
def get_user_agent(request: Request) -> str:
|
||||
"""Extract user agent from request"""
|
||||
return request.headers.get("User-Agent", "unknown")[:500]
|
||||
|
||||
|
||||
@router.post("/login", response_model=Token, summary="External API login")
|
||||
async def login(
|
||||
login_data: LoginRequest,
|
||||
request: Request,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
User login with username and password
|
||||
User login via external Microsoft Azure AD API
|
||||
|
||||
Returns JWT access token for authentication
|
||||
Returns JWT access token and stores session information
|
||||
|
||||
- **username**: User's username
|
||||
- **username**: User's email address
|
||||
- **password**: User's password
|
||||
"""
|
||||
# Query user by username
|
||||
user = db.query(User).filter(User.username == login_data.username).first()
|
||||
# Call external authentication API
|
||||
success, auth_response, error_msg = await external_auth_service.authenticate_user(
|
||||
username=login_data.username,
|
||||
password=login_data.password
|
||||
)
|
||||
|
||||
# Verify user exists and password is correct
|
||||
if not user or not verify_password(login_data.password, user.password_hash):
|
||||
logger.warning(f"Failed login attempt for username: {login_data.username}")
|
||||
if not success or not auth_response:
|
||||
logger.warning(
|
||||
f"External auth failed for user {login_data.username}: {error_msg}"
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Incorrect username or password",
|
||||
detail=error_msg or "Authentication failed",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
# Check if user is active
|
||||
if not user.is_active:
|
||||
logger.warning(f"Inactive user login attempt: {login_data.username}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="User account is inactive"
|
||||
)
|
||||
# Extract user info from external API response
|
||||
user_info = auth_response.user_info
|
||||
email = user_info.email
|
||||
display_name = user_info.name
|
||||
|
||||
# Create access token
|
||||
access_token_expires = timedelta(minutes=settings.access_token_expire_minutes)
|
||||
access_token = create_access_token(
|
||||
data={"sub": str(user.id), "username": user.username},
|
||||
expires_delta=access_token_expires
|
||||
# Find or create user in database
|
||||
user = db.query(User).filter(User.email == email).first()
|
||||
|
||||
if not user:
|
||||
# Create new user
|
||||
user = User(
|
||||
email=email,
|
||||
display_name=display_name,
|
||||
is_active=True,
|
||||
last_login=datetime.utcnow()
|
||||
)
|
||||
db.add(user)
|
||||
db.commit()
|
||||
db.refresh(user)
|
||||
logger.info(f"Created new user: {email} (ID: {user.id})")
|
||||
else:
|
||||
# Update existing user
|
||||
user.display_name = display_name
|
||||
user.last_login = datetime.utcnow()
|
||||
|
||||
# Check if user is active
|
||||
if not user.is_active:
|
||||
logger.warning(f"Inactive user login attempt: {email}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="User account is inactive"
|
||||
)
|
||||
|
||||
db.commit()
|
||||
db.refresh(user)
|
||||
logger.info(f"Updated existing user: {email} (ID: {user.id})")
|
||||
|
||||
# Parse token expiration
|
||||
try:
|
||||
expires_at = datetime.fromisoformat(auth_response.expires_at.replace('Z', '+00:00'))
|
||||
issued_at = datetime.fromisoformat(auth_response.issued_at.replace('Z', '+00:00'))
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse token timestamps: {e}")
|
||||
expires_at = datetime.utcnow() + timedelta(seconds=auth_response.expires_in)
|
||||
issued_at = datetime.utcnow()
|
||||
|
||||
# Create session in database
|
||||
# TODO: Implement token encryption before storing
|
||||
session = UserSession(
|
||||
user_id=user.id,
|
||||
access_token=auth_response.access_token, # Should be encrypted
|
||||
id_token=auth_response.id_token, # Should be encrypted
|
||||
token_type=auth_response.token_type,
|
||||
expires_at=expires_at,
|
||||
issued_at=issued_at,
|
||||
ip_address=get_client_ip(request),
|
||||
user_agent=get_user_agent(request)
|
||||
)
|
||||
db.add(session)
|
||||
db.commit()
|
||||
db.refresh(session)
|
||||
|
||||
logger.info(
|
||||
f"Created session {session.id} for user {user.email} "
|
||||
f"(expires: {expires_at})"
|
||||
)
|
||||
|
||||
logger.info(f"Successful login: {user.username} (ID: {user.id})")
|
||||
# Create internal JWT token for API access
|
||||
# This token contains user ID and session ID
|
||||
internal_token_expires = timedelta(minutes=settings.access_token_expire_minutes)
|
||||
internal_access_token = create_access_token(
|
||||
data={
|
||||
"sub": str(user.id),
|
||||
"email": user.email,
|
||||
"session_id": session.id
|
||||
},
|
||||
expires_delta=internal_token_expires
|
||||
)
|
||||
|
||||
return {
|
||||
"access_token": access_token,
|
||||
"access_token": internal_access_token,
|
||||
"token_type": "bearer",
|
||||
"expires_in": settings.access_token_expire_minutes * 60 # Convert to seconds
|
||||
"expires_in": int(internal_token_expires.total_seconds()),
|
||||
"user": {
|
||||
"id": user.id,
|
||||
"email": user.email,
|
||||
"display_name": user.display_name
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@router.post("/logout", summary="User logout")
|
||||
async def logout(
|
||||
session_id: Optional[int] = None,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
User logout - invalidates session
|
||||
|
||||
- **session_id**: Session ID to logout (optional, logs out all if not provided)
|
||||
"""
|
||||
# TODO: Implement proper current_user dependency from JWT token
|
||||
# For now, this is a placeholder
|
||||
|
||||
if session_id:
|
||||
# Logout specific session
|
||||
session = db.query(UserSession).filter(
|
||||
UserSession.id == session_id,
|
||||
UserSession.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if session:
|
||||
db.delete(session)
|
||||
db.commit()
|
||||
logger.info(f"Logged out session {session_id} for user {current_user.email}")
|
||||
return {"message": "Logged out successfully"}
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Session not found"
|
||||
)
|
||||
else:
|
||||
# Logout all sessions
|
||||
sessions = db.query(UserSession).filter(
|
||||
UserSession.user_id == current_user.id
|
||||
).all()
|
||||
|
||||
count = len(sessions)
|
||||
for session in sessions:
|
||||
db.delete(session)
|
||||
|
||||
db.commit()
|
||||
logger.info(f"Logged out all {count} sessions for user {current_user.email}")
|
||||
return {"message": f"Logged out {count} sessions"}
|
||||
|
||||
|
||||
@router.get("/me", response_model=UserResponse, summary="Get current user")
|
||||
async def get_me(
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Get current authenticated user information
|
||||
"""
|
||||
# TODO: Implement proper current_user dependency from JWT token
|
||||
return {
|
||||
"id": current_user.id,
|
||||
"email": current_user.email,
|
||||
"display_name": current_user.display_name,
|
||||
"created_at": current_user.created_at,
|
||||
"last_login": current_user.last_login,
|
||||
"is_active": current_user.is_active
|
||||
}
|
||||
|
||||
|
||||
@router.get("/sessions", summary="List user sessions")
|
||||
async def list_sessions(
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
List all active sessions for current user
|
||||
"""
|
||||
sessions = db.query(UserSession).filter(
|
||||
UserSession.user_id == current_user.id
|
||||
).order_by(UserSession.created_at.desc()).all()
|
||||
|
||||
return {
|
||||
"sessions": [
|
||||
{
|
||||
"id": s.id,
|
||||
"token_type": s.token_type,
|
||||
"expires_at": s.expires_at,
|
||||
"issued_at": s.issued_at,
|
||||
"ip_address": s.ip_address,
|
||||
"user_agent": s.user_agent,
|
||||
"created_at": s.created_at,
|
||||
"last_accessed_at": s.last_accessed_at,
|
||||
"is_expired": s.is_expired,
|
||||
"time_until_expiry": s.time_until_expiry
|
||||
}
|
||||
for s in sessions
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@router.post("/refresh", response_model=Token, summary="Refresh access token")
|
||||
async def refresh_token(
|
||||
request: Request,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Refresh access token before expiration
|
||||
|
||||
Re-authenticates with external API using stored session.
|
||||
Note: Since external API doesn't provide refresh tokens,
|
||||
we re-issue internal JWT tokens with extended expiry.
|
||||
"""
|
||||
try:
|
||||
# Find user's most recent session
|
||||
session = db.query(UserSession).filter(
|
||||
UserSession.user_id == current_user.id
|
||||
).order_by(UserSession.created_at.desc()).first()
|
||||
|
||||
if not session:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="No active session found"
|
||||
)
|
||||
|
||||
# Check if token is expiring soon (within TOKEN_REFRESH_BUFFER)
|
||||
if not external_auth_service.is_token_expiring_soon(session.expires_at):
|
||||
# Token still valid for a while, just issue new internal JWT
|
||||
internal_token_expires = timedelta(minutes=settings.access_token_expire_minutes)
|
||||
internal_access_token = create_access_token(
|
||||
data={
|
||||
"sub": str(current_user.id),
|
||||
"email": current_user.email,
|
||||
"session_id": session.id
|
||||
},
|
||||
expires_delta=internal_token_expires
|
||||
)
|
||||
|
||||
logger.info(f"Refreshed internal token for user {current_user.email}")
|
||||
|
||||
return {
|
||||
"access_token": internal_access_token,
|
||||
"token_type": "bearer",
|
||||
"expires_in": int(internal_token_expires.total_seconds()),
|
||||
"user": {
|
||||
"id": current_user.id,
|
||||
"email": current_user.email,
|
||||
"display_name": current_user.display_name
|
||||
}
|
||||
}
|
||||
|
||||
# External token expiring soon - would need re-authentication
|
||||
# For now, we extend internal token and log a warning
|
||||
logger.warning(
|
||||
f"External token expiring soon for user {current_user.email}. "
|
||||
"User should re-authenticate."
|
||||
)
|
||||
|
||||
internal_token_expires = timedelta(minutes=settings.access_token_expire_minutes)
|
||||
internal_access_token = create_access_token(
|
||||
data={
|
||||
"sub": str(current_user.id),
|
||||
"email": current_user.email,
|
||||
"session_id": session.id
|
||||
},
|
||||
expires_delta=internal_token_expires
|
||||
)
|
||||
|
||||
return {
|
||||
"access_token": internal_access_token,
|
||||
"token_type": "bearer",
|
||||
"expires_in": int(internal_token_expires.total_seconds()),
|
||||
"user": {
|
||||
"id": current_user.id,
|
||||
"email": current_user.email,
|
||||
"display_name": current_user.display_name
|
||||
}
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception(f"Token refresh failed for user {current_user.id}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Token refresh failed: {str(e)}"
|
||||
)
|
||||
|
||||
@@ -1,347 +0,0 @@
|
||||
"""
|
||||
Tool_OCR - External Authentication Router (V2)
|
||||
Handles authentication via external Microsoft Azure AD API
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status, Request
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.deps import get_db, get_current_user_v2
|
||||
from app.core.security import create_access_token
|
||||
from app.models.user_v2 import User
|
||||
from app.models.session import Session as UserSession
|
||||
from app.schemas.auth import LoginRequest, Token, UserResponse
|
||||
from app.services.external_auth_service import external_auth_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v2/auth", tags=["Authentication V2"])
|
||||
|
||||
|
||||
def get_client_ip(request: Request) -> str:
|
||||
"""Extract client IP address from request"""
|
||||
# Check X-Forwarded-For header (for proxies)
|
||||
forwarded = request.headers.get("X-Forwarded-For")
|
||||
if forwarded:
|
||||
return forwarded.split(",")[0].strip()
|
||||
# Check X-Real-IP header
|
||||
real_ip = request.headers.get("X-Real-IP")
|
||||
if real_ip:
|
||||
return real_ip
|
||||
# Fallback to direct client
|
||||
return request.client.host if request.client else "unknown"
|
||||
|
||||
|
||||
def get_user_agent(request: Request) -> str:
|
||||
"""Extract user agent from request"""
|
||||
return request.headers.get("User-Agent", "unknown")[:500]
|
||||
|
||||
|
||||
@router.post("/login", response_model=Token, summary="External API login")
|
||||
async def login(
|
||||
login_data: LoginRequest,
|
||||
request: Request,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
User login via external Microsoft Azure AD API
|
||||
|
||||
Returns JWT access token and stores session information
|
||||
|
||||
- **username**: User's email address
|
||||
- **password**: User's password
|
||||
"""
|
||||
# Call external authentication API
|
||||
success, auth_response, error_msg = await external_auth_service.authenticate_user(
|
||||
username=login_data.username,
|
||||
password=login_data.password
|
||||
)
|
||||
|
||||
if not success or not auth_response:
|
||||
logger.warning(
|
||||
f"External auth failed for user {login_data.username}: {error_msg}"
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail=error_msg or "Authentication failed",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
# Extract user info from external API response
|
||||
user_info = auth_response.user_info
|
||||
email = user_info.email
|
||||
display_name = user_info.name
|
||||
|
||||
# Find or create user in database
|
||||
user = db.query(User).filter(User.email == email).first()
|
||||
|
||||
if not user:
|
||||
# Create new user
|
||||
user = User(
|
||||
email=email,
|
||||
display_name=display_name,
|
||||
is_active=True,
|
||||
last_login=datetime.utcnow()
|
||||
)
|
||||
db.add(user)
|
||||
db.commit()
|
||||
db.refresh(user)
|
||||
logger.info(f"Created new user: {email} (ID: {user.id})")
|
||||
else:
|
||||
# Update existing user
|
||||
user.display_name = display_name
|
||||
user.last_login = datetime.utcnow()
|
||||
|
||||
# Check if user is active
|
||||
if not user.is_active:
|
||||
logger.warning(f"Inactive user login attempt: {email}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="User account is inactive"
|
||||
)
|
||||
|
||||
db.commit()
|
||||
db.refresh(user)
|
||||
logger.info(f"Updated existing user: {email} (ID: {user.id})")
|
||||
|
||||
# Parse token expiration
|
||||
try:
|
||||
expires_at = datetime.fromisoformat(auth_response.expires_at.replace('Z', '+00:00'))
|
||||
issued_at = datetime.fromisoformat(auth_response.issued_at.replace('Z', '+00:00'))
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse token timestamps: {e}")
|
||||
expires_at = datetime.utcnow() + timedelta(seconds=auth_response.expires_in)
|
||||
issued_at = datetime.utcnow()
|
||||
|
||||
# Create session in database
|
||||
# TODO: Implement token encryption before storing
|
||||
session = UserSession(
|
||||
user_id=user.id,
|
||||
access_token=auth_response.access_token, # Should be encrypted
|
||||
id_token=auth_response.id_token, # Should be encrypted
|
||||
token_type=auth_response.token_type,
|
||||
expires_at=expires_at,
|
||||
issued_at=issued_at,
|
||||
ip_address=get_client_ip(request),
|
||||
user_agent=get_user_agent(request)
|
||||
)
|
||||
db.add(session)
|
||||
db.commit()
|
||||
db.refresh(session)
|
||||
|
||||
logger.info(
|
||||
f"Created session {session.id} for user {user.email} "
|
||||
f"(expires: {expires_at})"
|
||||
)
|
||||
|
||||
# Create internal JWT token for API access
|
||||
# This token contains user ID and session ID
|
||||
internal_token_expires = timedelta(minutes=settings.access_token_expire_minutes)
|
||||
internal_access_token = create_access_token(
|
||||
data={
|
||||
"sub": str(user.id),
|
||||
"email": user.email,
|
||||
"session_id": session.id
|
||||
},
|
||||
expires_delta=internal_token_expires
|
||||
)
|
||||
|
||||
return {
|
||||
"access_token": internal_access_token,
|
||||
"token_type": "bearer",
|
||||
"expires_in": int(internal_token_expires.total_seconds()),
|
||||
"user": {
|
||||
"id": user.id,
|
||||
"email": user.email,
|
||||
"display_name": user.display_name
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@router.post("/logout", summary="User logout")
|
||||
async def logout(
|
||||
session_id: Optional[int] = None,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user_v2)
|
||||
):
|
||||
"""
|
||||
User logout - invalidates session
|
||||
|
||||
- **session_id**: Session ID to logout (optional, logs out all if not provided)
|
||||
"""
|
||||
# TODO: Implement proper current_user dependency from JWT token
|
||||
# For now, this is a placeholder
|
||||
|
||||
if session_id:
|
||||
# Logout specific session
|
||||
session = db.query(UserSession).filter(
|
||||
UserSession.id == session_id,
|
||||
UserSession.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if session:
|
||||
db.delete(session)
|
||||
db.commit()
|
||||
logger.info(f"Logged out session {session_id} for user {current_user.email}")
|
||||
return {"message": "Logged out successfully"}
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Session not found"
|
||||
)
|
||||
else:
|
||||
# Logout all sessions
|
||||
sessions = db.query(UserSession).filter(
|
||||
UserSession.user_id == current_user.id
|
||||
).all()
|
||||
|
||||
count = len(sessions)
|
||||
for session in sessions:
|
||||
db.delete(session)
|
||||
|
||||
db.commit()
|
||||
logger.info(f"Logged out all {count} sessions for user {current_user.email}")
|
||||
return {"message": f"Logged out {count} sessions"}
|
||||
|
||||
|
||||
@router.get("/me", response_model=UserResponse, summary="Get current user")
|
||||
async def get_me(
|
||||
current_user: User = Depends(get_current_user_v2)
|
||||
):
|
||||
"""
|
||||
Get current authenticated user information
|
||||
"""
|
||||
# TODO: Implement proper current_user dependency from JWT token
|
||||
return {
|
||||
"id": current_user.id,
|
||||
"email": current_user.email,
|
||||
"display_name": current_user.display_name,
|
||||
"created_at": current_user.created_at,
|
||||
"last_login": current_user.last_login,
|
||||
"is_active": current_user.is_active
|
||||
}
|
||||
|
||||
|
||||
@router.get("/sessions", summary="List user sessions")
|
||||
async def list_sessions(
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user_v2)
|
||||
):
|
||||
"""
|
||||
List all active sessions for current user
|
||||
"""
|
||||
sessions = db.query(UserSession).filter(
|
||||
UserSession.user_id == current_user.id
|
||||
).order_by(UserSession.created_at.desc()).all()
|
||||
|
||||
return {
|
||||
"sessions": [
|
||||
{
|
||||
"id": s.id,
|
||||
"token_type": s.token_type,
|
||||
"expires_at": s.expires_at,
|
||||
"issued_at": s.issued_at,
|
||||
"ip_address": s.ip_address,
|
||||
"user_agent": s.user_agent,
|
||||
"created_at": s.created_at,
|
||||
"last_accessed_at": s.last_accessed_at,
|
||||
"is_expired": s.is_expired,
|
||||
"time_until_expiry": s.time_until_expiry
|
||||
}
|
||||
for s in sessions
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@router.post("/refresh", response_model=Token, summary="Refresh access token")
|
||||
async def refresh_token(
|
||||
request: Request,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user_v2)
|
||||
):
|
||||
"""
|
||||
Refresh access token before expiration
|
||||
|
||||
Re-authenticates with external API using stored session.
|
||||
Note: Since external API doesn't provide refresh tokens,
|
||||
we re-issue internal JWT tokens with extended expiry.
|
||||
"""
|
||||
try:
|
||||
# Find user's most recent session
|
||||
session = db.query(UserSession).filter(
|
||||
UserSession.user_id == current_user.id
|
||||
).order_by(UserSession.created_at.desc()).first()
|
||||
|
||||
if not session:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="No active session found"
|
||||
)
|
||||
|
||||
# Check if token is expiring soon (within TOKEN_REFRESH_BUFFER)
|
||||
if not external_auth_service.is_token_expiring_soon(session.expires_at):
|
||||
# Token still valid for a while, just issue new internal JWT
|
||||
internal_token_expires = timedelta(minutes=settings.access_token_expire_minutes)
|
||||
internal_access_token = create_access_token(
|
||||
data={
|
||||
"sub": str(current_user.id),
|
||||
"email": current_user.email,
|
||||
"session_id": session.id
|
||||
},
|
||||
expires_delta=internal_token_expires
|
||||
)
|
||||
|
||||
logger.info(f"Refreshed internal token for user {current_user.email}")
|
||||
|
||||
return {
|
||||
"access_token": internal_access_token,
|
||||
"token_type": "bearer",
|
||||
"expires_in": int(internal_token_expires.total_seconds()),
|
||||
"user": {
|
||||
"id": current_user.id,
|
||||
"email": current_user.email,
|
||||
"display_name": current_user.display_name
|
||||
}
|
||||
}
|
||||
|
||||
# External token expiring soon - would need re-authentication
|
||||
# For now, we extend internal token and log a warning
|
||||
logger.warning(
|
||||
f"External token expiring soon for user {current_user.email}. "
|
||||
"User should re-authenticate."
|
||||
)
|
||||
|
||||
internal_token_expires = timedelta(minutes=settings.access_token_expire_minutes)
|
||||
internal_access_token = create_access_token(
|
||||
data={
|
||||
"sub": str(current_user.id),
|
||||
"email": current_user.email,
|
||||
"session_id": session.id
|
||||
},
|
||||
expires_delta=internal_token_expires
|
||||
)
|
||||
|
||||
return {
|
||||
"access_token": internal_access_token,
|
||||
"token_type": "bearer",
|
||||
"expires_in": int(internal_token_expires.total_seconds()),
|
||||
"user": {
|
||||
"id": current_user.id,
|
||||
"email": current_user.email,
|
||||
"display_name": current_user.display_name
|
||||
}
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception(f"Token refresh failed for user {current_user.id}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Token refresh failed: {str(e)}"
|
||||
)
|
||||
@@ -1,338 +0,0 @@
|
||||
"""
|
||||
Tool_OCR - Export Router
|
||||
Export results in multiple formats
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.deps import get_db, get_current_active_user
|
||||
from app.models.user import User
|
||||
from app.models.ocr import OCRBatch, OCRFile, OCRResult, FileStatus
|
||||
from app.models.export import ExportRule
|
||||
from app.schemas.export import (
|
||||
ExportRequest,
|
||||
ExportRuleCreate,
|
||||
ExportRuleUpdate,
|
||||
ExportRuleResponse,
|
||||
CSSTemplateResponse,
|
||||
)
|
||||
from app.services.export_service import ExportService, ExportError
|
||||
from app.services.pdf_generator import PDFGenerator
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/export", tags=["Export"])
|
||||
|
||||
# Initialize services
|
||||
export_service = ExportService()
|
||||
pdf_generator = PDFGenerator()
|
||||
|
||||
|
||||
@router.post("", summary="Export OCR results")
|
||||
async def export_results(
|
||||
request: ExportRequest,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Export OCR results in specified format
|
||||
|
||||
Supports multiple export formats: txt, json, excel, markdown, pdf, zip
|
||||
|
||||
- **batch_id**: Batch ID to export
|
||||
- **format**: Export format (txt, json, excel, markdown, pdf, zip)
|
||||
- **rule_id**: Optional export rule ID to apply filters
|
||||
- **css_template**: CSS template for PDF export (default, academic, business)
|
||||
- **include_formats**: Formats to include in ZIP export
|
||||
"""
|
||||
# Verify batch ownership
|
||||
batch = db.query(OCRBatch).filter(
|
||||
OCRBatch.id == request.batch_id,
|
||||
OCRBatch.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not batch:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Batch not found"
|
||||
)
|
||||
|
||||
# Get completed results
|
||||
results = db.query(OCRResult).join(OCRFile).filter(
|
||||
OCRFile.batch_id == request.batch_id,
|
||||
OCRFile.status == FileStatus.COMPLETED
|
||||
).all()
|
||||
|
||||
if not results:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="No completed results found for this batch"
|
||||
)
|
||||
|
||||
# Apply export rule if specified
|
||||
if request.rule_id:
|
||||
try:
|
||||
results = export_service.apply_export_rule(db, results, request.rule_id)
|
||||
except ExportError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=str(e)
|
||||
)
|
||||
|
||||
try:
|
||||
# Generate export based on format
|
||||
export_dir = Path(f"uploads/batches/{batch.id}/exports")
|
||||
export_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if request.format == "txt":
|
||||
output_path = export_dir / f"batch_{batch.id}_export.txt"
|
||||
export_service.export_to_txt(results, output_path)
|
||||
|
||||
elif request.format == "json":
|
||||
output_path = export_dir / f"batch_{batch.id}_export.json"
|
||||
export_service.export_to_json(results, output_path)
|
||||
|
||||
elif request.format == "excel":
|
||||
output_path = export_dir / f"batch_{batch.id}_export.xlsx"
|
||||
export_service.export_to_excel(results, output_path)
|
||||
|
||||
elif request.format == "markdown":
|
||||
output_path = export_dir / f"batch_{batch.id}_export.md"
|
||||
export_service.export_to_markdown(results, output_path, combine=True)
|
||||
|
||||
elif request.format == "zip":
|
||||
output_path = export_dir / f"batch_{batch.id}_export.zip"
|
||||
include_formats = request.include_formats or ["markdown", "json"]
|
||||
export_service.export_batch_to_zip(db, batch.id, output_path, include_formats)
|
||||
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Unsupported export format: {request.format}"
|
||||
)
|
||||
|
||||
logger.info(f"Exported batch {batch.id} to {request.format} format: {output_path}")
|
||||
|
||||
# Return file for download
|
||||
return FileResponse(
|
||||
path=str(output_path),
|
||||
filename=output_path.name,
|
||||
media_type="application/octet-stream"
|
||||
)
|
||||
|
||||
except ExportError as e:
|
||||
logger.error(f"Export error: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=str(e)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected export error: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Export failed"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/pdf/{file_id}", summary="Generate PDF for single file")
|
||||
async def generate_pdf(
|
||||
file_id: int,
|
||||
css_template: str = "default",
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Generate layout-preserved PDF for a single file
|
||||
|
||||
- **file_id**: File ID
|
||||
- **css_template**: CSS template (default, academic, business)
|
||||
"""
|
||||
# Get file and verify ownership
|
||||
ocr_file = db.query(OCRFile).join(OCRBatch).filter(
|
||||
OCRFile.id == file_id,
|
||||
OCRBatch.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not ocr_file:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="File not found"
|
||||
)
|
||||
|
||||
# Get result
|
||||
result = db.query(OCRResult).filter(OCRResult.file_id == file_id).first()
|
||||
if not result:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="OCR result not found"
|
||||
)
|
||||
|
||||
try:
|
||||
# Generate PDF
|
||||
export_dir = Path(f"uploads/batches/{ocr_file.batch_id}/exports")
|
||||
export_dir.mkdir(parents=True, exist_ok=True)
|
||||
output_path = export_dir / f"file_{file_id}_export.pdf"
|
||||
|
||||
export_service.export_to_pdf(
|
||||
result=result,
|
||||
output_path=output_path,
|
||||
css_template=css_template,
|
||||
metadata={"title": ocr_file.original_filename}
|
||||
)
|
||||
|
||||
logger.info(f"Generated PDF for file {file_id}: {output_path}")
|
||||
|
||||
return FileResponse(
|
||||
path=str(output_path),
|
||||
filename=f"{Path(ocr_file.original_filename).stem}.pdf",
|
||||
media_type="application/pdf"
|
||||
)
|
||||
|
||||
except ExportError as e:
|
||||
logger.error(f"PDF generation error: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=str(e)
|
||||
)
|
||||
|
||||
|
||||
@router.get("/rules", response_model=List[ExportRuleResponse], summary="List export rules")
|
||||
async def list_export_rules(
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
List all export rules for current user
|
||||
|
||||
Returns list of saved export rules
|
||||
"""
|
||||
rules = db.query(ExportRule).filter(ExportRule.user_id == current_user.id).all()
|
||||
return rules
|
||||
|
||||
|
||||
@router.post("/rules", response_model=ExportRuleResponse, summary="Create export rule")
|
||||
async def create_export_rule(
|
||||
rule: ExportRuleCreate,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Create new export rule
|
||||
|
||||
Saves custom export configuration for reuse
|
||||
|
||||
- **rule_name**: Rule name
|
||||
- **description**: Optional description
|
||||
- **config_json**: Rule configuration (filters, formatting, export_options)
|
||||
- **css_template**: Optional custom CSS for PDF export
|
||||
"""
|
||||
# Create rule
|
||||
new_rule = ExportRule(
|
||||
user_id=current_user.id,
|
||||
rule_name=rule.rule_name,
|
||||
description=rule.description,
|
||||
config_json=rule.config_json,
|
||||
css_template=rule.css_template
|
||||
)
|
||||
|
||||
db.add(new_rule)
|
||||
db.commit()
|
||||
db.refresh(new_rule)
|
||||
|
||||
logger.info(f"Created export rule {new_rule.id} for user {current_user.id}")
|
||||
|
||||
return new_rule
|
||||
|
||||
|
||||
@router.put("/rules/{rule_id}", response_model=ExportRuleResponse, summary="Update export rule")
|
||||
async def update_export_rule(
|
||||
rule_id: int,
|
||||
rule: ExportRuleUpdate,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Update existing export rule
|
||||
|
||||
- **rule_id**: Rule ID to update
|
||||
- **rule_name**: Optional new rule name
|
||||
- **description**: Optional new description
|
||||
- **config_json**: Optional new configuration
|
||||
- **css_template**: Optional new CSS template
|
||||
"""
|
||||
# Get rule and verify ownership
|
||||
db_rule = db.query(ExportRule).filter(
|
||||
ExportRule.id == rule_id,
|
||||
ExportRule.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not db_rule:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Export rule not found"
|
||||
)
|
||||
|
||||
# Update fields
|
||||
update_data = rule.dict(exclude_unset=True)
|
||||
for field, value in update_data.items():
|
||||
setattr(db_rule, field, value)
|
||||
|
||||
db.commit()
|
||||
db.refresh(db_rule)
|
||||
|
||||
logger.info(f"Updated export rule {rule_id}")
|
||||
|
||||
return db_rule
|
||||
|
||||
|
||||
@router.delete("/rules/{rule_id}", summary="Delete export rule")
|
||||
async def delete_export_rule(
|
||||
rule_id: int,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Delete export rule
|
||||
|
||||
- **rule_id**: Rule ID to delete
|
||||
"""
|
||||
# Get rule and verify ownership
|
||||
db_rule = db.query(ExportRule).filter(
|
||||
ExportRule.id == rule_id,
|
||||
ExportRule.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not db_rule:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Export rule not found"
|
||||
)
|
||||
|
||||
db.delete(db_rule)
|
||||
db.commit()
|
||||
|
||||
logger.info(f"Deleted export rule {rule_id}")
|
||||
|
||||
return {"message": "Export rule deleted successfully"}
|
||||
|
||||
|
||||
@router.get("/css-templates", response_model=List[CSSTemplateResponse], summary="List CSS templates")
|
||||
async def list_css_templates():
|
||||
"""
|
||||
List available CSS templates for PDF generation
|
||||
|
||||
Returns list of predefined CSS templates with descriptions
|
||||
"""
|
||||
templates = pdf_generator.get_available_templates()
|
||||
|
||||
return [
|
||||
{"name": name, "description": desc}
|
||||
for name, desc in templates.items()
|
||||
]
|
||||
@@ -1,244 +0,0 @@
|
||||
"""
|
||||
Tool_OCR - OCR Router
|
||||
File upload, OCR processing, and status endpoints
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status, UploadFile, File, BackgroundTasks
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.deps import get_db, get_current_active_user
|
||||
from app.models.user import User
|
||||
from app.models.ocr import OCRBatch, OCRFile, OCRResult, BatchStatus, FileStatus
|
||||
from app.schemas.ocr import (
|
||||
OCRBatchResponse,
|
||||
BatchStatusResponse,
|
||||
FileStatusResponse,
|
||||
OCRResultDetailResponse,
|
||||
UploadBatchResponse,
|
||||
ProcessRequest,
|
||||
ProcessResponse,
|
||||
)
|
||||
from app.services.file_manager import FileManager, FileManagementError
|
||||
from app.services.ocr_service import OCRService
|
||||
from app.services.background_tasks import process_batch_files_with_retry
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1", tags=["OCR"])
|
||||
|
||||
# Initialize services
|
||||
file_manager = FileManager()
|
||||
ocr_service = OCRService()
|
||||
|
||||
|
||||
@router.post("/upload", response_model=UploadBatchResponse, summary="Upload files for OCR")
|
||||
async def upload_files(
|
||||
files: List[UploadFile] = File(..., description="Files to upload (PNG, JPG, PDF)"),
|
||||
batch_name: str = None,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Upload files for OCR processing
|
||||
|
||||
Creates a new batch and uploads files to it
|
||||
|
||||
- **files**: List of files to upload (PNG, JPG, JPEG, PDF)
|
||||
- **batch_name**: Optional name for the batch
|
||||
"""
|
||||
if not files:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="No files provided"
|
||||
)
|
||||
|
||||
try:
|
||||
# Create batch
|
||||
batch = file_manager.create_batch(db, current_user.id, batch_name)
|
||||
|
||||
# Upload files
|
||||
uploaded_files = file_manager.add_files_to_batch(db, batch.id, files)
|
||||
|
||||
logger.info(f"Uploaded {len(uploaded_files)} files to batch {batch.id} for user {current_user.id}")
|
||||
|
||||
# Refresh batch to get updated counts
|
||||
db.refresh(batch)
|
||||
|
||||
# Return response matching frontend expectations
|
||||
return {
|
||||
"batch_id": batch.id,
|
||||
"files": uploaded_files
|
||||
}
|
||||
|
||||
except FileManagementError as e:
|
||||
logger.error(f"File upload error: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=str(e)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error during upload: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to upload files"
|
||||
)
|
||||
|
||||
|
||||
# NOTE: process_batch_files function moved to app.services.background_tasks
|
||||
# Now using process_batch_files_with_retry with retry logic
|
||||
|
||||
@router.post("/ocr/process", response_model=ProcessResponse, summary="Trigger OCR processing")
|
||||
async def process_ocr(
|
||||
request: ProcessRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Trigger OCR processing for a batch
|
||||
|
||||
Starts background processing of all files in the batch
|
||||
|
||||
- **batch_id**: Batch ID to process
|
||||
- **lang**: Language code (ch, en, japan, korean)
|
||||
- **detect_layout**: Enable layout detection
|
||||
"""
|
||||
# Verify batch ownership
|
||||
batch = db.query(OCRBatch).filter(
|
||||
OCRBatch.id == request.batch_id,
|
||||
OCRBatch.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not batch:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Batch not found"
|
||||
)
|
||||
|
||||
if batch.status != BatchStatus.PENDING:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Batch is already {batch.status.value}"
|
||||
)
|
||||
|
||||
# Start background processing with retry logic
|
||||
background_tasks.add_task(
|
||||
process_batch_files_with_retry,
|
||||
batch_id=batch.id,
|
||||
lang=request.lang,
|
||||
detect_layout=request.detect_layout,
|
||||
db=SessionLocal() # Create new session for background task
|
||||
)
|
||||
|
||||
logger.info(f"Started OCR processing for batch {batch.id}")
|
||||
|
||||
return {
|
||||
"message": "OCR processing started",
|
||||
"batch_id": batch.id,
|
||||
"total_files": batch.total_files,
|
||||
"status": "processing"
|
||||
}
|
||||
|
||||
|
||||
@router.get("/batch/{batch_id}/status", response_model=BatchStatusResponse, summary="Get batch status")
|
||||
async def get_batch_status(
|
||||
batch_id: int,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Get batch processing status
|
||||
|
||||
Returns batch information and all files in the batch
|
||||
|
||||
- **batch_id**: Batch ID
|
||||
"""
|
||||
# Verify batch ownership
|
||||
batch = db.query(OCRBatch).filter(
|
||||
OCRBatch.id == batch_id,
|
||||
OCRBatch.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not batch:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Batch not found"
|
||||
)
|
||||
|
||||
# Get all files in batch
|
||||
files = db.query(OCRFile).filter(OCRFile.batch_id == batch_id).all()
|
||||
|
||||
return {
|
||||
"batch": batch,
|
||||
"files": files
|
||||
}
|
||||
|
||||
|
||||
@router.get("/ocr/result/{file_id}", response_model=OCRResultDetailResponse, summary="Get OCR result")
|
||||
async def get_ocr_result(
|
||||
file_id: int,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Get OCR result for a file
|
||||
|
||||
Returns flattened file and OCR result information for frontend preview
|
||||
|
||||
- **file_id**: File ID
|
||||
"""
|
||||
# Get file
|
||||
ocr_file = db.query(OCRFile).join(OCRBatch).filter(
|
||||
OCRFile.id == file_id,
|
||||
OCRBatch.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not ocr_file:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="File not found"
|
||||
)
|
||||
|
||||
# Get result if exists
|
||||
result = db.query(OCRResult).filter(OCRResult.file_id == file_id).first()
|
||||
|
||||
# Read markdown content if result exists
|
||||
markdown_content = None
|
||||
if result and result.markdown_path:
|
||||
markdown_file = Path(result.markdown_path)
|
||||
if markdown_file.exists():
|
||||
try:
|
||||
markdown_content = markdown_file.read_text(encoding='utf-8')
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read markdown file {result.markdown_path}: {e}")
|
||||
|
||||
# Build JSON data from result if available
|
||||
json_data = None
|
||||
if result:
|
||||
json_data = {
|
||||
"total_text_regions": result.total_text_regions,
|
||||
"average_confidence": result.average_confidence,
|
||||
"detected_language": result.detected_language,
|
||||
"layout_data": result.layout_data,
|
||||
"images_metadata": result.images_metadata,
|
||||
}
|
||||
|
||||
# Return flattened structure matching frontend expectations
|
||||
return {
|
||||
"file_id": ocr_file.id,
|
||||
"filename": ocr_file.filename,
|
||||
"status": ocr_file.status.value,
|
||||
"markdown_content": markdown_content,
|
||||
"json_data": json_data,
|
||||
"confidence": result.average_confidence if result else None,
|
||||
"processing_time": ocr_file.processing_time,
|
||||
}
|
||||
|
||||
|
||||
# Import SessionLocal for background tasks
|
||||
from app.core.database import SessionLocal
|
||||
@@ -10,8 +10,8 @@ from fastapi import APIRouter, Depends, HTTPException, status, Query
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.deps import get_db, get_current_user_v2
|
||||
from app.models.user_v2 import User
|
||||
from app.core.deps import get_db, get_current_user
|
||||
from app.models.user import User
|
||||
from app.models.task import TaskStatus
|
||||
from app.schemas.task import (
|
||||
TaskCreate,
|
||||
@@ -34,7 +34,7 @@ router = APIRouter(prefix="/api/v2/tasks", tags=["Tasks"])
|
||||
async def create_task(
|
||||
task_data: TaskCreate,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user_v2)
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Create a new OCR task
|
||||
@@ -72,7 +72,7 @@ async def list_tasks(
|
||||
order_by: str = Query("created_at"),
|
||||
order_desc: bool = Query(True),
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user_v2)
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
List user's tasks with pagination and filtering
|
||||
@@ -134,7 +134,7 @@ async def list_tasks(
|
||||
@router.get("/stats", response_model=TaskStatsResponse)
|
||||
async def get_task_stats(
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user_v2)
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Get task statistics for current user
|
||||
@@ -157,7 +157,7 @@ async def get_task_stats(
|
||||
async def get_task(
|
||||
task_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user_v2)
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Get task details by ID
|
||||
@@ -184,7 +184,7 @@ async def update_task(
|
||||
task_id: str,
|
||||
task_update: TaskUpdate,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user_v2)
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Update task status and results
|
||||
@@ -253,7 +253,7 @@ async def update_task(
|
||||
async def delete_task(
|
||||
task_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user_v2)
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Delete a task
|
||||
@@ -280,7 +280,7 @@ async def delete_task(
|
||||
async def download_json(
|
||||
task_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user_v2)
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Download task result as JSON file
|
||||
@@ -327,7 +327,7 @@ async def download_json(
|
||||
async def download_markdown(
|
||||
task_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user_v2)
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Download task result as Markdown file
|
||||
@@ -374,7 +374,7 @@ async def download_markdown(
|
||||
async def download_pdf(
|
||||
task_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user_v2)
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Download task result as searchable PDF file
|
||||
@@ -421,7 +421,7 @@ async def download_pdf(
|
||||
async def start_task(
|
||||
task_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user_v2)
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Start processing a pending task
|
||||
@@ -459,7 +459,7 @@ async def start_task(
|
||||
async def cancel_task(
|
||||
task_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user_v2)
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Cancel a pending or processing task
|
||||
@@ -513,7 +513,7 @@ async def cancel_task(
|
||||
async def retry_task(
|
||||
task_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user_v2)
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Retry a failed task
|
||||
|
||||
@@ -1,189 +0,0 @@
|
||||
"""
|
||||
Tool_OCR - Translation Router (RESERVED)
|
||||
Stub endpoints for future translation feature
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.deps import get_db, get_current_active_user
|
||||
from app.models.user import User
|
||||
from app.schemas.translation import (
|
||||
TranslationRequest,
|
||||
TranslationResponse,
|
||||
TranslationFeatureStatus,
|
||||
LanguageInfo,
|
||||
)
|
||||
from app.services.translation_service import StubTranslationService
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/translate", tags=["Translation (RESERVED)"])
|
||||
|
||||
|
||||
@router.get("/status", response_model=TranslationFeatureStatus, summary="Get translation feature status")
|
||||
async def get_translation_status():
|
||||
"""
|
||||
Get translation feature status
|
||||
|
||||
Returns current implementation status and roadmap for translation feature.
|
||||
This is a RESERVED feature that will be implemented in Phase 5.
|
||||
|
||||
**Status**: RESERVED - Not yet implemented
|
||||
**Phase**: Phase 5 (Post-production)
|
||||
**Priority**: Implemented after production deployment and user feedback
|
||||
"""
|
||||
return StubTranslationService.get_feature_status()
|
||||
|
||||
|
||||
@router.get("/languages", response_model=List[LanguageInfo], summary="Get supported languages")
|
||||
async def get_supported_languages():
|
||||
"""
|
||||
Get list of languages planned for translation support
|
||||
|
||||
Returns list of languages that will be supported when translation
|
||||
feature is implemented.
|
||||
|
||||
**Status**: RESERVED - Planning phase
|
||||
"""
|
||||
return StubTranslationService.get_supported_languages()
|
||||
|
||||
|
||||
@router.post("/document", response_model=TranslationResponse, summary="Translate document (RESERVED)")
|
||||
async def translate_document(
|
||||
request: TranslationRequest,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Translate OCR document (RESERVED - NOT IMPLEMENTED)
|
||||
|
||||
This endpoint is reserved for future translation functionality.
|
||||
Returns 501 Not Implemented status.
|
||||
|
||||
**Expected Functionality** (when implemented):
|
||||
- Translate markdown documents while preserving structure
|
||||
- Support multiple translation engines (offline, ERNIE, Google, DeepL)
|
||||
- Maintain layout and formatting
|
||||
- Handle technical terminology
|
||||
|
||||
**Planned Features**:
|
||||
- Offline translation (Argos Translate)
|
||||
- Cloud API integration (ERNIE, Google, DeepL)
|
||||
- Batch translation support
|
||||
- Translation memory
|
||||
- Glossary support
|
||||
|
||||
**Current Status**: RESERVED for Phase 5 implementation
|
||||
|
||||
---
|
||||
|
||||
**Request Parameters** (planned):
|
||||
- **file_id**: ID of OCR result file to translate
|
||||
- **source_lang**: Source language code (zh, en, ja, ko)
|
||||
- **target_lang**: Target language code (zh, en, ja, ko)
|
||||
- **engine_type**: Translation engine (offline, ernie, google, deepl)
|
||||
- **preserve_structure**: Whether to preserve markdown structure
|
||||
- **engine_config**: Engine-specific configuration
|
||||
|
||||
**Response** (planned):
|
||||
- **task_id**: Translation task ID for tracking progress
|
||||
- **status**: Translation status
|
||||
- **translated_file_path**: Path to translated file (when completed)
|
||||
"""
|
||||
logger.info(f"Translation request received from user {current_user.id} (stub endpoint)")
|
||||
|
||||
# Return 501 Not Implemented with informative message
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_501_NOT_IMPLEMENTED,
|
||||
detail={
|
||||
"error": "Translation feature not implemented",
|
||||
"message": "This feature is reserved for future development (Phase 5)",
|
||||
"status": "RESERVED",
|
||||
"roadmap": {
|
||||
"phase": "Phase 5",
|
||||
"priority": "Implemented after production deployment",
|
||||
"planned_features": [
|
||||
"Offline translation (Argos Translate)",
|
||||
"Cloud API integration (ERNIE, Google, DeepL)",
|
||||
"Structure-preserving markdown translation",
|
||||
"Batch translation support"
|
||||
]
|
||||
},
|
||||
"request_received": {
|
||||
"file_id": request.file_id,
|
||||
"source_lang": request.source_lang,
|
||||
"target_lang": request.target_lang,
|
||||
"engine_type": request.engine_type
|
||||
},
|
||||
"action": "Please check back in a future release or contact support for updates"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.get("/task/{task_id}", summary="Get translation task status (RESERVED)")
|
||||
async def get_translation_task_status(
|
||||
task_id: int,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Get translation task status (RESERVED - NOT IMPLEMENTED)
|
||||
|
||||
This endpoint would track translation task progress.
|
||||
Returns 501 Not Implemented status.
|
||||
|
||||
**Planned Functionality**:
|
||||
- Real-time translation progress
|
||||
- Status updates (pending, processing, completed, failed)
|
||||
- Estimated completion time
|
||||
- Error reporting
|
||||
|
||||
**Current Status**: RESERVED for Phase 5 implementation
|
||||
"""
|
||||
logger.info(f"Translation status check for task {task_id} from user {current_user.id} (stub endpoint)")
|
||||
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_501_NOT_IMPLEMENTED,
|
||||
detail={
|
||||
"error": "Translation feature not implemented",
|
||||
"message": "Translation task tracking is reserved for Phase 5",
|
||||
"task_id": task_id,
|
||||
"status": "RESERVED"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/task/{task_id}", summary="Cancel translation task (RESERVED)")
|
||||
async def cancel_translation_task(
|
||||
task_id: int,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Cancel ongoing translation task (RESERVED - NOT IMPLEMENTED)
|
||||
|
||||
This endpoint would allow cancellation of translation tasks.
|
||||
Returns 501 Not Implemented status.
|
||||
|
||||
**Planned Functionality**:
|
||||
- Cancel in-progress translations
|
||||
- Clean up temporary files
|
||||
- Refund credits (if applicable)
|
||||
|
||||
**Current Status**: RESERVED for Phase 5 implementation
|
||||
"""
|
||||
logger.info(f"Translation cancellation request for task {task_id} from user {current_user.id} (stub endpoint)")
|
||||
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_501_NOT_IMPLEMENTED,
|
||||
detail={
|
||||
"error": "Translation feature not implemented",
|
||||
"message": "This feature is reserved for Phase 5",
|
||||
"status": "RESERVED"
|
||||
}
|
||||
)
|
||||
Reference in New Issue
Block a user