first
This commit is contained in:
7
backend/app/routers/__init__.py
Normal file
7
backend/app/routers/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""
|
||||
Tool_OCR - API Routers
|
||||
"""
|
||||
|
||||
from app.routers import auth, ocr, export, translation
|
||||
|
||||
__all__ = ["auth", "ocr", "export", "translation"]
|
||||
70
backend/app/routers/auth.py
Normal file
70
backend/app/routers/auth.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""
|
||||
Tool_OCR - Authentication Router
|
||||
JWT login endpoint
|
||||
"""
|
||||
|
||||
from datetime import timedelta
|
||||
import logging
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.deps import get_db
|
||||
from app.core.security import verify_password, create_access_token
|
||||
from app.models.user import User
|
||||
from app.schemas.auth import LoginRequest, Token
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/auth", tags=["Authentication"])
|
||||
|
||||
|
||||
@router.post("/login", response_model=Token, summary="User login")
|
||||
async def login(
|
||||
login_data: LoginRequest,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
User login with username and password
|
||||
|
||||
Returns JWT access token for authentication
|
||||
|
||||
- **username**: User's username
|
||||
- **password**: User's password
|
||||
"""
|
||||
# Query user by username
|
||||
user = db.query(User).filter(User.username == login_data.username).first()
|
||||
|
||||
# Verify user exists and password is correct
|
||||
if not user or not verify_password(login_data.password, user.password_hash):
|
||||
logger.warning(f"Failed login attempt for username: {login_data.username}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Incorrect username or password",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
# Check if user is active
|
||||
if not user.is_active:
|
||||
logger.warning(f"Inactive user login attempt: {login_data.username}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="User account is inactive"
|
||||
)
|
||||
|
||||
# Create access token
|
||||
access_token_expires = timedelta(minutes=settings.access_token_expire_minutes)
|
||||
access_token = create_access_token(
|
||||
data={"sub": str(user.id), "username": user.username},
|
||||
expires_delta=access_token_expires
|
||||
)
|
||||
|
||||
logger.info(f"Successful login: {user.username} (ID: {user.id})")
|
||||
|
||||
return {
|
||||
"access_token": access_token,
|
||||
"token_type": "bearer",
|
||||
"expires_in": settings.access_token_expire_minutes * 60 # Convert to seconds
|
||||
}
|
||||
338
backend/app/routers/export.py
Normal file
338
backend/app/routers/export.py
Normal file
@@ -0,0 +1,338 @@
|
||||
"""
|
||||
Tool_OCR - Export Router
|
||||
Export results in multiple formats
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.deps import get_db, get_current_active_user
|
||||
from app.models.user import User
|
||||
from app.models.ocr import OCRBatch, OCRFile, OCRResult, FileStatus
|
||||
from app.models.export import ExportRule
|
||||
from app.schemas.export import (
|
||||
ExportRequest,
|
||||
ExportRuleCreate,
|
||||
ExportRuleUpdate,
|
||||
ExportRuleResponse,
|
||||
CSSTemplateResponse,
|
||||
)
|
||||
from app.services.export_service import ExportService, ExportError
|
||||
from app.services.pdf_generator import PDFGenerator
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/export", tags=["Export"])
|
||||
|
||||
# Initialize services
|
||||
export_service = ExportService()
|
||||
pdf_generator = PDFGenerator()
|
||||
|
||||
|
||||
@router.post("", summary="Export OCR results")
|
||||
async def export_results(
|
||||
request: ExportRequest,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Export OCR results in specified format
|
||||
|
||||
Supports multiple export formats: txt, json, excel, markdown, pdf, zip
|
||||
|
||||
- **batch_id**: Batch ID to export
|
||||
- **format**: Export format (txt, json, excel, markdown, pdf, zip)
|
||||
- **rule_id**: Optional export rule ID to apply filters
|
||||
- **css_template**: CSS template for PDF export (default, academic, business)
|
||||
- **include_formats**: Formats to include in ZIP export
|
||||
"""
|
||||
# Verify batch ownership
|
||||
batch = db.query(OCRBatch).filter(
|
||||
OCRBatch.id == request.batch_id,
|
||||
OCRBatch.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not batch:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Batch not found"
|
||||
)
|
||||
|
||||
# Get completed results
|
||||
results = db.query(OCRResult).join(OCRFile).filter(
|
||||
OCRFile.batch_id == request.batch_id,
|
||||
OCRFile.status == FileStatus.COMPLETED
|
||||
).all()
|
||||
|
||||
if not results:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="No completed results found for this batch"
|
||||
)
|
||||
|
||||
# Apply export rule if specified
|
||||
if request.rule_id:
|
||||
try:
|
||||
results = export_service.apply_export_rule(db, results, request.rule_id)
|
||||
except ExportError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=str(e)
|
||||
)
|
||||
|
||||
try:
|
||||
# Generate export based on format
|
||||
export_dir = Path(f"uploads/batches/{batch.id}/exports")
|
||||
export_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if request.format == "txt":
|
||||
output_path = export_dir / f"batch_{batch.id}_export.txt"
|
||||
export_service.export_to_txt(results, output_path)
|
||||
|
||||
elif request.format == "json":
|
||||
output_path = export_dir / f"batch_{batch.id}_export.json"
|
||||
export_service.export_to_json(results, output_path)
|
||||
|
||||
elif request.format == "excel":
|
||||
output_path = export_dir / f"batch_{batch.id}_export.xlsx"
|
||||
export_service.export_to_excel(results, output_path)
|
||||
|
||||
elif request.format == "markdown":
|
||||
output_path = export_dir / f"batch_{batch.id}_export.md"
|
||||
export_service.export_to_markdown(results, output_path, combine=True)
|
||||
|
||||
elif request.format == "zip":
|
||||
output_path = export_dir / f"batch_{batch.id}_export.zip"
|
||||
include_formats = request.include_formats or ["markdown", "json"]
|
||||
export_service.export_batch_to_zip(db, batch.id, output_path, include_formats)
|
||||
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Unsupported export format: {request.format}"
|
||||
)
|
||||
|
||||
logger.info(f"Exported batch {batch.id} to {request.format} format: {output_path}")
|
||||
|
||||
# Return file for download
|
||||
return FileResponse(
|
||||
path=str(output_path),
|
||||
filename=output_path.name,
|
||||
media_type="application/octet-stream"
|
||||
)
|
||||
|
||||
except ExportError as e:
|
||||
logger.error(f"Export error: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=str(e)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected export error: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Export failed"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/pdf/{file_id}", summary="Generate PDF for single file")
|
||||
async def generate_pdf(
|
||||
file_id: int,
|
||||
css_template: str = "default",
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Generate layout-preserved PDF for a single file
|
||||
|
||||
- **file_id**: File ID
|
||||
- **css_template**: CSS template (default, academic, business)
|
||||
"""
|
||||
# Get file and verify ownership
|
||||
ocr_file = db.query(OCRFile).join(OCRBatch).filter(
|
||||
OCRFile.id == file_id,
|
||||
OCRBatch.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not ocr_file:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="File not found"
|
||||
)
|
||||
|
||||
# Get result
|
||||
result = db.query(OCRResult).filter(OCRResult.file_id == file_id).first()
|
||||
if not result:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="OCR result not found"
|
||||
)
|
||||
|
||||
try:
|
||||
# Generate PDF
|
||||
export_dir = Path(f"uploads/batches/{ocr_file.batch_id}/exports")
|
||||
export_dir.mkdir(parents=True, exist_ok=True)
|
||||
output_path = export_dir / f"file_{file_id}_export.pdf"
|
||||
|
||||
export_service.export_to_pdf(
|
||||
result=result,
|
||||
output_path=output_path,
|
||||
css_template=css_template,
|
||||
metadata={"title": ocr_file.original_filename}
|
||||
)
|
||||
|
||||
logger.info(f"Generated PDF for file {file_id}: {output_path}")
|
||||
|
||||
return FileResponse(
|
||||
path=str(output_path),
|
||||
filename=f"{Path(ocr_file.original_filename).stem}.pdf",
|
||||
media_type="application/pdf"
|
||||
)
|
||||
|
||||
except ExportError as e:
|
||||
logger.error(f"PDF generation error: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=str(e)
|
||||
)
|
||||
|
||||
|
||||
@router.get("/rules", response_model=List[ExportRuleResponse], summary="List export rules")
|
||||
async def list_export_rules(
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
List all export rules for current user
|
||||
|
||||
Returns list of saved export rules
|
||||
"""
|
||||
rules = db.query(ExportRule).filter(ExportRule.user_id == current_user.id).all()
|
||||
return rules
|
||||
|
||||
|
||||
@router.post("/rules", response_model=ExportRuleResponse, summary="Create export rule")
|
||||
async def create_export_rule(
|
||||
rule: ExportRuleCreate,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Create new export rule
|
||||
|
||||
Saves custom export configuration for reuse
|
||||
|
||||
- **rule_name**: Rule name
|
||||
- **description**: Optional description
|
||||
- **config_json**: Rule configuration (filters, formatting, export_options)
|
||||
- **css_template**: Optional custom CSS for PDF export
|
||||
"""
|
||||
# Create rule
|
||||
new_rule = ExportRule(
|
||||
user_id=current_user.id,
|
||||
rule_name=rule.rule_name,
|
||||
description=rule.description,
|
||||
config_json=rule.config_json,
|
||||
css_template=rule.css_template
|
||||
)
|
||||
|
||||
db.add(new_rule)
|
||||
db.commit()
|
||||
db.refresh(new_rule)
|
||||
|
||||
logger.info(f"Created export rule {new_rule.id} for user {current_user.id}")
|
||||
|
||||
return new_rule
|
||||
|
||||
|
||||
@router.put("/rules/{rule_id}", response_model=ExportRuleResponse, summary="Update export rule")
|
||||
async def update_export_rule(
|
||||
rule_id: int,
|
||||
rule: ExportRuleUpdate,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Update existing export rule
|
||||
|
||||
- **rule_id**: Rule ID to update
|
||||
- **rule_name**: Optional new rule name
|
||||
- **description**: Optional new description
|
||||
- **config_json**: Optional new configuration
|
||||
- **css_template**: Optional new CSS template
|
||||
"""
|
||||
# Get rule and verify ownership
|
||||
db_rule = db.query(ExportRule).filter(
|
||||
ExportRule.id == rule_id,
|
||||
ExportRule.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not db_rule:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Export rule not found"
|
||||
)
|
||||
|
||||
# Update fields
|
||||
update_data = rule.dict(exclude_unset=True)
|
||||
for field, value in update_data.items():
|
||||
setattr(db_rule, field, value)
|
||||
|
||||
db.commit()
|
||||
db.refresh(db_rule)
|
||||
|
||||
logger.info(f"Updated export rule {rule_id}")
|
||||
|
||||
return db_rule
|
||||
|
||||
|
||||
@router.delete("/rules/{rule_id}", summary="Delete export rule")
|
||||
async def delete_export_rule(
|
||||
rule_id: int,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Delete export rule
|
||||
|
||||
- **rule_id**: Rule ID to delete
|
||||
"""
|
||||
# Get rule and verify ownership
|
||||
db_rule = db.query(ExportRule).filter(
|
||||
ExportRule.id == rule_id,
|
||||
ExportRule.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not db_rule:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Export rule not found"
|
||||
)
|
||||
|
||||
db.delete(db_rule)
|
||||
db.commit()
|
||||
|
||||
logger.info(f"Deleted export rule {rule_id}")
|
||||
|
||||
return {"message": "Export rule deleted successfully"}
|
||||
|
||||
|
||||
@router.get("/css-templates", response_model=List[CSSTemplateResponse], summary="List CSS templates")
|
||||
async def list_css_templates():
|
||||
"""
|
||||
List available CSS templates for PDF generation
|
||||
|
||||
Returns list of predefined CSS templates with descriptions
|
||||
"""
|
||||
templates = pdf_generator.get_available_templates()
|
||||
|
||||
return [
|
||||
{"name": name, "description": desc}
|
||||
for name, desc in templates.items()
|
||||
]
|
||||
244
backend/app/routers/ocr.py
Normal file
244
backend/app/routers/ocr.py
Normal file
@@ -0,0 +1,244 @@
|
||||
"""
|
||||
Tool_OCR - OCR Router
|
||||
File upload, OCR processing, and status endpoints
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status, UploadFile, File, BackgroundTasks
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.deps import get_db, get_current_active_user
|
||||
from app.models.user import User
|
||||
from app.models.ocr import OCRBatch, OCRFile, OCRResult, BatchStatus, FileStatus
|
||||
from app.schemas.ocr import (
|
||||
OCRBatchResponse,
|
||||
BatchStatusResponse,
|
||||
FileStatusResponse,
|
||||
OCRResultDetailResponse,
|
||||
UploadBatchResponse,
|
||||
ProcessRequest,
|
||||
ProcessResponse,
|
||||
)
|
||||
from app.services.file_manager import FileManager, FileManagementError
|
||||
from app.services.ocr_service import OCRService
|
||||
from app.services.background_tasks import process_batch_files_with_retry
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1", tags=["OCR"])
|
||||
|
||||
# Initialize services
|
||||
file_manager = FileManager()
|
||||
ocr_service = OCRService()
|
||||
|
||||
|
||||
@router.post("/upload", response_model=UploadBatchResponse, summary="Upload files for OCR")
|
||||
async def upload_files(
|
||||
files: List[UploadFile] = File(..., description="Files to upload (PNG, JPG, PDF)"),
|
||||
batch_name: str = None,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Upload files for OCR processing
|
||||
|
||||
Creates a new batch and uploads files to it
|
||||
|
||||
- **files**: List of files to upload (PNG, JPG, JPEG, PDF)
|
||||
- **batch_name**: Optional name for the batch
|
||||
"""
|
||||
if not files:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="No files provided"
|
||||
)
|
||||
|
||||
try:
|
||||
# Create batch
|
||||
batch = file_manager.create_batch(db, current_user.id, batch_name)
|
||||
|
||||
# Upload files
|
||||
uploaded_files = file_manager.add_files_to_batch(db, batch.id, files)
|
||||
|
||||
logger.info(f"Uploaded {len(uploaded_files)} files to batch {batch.id} for user {current_user.id}")
|
||||
|
||||
# Refresh batch to get updated counts
|
||||
db.refresh(batch)
|
||||
|
||||
# Return response matching frontend expectations
|
||||
return {
|
||||
"batch_id": batch.id,
|
||||
"files": uploaded_files
|
||||
}
|
||||
|
||||
except FileManagementError as e:
|
||||
logger.error(f"File upload error: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=str(e)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error during upload: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to upload files"
|
||||
)
|
||||
|
||||
|
||||
# NOTE: process_batch_files function moved to app.services.background_tasks
|
||||
# Now using process_batch_files_with_retry with retry logic
|
||||
|
||||
@router.post("/ocr/process", response_model=ProcessResponse, summary="Trigger OCR processing")
|
||||
async def process_ocr(
|
||||
request: ProcessRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Trigger OCR processing for a batch
|
||||
|
||||
Starts background processing of all files in the batch
|
||||
|
||||
- **batch_id**: Batch ID to process
|
||||
- **lang**: Language code (ch, en, japan, korean)
|
||||
- **detect_layout**: Enable layout detection
|
||||
"""
|
||||
# Verify batch ownership
|
||||
batch = db.query(OCRBatch).filter(
|
||||
OCRBatch.id == request.batch_id,
|
||||
OCRBatch.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not batch:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Batch not found"
|
||||
)
|
||||
|
||||
if batch.status != BatchStatus.PENDING:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Batch is already {batch.status.value}"
|
||||
)
|
||||
|
||||
# Start background processing with retry logic
|
||||
background_tasks.add_task(
|
||||
process_batch_files_with_retry,
|
||||
batch_id=batch.id,
|
||||
lang=request.lang,
|
||||
detect_layout=request.detect_layout,
|
||||
db=SessionLocal() # Create new session for background task
|
||||
)
|
||||
|
||||
logger.info(f"Started OCR processing for batch {batch.id}")
|
||||
|
||||
return {
|
||||
"message": "OCR processing started",
|
||||
"batch_id": batch.id,
|
||||
"total_files": batch.total_files,
|
||||
"status": "processing"
|
||||
}
|
||||
|
||||
|
||||
@router.get("/batch/{batch_id}/status", response_model=BatchStatusResponse, summary="Get batch status")
|
||||
async def get_batch_status(
|
||||
batch_id: int,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Get batch processing status
|
||||
|
||||
Returns batch information and all files in the batch
|
||||
|
||||
- **batch_id**: Batch ID
|
||||
"""
|
||||
# Verify batch ownership
|
||||
batch = db.query(OCRBatch).filter(
|
||||
OCRBatch.id == batch_id,
|
||||
OCRBatch.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not batch:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Batch not found"
|
||||
)
|
||||
|
||||
# Get all files in batch
|
||||
files = db.query(OCRFile).filter(OCRFile.batch_id == batch_id).all()
|
||||
|
||||
return {
|
||||
"batch": batch,
|
||||
"files": files
|
||||
}
|
||||
|
||||
|
||||
@router.get("/ocr/result/{file_id}", response_model=OCRResultDetailResponse, summary="Get OCR result")
|
||||
async def get_ocr_result(
|
||||
file_id: int,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Get OCR result for a file
|
||||
|
||||
Returns flattened file and OCR result information for frontend preview
|
||||
|
||||
- **file_id**: File ID
|
||||
"""
|
||||
# Get file
|
||||
ocr_file = db.query(OCRFile).join(OCRBatch).filter(
|
||||
OCRFile.id == file_id,
|
||||
OCRBatch.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not ocr_file:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="File not found"
|
||||
)
|
||||
|
||||
# Get result if exists
|
||||
result = db.query(OCRResult).filter(OCRResult.file_id == file_id).first()
|
||||
|
||||
# Read markdown content if result exists
|
||||
markdown_content = None
|
||||
if result and result.markdown_path:
|
||||
markdown_file = Path(result.markdown_path)
|
||||
if markdown_file.exists():
|
||||
try:
|
||||
markdown_content = markdown_file.read_text(encoding='utf-8')
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read markdown file {result.markdown_path}: {e}")
|
||||
|
||||
# Build JSON data from result if available
|
||||
json_data = None
|
||||
if result:
|
||||
json_data = {
|
||||
"total_text_regions": result.total_text_regions,
|
||||
"average_confidence": result.average_confidence,
|
||||
"detected_language": result.detected_language,
|
||||
"layout_data": result.layout_data,
|
||||
"images_metadata": result.images_metadata,
|
||||
}
|
||||
|
||||
# Return flattened structure matching frontend expectations
|
||||
return {
|
||||
"file_id": ocr_file.id,
|
||||
"filename": ocr_file.filename,
|
||||
"status": ocr_file.status.value,
|
||||
"markdown_content": markdown_content,
|
||||
"json_data": json_data,
|
||||
"confidence": result.average_confidence if result else None,
|
||||
"processing_time": ocr_file.processing_time,
|
||||
}
|
||||
|
||||
|
||||
# Import SessionLocal for background tasks
|
||||
from app.core.database import SessionLocal
|
||||
189
backend/app/routers/translation.py
Normal file
189
backend/app/routers/translation.py
Normal file
@@ -0,0 +1,189 @@
|
||||
"""
|
||||
Tool_OCR - Translation Router (RESERVED)
|
||||
Stub endpoints for future translation feature
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.deps import get_db, get_current_active_user
|
||||
from app.models.user import User
|
||||
from app.schemas.translation import (
|
||||
TranslationRequest,
|
||||
TranslationResponse,
|
||||
TranslationFeatureStatus,
|
||||
LanguageInfo,
|
||||
)
|
||||
from app.services.translation_service import StubTranslationService
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/translate", tags=["Translation (RESERVED)"])
|
||||
|
||||
|
||||
@router.get("/status", response_model=TranslationFeatureStatus, summary="Get translation feature status")
|
||||
async def get_translation_status():
|
||||
"""
|
||||
Get translation feature status
|
||||
|
||||
Returns current implementation status and roadmap for translation feature.
|
||||
This is a RESERVED feature that will be implemented in Phase 5.
|
||||
|
||||
**Status**: RESERVED - Not yet implemented
|
||||
**Phase**: Phase 5 (Post-production)
|
||||
**Priority**: Implemented after production deployment and user feedback
|
||||
"""
|
||||
return StubTranslationService.get_feature_status()
|
||||
|
||||
|
||||
@router.get("/languages", response_model=List[LanguageInfo], summary="Get supported languages")
|
||||
async def get_supported_languages():
|
||||
"""
|
||||
Get list of languages planned for translation support
|
||||
|
||||
Returns list of languages that will be supported when translation
|
||||
feature is implemented.
|
||||
|
||||
**Status**: RESERVED - Planning phase
|
||||
"""
|
||||
return StubTranslationService.get_supported_languages()
|
||||
|
||||
|
||||
@router.post("/document", response_model=TranslationResponse, summary="Translate document (RESERVED)")
|
||||
async def translate_document(
|
||||
request: TranslationRequest,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Translate OCR document (RESERVED - NOT IMPLEMENTED)
|
||||
|
||||
This endpoint is reserved for future translation functionality.
|
||||
Returns 501 Not Implemented status.
|
||||
|
||||
**Expected Functionality** (when implemented):
|
||||
- Translate markdown documents while preserving structure
|
||||
- Support multiple translation engines (offline, ERNIE, Google, DeepL)
|
||||
- Maintain layout and formatting
|
||||
- Handle technical terminology
|
||||
|
||||
**Planned Features**:
|
||||
- Offline translation (Argos Translate)
|
||||
- Cloud API integration (ERNIE, Google, DeepL)
|
||||
- Batch translation support
|
||||
- Translation memory
|
||||
- Glossary support
|
||||
|
||||
**Current Status**: RESERVED for Phase 5 implementation
|
||||
|
||||
---
|
||||
|
||||
**Request Parameters** (planned):
|
||||
- **file_id**: ID of OCR result file to translate
|
||||
- **source_lang**: Source language code (zh, en, ja, ko)
|
||||
- **target_lang**: Target language code (zh, en, ja, ko)
|
||||
- **engine_type**: Translation engine (offline, ernie, google, deepl)
|
||||
- **preserve_structure**: Whether to preserve markdown structure
|
||||
- **engine_config**: Engine-specific configuration
|
||||
|
||||
**Response** (planned):
|
||||
- **task_id**: Translation task ID for tracking progress
|
||||
- **status**: Translation status
|
||||
- **translated_file_path**: Path to translated file (when completed)
|
||||
"""
|
||||
logger.info(f"Translation request received from user {current_user.id} (stub endpoint)")
|
||||
|
||||
# Return 501 Not Implemented with informative message
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_501_NOT_IMPLEMENTED,
|
||||
detail={
|
||||
"error": "Translation feature not implemented",
|
||||
"message": "This feature is reserved for future development (Phase 5)",
|
||||
"status": "RESERVED",
|
||||
"roadmap": {
|
||||
"phase": "Phase 5",
|
||||
"priority": "Implemented after production deployment",
|
||||
"planned_features": [
|
||||
"Offline translation (Argos Translate)",
|
||||
"Cloud API integration (ERNIE, Google, DeepL)",
|
||||
"Structure-preserving markdown translation",
|
||||
"Batch translation support"
|
||||
]
|
||||
},
|
||||
"request_received": {
|
||||
"file_id": request.file_id,
|
||||
"source_lang": request.source_lang,
|
||||
"target_lang": request.target_lang,
|
||||
"engine_type": request.engine_type
|
||||
},
|
||||
"action": "Please check back in a future release or contact support for updates"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.get("/task/{task_id}", summary="Get translation task status (RESERVED)")
|
||||
async def get_translation_task_status(
|
||||
task_id: int,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Get translation task status (RESERVED - NOT IMPLEMENTED)
|
||||
|
||||
This endpoint would track translation task progress.
|
||||
Returns 501 Not Implemented status.
|
||||
|
||||
**Planned Functionality**:
|
||||
- Real-time translation progress
|
||||
- Status updates (pending, processing, completed, failed)
|
||||
- Estimated completion time
|
||||
- Error reporting
|
||||
|
||||
**Current Status**: RESERVED for Phase 5 implementation
|
||||
"""
|
||||
logger.info(f"Translation status check for task {task_id} from user {current_user.id} (stub endpoint)")
|
||||
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_501_NOT_IMPLEMENTED,
|
||||
detail={
|
||||
"error": "Translation feature not implemented",
|
||||
"message": "Translation task tracking is reserved for Phase 5",
|
||||
"task_id": task_id,
|
||||
"status": "RESERVED"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/task/{task_id}", summary="Cancel translation task (RESERVED)")
|
||||
async def cancel_translation_task(
|
||||
task_id: int,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Cancel ongoing translation task (RESERVED - NOT IMPLEMENTED)
|
||||
|
||||
This endpoint would allow cancellation of translation tasks.
|
||||
Returns 501 Not Implemented status.
|
||||
|
||||
**Planned Functionality**:
|
||||
- Cancel in-progress translations
|
||||
- Clean up temporary files
|
||||
- Refund credits (if applicable)
|
||||
|
||||
**Current Status**: RESERVED for Phase 5 implementation
|
||||
"""
|
||||
logger.info(f"Translation cancellation request for task {task_id} from user {current_user.id} (stub endpoint)")
|
||||
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_501_NOT_IMPLEMENTED,
|
||||
detail={
|
||||
"error": "Translation feature not implemented",
|
||||
"message": "This feature is reserved for Phase 5",
|
||||
"status": "RESERVED"
|
||||
}
|
||||
)
|
||||
Reference in New Issue
Block a user