first
This commit is contained in:
338
backend/app/routers/export.py
Normal file
338
backend/app/routers/export.py
Normal file
@@ -0,0 +1,338 @@
|
||||
"""
|
||||
Tool_OCR - Export Router
|
||||
Export results in multiple formats
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.deps import get_db, get_current_active_user
|
||||
from app.models.user import User
|
||||
from app.models.ocr import OCRBatch, OCRFile, OCRResult, FileStatus
|
||||
from app.models.export import ExportRule
|
||||
from app.schemas.export import (
|
||||
ExportRequest,
|
||||
ExportRuleCreate,
|
||||
ExportRuleUpdate,
|
||||
ExportRuleResponse,
|
||||
CSSTemplateResponse,
|
||||
)
|
||||
from app.services.export_service import ExportService, ExportError
|
||||
from app.services.pdf_generator import PDFGenerator
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/export", tags=["Export"])
|
||||
|
||||
# Initialize services
|
||||
export_service = ExportService()
|
||||
pdf_generator = PDFGenerator()
|
||||
|
||||
|
||||
@router.post("", summary="Export OCR results")
|
||||
async def export_results(
|
||||
request: ExportRequest,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Export OCR results in specified format
|
||||
|
||||
Supports multiple export formats: txt, json, excel, markdown, pdf, zip
|
||||
|
||||
- **batch_id**: Batch ID to export
|
||||
- **format**: Export format (txt, json, excel, markdown, pdf, zip)
|
||||
- **rule_id**: Optional export rule ID to apply filters
|
||||
- **css_template**: CSS template for PDF export (default, academic, business)
|
||||
- **include_formats**: Formats to include in ZIP export
|
||||
"""
|
||||
# Verify batch ownership
|
||||
batch = db.query(OCRBatch).filter(
|
||||
OCRBatch.id == request.batch_id,
|
||||
OCRBatch.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not batch:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Batch not found"
|
||||
)
|
||||
|
||||
# Get completed results
|
||||
results = db.query(OCRResult).join(OCRFile).filter(
|
||||
OCRFile.batch_id == request.batch_id,
|
||||
OCRFile.status == FileStatus.COMPLETED
|
||||
).all()
|
||||
|
||||
if not results:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="No completed results found for this batch"
|
||||
)
|
||||
|
||||
# Apply export rule if specified
|
||||
if request.rule_id:
|
||||
try:
|
||||
results = export_service.apply_export_rule(db, results, request.rule_id)
|
||||
except ExportError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=str(e)
|
||||
)
|
||||
|
||||
try:
|
||||
# Generate export based on format
|
||||
export_dir = Path(f"uploads/batches/{batch.id}/exports")
|
||||
export_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if request.format == "txt":
|
||||
output_path = export_dir / f"batch_{batch.id}_export.txt"
|
||||
export_service.export_to_txt(results, output_path)
|
||||
|
||||
elif request.format == "json":
|
||||
output_path = export_dir / f"batch_{batch.id}_export.json"
|
||||
export_service.export_to_json(results, output_path)
|
||||
|
||||
elif request.format == "excel":
|
||||
output_path = export_dir / f"batch_{batch.id}_export.xlsx"
|
||||
export_service.export_to_excel(results, output_path)
|
||||
|
||||
elif request.format == "markdown":
|
||||
output_path = export_dir / f"batch_{batch.id}_export.md"
|
||||
export_service.export_to_markdown(results, output_path, combine=True)
|
||||
|
||||
elif request.format == "zip":
|
||||
output_path = export_dir / f"batch_{batch.id}_export.zip"
|
||||
include_formats = request.include_formats or ["markdown", "json"]
|
||||
export_service.export_batch_to_zip(db, batch.id, output_path, include_formats)
|
||||
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Unsupported export format: {request.format}"
|
||||
)
|
||||
|
||||
logger.info(f"Exported batch {batch.id} to {request.format} format: {output_path}")
|
||||
|
||||
# Return file for download
|
||||
return FileResponse(
|
||||
path=str(output_path),
|
||||
filename=output_path.name,
|
||||
media_type="application/octet-stream"
|
||||
)
|
||||
|
||||
except ExportError as e:
|
||||
logger.error(f"Export error: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=str(e)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected export error: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Export failed"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/pdf/{file_id}", summary="Generate PDF for single file")
|
||||
async def generate_pdf(
|
||||
file_id: int,
|
||||
css_template: str = "default",
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Generate layout-preserved PDF for a single file
|
||||
|
||||
- **file_id**: File ID
|
||||
- **css_template**: CSS template (default, academic, business)
|
||||
"""
|
||||
# Get file and verify ownership
|
||||
ocr_file = db.query(OCRFile).join(OCRBatch).filter(
|
||||
OCRFile.id == file_id,
|
||||
OCRBatch.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not ocr_file:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="File not found"
|
||||
)
|
||||
|
||||
# Get result
|
||||
result = db.query(OCRResult).filter(OCRResult.file_id == file_id).first()
|
||||
if not result:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="OCR result not found"
|
||||
)
|
||||
|
||||
try:
|
||||
# Generate PDF
|
||||
export_dir = Path(f"uploads/batches/{ocr_file.batch_id}/exports")
|
||||
export_dir.mkdir(parents=True, exist_ok=True)
|
||||
output_path = export_dir / f"file_{file_id}_export.pdf"
|
||||
|
||||
export_service.export_to_pdf(
|
||||
result=result,
|
||||
output_path=output_path,
|
||||
css_template=css_template,
|
||||
metadata={"title": ocr_file.original_filename}
|
||||
)
|
||||
|
||||
logger.info(f"Generated PDF for file {file_id}: {output_path}")
|
||||
|
||||
return FileResponse(
|
||||
path=str(output_path),
|
||||
filename=f"{Path(ocr_file.original_filename).stem}.pdf",
|
||||
media_type="application/pdf"
|
||||
)
|
||||
|
||||
except ExportError as e:
|
||||
logger.error(f"PDF generation error: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=str(e)
|
||||
)
|
||||
|
||||
|
||||
@router.get("/rules", response_model=List[ExportRuleResponse], summary="List export rules")
|
||||
async def list_export_rules(
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
List all export rules for current user
|
||||
|
||||
Returns list of saved export rules
|
||||
"""
|
||||
rules = db.query(ExportRule).filter(ExportRule.user_id == current_user.id).all()
|
||||
return rules
|
||||
|
||||
|
||||
@router.post("/rules", response_model=ExportRuleResponse, summary="Create export rule")
|
||||
async def create_export_rule(
|
||||
rule: ExportRuleCreate,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Create new export rule
|
||||
|
||||
Saves custom export configuration for reuse
|
||||
|
||||
- **rule_name**: Rule name
|
||||
- **description**: Optional description
|
||||
- **config_json**: Rule configuration (filters, formatting, export_options)
|
||||
- **css_template**: Optional custom CSS for PDF export
|
||||
"""
|
||||
# Create rule
|
||||
new_rule = ExportRule(
|
||||
user_id=current_user.id,
|
||||
rule_name=rule.rule_name,
|
||||
description=rule.description,
|
||||
config_json=rule.config_json,
|
||||
css_template=rule.css_template
|
||||
)
|
||||
|
||||
db.add(new_rule)
|
||||
db.commit()
|
||||
db.refresh(new_rule)
|
||||
|
||||
logger.info(f"Created export rule {new_rule.id} for user {current_user.id}")
|
||||
|
||||
return new_rule
|
||||
|
||||
|
||||
@router.put("/rules/{rule_id}", response_model=ExportRuleResponse, summary="Update export rule")
|
||||
async def update_export_rule(
|
||||
rule_id: int,
|
||||
rule: ExportRuleUpdate,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Update existing export rule
|
||||
|
||||
- **rule_id**: Rule ID to update
|
||||
- **rule_name**: Optional new rule name
|
||||
- **description**: Optional new description
|
||||
- **config_json**: Optional new configuration
|
||||
- **css_template**: Optional new CSS template
|
||||
"""
|
||||
# Get rule and verify ownership
|
||||
db_rule = db.query(ExportRule).filter(
|
||||
ExportRule.id == rule_id,
|
||||
ExportRule.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not db_rule:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Export rule not found"
|
||||
)
|
||||
|
||||
# Update fields
|
||||
update_data = rule.dict(exclude_unset=True)
|
||||
for field, value in update_data.items():
|
||||
setattr(db_rule, field, value)
|
||||
|
||||
db.commit()
|
||||
db.refresh(db_rule)
|
||||
|
||||
logger.info(f"Updated export rule {rule_id}")
|
||||
|
||||
return db_rule
|
||||
|
||||
|
||||
@router.delete("/rules/{rule_id}", summary="Delete export rule")
|
||||
async def delete_export_rule(
|
||||
rule_id: int,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Delete export rule
|
||||
|
||||
- **rule_id**: Rule ID to delete
|
||||
"""
|
||||
# Get rule and verify ownership
|
||||
db_rule = db.query(ExportRule).filter(
|
||||
ExportRule.id == rule_id,
|
||||
ExportRule.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not db_rule:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Export rule not found"
|
||||
)
|
||||
|
||||
db.delete(db_rule)
|
||||
db.commit()
|
||||
|
||||
logger.info(f"Deleted export rule {rule_id}")
|
||||
|
||||
return {"message": "Export rule deleted successfully"}
|
||||
|
||||
|
||||
@router.get("/css-templates", response_model=List[CSSTemplateResponse], summary="List CSS templates")
|
||||
async def list_css_templates():
|
||||
"""
|
||||
List available CSS templates for PDF generation
|
||||
|
||||
Returns list of predefined CSS templates with descriptions
|
||||
"""
|
||||
templates = pdf_generator.get_available_templates()
|
||||
|
||||
return [
|
||||
{"name": name, "description": desc}
|
||||
for name, desc in templates.items()
|
||||
]
|
||||
Reference in New Issue
Block a user