""" Tool_OCR - Export Router Export results in multiple formats """ import logging from typing import List from pathlib import Path from fastapi import APIRouter, Depends, HTTPException, status from fastapi.responses import FileResponse from sqlalchemy.orm import Session from app.core.deps import get_db, get_current_active_user from app.models.user import User from app.models.ocr import OCRBatch, OCRFile, OCRResult, FileStatus from app.models.export import ExportRule from app.schemas.export import ( ExportRequest, ExportRuleCreate, ExportRuleUpdate, ExportRuleResponse, CSSTemplateResponse, ) from app.services.export_service import ExportService, ExportError from app.services.pdf_generator import PDFGenerator logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/v1/export", tags=["Export"]) # Initialize services export_service = ExportService() pdf_generator = PDFGenerator() @router.post("", summary="Export OCR results") async def export_results( request: ExportRequest, db: Session = Depends(get_db), current_user: User = Depends(get_current_active_user) ): """ Export OCR results in specified format Supports multiple export formats: txt, json, excel, markdown, pdf, zip - **batch_id**: Batch ID to export - **format**: Export format (txt, json, excel, markdown, pdf, zip) - **rule_id**: Optional export rule ID to apply filters - **css_template**: CSS template for PDF export (default, academic, business) - **include_formats**: Formats to include in ZIP export """ # Verify batch ownership batch = db.query(OCRBatch).filter( OCRBatch.id == request.batch_id, OCRBatch.user_id == current_user.id ).first() if not batch: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Batch not found" ) # Get completed results results = db.query(OCRResult).join(OCRFile).filter( OCRFile.batch_id == request.batch_id, OCRFile.status == FileStatus.COMPLETED ).all() if not results: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="No completed results found for this batch" ) # Apply export rule if specified if request.rule_id: try: results = export_service.apply_export_rule(db, results, request.rule_id) except ExportError as e: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail=str(e) ) try: # Generate export based on format export_dir = Path(f"uploads/batches/{batch.id}/exports") export_dir.mkdir(parents=True, exist_ok=True) if request.format == "txt": output_path = export_dir / f"batch_{batch.id}_export.txt" export_service.export_to_txt(results, output_path) elif request.format == "json": output_path = export_dir / f"batch_{batch.id}_export.json" export_service.export_to_json(results, output_path) elif request.format == "excel": output_path = export_dir / f"batch_{batch.id}_export.xlsx" export_service.export_to_excel(results, output_path) elif request.format == "markdown": output_path = export_dir / f"batch_{batch.id}_export.md" export_service.export_to_markdown(results, output_path, combine=True) elif request.format == "zip": output_path = export_dir / f"batch_{batch.id}_export.zip" include_formats = request.include_formats or ["markdown", "json"] export_service.export_batch_to_zip(db, batch.id, output_path, include_formats) else: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=f"Unsupported export format: {request.format}" ) logger.info(f"Exported batch {batch.id} to {request.format} format: {output_path}") # Return file for download return FileResponse( path=str(output_path), filename=output_path.name, media_type="application/octet-stream" ) except ExportError as e: logger.error(f"Export error: {e}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e) ) except Exception as e: logger.error(f"Unexpected export error: {e}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Export failed" ) @router.get("/pdf/{file_id}", summary="Generate PDF for single file") async def generate_pdf( file_id: int, css_template: str = "default", db: Session = Depends(get_db), current_user: User = Depends(get_current_active_user) ): """ Generate layout-preserved PDF for a single file - **file_id**: File ID - **css_template**: CSS template (default, academic, business) """ # Get file and verify ownership ocr_file = db.query(OCRFile).join(OCRBatch).filter( OCRFile.id == file_id, OCRBatch.user_id == current_user.id ).first() if not ocr_file: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="File not found" ) # Get result result = db.query(OCRResult).filter(OCRResult.file_id == file_id).first() if not result: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="OCR result not found" ) try: # Generate PDF export_dir = Path(f"uploads/batches/{ocr_file.batch_id}/exports") export_dir.mkdir(parents=True, exist_ok=True) output_path = export_dir / f"file_{file_id}_export.pdf" export_service.export_to_pdf( result=result, output_path=output_path, css_template=css_template, metadata={"title": ocr_file.original_filename} ) logger.info(f"Generated PDF for file {file_id}: {output_path}") return FileResponse( path=str(output_path), filename=f"{Path(ocr_file.original_filename).stem}.pdf", media_type="application/pdf" ) except ExportError as e: logger.error(f"PDF generation error: {e}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e) ) @router.get("/rules", response_model=List[ExportRuleResponse], summary="List export rules") async def list_export_rules( db: Session = Depends(get_db), current_user: User = Depends(get_current_active_user) ): """ List all export rules for current user Returns list of saved export rules """ rules = db.query(ExportRule).filter(ExportRule.user_id == current_user.id).all() return rules @router.post("/rules", response_model=ExportRuleResponse, summary="Create export rule") async def create_export_rule( rule: ExportRuleCreate, db: Session = Depends(get_db), current_user: User = Depends(get_current_active_user) ): """ Create new export rule Saves custom export configuration for reuse - **rule_name**: Rule name - **description**: Optional description - **config_json**: Rule configuration (filters, formatting, export_options) - **css_template**: Optional custom CSS for PDF export """ # Create rule new_rule = ExportRule( user_id=current_user.id, rule_name=rule.rule_name, description=rule.description, config_json=rule.config_json, css_template=rule.css_template ) db.add(new_rule) db.commit() db.refresh(new_rule) logger.info(f"Created export rule {new_rule.id} for user {current_user.id}") return new_rule @router.put("/rules/{rule_id}", response_model=ExportRuleResponse, summary="Update export rule") async def update_export_rule( rule_id: int, rule: ExportRuleUpdate, db: Session = Depends(get_db), current_user: User = Depends(get_current_active_user) ): """ Update existing export rule - **rule_id**: Rule ID to update - **rule_name**: Optional new rule name - **description**: Optional new description - **config_json**: Optional new configuration - **css_template**: Optional new CSS template """ # Get rule and verify ownership db_rule = db.query(ExportRule).filter( ExportRule.id == rule_id, ExportRule.user_id == current_user.id ).first() if not db_rule: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Export rule not found" ) # Update fields update_data = rule.dict(exclude_unset=True) for field, value in update_data.items(): setattr(db_rule, field, value) db.commit() db.refresh(db_rule) logger.info(f"Updated export rule {rule_id}") return db_rule @router.delete("/rules/{rule_id}", summary="Delete export rule") async def delete_export_rule( rule_id: int, db: Session = Depends(get_db), current_user: User = Depends(get_current_active_user) ): """ Delete export rule - **rule_id**: Rule ID to delete """ # Get rule and verify ownership db_rule = db.query(ExportRule).filter( ExportRule.id == rule_id, ExportRule.user_id == current_user.id ).first() if not db_rule: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Export rule not found" ) db.delete(db_rule) db.commit() logger.info(f"Deleted export rule {rule_id}") return {"message": "Export rule deleted successfully"} @router.get("/css-templates", response_model=List[CSSTemplateResponse], summary="List CSS templates") async def list_css_templates(): """ List available CSS templates for PDF generation Returns list of predefined CSS templates with descriptions """ templates = pdf_generator.get_available_templates() return [ {"name": name, "description": desc} for name, desc in templates.items() ]