""" Tool_OCR - OCR Schemas """ from datetime import datetime from typing import Optional, Dict, List, Any from pydantic import BaseModel, Field from app.models.ocr import BatchStatus, FileStatus class OCRFileResponse(BaseModel): """OCR file response schema""" id: int batch_id: int filename: str original_filename: str file_size: int file_format: str status: FileStatus error: Optional[str] = Field(None, validation_alias='error_message') # Map from error_message to error created_at: datetime processing_time: Optional[float] = None class Config: from_attributes = True populate_by_name = True class OCRResultResponse(BaseModel): """OCR result response schema""" id: int file_id: int markdown_path: Optional[str] = None markdown_content: Optional[str] = None # Added for frontend preview json_path: Optional[str] = None images_dir: Optional[str] = None detected_language: Optional[str] = None total_text_regions: int average_confidence: Optional[float] = None layout_data: Optional[Dict[str, Any]] = None images_metadata: Optional[List[Dict[str, Any]]] = None created_at: datetime class Config: from_attributes = True class OCRBatchResponse(BaseModel): """OCR batch response schema""" id: int user_id: int batch_name: Optional[str] = None status: BatchStatus total_files: int completed_files: int failed_files: int progress_percentage: float created_at: datetime started_at: Optional[datetime] = None completed_at: Optional[datetime] = None class Config: from_attributes = True class BatchStatusResponse(BaseModel): """Batch status with file details""" batch: OCRBatchResponse files: List[OCRFileResponse] class FileStatusResponse(BaseModel): """File status with result details""" file: OCRFileResponse result: Optional[OCRResultResponse] = None class OCRResultDetailResponse(BaseModel): """OCR result detail response for frontend preview - flattened structure""" file_id: int filename: str status: str markdown_content: Optional[str] = None json_data: Optional[Dict[str, Any]] = None confidence: Optional[float] = None processing_time: Optional[float] = None class Config: from_attributes = True class UploadBatchResponse(BaseModel): """Upload response schema matching frontend expectations""" batch_id: int = Field(..., description="Batch ID") files: List[OCRFileResponse] = Field(..., description="Uploaded files") class Config: json_schema_extra = { "example": { "batch_id": 1, "files": [ { "id": 1, "batch_id": 1, "filename": "doc_1.png", "original_filename": "document.png", "file_size": 1024000, "file_format": "png", "status": "pending", "error_message": None, "created_at": "2025-01-01T00:00:00", "processing_time": None } ] } } class ProcessRequest(BaseModel): """OCR process request schema""" batch_id: int = Field(..., description="Batch ID to process") lang: str = Field(default="ch", description="Language code (ch, en, japan, korean)") detect_layout: bool = Field(default=True, description="Enable layout detection") class Config: json_schema_extra = { "example": { "batch_id": 1, "lang": "ch", "detect_layout": True } } class ProcessResponse(BaseModel): """OCR process response schema""" message: str batch_id: int total_files: int status: str class Config: json_schema_extra = { "example": { "message": "OCR processing started", "batch_id": 1, "total_files": 5, "status": "processing" } }