Files
OCR/backend/app/schemas/ocr.py
beabigegg da700721fa first
2025-11-12 22:53:17 +08:00

152 lines
4.1 KiB
Python

"""
Tool_OCR - OCR Schemas
"""
from datetime import datetime
from typing import Optional, Dict, List, Any
from pydantic import BaseModel, Field
from app.models.ocr import BatchStatus, FileStatus
class OCRFileResponse(BaseModel):
"""OCR file response schema"""
id: int
batch_id: int
filename: str
original_filename: str
file_size: int
file_format: str
status: FileStatus
error: Optional[str] = Field(None, validation_alias='error_message') # Map from error_message to error
created_at: datetime
processing_time: Optional[float] = None
class Config:
from_attributes = True
populate_by_name = True
class OCRResultResponse(BaseModel):
"""OCR result response schema"""
id: int
file_id: int
markdown_path: Optional[str] = None
markdown_content: Optional[str] = None # Added for frontend preview
json_path: Optional[str] = None
images_dir: Optional[str] = None
detected_language: Optional[str] = None
total_text_regions: int
average_confidence: Optional[float] = None
layout_data: Optional[Dict[str, Any]] = None
images_metadata: Optional[List[Dict[str, Any]]] = None
created_at: datetime
class Config:
from_attributes = True
class OCRBatchResponse(BaseModel):
"""OCR batch response schema"""
id: int
user_id: int
batch_name: Optional[str] = None
status: BatchStatus
total_files: int
completed_files: int
failed_files: int
progress_percentage: float
created_at: datetime
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
class Config:
from_attributes = True
class BatchStatusResponse(BaseModel):
"""Batch status with file details"""
batch: OCRBatchResponse
files: List[OCRFileResponse]
class FileStatusResponse(BaseModel):
"""File status with result details"""
file: OCRFileResponse
result: Optional[OCRResultResponse] = None
class OCRResultDetailResponse(BaseModel):
"""OCR result detail response for frontend preview - flattened structure"""
file_id: int
filename: str
status: str
markdown_content: Optional[str] = None
json_data: Optional[Dict[str, Any]] = None
confidence: Optional[float] = None
processing_time: Optional[float] = None
class Config:
from_attributes = True
class UploadBatchResponse(BaseModel):
"""Upload response schema matching frontend expectations"""
batch_id: int = Field(..., description="Batch ID")
files: List[OCRFileResponse] = Field(..., description="Uploaded files")
class Config:
json_schema_extra = {
"example": {
"batch_id": 1,
"files": [
{
"id": 1,
"batch_id": 1,
"filename": "doc_1.png",
"original_filename": "document.png",
"file_size": 1024000,
"file_format": "png",
"status": "pending",
"error_message": None,
"created_at": "2025-01-01T00:00:00",
"processing_time": None
}
]
}
}
class ProcessRequest(BaseModel):
"""OCR process request schema"""
batch_id: int = Field(..., description="Batch ID to process")
lang: str = Field(default="ch", description="Language code (ch, en, japan, korean)")
detect_layout: bool = Field(default=True, description="Enable layout detection")
class Config:
json_schema_extra = {
"example": {
"batch_id": 1,
"lang": "ch",
"detect_layout": True
}
}
class ProcessResponse(BaseModel):
"""OCR process response schema"""
message: str
batch_id: int
total_files: int
status: str
class Config:
json_schema_extra = {
"example": {
"message": "OCR processing started",
"batch_id": 1,
"total_files": 5,
"status": "processing"
}
}