152 lines
4.1 KiB
Python
152 lines
4.1 KiB
Python
"""
|
|
Tool_OCR - OCR Schemas
|
|
"""
|
|
|
|
from datetime import datetime
|
|
from typing import Optional, Dict, List, Any
|
|
from pydantic import BaseModel, Field
|
|
|
|
from app.models.ocr import BatchStatus, FileStatus
|
|
|
|
|
|
class OCRFileResponse(BaseModel):
|
|
"""OCR file response schema"""
|
|
id: int
|
|
batch_id: int
|
|
filename: str
|
|
original_filename: str
|
|
file_size: int
|
|
file_format: str
|
|
status: FileStatus
|
|
error: Optional[str] = Field(None, validation_alias='error_message') # Map from error_message to error
|
|
created_at: datetime
|
|
processing_time: Optional[float] = None
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
populate_by_name = True
|
|
|
|
|
|
class OCRResultResponse(BaseModel):
|
|
"""OCR result response schema"""
|
|
id: int
|
|
file_id: int
|
|
markdown_path: Optional[str] = None
|
|
markdown_content: Optional[str] = None # Added for frontend preview
|
|
json_path: Optional[str] = None
|
|
images_dir: Optional[str] = None
|
|
detected_language: Optional[str] = None
|
|
total_text_regions: int
|
|
average_confidence: Optional[float] = None
|
|
layout_data: Optional[Dict[str, Any]] = None
|
|
images_metadata: Optional[List[Dict[str, Any]]] = None
|
|
created_at: datetime
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class OCRBatchResponse(BaseModel):
|
|
"""OCR batch response schema"""
|
|
id: int
|
|
user_id: int
|
|
batch_name: Optional[str] = None
|
|
status: BatchStatus
|
|
total_files: int
|
|
completed_files: int
|
|
failed_files: int
|
|
progress_percentage: float
|
|
created_at: datetime
|
|
started_at: Optional[datetime] = None
|
|
completed_at: Optional[datetime] = None
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class BatchStatusResponse(BaseModel):
|
|
"""Batch status with file details"""
|
|
batch: OCRBatchResponse
|
|
files: List[OCRFileResponse]
|
|
|
|
|
|
class FileStatusResponse(BaseModel):
|
|
"""File status with result details"""
|
|
file: OCRFileResponse
|
|
result: Optional[OCRResultResponse] = None
|
|
|
|
|
|
class OCRResultDetailResponse(BaseModel):
|
|
"""OCR result detail response for frontend preview - flattened structure"""
|
|
file_id: int
|
|
filename: str
|
|
status: str
|
|
markdown_content: Optional[str] = None
|
|
json_data: Optional[Dict[str, Any]] = None
|
|
confidence: Optional[float] = None
|
|
processing_time: Optional[float] = None
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class UploadBatchResponse(BaseModel):
|
|
"""Upload response schema matching frontend expectations"""
|
|
batch_id: int = Field(..., description="Batch ID")
|
|
files: List[OCRFileResponse] = Field(..., description="Uploaded files")
|
|
|
|
class Config:
|
|
json_schema_extra = {
|
|
"example": {
|
|
"batch_id": 1,
|
|
"files": [
|
|
{
|
|
"id": 1,
|
|
"batch_id": 1,
|
|
"filename": "doc_1.png",
|
|
"original_filename": "document.png",
|
|
"file_size": 1024000,
|
|
"file_format": "png",
|
|
"status": "pending",
|
|
"error_message": None,
|
|
"created_at": "2025-01-01T00:00:00",
|
|
"processing_time": None
|
|
}
|
|
]
|
|
}
|
|
}
|
|
|
|
|
|
class ProcessRequest(BaseModel):
|
|
"""OCR process request schema"""
|
|
batch_id: int = Field(..., description="Batch ID to process")
|
|
lang: str = Field(default="ch", description="Language code (ch, en, japan, korean)")
|
|
detect_layout: bool = Field(default=True, description="Enable layout detection")
|
|
|
|
class Config:
|
|
json_schema_extra = {
|
|
"example": {
|
|
"batch_id": 1,
|
|
"lang": "ch",
|
|
"detect_layout": True
|
|
}
|
|
}
|
|
|
|
|
|
class ProcessResponse(BaseModel):
|
|
"""OCR process response schema"""
|
|
message: str
|
|
batch_id: int
|
|
total_files: int
|
|
status: str
|
|
|
|
class Config:
|
|
json_schema_extra = {
|
|
"example": {
|
|
"message": "OCR processing started",
|
|
"batch_id": 1,
|
|
"total_files": 5,
|
|
"status": "processing"
|
|
}
|
|
}
|