Database stores times in UTC but serialized without timezone info, causing frontend to misinterpret as local time. Now all datetime fields include 'Z' suffix to indicate UTC, enabling proper timezone conversion in the browser. - Add UTCDatetimeBaseModel base class for Pydantic schemas - Update model to_dict() methods to append 'Z' suffix - Affects: tasks, users, sessions, audit logs, translations 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
347 lines
13 KiB
Python
347 lines
13 KiB
Python
"""
|
|
Tool_OCR - Task Management Schemas
|
|
"""
|
|
|
|
from typing import Optional, List
|
|
from datetime import datetime
|
|
from pydantic import BaseModel, Field
|
|
from enum import Enum
|
|
|
|
from app.schemas.base import UTCDatetimeBaseModel
|
|
|
|
|
|
class TaskStatusEnum(str, Enum):
|
|
"""Task status enumeration"""
|
|
PENDING = "pending"
|
|
PROCESSING = "processing"
|
|
COMPLETED = "completed"
|
|
FAILED = "failed"
|
|
|
|
|
|
class ProcessingTrackEnum(str, Enum):
|
|
"""Processing track enumeration for dual-track processing"""
|
|
OCR = "ocr" # PaddleOCR PP-StructureV3 for scanned documents
|
|
DIRECT = "direct" # PyMuPDF for editable PDFs
|
|
HYBRID = "hybrid" # Mixed processing
|
|
AUTO = "auto" # Auto-detect best track
|
|
|
|
|
|
class LayoutModelEnum(str, Enum):
|
|
"""Layout detection model selection for OCR track.
|
|
|
|
Different models are optimized for different document types:
|
|
- CHINESE: PP-DocLayout_plus-L (83.2% mAP), optimized for complex Chinese documents
|
|
- DEFAULT: PubLayNet-based (~94% mAP), optimized for English academic papers
|
|
- CDLA: CDLA model (~86% mAP), specialized Chinese document layout analysis
|
|
"""
|
|
CHINESE = "chinese" # PP-DocLayout_plus-L - Best for Chinese documents (recommended)
|
|
DEFAULT = "default" # PubLayNet-based - Best for English documents
|
|
CDLA = "cdla" # CDLA model - Alternative for Chinese layout
|
|
|
|
|
|
class PreprocessingModeEnum(str, Enum):
|
|
"""Preprocessing mode for layout detection enhancement.
|
|
|
|
- AUTO: Analyze image quality and automatically apply optimal preprocessing
|
|
- MANUAL: Use user-specified preprocessing configuration
|
|
- DISABLED: Skip preprocessing entirely
|
|
"""
|
|
AUTO = "auto" # Analyze and apply automatically (default)
|
|
MANUAL = "manual" # Use specified configuration
|
|
DISABLED = "disabled" # Skip preprocessing
|
|
|
|
|
|
class PreprocessingContrastEnum(str, Enum):
|
|
"""Contrast enhancement method for preprocessing.
|
|
|
|
- NONE: No contrast enhancement
|
|
- HISTOGRAM: Standard histogram equalization
|
|
- CLAHE: Contrast Limited Adaptive Histogram Equalization (recommended for most cases)
|
|
- DOCUMENT: Background normalization + CLAHE (recommended for scanned documents)
|
|
Removes uneven illumination before enhancement. Best for scans with
|
|
yellowed paper, shadow, or scanner lighting issues.
|
|
"""
|
|
NONE = "none"
|
|
HISTOGRAM = "histogram"
|
|
CLAHE = "clahe"
|
|
DOCUMENT = "document"
|
|
|
|
|
|
class PreprocessingConfig(BaseModel):
|
|
"""Preprocessing configuration for layout detection enhancement.
|
|
|
|
Used to configure image preprocessing before PP-Structure layout detection.
|
|
Preprocessing helps detect tables with faint lines or low contrast borders.
|
|
Original image is preserved for element extraction.
|
|
"""
|
|
contrast: PreprocessingContrastEnum = Field(
|
|
default=PreprocessingContrastEnum.CLAHE,
|
|
description="Contrast enhancement method"
|
|
)
|
|
contrast_strength: float = Field(
|
|
default=1.0,
|
|
ge=0.5,
|
|
le=3.0,
|
|
description="Contrast enhancement strength (0.5=subtle, 1.0=normal, 2.0=strong, 3.0=maximum)"
|
|
)
|
|
sharpen: bool = Field(
|
|
default=True,
|
|
description="Enable sharpening for faint lines"
|
|
)
|
|
sharpen_strength: float = Field(
|
|
default=1.0,
|
|
ge=0.5,
|
|
le=2.0,
|
|
description="Sharpening strength (0.5=subtle, 1.0=normal, 1.5=strong, 2.0=maximum)"
|
|
)
|
|
binarize: bool = Field(
|
|
default=False,
|
|
description="Enable binarization (aggressive, for very low contrast). Not recommended for most documents."
|
|
)
|
|
remove_scan_artifacts: bool = Field(
|
|
default=True,
|
|
description="Remove horizontal scan line artifacts. Recommended for scanned documents to prevent misdetection of scanner light bar lines as table borders."
|
|
)
|
|
|
|
|
|
class ImageQualityMetrics(BaseModel):
|
|
"""Image quality metrics from auto-analysis."""
|
|
contrast: float = Field(..., description="Contrast level (std dev of grayscale)")
|
|
edge_strength: float = Field(..., description="Edge strength (Sobel gradient mean)")
|
|
|
|
|
|
class PreprocessingPreviewRequest(BaseModel):
|
|
"""Request for preprocessing preview."""
|
|
page: int = Field(default=1, ge=1, description="Page number to preview")
|
|
mode: PreprocessingModeEnum = Field(
|
|
default=PreprocessingModeEnum.AUTO,
|
|
description="Preprocessing mode"
|
|
)
|
|
config: Optional[PreprocessingConfig] = Field(
|
|
None,
|
|
description="Manual configuration (only used when mode='manual')"
|
|
)
|
|
|
|
|
|
class PreprocessingPreviewResponse(BaseModel):
|
|
"""Response for preprocessing preview."""
|
|
original_url: str = Field(..., description="URL to original image")
|
|
preprocessed_url: str = Field(..., description="URL to preprocessed image")
|
|
quality_metrics: ImageQualityMetrics = Field(..., description="Image quality analysis")
|
|
auto_config: PreprocessingConfig = Field(..., description="Auto-detected configuration")
|
|
mode_used: PreprocessingModeEnum = Field(..., description="Mode that was applied")
|
|
|
|
|
|
class TaskCreate(BaseModel):
|
|
"""Task creation request"""
|
|
filename: Optional[str] = Field(None, description="Original filename")
|
|
file_type: Optional[str] = Field(None, description="File MIME type")
|
|
|
|
|
|
class TaskUpdate(BaseModel):
|
|
"""Task update request"""
|
|
status: Optional[TaskStatusEnum] = None
|
|
error_message: Optional[str] = None
|
|
processing_time_ms: Optional[int] = None
|
|
result_json_path: Optional[str] = None
|
|
result_markdown_path: Optional[str] = None
|
|
result_pdf_path: Optional[str] = None
|
|
|
|
|
|
class TaskFileResponse(UTCDatetimeBaseModel):
|
|
"""Task file response schema"""
|
|
id: int
|
|
original_name: Optional[str] = None
|
|
stored_path: Optional[str] = None
|
|
file_size: Optional[int] = None
|
|
mime_type: Optional[str] = None
|
|
file_hash: Optional[str] = None
|
|
created_at: datetime
|
|
|
|
|
|
class TaskResponse(UTCDatetimeBaseModel):
|
|
"""Task response schema"""
|
|
id: int
|
|
user_id: int
|
|
task_id: str
|
|
filename: Optional[str] = None
|
|
file_type: Optional[str] = None
|
|
status: TaskStatusEnum
|
|
result_json_path: Optional[str] = None
|
|
result_markdown_path: Optional[str] = None
|
|
result_pdf_path: Optional[str] = None
|
|
error_message: Optional[str] = None
|
|
processing_time_ms: Optional[int] = None
|
|
created_at: datetime
|
|
updated_at: datetime
|
|
completed_at: Optional[datetime] = None
|
|
file_deleted: bool = False
|
|
|
|
|
|
class TaskDetailResponse(TaskResponse):
|
|
"""Detailed task response with files"""
|
|
files: List[TaskFileResponse] = []
|
|
# Dual-track processing field (extracted from result metadata)
|
|
processing_track: Optional[ProcessingTrackEnum] = None
|
|
# Visualization availability (OCR Track only)
|
|
has_visualization: bool = False
|
|
|
|
|
|
class TaskListResponse(BaseModel):
|
|
"""Paginated task list response"""
|
|
tasks: List[TaskResponse]
|
|
total: int
|
|
page: int
|
|
page_size: int
|
|
has_more: bool
|
|
|
|
|
|
class TaskStatsResponse(BaseModel):
|
|
"""User task statistics"""
|
|
total: int
|
|
pending: int
|
|
processing: int
|
|
completed: int
|
|
failed: int
|
|
|
|
|
|
class TaskHistoryQuery(BaseModel):
|
|
"""Task history query parameters"""
|
|
status: Optional[TaskStatusEnum] = None
|
|
filename: Optional[str] = None
|
|
date_from: Optional[datetime] = None
|
|
date_to: Optional[datetime] = None
|
|
page: int = Field(default=1, ge=1)
|
|
page_size: int = Field(default=50, ge=1, le=100)
|
|
order_by: str = Field(default="created_at")
|
|
order_desc: bool = Field(default=True)
|
|
|
|
|
|
class UploadFileInfo(BaseModel):
|
|
"""Uploaded file information"""
|
|
filename: str
|
|
file_size: int
|
|
file_type: str
|
|
|
|
|
|
class UploadResponse(BaseModel):
|
|
"""File upload response"""
|
|
task_id: str = Field(..., description="Created task ID")
|
|
filename: str = Field(..., description="Original filename")
|
|
file_size: int = Field(..., description="File size in bytes")
|
|
file_type: str = Field(..., description="File MIME type")
|
|
status: TaskStatusEnum = Field(..., description="Initial task status")
|
|
|
|
|
|
# ===== Dual-Track Processing Schemas =====
|
|
|
|
class PPStructureV3Params(BaseModel):
|
|
"""PP-StructureV3 fine-tuning parameters for OCR track.
|
|
|
|
DEPRECATED: This class is deprecated and will be removed in a future version.
|
|
Use `layout_model` parameter in ProcessingOptions instead.
|
|
"""
|
|
layout_detection_threshold: Optional[float] = Field(
|
|
None, ge=0, le=1,
|
|
description="Layout block detection score threshold (lower=more blocks, higher=high confidence only)"
|
|
)
|
|
layout_nms_threshold: Optional[float] = Field(
|
|
None, ge=0, le=1,
|
|
description="Layout NMS IoU threshold (lower=aggressive overlap removal, higher=allow more overlap)"
|
|
)
|
|
layout_merge_bboxes_mode: Optional[str] = Field(
|
|
None, pattern="^(union|large|small)$",
|
|
description="Bbox merging strategy: 'small'=conservative, 'large'=aggressive, 'union'=middle"
|
|
)
|
|
layout_unclip_ratio: Optional[float] = Field(
|
|
None, gt=0,
|
|
description="Layout bbox expansion ratio (larger=looser boxes, smaller=tighter boxes)"
|
|
)
|
|
text_det_thresh: Optional[float] = Field(
|
|
None, ge=0, le=1,
|
|
description="Text detection score threshold (lower=detect more small/low-contrast text, higher=cleaner)"
|
|
)
|
|
text_det_box_thresh: Optional[float] = Field(
|
|
None, ge=0, le=1,
|
|
description="Text box candidate threshold (lower=more text boxes, higher=fewer false positives)"
|
|
)
|
|
text_det_unclip_ratio: Optional[float] = Field(
|
|
None, gt=0,
|
|
description="Text box expansion ratio (larger=looser boxes, smaller=tighter boxes)"
|
|
)
|
|
|
|
|
|
class ProcessingOptions(BaseModel):
|
|
"""Processing options for dual-track OCR"""
|
|
use_dual_track: bool = Field(default=True, description="Enable dual-track processing")
|
|
force_track: Optional[ProcessingTrackEnum] = Field(None, description="Force specific track (ocr/direct)")
|
|
language: str = Field(default="ch", description="OCR language code")
|
|
include_layout: bool = Field(default=True, description="Include layout analysis")
|
|
include_images: bool = Field(default=True, description="Extract and save images")
|
|
confidence_threshold: Optional[float] = Field(None, ge=0, le=1, description="OCR confidence threshold")
|
|
|
|
# Layout model selection (OCR track only)
|
|
layout_model: Optional[LayoutModelEnum] = Field(
|
|
default=LayoutModelEnum.CHINESE,
|
|
description="Layout detection model: 'chinese' (recommended for Chinese docs), 'default' (English docs), 'cdla' (Chinese layout)"
|
|
)
|
|
|
|
# Layout preprocessing (OCR track only)
|
|
preprocessing_mode: PreprocessingModeEnum = Field(
|
|
default=PreprocessingModeEnum.AUTO,
|
|
description="Preprocessing mode: 'auto' (analyze and apply), 'manual' (use config), 'disabled'"
|
|
)
|
|
preprocessing_config: Optional[PreprocessingConfig] = Field(
|
|
None,
|
|
description="Manual preprocessing config (only used when preprocessing_mode='manual')"
|
|
)
|
|
|
|
|
|
class AnalyzeRequest(BaseModel):
|
|
"""Document analysis request"""
|
|
use_dual_track: bool = Field(default=True, description="Enable dual-track processing")
|
|
force_track: Optional[ProcessingTrackEnum] = Field(None, description="Force specific track")
|
|
language: str = Field(default="ch", description="OCR language")
|
|
include_layout: bool = Field(default=True, description="Include layout analysis")
|
|
|
|
|
|
class DocumentAnalysisResponse(BaseModel):
|
|
"""Document type analysis response"""
|
|
task_id: str
|
|
filename: str
|
|
recommended_track: ProcessingTrackEnum
|
|
confidence: float = Field(..., ge=0, le=1, description="Detection confidence")
|
|
reason: str = Field(..., description="Reason for recommendation")
|
|
document_info: dict = Field(default_factory=dict, description="Document metadata")
|
|
is_editable: bool = Field(..., description="Whether document has extractable text")
|
|
text_coverage: Optional[float] = Field(None, description="Percentage of text coverage")
|
|
page_count: Optional[int] = Field(None, description="Number of pages")
|
|
|
|
|
|
class ProcessingMetadata(BaseModel):
|
|
"""Processing metadata included in responses"""
|
|
processing_track: ProcessingTrackEnum
|
|
processing_time_seconds: float
|
|
language: str
|
|
page_count: int
|
|
total_elements: int
|
|
total_text_regions: int
|
|
total_tables: int
|
|
total_images: int
|
|
average_confidence: Optional[float] = None
|
|
unified_format: bool = True
|
|
|
|
|
|
class TaskResponseWithMetadata(TaskResponse):
|
|
"""Extended task response with processing metadata"""
|
|
processing_track: Optional[ProcessingTrackEnum] = None
|
|
processing_metadata: Optional[ProcessingMetadata] = None
|
|
|
|
|
|
class ExportOptions(BaseModel):
|
|
"""Export format options"""
|
|
format: str = Field(default="json", description="Export format: json, markdown, pdf, unified")
|
|
include_metadata: bool = Field(default=True, description="Include processing metadata")
|
|
include_statistics: bool = Field(default=True, description="Include document statistics")
|
|
legacy_format: bool = Field(default=False, description="Use legacy JSON format for compatibility")
|