Files
OCR/backend/app/schemas/task.py
egg ee49751c38 fix: add UTC timezone indicator to all datetime serialization
Database stores times in UTC but serialized without timezone info,
causing frontend to misinterpret as local time. Now all datetime
fields include 'Z' suffix to indicate UTC, enabling proper timezone
conversion in the browser.

- Add UTCDatetimeBaseModel base class for Pydantic schemas
- Update model to_dict() methods to append 'Z' suffix
- Affects: tasks, users, sessions, audit logs, translations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 15:48:17 +08:00

347 lines
13 KiB
Python

"""
Tool_OCR - Task Management Schemas
"""
from typing import Optional, List
from datetime import datetime
from pydantic import BaseModel, Field
from enum import Enum
from app.schemas.base import UTCDatetimeBaseModel
class TaskStatusEnum(str, Enum):
"""Task status enumeration"""
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
class ProcessingTrackEnum(str, Enum):
"""Processing track enumeration for dual-track processing"""
OCR = "ocr" # PaddleOCR PP-StructureV3 for scanned documents
DIRECT = "direct" # PyMuPDF for editable PDFs
HYBRID = "hybrid" # Mixed processing
AUTO = "auto" # Auto-detect best track
class LayoutModelEnum(str, Enum):
"""Layout detection model selection for OCR track.
Different models are optimized for different document types:
- CHINESE: PP-DocLayout_plus-L (83.2% mAP), optimized for complex Chinese documents
- DEFAULT: PubLayNet-based (~94% mAP), optimized for English academic papers
- CDLA: CDLA model (~86% mAP), specialized Chinese document layout analysis
"""
CHINESE = "chinese" # PP-DocLayout_plus-L - Best for Chinese documents (recommended)
DEFAULT = "default" # PubLayNet-based - Best for English documents
CDLA = "cdla" # CDLA model - Alternative for Chinese layout
class PreprocessingModeEnum(str, Enum):
"""Preprocessing mode for layout detection enhancement.
- AUTO: Analyze image quality and automatically apply optimal preprocessing
- MANUAL: Use user-specified preprocessing configuration
- DISABLED: Skip preprocessing entirely
"""
AUTO = "auto" # Analyze and apply automatically (default)
MANUAL = "manual" # Use specified configuration
DISABLED = "disabled" # Skip preprocessing
class PreprocessingContrastEnum(str, Enum):
"""Contrast enhancement method for preprocessing.
- NONE: No contrast enhancement
- HISTOGRAM: Standard histogram equalization
- CLAHE: Contrast Limited Adaptive Histogram Equalization (recommended for most cases)
- DOCUMENT: Background normalization + CLAHE (recommended for scanned documents)
Removes uneven illumination before enhancement. Best for scans with
yellowed paper, shadow, or scanner lighting issues.
"""
NONE = "none"
HISTOGRAM = "histogram"
CLAHE = "clahe"
DOCUMENT = "document"
class PreprocessingConfig(BaseModel):
"""Preprocessing configuration for layout detection enhancement.
Used to configure image preprocessing before PP-Structure layout detection.
Preprocessing helps detect tables with faint lines or low contrast borders.
Original image is preserved for element extraction.
"""
contrast: PreprocessingContrastEnum = Field(
default=PreprocessingContrastEnum.CLAHE,
description="Contrast enhancement method"
)
contrast_strength: float = Field(
default=1.0,
ge=0.5,
le=3.0,
description="Contrast enhancement strength (0.5=subtle, 1.0=normal, 2.0=strong, 3.0=maximum)"
)
sharpen: bool = Field(
default=True,
description="Enable sharpening for faint lines"
)
sharpen_strength: float = Field(
default=1.0,
ge=0.5,
le=2.0,
description="Sharpening strength (0.5=subtle, 1.0=normal, 1.5=strong, 2.0=maximum)"
)
binarize: bool = Field(
default=False,
description="Enable binarization (aggressive, for very low contrast). Not recommended for most documents."
)
remove_scan_artifacts: bool = Field(
default=True,
description="Remove horizontal scan line artifacts. Recommended for scanned documents to prevent misdetection of scanner light bar lines as table borders."
)
class ImageQualityMetrics(BaseModel):
"""Image quality metrics from auto-analysis."""
contrast: float = Field(..., description="Contrast level (std dev of grayscale)")
edge_strength: float = Field(..., description="Edge strength (Sobel gradient mean)")
class PreprocessingPreviewRequest(BaseModel):
"""Request for preprocessing preview."""
page: int = Field(default=1, ge=1, description="Page number to preview")
mode: PreprocessingModeEnum = Field(
default=PreprocessingModeEnum.AUTO,
description="Preprocessing mode"
)
config: Optional[PreprocessingConfig] = Field(
None,
description="Manual configuration (only used when mode='manual')"
)
class PreprocessingPreviewResponse(BaseModel):
"""Response for preprocessing preview."""
original_url: str = Field(..., description="URL to original image")
preprocessed_url: str = Field(..., description="URL to preprocessed image")
quality_metrics: ImageQualityMetrics = Field(..., description="Image quality analysis")
auto_config: PreprocessingConfig = Field(..., description="Auto-detected configuration")
mode_used: PreprocessingModeEnum = Field(..., description="Mode that was applied")
class TaskCreate(BaseModel):
"""Task creation request"""
filename: Optional[str] = Field(None, description="Original filename")
file_type: Optional[str] = Field(None, description="File MIME type")
class TaskUpdate(BaseModel):
"""Task update request"""
status: Optional[TaskStatusEnum] = None
error_message: Optional[str] = None
processing_time_ms: Optional[int] = None
result_json_path: Optional[str] = None
result_markdown_path: Optional[str] = None
result_pdf_path: Optional[str] = None
class TaskFileResponse(UTCDatetimeBaseModel):
"""Task file response schema"""
id: int
original_name: Optional[str] = None
stored_path: Optional[str] = None
file_size: Optional[int] = None
mime_type: Optional[str] = None
file_hash: Optional[str] = None
created_at: datetime
class TaskResponse(UTCDatetimeBaseModel):
"""Task response schema"""
id: int
user_id: int
task_id: str
filename: Optional[str] = None
file_type: Optional[str] = None
status: TaskStatusEnum
result_json_path: Optional[str] = None
result_markdown_path: Optional[str] = None
result_pdf_path: Optional[str] = None
error_message: Optional[str] = None
processing_time_ms: Optional[int] = None
created_at: datetime
updated_at: datetime
completed_at: Optional[datetime] = None
file_deleted: bool = False
class TaskDetailResponse(TaskResponse):
"""Detailed task response with files"""
files: List[TaskFileResponse] = []
# Dual-track processing field (extracted from result metadata)
processing_track: Optional[ProcessingTrackEnum] = None
# Visualization availability (OCR Track only)
has_visualization: bool = False
class TaskListResponse(BaseModel):
"""Paginated task list response"""
tasks: List[TaskResponse]
total: int
page: int
page_size: int
has_more: bool
class TaskStatsResponse(BaseModel):
"""User task statistics"""
total: int
pending: int
processing: int
completed: int
failed: int
class TaskHistoryQuery(BaseModel):
"""Task history query parameters"""
status: Optional[TaskStatusEnum] = None
filename: Optional[str] = None
date_from: Optional[datetime] = None
date_to: Optional[datetime] = None
page: int = Field(default=1, ge=1)
page_size: int = Field(default=50, ge=1, le=100)
order_by: str = Field(default="created_at")
order_desc: bool = Field(default=True)
class UploadFileInfo(BaseModel):
"""Uploaded file information"""
filename: str
file_size: int
file_type: str
class UploadResponse(BaseModel):
"""File upload response"""
task_id: str = Field(..., description="Created task ID")
filename: str = Field(..., description="Original filename")
file_size: int = Field(..., description="File size in bytes")
file_type: str = Field(..., description="File MIME type")
status: TaskStatusEnum = Field(..., description="Initial task status")
# ===== Dual-Track Processing Schemas =====
class PPStructureV3Params(BaseModel):
"""PP-StructureV3 fine-tuning parameters for OCR track.
DEPRECATED: This class is deprecated and will be removed in a future version.
Use `layout_model` parameter in ProcessingOptions instead.
"""
layout_detection_threshold: Optional[float] = Field(
None, ge=0, le=1,
description="Layout block detection score threshold (lower=more blocks, higher=high confidence only)"
)
layout_nms_threshold: Optional[float] = Field(
None, ge=0, le=1,
description="Layout NMS IoU threshold (lower=aggressive overlap removal, higher=allow more overlap)"
)
layout_merge_bboxes_mode: Optional[str] = Field(
None, pattern="^(union|large|small)$",
description="Bbox merging strategy: 'small'=conservative, 'large'=aggressive, 'union'=middle"
)
layout_unclip_ratio: Optional[float] = Field(
None, gt=0,
description="Layout bbox expansion ratio (larger=looser boxes, smaller=tighter boxes)"
)
text_det_thresh: Optional[float] = Field(
None, ge=0, le=1,
description="Text detection score threshold (lower=detect more small/low-contrast text, higher=cleaner)"
)
text_det_box_thresh: Optional[float] = Field(
None, ge=0, le=1,
description="Text box candidate threshold (lower=more text boxes, higher=fewer false positives)"
)
text_det_unclip_ratio: Optional[float] = Field(
None, gt=0,
description="Text box expansion ratio (larger=looser boxes, smaller=tighter boxes)"
)
class ProcessingOptions(BaseModel):
"""Processing options for dual-track OCR"""
use_dual_track: bool = Field(default=True, description="Enable dual-track processing")
force_track: Optional[ProcessingTrackEnum] = Field(None, description="Force specific track (ocr/direct)")
language: str = Field(default="ch", description="OCR language code")
include_layout: bool = Field(default=True, description="Include layout analysis")
include_images: bool = Field(default=True, description="Extract and save images")
confidence_threshold: Optional[float] = Field(None, ge=0, le=1, description="OCR confidence threshold")
# Layout model selection (OCR track only)
layout_model: Optional[LayoutModelEnum] = Field(
default=LayoutModelEnum.CHINESE,
description="Layout detection model: 'chinese' (recommended for Chinese docs), 'default' (English docs), 'cdla' (Chinese layout)"
)
# Layout preprocessing (OCR track only)
preprocessing_mode: PreprocessingModeEnum = Field(
default=PreprocessingModeEnum.AUTO,
description="Preprocessing mode: 'auto' (analyze and apply), 'manual' (use config), 'disabled'"
)
preprocessing_config: Optional[PreprocessingConfig] = Field(
None,
description="Manual preprocessing config (only used when preprocessing_mode='manual')"
)
class AnalyzeRequest(BaseModel):
"""Document analysis request"""
use_dual_track: bool = Field(default=True, description="Enable dual-track processing")
force_track: Optional[ProcessingTrackEnum] = Field(None, description="Force specific track")
language: str = Field(default="ch", description="OCR language")
include_layout: bool = Field(default=True, description="Include layout analysis")
class DocumentAnalysisResponse(BaseModel):
"""Document type analysis response"""
task_id: str
filename: str
recommended_track: ProcessingTrackEnum
confidence: float = Field(..., ge=0, le=1, description="Detection confidence")
reason: str = Field(..., description="Reason for recommendation")
document_info: dict = Field(default_factory=dict, description="Document metadata")
is_editable: bool = Field(..., description="Whether document has extractable text")
text_coverage: Optional[float] = Field(None, description="Percentage of text coverage")
page_count: Optional[int] = Field(None, description="Number of pages")
class ProcessingMetadata(BaseModel):
"""Processing metadata included in responses"""
processing_track: ProcessingTrackEnum
processing_time_seconds: float
language: str
page_count: int
total_elements: int
total_text_regions: int
total_tables: int
total_images: int
average_confidence: Optional[float] = None
unified_format: bool = True
class TaskResponseWithMetadata(TaskResponse):
"""Extended task response with processing metadata"""
processing_track: Optional[ProcessingTrackEnum] = None
processing_metadata: Optional[ProcessingMetadata] = None
class ExportOptions(BaseModel):
"""Export format options"""
format: str = Field(default="json", description="Export format: json, markdown, pdf, unified")
include_metadata: bool = Field(default=True, description="Include processing metadata")
include_statistics: bool = Field(default=True, description="Include document statistics")
legacy_format: bool = Field(default=False, description="Use legacy JSON format for compatibility")