This commit is contained in:
beabigegg
2025-11-12 22:53:17 +08:00
commit da700721fa
130 changed files with 23393 additions and 0 deletions

View File

@@ -0,0 +1,59 @@
"""
Tool_OCR - API Schemas
Pydantic models for request/response validation
"""
from app.schemas.auth import Token, TokenData, LoginRequest
from app.schemas.user import UserBase, UserCreate, UserResponse
from app.schemas.ocr import (
OCRBatchResponse,
OCRFileResponse,
OCRResultResponse,
BatchStatusResponse,
FileStatusResponse,
ProcessRequest,
ProcessResponse,
)
from app.schemas.export import (
ExportRequest,
ExportRuleCreate,
ExportRuleUpdate,
ExportRuleResponse,
CSSTemplateResponse,
)
from app.schemas.translation import (
TranslationRequest,
TranslationResponse,
TranslationFeatureStatus,
LanguageInfo,
)
__all__ = [
# Auth
"Token",
"TokenData",
"LoginRequest",
# User
"UserBase",
"UserCreate",
"UserResponse",
# OCR
"OCRBatchResponse",
"OCRFileResponse",
"OCRResultResponse",
"BatchStatusResponse",
"FileStatusResponse",
"ProcessRequest",
"ProcessResponse",
# Export
"ExportRequest",
"ExportRuleCreate",
"ExportRuleUpdate",
"ExportRuleResponse",
"CSSTemplateResponse",
# Translation (RESERVED)
"TranslationRequest",
"TranslationResponse",
"TranslationFeatureStatus",
"LanguageInfo",
]

View File

@@ -0,0 +1,42 @@
"""
Tool_OCR - Authentication Schemas
"""
from typing import Optional
from pydantic import BaseModel, Field
class LoginRequest(BaseModel):
"""Login request schema"""
username: str = Field(..., min_length=3, max_length=50, description="Username")
password: str = Field(..., min_length=6, description="Password")
class Config:
json_schema_extra = {
"example": {
"username": "admin",
"password": "password123"
}
}
class Token(BaseModel):
"""JWT token response schema"""
access_token: str = Field(..., description="JWT access token")
token_type: str = Field(default="bearer", description="Token type")
expires_in: int = Field(..., description="Token expiration time in seconds")
class Config:
json_schema_extra = {
"example": {
"access_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
"token_type": "bearer",
"expires_in": 3600
}
}
class TokenData(BaseModel):
"""Token payload data"""
user_id: Optional[int] = None
username: Optional[str] = None

View File

@@ -0,0 +1,104 @@
"""
Tool_OCR - Export Schemas
"""
from datetime import datetime
from typing import Optional, Dict, Any, List
from pydantic import BaseModel, Field
class ExportOptions(BaseModel):
"""Export options schema"""
confidence_threshold: Optional[float] = Field(None, description="Minimum confidence threshold")
include_metadata: Optional[bool] = Field(True, description="Include metadata in export")
filename_pattern: Optional[str] = Field(None, description="Filename pattern for export")
css_template: Optional[str] = Field(None, description="CSS template for PDF export")
class ExportRequest(BaseModel):
"""Export request schema"""
batch_id: int = Field(..., description="Batch ID to export")
format: str = Field(..., description="Export format (txt, json, excel, markdown, pdf, zip)")
rule_id: Optional[int] = Field(None, description="Optional export rule ID to apply")
css_template: Optional[str] = Field("default", description="CSS template for PDF export")
include_formats: Optional[List[str]] = Field(None, description="Formats to include in ZIP export")
options: Optional[ExportOptions] = Field(None, description="Additional export options")
class Config:
json_schema_extra = {
"example": {
"batch_id": 1,
"format": "pdf",
"rule_id": None,
"css_template": "default",
"include_formats": ["markdown", "json"],
"options": {
"confidence_threshold": 0.8,
"include_metadata": True
}
}
}
class ExportRuleCreate(BaseModel):
"""Export rule creation schema"""
rule_name: str = Field(..., max_length=100, description="Rule name")
description: Optional[str] = Field(None, description="Rule description")
config_json: Dict[str, Any] = Field(..., description="Rule configuration as JSON")
css_template: Optional[str] = Field(None, description="Custom CSS template")
class Config:
json_schema_extra = {
"example": {
"rule_name": "High Confidence Only",
"description": "Export only results with confidence > 0.8",
"config_json": {
"filters": {
"confidence_threshold": 0.8
},
"formatting": {
"add_line_numbers": True
}
},
"css_template": None
}
}
class ExportRuleUpdate(BaseModel):
"""Export rule update schema"""
rule_name: Optional[str] = Field(None, max_length=100)
description: Optional[str] = None
config_json: Optional[Dict[str, Any]] = None
css_template: Optional[str] = None
class ExportRuleResponse(BaseModel):
"""Export rule response schema"""
id: int
user_id: int
rule_name: str
description: Optional[str] = None
config_json: Dict[str, Any]
css_template: Optional[str] = None
created_at: datetime
updated_at: datetime
class Config:
from_attributes = True
class CSSTemplateResponse(BaseModel):
"""CSS template response schema"""
name: str = Field(..., description="Template name")
description: str = Field(..., description="Template description")
filename: str = Field(..., description="Template filename")
class Config:
json_schema_extra = {
"example": {
"name": "default",
"description": "通用排版模板,適合大多數文檔",
"filename": "default.css"
}
}

151
backend/app/schemas/ocr.py Normal file
View File

@@ -0,0 +1,151 @@
"""
Tool_OCR - OCR Schemas
"""
from datetime import datetime
from typing import Optional, Dict, List, Any
from pydantic import BaseModel, Field
from app.models.ocr import BatchStatus, FileStatus
class OCRFileResponse(BaseModel):
"""OCR file response schema"""
id: int
batch_id: int
filename: str
original_filename: str
file_size: int
file_format: str
status: FileStatus
error: Optional[str] = Field(None, validation_alias='error_message') # Map from error_message to error
created_at: datetime
processing_time: Optional[float] = None
class Config:
from_attributes = True
populate_by_name = True
class OCRResultResponse(BaseModel):
"""OCR result response schema"""
id: int
file_id: int
markdown_path: Optional[str] = None
markdown_content: Optional[str] = None # Added for frontend preview
json_path: Optional[str] = None
images_dir: Optional[str] = None
detected_language: Optional[str] = None
total_text_regions: int
average_confidence: Optional[float] = None
layout_data: Optional[Dict[str, Any]] = None
images_metadata: Optional[List[Dict[str, Any]]] = None
created_at: datetime
class Config:
from_attributes = True
class OCRBatchResponse(BaseModel):
"""OCR batch response schema"""
id: int
user_id: int
batch_name: Optional[str] = None
status: BatchStatus
total_files: int
completed_files: int
failed_files: int
progress_percentage: float
created_at: datetime
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
class Config:
from_attributes = True
class BatchStatusResponse(BaseModel):
"""Batch status with file details"""
batch: OCRBatchResponse
files: List[OCRFileResponse]
class FileStatusResponse(BaseModel):
"""File status with result details"""
file: OCRFileResponse
result: Optional[OCRResultResponse] = None
class OCRResultDetailResponse(BaseModel):
"""OCR result detail response for frontend preview - flattened structure"""
file_id: int
filename: str
status: str
markdown_content: Optional[str] = None
json_data: Optional[Dict[str, Any]] = None
confidence: Optional[float] = None
processing_time: Optional[float] = None
class Config:
from_attributes = True
class UploadBatchResponse(BaseModel):
"""Upload response schema matching frontend expectations"""
batch_id: int = Field(..., description="Batch ID")
files: List[OCRFileResponse] = Field(..., description="Uploaded files")
class Config:
json_schema_extra = {
"example": {
"batch_id": 1,
"files": [
{
"id": 1,
"batch_id": 1,
"filename": "doc_1.png",
"original_filename": "document.png",
"file_size": 1024000,
"file_format": "png",
"status": "pending",
"error_message": None,
"created_at": "2025-01-01T00:00:00",
"processing_time": None
}
]
}
}
class ProcessRequest(BaseModel):
"""OCR process request schema"""
batch_id: int = Field(..., description="Batch ID to process")
lang: str = Field(default="ch", description="Language code (ch, en, japan, korean)")
detect_layout: bool = Field(default=True, description="Enable layout detection")
class Config:
json_schema_extra = {
"example": {
"batch_id": 1,
"lang": "ch",
"detect_layout": True
}
}
class ProcessResponse(BaseModel):
"""OCR process response schema"""
message: str
batch_id: int
total_files: int
status: str
class Config:
json_schema_extra = {
"example": {
"message": "OCR processing started",
"batch_id": 1,
"total_files": 5,
"status": "processing"
}
}

View File

@@ -0,0 +1,124 @@
"""
Tool_OCR - Translation Schemas (RESERVED)
Request/response models for translation endpoints
"""
from typing import Optional, Dict, List, Any
from pydantic import BaseModel, Field
class TranslationRequest(BaseModel):
"""
Translation request schema (RESERVED)
Expected format for document translation requests
"""
file_id: int = Field(..., description="File ID to translate")
source_lang: str = Field(..., description="Source language code (zh, en, ja, ko)")
target_lang: str = Field(..., description="Target language code (zh, en, ja, ko)")
engine_type: Optional[str] = Field("offline", description="Translation engine (offline, ernie, google, deepl)")
preserve_structure: bool = Field(True, description="Preserve markdown structure")
engine_config: Optional[Dict[str, Any]] = Field(None, description="Engine-specific configuration")
class Config:
json_schema_extra = {
"example": {
"file_id": 1,
"source_lang": "zh",
"target_lang": "en",
"engine_type": "offline",
"preserve_structure": True,
"engine_config": {}
}
}
class TranslationResponse(BaseModel):
"""
Translation response schema (RESERVED)
Expected format for translation results
"""
task_id: int = Field(..., description="Translation task ID")
file_id: int
source_lang: str
target_lang: str
engine_type: str
status: str = Field(..., description="Translation status (pending, processing, completed, failed)")
translated_file_path: Optional[str] = Field(None, description="Path to translated markdown file")
progress: float = Field(0.0, description="Translation progress (0.0-1.0)")
error_message: Optional[str] = None
class Config:
json_schema_extra = {
"example": {
"task_id": 1,
"file_id": 1,
"source_lang": "zh",
"target_lang": "en",
"engine_type": "offline",
"status": "processing",
"translated_file_path": None,
"progress": 0.5,
"error_message": None
}
}
class TranslationStatusResponse(BaseModel):
"""Translation task status response (RESERVED)"""
task_id: int
status: str
progress: float
created_at: str
completed_at: Optional[str] = None
error_message: Optional[str] = None
class TranslationConfigRequest(BaseModel):
"""Translation configuration request (RESERVED)"""
source_lang: str = Field(..., max_length=20)
target_lang: str = Field(..., max_length=20)
engine_type: str = Field(..., max_length=50)
engine_config: Optional[Dict[str, Any]] = None
class Config:
json_schema_extra = {
"example": {
"source_lang": "zh",
"target_lang": "en",
"engine_type": "offline",
"engine_config": {
"model_path": "/path/to/model"
}
}
}
class TranslationConfigResponse(BaseModel):
"""Translation configuration response (RESERVED)"""
id: int
user_id: int
source_lang: str
target_lang: str
engine_type: str
engine_config: Optional[Dict[str, Any]] = None
created_at: str
updated_at: str
class TranslationFeatureStatus(BaseModel):
"""Translation feature status response"""
available: bool = Field(..., description="Whether translation is available")
status: str = Field(..., description="Feature status (reserved, planned, implemented)")
message: str = Field(..., description="Status message")
supported_engines: List[str] = Field(default_factory=list, description="Currently supported engines")
planned_engines: List[Dict[str, str]] = Field(default_factory=list, description="Planned engines")
roadmap: Dict[str, Any] = Field(default_factory=dict, description="Implementation roadmap")
class LanguageInfo(BaseModel):
"""Language information"""
code: str = Field(..., description="Language code (ISO 639-1)")
name: str = Field(..., description="Language name")
status: str = Field(..., description="Support status (planned, supported)")

View File

@@ -0,0 +1,53 @@
"""
Tool_OCR - User Schemas
"""
from datetime import datetime
from typing import Optional
from pydantic import BaseModel, EmailStr, Field
class UserBase(BaseModel):
"""Base user schema"""
username: str = Field(..., min_length=3, max_length=50)
email: EmailStr
full_name: Optional[str] = Field(None, max_length=100)
class UserCreate(UserBase):
"""User creation schema"""
password: str = Field(..., min_length=6, description="Password (min 6 characters)")
class Config:
json_schema_extra = {
"example": {
"username": "johndoe",
"email": "john@example.com",
"full_name": "John Doe",
"password": "secret123"
}
}
class UserResponse(UserBase):
"""User response schema"""
id: int
is_active: bool
is_admin: bool
created_at: datetime
updated_at: datetime
class Config:
from_attributes = True
json_schema_extra = {
"example": {
"id": 1,
"username": "johndoe",
"email": "john@example.com",
"full_name": "John Doe",
"is_active": True,
"is_admin": False,
"created_at": "2025-01-01T00:00:00",
"updated_at": "2025-01-01T00:00:00"
}
}