refactor: complete V1 to V2 migration and remove legacy architecture
Remove all V1 architecture components and promote V2 to primary: - Delete all paddle_ocr_* table models (export, ocr, translation, user) - Delete legacy routers (auth, export, ocr, translation) - Delete legacy schemas and services - Promote user_v2.py to user.py as primary user model - Update all imports and dependencies to use V2 models only - Update main.py version to 2.0.0 Database changes: - Fix SQLAlchemy reserved word: rename audit_log.metadata to extra_data - Add migration to drop all paddle_ocr_* tables - Update alembic env to only import V2 models Frontend fixes: - Fix Select component exports in TaskHistoryPage.tsx - Update to use simplified Select API with options prop - Fix AxiosInstance TypeScript import syntax 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,59 +1,30 @@
|
||||
"""
|
||||
Tool_OCR - API Schemas
|
||||
Tool_OCR - API Schemas (V2)
|
||||
Pydantic models for request/response validation
|
||||
"""
|
||||
|
||||
from app.schemas.auth import Token, TokenData, LoginRequest
|
||||
from app.schemas.user import UserBase, UserCreate, UserResponse
|
||||
from app.schemas.ocr import (
|
||||
OCRBatchResponse,
|
||||
OCRFileResponse,
|
||||
OCRResultResponse,
|
||||
BatchStatusResponse,
|
||||
FileStatusResponse,
|
||||
ProcessRequest,
|
||||
ProcessResponse,
|
||||
)
|
||||
from app.schemas.export import (
|
||||
ExportRequest,
|
||||
ExportRuleCreate,
|
||||
ExportRuleUpdate,
|
||||
ExportRuleResponse,
|
||||
CSSTemplateResponse,
|
||||
)
|
||||
from app.schemas.translation import (
|
||||
TranslationRequest,
|
||||
TranslationResponse,
|
||||
TranslationFeatureStatus,
|
||||
LanguageInfo,
|
||||
from app.schemas.auth import LoginRequest, Token, UserResponse
|
||||
from app.schemas.task import (
|
||||
TaskCreate,
|
||||
TaskUpdate,
|
||||
TaskResponse,
|
||||
TaskDetailResponse,
|
||||
TaskListResponse,
|
||||
TaskStatsResponse,
|
||||
TaskStatusEnum,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# Auth
|
||||
"Token",
|
||||
"TokenData",
|
||||
"LoginRequest",
|
||||
# User
|
||||
"UserBase",
|
||||
"UserCreate",
|
||||
"Token",
|
||||
"UserResponse",
|
||||
# OCR
|
||||
"OCRBatchResponse",
|
||||
"OCRFileResponse",
|
||||
"OCRResultResponse",
|
||||
"BatchStatusResponse",
|
||||
"FileStatusResponse",
|
||||
"ProcessRequest",
|
||||
"ProcessResponse",
|
||||
# Export
|
||||
"ExportRequest",
|
||||
"ExportRuleCreate",
|
||||
"ExportRuleUpdate",
|
||||
"ExportRuleResponse",
|
||||
"CSSTemplateResponse",
|
||||
# Translation (RESERVED)
|
||||
"TranslationRequest",
|
||||
"TranslationResponse",
|
||||
"TranslationFeatureStatus",
|
||||
"LanguageInfo",
|
||||
# Task
|
||||
"TaskCreate",
|
||||
"TaskUpdate",
|
||||
"TaskResponse",
|
||||
"TaskDetailResponse",
|
||||
"TaskListResponse",
|
||||
"TaskStatsResponse",
|
||||
"TaskStatusEnum",
|
||||
]
|
||||
|
||||
@@ -1,104 +0,0 @@
|
||||
"""
|
||||
Tool_OCR - Export Schemas
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any, List
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ExportOptions(BaseModel):
|
||||
"""Export options schema"""
|
||||
confidence_threshold: Optional[float] = Field(None, description="Minimum confidence threshold")
|
||||
include_metadata: Optional[bool] = Field(True, description="Include metadata in export")
|
||||
filename_pattern: Optional[str] = Field(None, description="Filename pattern for export")
|
||||
css_template: Optional[str] = Field(None, description="CSS template for PDF export")
|
||||
|
||||
|
||||
class ExportRequest(BaseModel):
|
||||
"""Export request schema"""
|
||||
batch_id: int = Field(..., description="Batch ID to export")
|
||||
format: str = Field(..., description="Export format (txt, json, excel, markdown, pdf, zip)")
|
||||
rule_id: Optional[int] = Field(None, description="Optional export rule ID to apply")
|
||||
css_template: Optional[str] = Field("default", description="CSS template for PDF export")
|
||||
include_formats: Optional[List[str]] = Field(None, description="Formats to include in ZIP export")
|
||||
options: Optional[ExportOptions] = Field(None, description="Additional export options")
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"batch_id": 1,
|
||||
"format": "pdf",
|
||||
"rule_id": None,
|
||||
"css_template": "default",
|
||||
"include_formats": ["markdown", "json"],
|
||||
"options": {
|
||||
"confidence_threshold": 0.8,
|
||||
"include_metadata": True
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class ExportRuleCreate(BaseModel):
|
||||
"""Export rule creation schema"""
|
||||
rule_name: str = Field(..., max_length=100, description="Rule name")
|
||||
description: Optional[str] = Field(None, description="Rule description")
|
||||
config_json: Dict[str, Any] = Field(..., description="Rule configuration as JSON")
|
||||
css_template: Optional[str] = Field(None, description="Custom CSS template")
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"rule_name": "High Confidence Only",
|
||||
"description": "Export only results with confidence > 0.8",
|
||||
"config_json": {
|
||||
"filters": {
|
||||
"confidence_threshold": 0.8
|
||||
},
|
||||
"formatting": {
|
||||
"add_line_numbers": True
|
||||
}
|
||||
},
|
||||
"css_template": None
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class ExportRuleUpdate(BaseModel):
|
||||
"""Export rule update schema"""
|
||||
rule_name: Optional[str] = Field(None, max_length=100)
|
||||
description: Optional[str] = None
|
||||
config_json: Optional[Dict[str, Any]] = None
|
||||
css_template: Optional[str] = None
|
||||
|
||||
|
||||
class ExportRuleResponse(BaseModel):
|
||||
"""Export rule response schema"""
|
||||
id: int
|
||||
user_id: int
|
||||
rule_name: str
|
||||
description: Optional[str] = None
|
||||
config_json: Dict[str, Any]
|
||||
css_template: Optional[str] = None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class CSSTemplateResponse(BaseModel):
|
||||
"""CSS template response schema"""
|
||||
name: str = Field(..., description="Template name")
|
||||
description: str = Field(..., description="Template description")
|
||||
filename: str = Field(..., description="Template filename")
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"name": "default",
|
||||
"description": "通用排版模板,適合大多數文檔",
|
||||
"filename": "default.css"
|
||||
}
|
||||
}
|
||||
@@ -1,151 +0,0 @@
|
||||
"""
|
||||
Tool_OCR - OCR Schemas
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, List, Any
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from app.models.ocr import BatchStatus, FileStatus
|
||||
|
||||
|
||||
class OCRFileResponse(BaseModel):
|
||||
"""OCR file response schema"""
|
||||
id: int
|
||||
batch_id: int
|
||||
filename: str
|
||||
original_filename: str
|
||||
file_size: int
|
||||
file_format: str
|
||||
status: FileStatus
|
||||
error: Optional[str] = Field(None, validation_alias='error_message') # Map from error_message to error
|
||||
created_at: datetime
|
||||
processing_time: Optional[float] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
populate_by_name = True
|
||||
|
||||
|
||||
class OCRResultResponse(BaseModel):
|
||||
"""OCR result response schema"""
|
||||
id: int
|
||||
file_id: int
|
||||
markdown_path: Optional[str] = None
|
||||
markdown_content: Optional[str] = None # Added for frontend preview
|
||||
json_path: Optional[str] = None
|
||||
images_dir: Optional[str] = None
|
||||
detected_language: Optional[str] = None
|
||||
total_text_regions: int
|
||||
average_confidence: Optional[float] = None
|
||||
layout_data: Optional[Dict[str, Any]] = None
|
||||
images_metadata: Optional[List[Dict[str, Any]]] = None
|
||||
created_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class OCRBatchResponse(BaseModel):
|
||||
"""OCR batch response schema"""
|
||||
id: int
|
||||
user_id: int
|
||||
batch_name: Optional[str] = None
|
||||
status: BatchStatus
|
||||
total_files: int
|
||||
completed_files: int
|
||||
failed_files: int
|
||||
progress_percentage: float
|
||||
created_at: datetime
|
||||
started_at: Optional[datetime] = None
|
||||
completed_at: Optional[datetime] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class BatchStatusResponse(BaseModel):
|
||||
"""Batch status with file details"""
|
||||
batch: OCRBatchResponse
|
||||
files: List[OCRFileResponse]
|
||||
|
||||
|
||||
class FileStatusResponse(BaseModel):
|
||||
"""File status with result details"""
|
||||
file: OCRFileResponse
|
||||
result: Optional[OCRResultResponse] = None
|
||||
|
||||
|
||||
class OCRResultDetailResponse(BaseModel):
|
||||
"""OCR result detail response for frontend preview - flattened structure"""
|
||||
file_id: int
|
||||
filename: str
|
||||
status: str
|
||||
markdown_content: Optional[str] = None
|
||||
json_data: Optional[Dict[str, Any]] = None
|
||||
confidence: Optional[float] = None
|
||||
processing_time: Optional[float] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class UploadBatchResponse(BaseModel):
|
||||
"""Upload response schema matching frontend expectations"""
|
||||
batch_id: int = Field(..., description="Batch ID")
|
||||
files: List[OCRFileResponse] = Field(..., description="Uploaded files")
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"batch_id": 1,
|
||||
"files": [
|
||||
{
|
||||
"id": 1,
|
||||
"batch_id": 1,
|
||||
"filename": "doc_1.png",
|
||||
"original_filename": "document.png",
|
||||
"file_size": 1024000,
|
||||
"file_format": "png",
|
||||
"status": "pending",
|
||||
"error_message": None,
|
||||
"created_at": "2025-01-01T00:00:00",
|
||||
"processing_time": None
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class ProcessRequest(BaseModel):
|
||||
"""OCR process request schema"""
|
||||
batch_id: int = Field(..., description="Batch ID to process")
|
||||
lang: str = Field(default="ch", description="Language code (ch, en, japan, korean)")
|
||||
detect_layout: bool = Field(default=True, description="Enable layout detection")
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"batch_id": 1,
|
||||
"lang": "ch",
|
||||
"detect_layout": True
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class ProcessResponse(BaseModel):
|
||||
"""OCR process response schema"""
|
||||
message: str
|
||||
batch_id: int
|
||||
total_files: int
|
||||
status: str
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"message": "OCR processing started",
|
||||
"batch_id": 1,
|
||||
"total_files": 5,
|
||||
"status": "processing"
|
||||
}
|
||||
}
|
||||
@@ -1,124 +0,0 @@
|
||||
"""
|
||||
Tool_OCR - Translation Schemas (RESERVED)
|
||||
Request/response models for translation endpoints
|
||||
"""
|
||||
|
||||
from typing import Optional, Dict, List, Any
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class TranslationRequest(BaseModel):
|
||||
"""
|
||||
Translation request schema (RESERVED)
|
||||
|
||||
Expected format for document translation requests
|
||||
"""
|
||||
file_id: int = Field(..., description="File ID to translate")
|
||||
source_lang: str = Field(..., description="Source language code (zh, en, ja, ko)")
|
||||
target_lang: str = Field(..., description="Target language code (zh, en, ja, ko)")
|
||||
engine_type: Optional[str] = Field("offline", description="Translation engine (offline, ernie, google, deepl)")
|
||||
preserve_structure: bool = Field(True, description="Preserve markdown structure")
|
||||
engine_config: Optional[Dict[str, Any]] = Field(None, description="Engine-specific configuration")
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"file_id": 1,
|
||||
"source_lang": "zh",
|
||||
"target_lang": "en",
|
||||
"engine_type": "offline",
|
||||
"preserve_structure": True,
|
||||
"engine_config": {}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class TranslationResponse(BaseModel):
|
||||
"""
|
||||
Translation response schema (RESERVED)
|
||||
|
||||
Expected format for translation results
|
||||
"""
|
||||
task_id: int = Field(..., description="Translation task ID")
|
||||
file_id: int
|
||||
source_lang: str
|
||||
target_lang: str
|
||||
engine_type: str
|
||||
status: str = Field(..., description="Translation status (pending, processing, completed, failed)")
|
||||
translated_file_path: Optional[str] = Field(None, description="Path to translated markdown file")
|
||||
progress: float = Field(0.0, description="Translation progress (0.0-1.0)")
|
||||
error_message: Optional[str] = None
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"task_id": 1,
|
||||
"file_id": 1,
|
||||
"source_lang": "zh",
|
||||
"target_lang": "en",
|
||||
"engine_type": "offline",
|
||||
"status": "processing",
|
||||
"translated_file_path": None,
|
||||
"progress": 0.5,
|
||||
"error_message": None
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class TranslationStatusResponse(BaseModel):
|
||||
"""Translation task status response (RESERVED)"""
|
||||
task_id: int
|
||||
status: str
|
||||
progress: float
|
||||
created_at: str
|
||||
completed_at: Optional[str] = None
|
||||
error_message: Optional[str] = None
|
||||
|
||||
|
||||
class TranslationConfigRequest(BaseModel):
|
||||
"""Translation configuration request (RESERVED)"""
|
||||
source_lang: str = Field(..., max_length=20)
|
||||
target_lang: str = Field(..., max_length=20)
|
||||
engine_type: str = Field(..., max_length=50)
|
||||
engine_config: Optional[Dict[str, Any]] = None
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"source_lang": "zh",
|
||||
"target_lang": "en",
|
||||
"engine_type": "offline",
|
||||
"engine_config": {
|
||||
"model_path": "/path/to/model"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class TranslationConfigResponse(BaseModel):
|
||||
"""Translation configuration response (RESERVED)"""
|
||||
id: int
|
||||
user_id: int
|
||||
source_lang: str
|
||||
target_lang: str
|
||||
engine_type: str
|
||||
engine_config: Optional[Dict[str, Any]] = None
|
||||
created_at: str
|
||||
updated_at: str
|
||||
|
||||
|
||||
class TranslationFeatureStatus(BaseModel):
|
||||
"""Translation feature status response"""
|
||||
available: bool = Field(..., description="Whether translation is available")
|
||||
status: str = Field(..., description="Feature status (reserved, planned, implemented)")
|
||||
message: str = Field(..., description="Status message")
|
||||
supported_engines: List[str] = Field(default_factory=list, description="Currently supported engines")
|
||||
planned_engines: List[Dict[str, str]] = Field(default_factory=list, description="Planned engines")
|
||||
roadmap: Dict[str, Any] = Field(default_factory=dict, description="Implementation roadmap")
|
||||
|
||||
|
||||
class LanguageInfo(BaseModel):
|
||||
"""Language information"""
|
||||
code: str = Field(..., description="Language code (ISO 639-1)")
|
||||
name: str = Field(..., description="Language name")
|
||||
status: str = Field(..., description="Support status (planned, supported)")
|
||||
@@ -1,53 +0,0 @@
|
||||
"""
|
||||
Tool_OCR - User Schemas
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel, EmailStr, Field
|
||||
|
||||
|
||||
class UserBase(BaseModel):
|
||||
"""Base user schema"""
|
||||
username: str = Field(..., min_length=3, max_length=50)
|
||||
email: EmailStr
|
||||
full_name: Optional[str] = Field(None, max_length=100)
|
||||
|
||||
|
||||
class UserCreate(UserBase):
|
||||
"""User creation schema"""
|
||||
password: str = Field(..., min_length=6, description="Password (min 6 characters)")
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"username": "johndoe",
|
||||
"email": "john@example.com",
|
||||
"full_name": "John Doe",
|
||||
"password": "secret123"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class UserResponse(UserBase):
|
||||
"""User response schema"""
|
||||
id: int
|
||||
is_active: bool
|
||||
is_admin: bool
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"id": 1,
|
||||
"username": "johndoe",
|
||||
"email": "john@example.com",
|
||||
"full_name": "John Doe",
|
||||
"is_active": True,
|
||||
"is_admin": False,
|
||||
"created_at": "2025-01-01T00:00:00",
|
||||
"updated_at": "2025-01-01T00:00:00"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user