refactor: complete V1 to V2 migration and remove legacy architecture
Remove all V1 architecture components and promote V2 to primary: - Delete all paddle_ocr_* table models (export, ocr, translation, user) - Delete legacy routers (auth, export, ocr, translation) - Delete legacy schemas and services - Promote user_v2.py to user.py as primary user model - Update all imports and dependencies to use V2 models only - Update main.py version to 2.0.0 Database changes: - Fix SQLAlchemy reserved word: rename audit_log.metadata to extra_data - Add migration to drop all paddle_ocr_* tables - Update alembic env to only import V2 models Frontend fixes: - Fix Select component exports in TaskHistoryPage.tsx - Update to use simplified Select API with options prop - Fix AxiosInstance TypeScript import syntax 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,31 +1,20 @@
|
||||
"""
|
||||
Tool_OCR - Database Models
|
||||
Tool_OCR - Database Models (V2)
|
||||
|
||||
New schema with external API authentication and user task isolation.
|
||||
External API authentication with user task isolation.
|
||||
All tables use 'tool_ocr_' prefix for namespace separation.
|
||||
"""
|
||||
|
||||
# New models for external authentication system
|
||||
from app.models.user_v2 import User
|
||||
from app.models.user import User
|
||||
from app.models.task import Task, TaskFile, TaskStatus
|
||||
from app.models.session import Session
|
||||
|
||||
# Legacy models (will be deprecated after migration)
|
||||
from app.models.ocr import OCRBatch, OCRFile, OCRResult
|
||||
from app.models.export import ExportRule
|
||||
from app.models.translation import TranslationConfig
|
||||
from app.models.audit_log import AuditLog
|
||||
|
||||
__all__ = [
|
||||
# New authentication and task models
|
||||
"User",
|
||||
"Task",
|
||||
"TaskFile",
|
||||
"TaskStatus",
|
||||
"Session",
|
||||
# Legacy models (deprecated)
|
||||
"OCRBatch",
|
||||
"OCRFile",
|
||||
"OCRResult",
|
||||
"ExportRule",
|
||||
"TranslationConfig",
|
||||
"AuditLog",
|
||||
]
|
||||
|
||||
@@ -67,7 +67,7 @@ class AuditLog(Base):
|
||||
comment="1 for success, 0 for failure"
|
||||
)
|
||||
error_message = Column(Text, nullable=True, comment="Error details if failed")
|
||||
metadata = Column(Text, nullable=True, comment="Additional JSON metadata")
|
||||
extra_data = Column(Text, nullable=True, comment="Additional JSON metadata")
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
|
||||
|
||||
# Relationships
|
||||
@@ -90,6 +90,6 @@ class AuditLog(Base):
|
||||
"resource_id": self.resource_id,
|
||||
"success": bool(self.success),
|
||||
"error_message": self.error_message,
|
||||
"metadata": self.metadata,
|
||||
"extra_data": self.extra_data,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None
|
||||
}
|
||||
|
||||
@@ -1,55 +0,0 @@
|
||||
"""
|
||||
Tool_OCR - Export Rule Model
|
||||
User-defined export rules and formatting configurations
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Text, ForeignKey, JSON
|
||||
from sqlalchemy.orm import relationship
|
||||
from datetime import datetime
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class ExportRule(Base):
|
||||
"""Export rule configuration for customized output formatting"""
|
||||
|
||||
__tablename__ = "paddle_ocr_export_rules"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
user_id = Column(Integer, ForeignKey("paddle_ocr_users.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
rule_name = Column(String(100), nullable=False)
|
||||
description = Column(Text, nullable=True)
|
||||
|
||||
# Rule configuration stored as JSON
|
||||
# {
|
||||
# "filters": {
|
||||
# "confidence_threshold": 0.8,
|
||||
# "filename_pattern": "invoice_*",
|
||||
# "language": "ch"
|
||||
# },
|
||||
# "formatting": {
|
||||
# "add_line_numbers": true,
|
||||
# "sort_by_position": true,
|
||||
# "group_by_filename": false
|
||||
# },
|
||||
# "export_options": {
|
||||
# "include_metadata": true,
|
||||
# "include_confidence": true,
|
||||
# "include_bounding_boxes": false
|
||||
# }
|
||||
# }
|
||||
config_json = Column(JSON, nullable=False)
|
||||
|
||||
# CSS template for PDF export (optional)
|
||||
# Can reference predefined templates: "default", "academic", "business", "report"
|
||||
# Or store custom CSS
|
||||
css_template = Column(Text, nullable=True)
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
# Relationships
|
||||
user = relationship("User", back_populates="export_rules")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<ExportRule(id={self.id}, name='{self.rule_name}', user_id={self.user_id})>"
|
||||
@@ -1,122 +0,0 @@
|
||||
"""
|
||||
Tool_OCR - OCR Models
|
||||
Database models for OCR batches, files, and results
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Float, Text, ForeignKey, Enum, JSON
|
||||
from sqlalchemy.orm import relationship
|
||||
from datetime import datetime
|
||||
import enum
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class BatchStatus(str, enum.Enum):
|
||||
"""Batch processing status"""
|
||||
PENDING = "pending"
|
||||
PROCESSING = "processing"
|
||||
COMPLETED = "completed"
|
||||
PARTIAL = "partial" # Some files failed
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
class FileStatus(str, enum.Enum):
|
||||
"""Individual file processing status"""
|
||||
PENDING = "pending"
|
||||
PROCESSING = "processing"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
class OCRBatch(Base):
|
||||
"""OCR batch processing tracking"""
|
||||
|
||||
__tablename__ = "paddle_ocr_batches"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
user_id = Column(Integer, ForeignKey("paddle_ocr_users.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
batch_name = Column(String(255), nullable=True)
|
||||
status = Column(Enum(BatchStatus), default=BatchStatus.PENDING, nullable=False, index=True)
|
||||
total_files = Column(Integer, default=0, nullable=False)
|
||||
completed_files = Column(Integer, default=0, nullable=False)
|
||||
failed_files = Column(Integer, default=0, nullable=False)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
|
||||
started_at = Column(DateTime, nullable=True)
|
||||
completed_at = Column(DateTime, nullable=True)
|
||||
|
||||
# Relationships
|
||||
user = relationship("User", back_populates="ocr_batches")
|
||||
files = relationship("OCRFile", back_populates="batch", cascade="all, delete-orphan")
|
||||
|
||||
@property
|
||||
def progress_percentage(self) -> float:
|
||||
"""Calculate progress percentage"""
|
||||
if self.total_files == 0:
|
||||
return 0.0
|
||||
return (self.completed_files / self.total_files) * 100
|
||||
|
||||
def __repr__(self):
|
||||
return f"<OCRBatch(id={self.id}, status='{self.status}', progress={self.progress_percentage:.1f}%)>"
|
||||
|
||||
|
||||
class OCRFile(Base):
|
||||
"""Individual file in an OCR batch"""
|
||||
|
||||
__tablename__ = "paddle_ocr_files"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
batch_id = Column(Integer, ForeignKey("paddle_ocr_batches.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
filename = Column(String(255), nullable=False)
|
||||
original_filename = Column(String(255), nullable=False)
|
||||
file_path = Column(String(512), nullable=False)
|
||||
file_size = Column(Integer, nullable=False) # Size in bytes
|
||||
file_format = Column(String(20), nullable=False) # png, jpg, pdf, etc.
|
||||
status = Column(Enum(FileStatus), default=FileStatus.PENDING, nullable=False, index=True)
|
||||
error_message = Column(Text, nullable=True)
|
||||
retry_count = Column(Integer, default=0, nullable=False) # Number of retry attempts
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
started_at = Column(DateTime, nullable=True)
|
||||
completed_at = Column(DateTime, nullable=True)
|
||||
processing_time = Column(Float, nullable=True) # Processing time in seconds
|
||||
|
||||
# Relationships
|
||||
batch = relationship("OCRBatch", back_populates="files")
|
||||
result = relationship("OCRResult", back_populates="file", uselist=False, cascade="all, delete-orphan")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<OCRFile(id={self.id}, filename='{self.filename}', status='{self.status}')>"
|
||||
|
||||
|
||||
class OCRResult(Base):
|
||||
"""OCR processing result with structure and images"""
|
||||
|
||||
__tablename__ = "paddle_ocr_results"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
file_id = Column(Integer, ForeignKey("paddle_ocr_files.id", ondelete="CASCADE"), unique=True, nullable=False, index=True)
|
||||
|
||||
# Output file paths
|
||||
markdown_path = Column(String(512), nullable=True) # Path to Markdown file
|
||||
json_path = Column(String(512), nullable=True) # Path to JSON file
|
||||
images_dir = Column(String(512), nullable=True) # Directory containing extracted images
|
||||
|
||||
# OCR metadata
|
||||
detected_language = Column(String(20), nullable=True) # ch, en, japan, korean
|
||||
total_text_regions = Column(Integer, default=0, nullable=False)
|
||||
average_confidence = Column(Float, nullable=True)
|
||||
|
||||
# Layout structure data (stored as JSON)
|
||||
# Contains: layout elements (title, paragraph, table, image, formula), reading order, bounding boxes
|
||||
layout_data = Column(JSON, nullable=True)
|
||||
|
||||
# Extracted images metadata (stored as JSON)
|
||||
# Contains: list of {image_path, bbox, element_type}
|
||||
images_metadata = Column(JSON, nullable=True)
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
# Relationships
|
||||
file = relationship("OCRFile", back_populates="result")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<OCRResult(id={self.id}, file_id={self.file_id}, language='{self.detected_language}')>"
|
||||
@@ -1,43 +0,0 @@
|
||||
"""
|
||||
Tool_OCR - Translation Config Model (RESERVED)
|
||||
Reserved for future translation feature implementation
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, JSON
|
||||
from sqlalchemy.orm import relationship
|
||||
from datetime import datetime
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class TranslationConfig(Base):
|
||||
"""
|
||||
Translation configuration (RESERVED for future implementation)
|
||||
|
||||
This table is created but not actively used until translation feature is implemented.
|
||||
"""
|
||||
|
||||
__tablename__ = "paddle_ocr_translation_configs"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
user_id = Column(Integer, ForeignKey("paddle_ocr_users.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
|
||||
source_lang = Column(String(20), nullable=False) # ch, en, japan, korean, etc.
|
||||
target_lang = Column(String(20), nullable=False) # en, ch, japan, korean, etc.
|
||||
|
||||
# Translation engine type: "offline" (argostranslate), "ernie", "google", "deepl"
|
||||
engine_type = Column(String(50), nullable=False, default="offline")
|
||||
|
||||
# Engine-specific configuration stored as JSON
|
||||
# For offline (argostranslate): {"model_path": "/path/to/model"}
|
||||
# For API-based: {"api_key": "xxx", "endpoint": "https://..."}
|
||||
engine_config = Column(JSON, nullable=True)
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
# Relationships
|
||||
user = relationship("User", back_populates="translation_configs")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<TranslationConfig(id={self.id}, {self.source_lang}->{self.target_lang}, engine='{self.engine_type}')>"
|
||||
@@ -1,6 +1,6 @@
|
||||
"""
|
||||
Tool_OCR - User Model
|
||||
User authentication and management
|
||||
Tool_OCR - User Model v2.0
|
||||
External API authentication with simplified schema
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Boolean
|
||||
@@ -11,24 +11,39 @@ from app.core.database import Base
|
||||
|
||||
|
||||
class User(Base):
|
||||
"""User model for JWT authentication"""
|
||||
"""
|
||||
User model for external API authentication
|
||||
|
||||
__tablename__ = "paddle_ocr_users"
|
||||
Uses email as primary identifier from Azure AD.
|
||||
No password storage - authentication via external API only.
|
||||
"""
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
username = Column(String(50), unique=True, nullable=False, index=True)
|
||||
email = Column(String(100), unique=True, nullable=False, index=True)
|
||||
password_hash = Column(String(255), nullable=False)
|
||||
full_name = Column(String(100), nullable=True)
|
||||
is_active = Column(Boolean, default=True, nullable=False)
|
||||
is_admin = Column(Boolean, default=False, nullable=False)
|
||||
__tablename__ = "tool_ocr_users"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
|
||||
email = Column(String(255), unique=True, nullable=False, index=True,
|
||||
comment="Primary identifier from Azure AD")
|
||||
display_name = Column(String(255), nullable=True,
|
||||
comment="Display name from API response")
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
last_login = Column(DateTime, nullable=True)
|
||||
is_active = Column(Boolean, default=True, nullable=False, index=True)
|
||||
|
||||
# Relationships
|
||||
ocr_batches = relationship("OCRBatch", back_populates="user", cascade="all, delete-orphan")
|
||||
export_rules = relationship("ExportRule", back_populates="user", cascade="all, delete-orphan")
|
||||
translation_configs = relationship("TranslationConfig", back_populates="user", cascade="all, delete-orphan")
|
||||
tasks = relationship("Task", back_populates="user", cascade="all, delete-orphan")
|
||||
sessions = relationship("Session", back_populates="user", cascade="all, delete-orphan")
|
||||
audit_logs = relationship("AuditLog", back_populates="user")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<User(id={self.id}, username='{self.username}', email='{self.email}')>"
|
||||
return f"<User(id={self.id}, email='{self.email}', display_name='{self.display_name}')>"
|
||||
|
||||
def to_dict(self):
|
||||
"""Convert user to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"email": self.email,
|
||||
"display_name": self.display_name,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"last_login": self.last_login.isoformat() if self.last_login else None,
|
||||
"is_active": self.is_active
|
||||
}
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
"""
|
||||
Tool_OCR - User Model v2.0
|
||||
External API authentication with simplified schema
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Boolean
|
||||
from sqlalchemy.orm import relationship
|
||||
from datetime import datetime
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class User(Base):
|
||||
"""
|
||||
User model for external API authentication
|
||||
|
||||
Uses email as primary identifier from Azure AD.
|
||||
No password storage - authentication via external API only.
|
||||
"""
|
||||
|
||||
__tablename__ = "tool_ocr_users"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
|
||||
email = Column(String(255), unique=True, nullable=False, index=True,
|
||||
comment="Primary identifier from Azure AD")
|
||||
display_name = Column(String(255), nullable=True,
|
||||
comment="Display name from API response")
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
last_login = Column(DateTime, nullable=True)
|
||||
is_active = Column(Boolean, default=True, nullable=False, index=True)
|
||||
|
||||
# Relationships
|
||||
tasks = relationship("Task", back_populates="user", cascade="all, delete-orphan")
|
||||
sessions = relationship("Session", back_populates="user", cascade="all, delete-orphan")
|
||||
audit_logs = relationship("AuditLog", back_populates="user")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<User(id={self.id}, email='{self.email}', display_name='{self.display_name}')>"
|
||||
|
||||
def to_dict(self):
|
||||
"""Convert user to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"email": self.email,
|
||||
"display_name": self.display_name,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"last_login": self.last_login.isoformat() if self.last_login else None,
|
||||
"is_active": self.is_active
|
||||
}
|
||||
Reference in New Issue
Block a user