Files
OCR/backend/app/models/task.py
egg 73112db055 feat: add storage cleanup mechanism with soft delete and auto scheduler
- Add soft delete (deleted_at column) to preserve task records for statistics
- Implement cleanup service to delete old files while keeping DB records
- Add automatic cleanup scheduler (configurable interval, default 24h)
- Add admin endpoints: storage stats, cleanup trigger, scheduler status
- Update task service with admin views (include deleted/files_deleted)
- Add frontend storage management UI in admin dashboard
- Add i18n translations for storage management

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 12:41:01 +08:00

130 lines
5.2 KiB
Python

"""
Tool_OCR - Task Model
OCR task management with user isolation
"""
from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text, ForeignKey, Enum as SQLEnum
from sqlalchemy.orm import relationship
from datetime import datetime
import enum
from app.core.database import Base
class TaskStatus(str, enum.Enum):
"""Task status enumeration"""
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
class Task(Base):
"""
OCR Task model with user association
Each task belongs to a specific user and stores
processing status and result file paths.
"""
__tablename__ = "tool_ocr_tasks"
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
user_id = Column(Integer, ForeignKey("tool_ocr_users.id", ondelete="CASCADE"),
nullable=False, index=True,
comment="Foreign key to users table")
task_id = Column(String(255), unique=True, nullable=False, index=True,
comment="Unique task identifier (UUID)")
filename = Column(String(255), nullable=True, index=True)
file_type = Column(String(100), nullable=True)
status = Column(SQLEnum(TaskStatus), default=TaskStatus.PENDING, nullable=False,
index=True)
result_json_path = Column(String(500), nullable=True,
comment="Path to JSON result file")
result_markdown_path = Column(String(500), nullable=True,
comment="Path to Markdown result file")
result_pdf_path = Column(String(500), nullable=True,
comment="Path to searchable PDF file")
error_message = Column(Text, nullable=True,
comment="Error details if task failed")
processing_time_ms = Column(Integer, nullable=True,
comment="Processing time in milliseconds")
created_at = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow,
nullable=False)
completed_at = Column(DateTime, nullable=True)
file_deleted = Column(Boolean, default=False, nullable=False,
comment="Track if files were auto-deleted")
deleted_at = Column(DateTime, nullable=True, index=True,
comment="Soft delete timestamp - NULL means not deleted")
# Relationships
user = relationship("User", back_populates="tasks")
files = relationship("TaskFile", back_populates="task", cascade="all, delete-orphan")
def __repr__(self):
return f"<Task(id={self.id}, task_id='{self.task_id}', status='{self.status.value}')>"
def to_dict(self):
"""Convert task to dictionary"""
return {
"id": self.id,
"task_id": self.task_id,
"filename": self.filename,
"file_type": self.file_type,
"status": self.status.value if self.status else None,
"result_json_path": self.result_json_path,
"result_markdown_path": self.result_markdown_path,
"result_pdf_path": self.result_pdf_path,
"error_message": self.error_message,
"processing_time_ms": self.processing_time_ms,
"created_at": self.created_at.isoformat() if self.created_at else None,
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
"completed_at": self.completed_at.isoformat() if self.completed_at else None,
"file_deleted": self.file_deleted,
"deleted_at": self.deleted_at.isoformat() if self.deleted_at else None
}
class TaskFile(Base):
"""
Task file model
Stores information about files associated with a task.
"""
__tablename__ = "tool_ocr_task_files"
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
task_id = Column(Integer, ForeignKey("tool_ocr_tasks.id", ondelete="CASCADE"),
nullable=False, index=True,
comment="Foreign key to tasks table")
original_name = Column(String(255), nullable=True)
stored_path = Column(String(500), nullable=True,
comment="Actual file path on server")
file_size = Column(Integer, nullable=True,
comment="File size in bytes")
mime_type = Column(String(100), nullable=True)
file_hash = Column(String(64), nullable=True, index=True,
comment="SHA256 hash for deduplication")
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
# Relationships
task = relationship("Task", back_populates="files")
def __repr__(self):
return f"<TaskFile(id={self.id}, task_id={self.task_id}, original_name='{self.original_name}')>"
def to_dict(self):
"""Convert task file to dictionary"""
return {
"id": self.id,
"task_id": self.task_id,
"original_name": self.original_name,
"stored_path": self.stored_path,
"file_size": self.file_size,
"mime_type": self.mime_type,
"file_hash": self.file_hash,
"created_at": self.created_at.isoformat() if self.created_at else None
}