""" Tool_OCR - Task Model OCR task management with user isolation """ from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text, ForeignKey, Enum as SQLEnum from sqlalchemy.orm import relationship from datetime import datetime import enum from app.core.database import Base class TaskStatus(str, enum.Enum): """Task status enumeration""" PENDING = "pending" PROCESSING = "processing" COMPLETED = "completed" FAILED = "failed" class Task(Base): """ OCR Task model with user association Each task belongs to a specific user and stores processing status and result file paths. """ __tablename__ = "tool_ocr_tasks" id = Column(Integer, primary_key=True, index=True, autoincrement=True) user_id = Column(Integer, ForeignKey("tool_ocr_users.id", ondelete="CASCADE"), nullable=False, index=True, comment="Foreign key to users table") task_id = Column(String(255), unique=True, nullable=False, index=True, comment="Unique task identifier (UUID)") filename = Column(String(255), nullable=True, index=True) file_type = Column(String(100), nullable=True) status = Column(SQLEnum(TaskStatus), default=TaskStatus.PENDING, nullable=False, index=True) result_json_path = Column(String(500), nullable=True, comment="Path to JSON result file") result_markdown_path = Column(String(500), nullable=True, comment="Path to Markdown result file") result_pdf_path = Column(String(500), nullable=True, comment="Path to searchable PDF file") error_message = Column(Text, nullable=True, comment="Error details if task failed") processing_time_ms = Column(Integer, nullable=True, comment="Processing time in milliseconds") created_at = Column(DateTime, default=datetime.utcnow, nullable=False, index=True) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) completed_at = Column(DateTime, nullable=True) file_deleted = Column(Boolean, default=False, nullable=False, comment="Track if files were auto-deleted") deleted_at = Column(DateTime, nullable=True, index=True, comment="Soft delete timestamp - NULL means not deleted") # Relationships user = relationship("User", back_populates="tasks") files = relationship("TaskFile", back_populates="task", cascade="all, delete-orphan") def __repr__(self): return f"" def to_dict(self): """Convert task to dictionary""" return { "id": self.id, "task_id": self.task_id, "filename": self.filename, "file_type": self.file_type, "status": self.status.value if self.status else None, "result_json_path": self.result_json_path, "result_markdown_path": self.result_markdown_path, "result_pdf_path": self.result_pdf_path, "error_message": self.error_message, "processing_time_ms": self.processing_time_ms, "created_at": self.created_at.isoformat() if self.created_at else None, "updated_at": self.updated_at.isoformat() if self.updated_at else None, "completed_at": self.completed_at.isoformat() if self.completed_at else None, "file_deleted": self.file_deleted, "deleted_at": self.deleted_at.isoformat() if self.deleted_at else None } class TaskFile(Base): """ Task file model Stores information about files associated with a task. """ __tablename__ = "tool_ocr_task_files" id = Column(Integer, primary_key=True, index=True, autoincrement=True) task_id = Column(Integer, ForeignKey("tool_ocr_tasks.id", ondelete="CASCADE"), nullable=False, index=True, comment="Foreign key to tasks table") original_name = Column(String(255), nullable=True) stored_path = Column(String(500), nullable=True, comment="Actual file path on server") file_size = Column(Integer, nullable=True, comment="File size in bytes") mime_type = Column(String(100), nullable=True) file_hash = Column(String(64), nullable=True, index=True, comment="SHA256 hash for deduplication") created_at = Column(DateTime, default=datetime.utcnow, nullable=False) # Relationships task = relationship("Task", back_populates="files") def __repr__(self): return f"" def to_dict(self): """Convert task file to dictionary""" return { "id": self.id, "task_id": self.task_id, "original_name": self.original_name, "stored_path": self.stored_path, "file_size": self.file_size, "mime_type": self.mime_type, "file_hash": self.file_hash, "created_at": self.created_at.isoformat() if self.created_at else None }