OCR/backend/app/models/task.py

"""
Tool_OCR - Task Model
OCR task management with user isolation
"""

from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text, ForeignKey, Enum as SQLEnum
from sqlalchemy.orm import relationship
from datetime import datetime
import enum

from app.core.database import Base


class TaskStatus(str, enum.Enum):
    """Task status enumeration"""
    PENDING = "pending"
    PROCESSING = "processing"
    COMPLETED = "completed"
    FAILED = "failed"


class Task(Base):
    """
    OCR Task model with user association

    Each task belongs to a specific user and stores
    processing status and result file paths.
    """

    __tablename__ = "tool_ocr_tasks"

    id = Column(Integer, primary_key=True, index=True, autoincrement=True)
    user_id = Column(Integer, ForeignKey("tool_ocr_users.id", ondelete="CASCADE"),
                    nullable=False, index=True,
                    comment="Foreign key to users table")
    task_id = Column(String(255), unique=True, nullable=False, index=True,
                    comment="Unique task identifier (UUID)")
    filename = Column(String(255), nullable=True, index=True)
    file_type = Column(String(100), nullable=True)
    status = Column(SQLEnum(TaskStatus), default=TaskStatus.PENDING, nullable=False,
                   index=True)
    result_json_path = Column(String(500), nullable=True,
                             comment="Path to JSON result file")
    result_markdown_path = Column(String(500), nullable=True,
                                 comment="Path to Markdown result file")
    result_pdf_path = Column(String(500), nullable=True,
                           comment="Path to searchable PDF file")
    error_message = Column(Text, nullable=True,
                          comment="Error details if task failed")
    processing_time_ms = Column(Integer, nullable=True,
                               comment="Processing time in milliseconds")
    created_at = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow,
                       nullable=False)
    completed_at = Column(DateTime, nullable=True)
    file_deleted = Column(Boolean, default=False, nullable=False,
                         comment="Track if files were auto-deleted")

    # Relationships
    user = relationship("User", back_populates="tasks")
    files = relationship("TaskFile", back_populates="task", cascade="all, delete-orphan")

    def __repr__(self):
        return f"<Task(id={self.id}, task_id='{self.task_id}', status='{self.status.value}')>"

    def to_dict(self):
        """Convert task to dictionary"""
        return {
            "id": self.id,
            "task_id": self.task_id,
            "filename": self.filename,
            "file_type": self.file_type,
            "status": self.status.value if self.status else None,
            "result_json_path": self.result_json_path,
            "result_markdown_path": self.result_markdown_path,
            "result_pdf_path": self.result_pdf_path,
            "error_message": self.error_message,
            "processing_time_ms": self.processing_time_ms,
            "created_at": self.created_at.isoformat() if self.created_at else None,
            "updated_at": self.updated_at.isoformat() if self.updated_at else None,
            "completed_at": self.completed_at.isoformat() if self.completed_at else None,
            "file_deleted": self.file_deleted
        }


class TaskFile(Base):
    """
    Task file model

    Stores information about files associated with a task.
    """

    __tablename__ = "tool_ocr_task_files"

    id = Column(Integer, primary_key=True, index=True, autoincrement=True)
    task_id = Column(Integer, ForeignKey("tool_ocr_tasks.id", ondelete="CASCADE"),
                    nullable=False, index=True,
                    comment="Foreign key to tasks table")
    original_name = Column(String(255), nullable=True)
    stored_path = Column(String(500), nullable=True,
                        comment="Actual file path on server")
    file_size = Column(Integer, nullable=True,
                      comment="File size in bytes")
    mime_type = Column(String(100), nullable=True)
    file_hash = Column(String(64), nullable=True, index=True,
                      comment="SHA256 hash for deduplication")
    created_at = Column(DateTime, default=datetime.utcnow, nullable=False)

    # Relationships
    task = relationship("Task", back_populates="files")

    def __repr__(self):
        return f"<TaskFile(id={self.id}, task_id={self.task_id}, original_name='{self.original_name}')>"

    def to_dict(self):
        """Convert task file to dictionary"""
        return {
            "id": self.id,
            "task_id": self.task_id,
            "original_name": self.original_name,
            "stored_path": self.stored_path,
            "file_size": self.file_size,
            "mime_type": self.mime_type,
            "file_hash": self.file_hash,
            "created_at": self.created_at.isoformat() if self.created_at else None
        }