Files
OCR/backend/app/models/task.py
egg ee49751c38 fix: add UTC timezone indicator to all datetime serialization
Database stores times in UTC but serialized without timezone info,
causing frontend to misinterpret as local time. Now all datetime
fields include 'Z' suffix to indicate UTC, enabling proper timezone
conversion in the browser.

- Add UTCDatetimeBaseModel base class for Pydantic schemas
- Update model to_dict() methods to append 'Z' suffix
- Affects: tasks, users, sessions, audit logs, translations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 15:48:17 +08:00

137 lines
5.5 KiB
Python

"""
Tool_OCR - Task Model
OCR task management with user isolation
"""
from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text, ForeignKey, Enum as SQLEnum
from sqlalchemy.orm import relationship
from datetime import datetime
import enum
from app.core.database import Base
class TaskStatus(str, enum.Enum):
"""Task status enumeration"""
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
class Task(Base):
"""
OCR Task model with user association
Each task belongs to a specific user and stores
processing status and result file paths.
"""
__tablename__ = "tool_ocr_tasks"
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
user_id = Column(Integer, ForeignKey("tool_ocr_users.id", ondelete="CASCADE"),
nullable=False, index=True,
comment="Foreign key to users table")
task_id = Column(String(255), unique=True, nullable=False, index=True,
comment="Unique task identifier (UUID)")
filename = Column(String(255), nullable=True, index=True)
file_type = Column(String(100), nullable=True)
status = Column(SQLEnum(TaskStatus), default=TaskStatus.PENDING, nullable=False,
index=True)
result_json_path = Column(String(500), nullable=True,
comment="Path to JSON result file")
result_markdown_path = Column(String(500), nullable=True,
comment="Path to Markdown result file")
result_pdf_path = Column(String(500), nullable=True,
comment="Path to searchable PDF file")
error_message = Column(Text, nullable=True,
comment="Error details if task failed")
processing_time_ms = Column(Integer, nullable=True,
comment="Processing time in milliseconds")
created_at = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow,
nullable=False)
completed_at = Column(DateTime, nullable=True)
file_deleted = Column(Boolean, default=False, nullable=False,
comment="Track if files were auto-deleted")
deleted_at = Column(DateTime, nullable=True, index=True,
comment="Soft delete timestamp - NULL means not deleted")
# Relationships
user = relationship("User", back_populates="tasks")
files = relationship("TaskFile", back_populates="task", cascade="all, delete-orphan")
def __repr__(self):
return f"<Task(id={self.id}, task_id='{self.task_id}', status='{self.status.value}')>"
def to_dict(self):
"""Convert task to dictionary.
All datetime fields are serialized with 'Z' suffix to indicate UTC timezone.
This ensures proper timezone conversion in the frontend.
"""
return {
"id": self.id,
"task_id": self.task_id,
"filename": self.filename,
"file_type": self.file_type,
"status": self.status.value if self.status else None,
"result_json_path": self.result_json_path,
"result_markdown_path": self.result_markdown_path,
"result_pdf_path": self.result_pdf_path,
"error_message": self.error_message,
"processing_time_ms": self.processing_time_ms,
"created_at": self.created_at.isoformat() + 'Z' if self.created_at else None,
"updated_at": self.updated_at.isoformat() + 'Z' if self.updated_at else None,
"completed_at": self.completed_at.isoformat() + 'Z' if self.completed_at else None,
"file_deleted": self.file_deleted,
"deleted_at": self.deleted_at.isoformat() + 'Z' if self.deleted_at else None
}
class TaskFile(Base):
"""
Task file model
Stores information about files associated with a task.
"""
__tablename__ = "tool_ocr_task_files"
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
task_id = Column(Integer, ForeignKey("tool_ocr_tasks.id", ondelete="CASCADE"),
nullable=False, index=True,
comment="Foreign key to tasks table")
original_name = Column(String(255), nullable=True)
stored_path = Column(String(500), nullable=True,
comment="Actual file path on server")
file_size = Column(Integer, nullable=True,
comment="File size in bytes")
mime_type = Column(String(100), nullable=True)
file_hash = Column(String(64), nullable=True, index=True,
comment="SHA256 hash for deduplication")
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
# Relationships
task = relationship("Task", back_populates="files")
def __repr__(self):
return f"<TaskFile(id={self.id}, task_id={self.task_id}, original_name='{self.original_name}')>"
def to_dict(self):
"""Convert task file to dictionary.
All datetime fields are serialized with 'Z' suffix to indicate UTC timezone.
"""
return {
"id": self.id,
"task_id": self.task_id,
"original_name": self.original_name,
"stored_path": self.stored_path,
"file_size": self.file_size,
"mime_type": self.mime_type,
"file_hash": self.file_hash,
"created_at": self.created_at.isoformat() + 'Z' if self.created_at else None
}