first
This commit is contained in:
17
backend/app/models/__init__.py
Normal file
17
backend/app/models/__init__.py
Normal file
@@ -0,0 +1,17 @@
|
||||
"""
|
||||
Tool_OCR - Database Models
|
||||
"""
|
||||
|
||||
from app.models.user import User
|
||||
from app.models.ocr import OCRBatch, OCRFile, OCRResult
|
||||
from app.models.export import ExportRule
|
||||
from app.models.translation import TranslationConfig
|
||||
|
||||
__all__ = [
|
||||
"User",
|
||||
"OCRBatch",
|
||||
"OCRFile",
|
||||
"OCRResult",
|
||||
"ExportRule",
|
||||
"TranslationConfig",
|
||||
]
|
||||
55
backend/app/models/export.py
Normal file
55
backend/app/models/export.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""
|
||||
Tool_OCR - Export Rule Model
|
||||
User-defined export rules and formatting configurations
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Text, ForeignKey, JSON
|
||||
from sqlalchemy.orm import relationship
|
||||
from datetime import datetime
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class ExportRule(Base):
|
||||
"""Export rule configuration for customized output formatting"""
|
||||
|
||||
__tablename__ = "paddle_ocr_export_rules"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
user_id = Column(Integer, ForeignKey("paddle_ocr_users.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
rule_name = Column(String(100), nullable=False)
|
||||
description = Column(Text, nullable=True)
|
||||
|
||||
# Rule configuration stored as JSON
|
||||
# {
|
||||
# "filters": {
|
||||
# "confidence_threshold": 0.8,
|
||||
# "filename_pattern": "invoice_*",
|
||||
# "language": "ch"
|
||||
# },
|
||||
# "formatting": {
|
||||
# "add_line_numbers": true,
|
||||
# "sort_by_position": true,
|
||||
# "group_by_filename": false
|
||||
# },
|
||||
# "export_options": {
|
||||
# "include_metadata": true,
|
||||
# "include_confidence": true,
|
||||
# "include_bounding_boxes": false
|
||||
# }
|
||||
# }
|
||||
config_json = Column(JSON, nullable=False)
|
||||
|
||||
# CSS template for PDF export (optional)
|
||||
# Can reference predefined templates: "default", "academic", "business", "report"
|
||||
# Or store custom CSS
|
||||
css_template = Column(Text, nullable=True)
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
# Relationships
|
||||
user = relationship("User", back_populates="export_rules")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<ExportRule(id={self.id}, name='{self.rule_name}', user_id={self.user_id})>"
|
||||
122
backend/app/models/ocr.py
Normal file
122
backend/app/models/ocr.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""
|
||||
Tool_OCR - OCR Models
|
||||
Database models for OCR batches, files, and results
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Float, Text, ForeignKey, Enum, JSON
|
||||
from sqlalchemy.orm import relationship
|
||||
from datetime import datetime
|
||||
import enum
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class BatchStatus(str, enum.Enum):
|
||||
"""Batch processing status"""
|
||||
PENDING = "pending"
|
||||
PROCESSING = "processing"
|
||||
COMPLETED = "completed"
|
||||
PARTIAL = "partial" # Some files failed
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
class FileStatus(str, enum.Enum):
|
||||
"""Individual file processing status"""
|
||||
PENDING = "pending"
|
||||
PROCESSING = "processing"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
class OCRBatch(Base):
|
||||
"""OCR batch processing tracking"""
|
||||
|
||||
__tablename__ = "paddle_ocr_batches"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
user_id = Column(Integer, ForeignKey("paddle_ocr_users.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
batch_name = Column(String(255), nullable=True)
|
||||
status = Column(Enum(BatchStatus), default=BatchStatus.PENDING, nullable=False, index=True)
|
||||
total_files = Column(Integer, default=0, nullable=False)
|
||||
completed_files = Column(Integer, default=0, nullable=False)
|
||||
failed_files = Column(Integer, default=0, nullable=False)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
|
||||
started_at = Column(DateTime, nullable=True)
|
||||
completed_at = Column(DateTime, nullable=True)
|
||||
|
||||
# Relationships
|
||||
user = relationship("User", back_populates="ocr_batches")
|
||||
files = relationship("OCRFile", back_populates="batch", cascade="all, delete-orphan")
|
||||
|
||||
@property
|
||||
def progress_percentage(self) -> float:
|
||||
"""Calculate progress percentage"""
|
||||
if self.total_files == 0:
|
||||
return 0.0
|
||||
return (self.completed_files / self.total_files) * 100
|
||||
|
||||
def __repr__(self):
|
||||
return f"<OCRBatch(id={self.id}, status='{self.status}', progress={self.progress_percentage:.1f}%)>"
|
||||
|
||||
|
||||
class OCRFile(Base):
|
||||
"""Individual file in an OCR batch"""
|
||||
|
||||
__tablename__ = "paddle_ocr_files"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
batch_id = Column(Integer, ForeignKey("paddle_ocr_batches.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
filename = Column(String(255), nullable=False)
|
||||
original_filename = Column(String(255), nullable=False)
|
||||
file_path = Column(String(512), nullable=False)
|
||||
file_size = Column(Integer, nullable=False) # Size in bytes
|
||||
file_format = Column(String(20), nullable=False) # png, jpg, pdf, etc.
|
||||
status = Column(Enum(FileStatus), default=FileStatus.PENDING, nullable=False, index=True)
|
||||
error_message = Column(Text, nullable=True)
|
||||
retry_count = Column(Integer, default=0, nullable=False) # Number of retry attempts
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
started_at = Column(DateTime, nullable=True)
|
||||
completed_at = Column(DateTime, nullable=True)
|
||||
processing_time = Column(Float, nullable=True) # Processing time in seconds
|
||||
|
||||
# Relationships
|
||||
batch = relationship("OCRBatch", back_populates="files")
|
||||
result = relationship("OCRResult", back_populates="file", uselist=False, cascade="all, delete-orphan")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<OCRFile(id={self.id}, filename='{self.filename}', status='{self.status}')>"
|
||||
|
||||
|
||||
class OCRResult(Base):
|
||||
"""OCR processing result with structure and images"""
|
||||
|
||||
__tablename__ = "paddle_ocr_results"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
file_id = Column(Integer, ForeignKey("paddle_ocr_files.id", ondelete="CASCADE"), unique=True, nullable=False, index=True)
|
||||
|
||||
# Output file paths
|
||||
markdown_path = Column(String(512), nullable=True) # Path to Markdown file
|
||||
json_path = Column(String(512), nullable=True) # Path to JSON file
|
||||
images_dir = Column(String(512), nullable=True) # Directory containing extracted images
|
||||
|
||||
# OCR metadata
|
||||
detected_language = Column(String(20), nullable=True) # ch, en, japan, korean
|
||||
total_text_regions = Column(Integer, default=0, nullable=False)
|
||||
average_confidence = Column(Float, nullable=True)
|
||||
|
||||
# Layout structure data (stored as JSON)
|
||||
# Contains: layout elements (title, paragraph, table, image, formula), reading order, bounding boxes
|
||||
layout_data = Column(JSON, nullable=True)
|
||||
|
||||
# Extracted images metadata (stored as JSON)
|
||||
# Contains: list of {image_path, bbox, element_type}
|
||||
images_metadata = Column(JSON, nullable=True)
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
# Relationships
|
||||
file = relationship("OCRFile", back_populates="result")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<OCRResult(id={self.id}, file_id={self.file_id}, language='{self.detected_language}')>"
|
||||
43
backend/app/models/translation.py
Normal file
43
backend/app/models/translation.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""
|
||||
Tool_OCR - Translation Config Model (RESERVED)
|
||||
Reserved for future translation feature implementation
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, JSON
|
||||
from sqlalchemy.orm import relationship
|
||||
from datetime import datetime
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class TranslationConfig(Base):
|
||||
"""
|
||||
Translation configuration (RESERVED for future implementation)
|
||||
|
||||
This table is created but not actively used until translation feature is implemented.
|
||||
"""
|
||||
|
||||
__tablename__ = "paddle_ocr_translation_configs"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
user_id = Column(Integer, ForeignKey("paddle_ocr_users.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
|
||||
source_lang = Column(String(20), nullable=False) # ch, en, japan, korean, etc.
|
||||
target_lang = Column(String(20), nullable=False) # en, ch, japan, korean, etc.
|
||||
|
||||
# Translation engine type: "offline" (argostranslate), "ernie", "google", "deepl"
|
||||
engine_type = Column(String(50), nullable=False, default="offline")
|
||||
|
||||
# Engine-specific configuration stored as JSON
|
||||
# For offline (argostranslate): {"model_path": "/path/to/model"}
|
||||
# For API-based: {"api_key": "xxx", "endpoint": "https://..."}
|
||||
engine_config = Column(JSON, nullable=True)
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
# Relationships
|
||||
user = relationship("User", back_populates="translation_configs")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<TranslationConfig(id={self.id}, {self.source_lang}->{self.target_lang}, engine='{self.engine_type}')>"
|
||||
34
backend/app/models/user.py
Normal file
34
backend/app/models/user.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""
|
||||
Tool_OCR - User Model
|
||||
User authentication and management
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Boolean
|
||||
from sqlalchemy.orm import relationship
|
||||
from datetime import datetime
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class User(Base):
|
||||
"""User model for JWT authentication"""
|
||||
|
||||
__tablename__ = "paddle_ocr_users"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
username = Column(String(50), unique=True, nullable=False, index=True)
|
||||
email = Column(String(100), unique=True, nullable=False, index=True)
|
||||
password_hash = Column(String(255), nullable=False)
|
||||
full_name = Column(String(100), nullable=True)
|
||||
is_active = Column(Boolean, default=True, nullable=False)
|
||||
is_admin = Column(Boolean, default=False, nullable=False)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
# Relationships
|
||||
ocr_batches = relationship("OCRBatch", back_populates="user", cascade="all, delete-orphan")
|
||||
export_rules = relationship("ExportRule", back_populates="user", cascade="all, delete-orphan")
|
||||
translation_configs = relationship("TranslationConfig", back_populates="user", cascade="all, delete-orphan")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<User(id={self.id}, username='{self.username}', email='{self.email}')>"
|
||||
Reference in New Issue
Block a user