This commit is contained in:
beabigegg
2025-11-12 22:53:17 +08:00
commit da700721fa
130 changed files with 23393 additions and 0 deletions

1
backend/alembic/README Normal file
View File

@@ -0,0 +1 @@
Generic single-database configuration.

91
backend/alembic/env.py Normal file
View File

@@ -0,0 +1,91 @@
from logging.config import fileConfig
import sys
from pathlib import Path
from sqlalchemy import engine_from_config
from sqlalchemy import pool
from alembic import context
# Add parent directory to Python path to import app modules
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
# Import application settings and models
from app.core.config import settings
from app.core.database import Base
# Import all models to ensure they're registered with Base.metadata
from app.models import User, OCRBatch, OCRFile, OCRResult, ExportRule, TranslationConfig
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
# Set sqlalchemy.url from settings
config.set_main_option("sqlalchemy.url", settings.database_url)
# Interpret the config file for Python logging.
# This line sets up loggers basically.
if config.config_file_name is not None:
fileConfig(config.config_file_name)
# add your model's MetaData object here
# for 'autogenerate' support
target_metadata = Base.metadata
# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection, target_metadata=target_metadata
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

View File

@@ -0,0 +1,28 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
"""Upgrade schema."""
${upgrades if upgrades else "pass"}
def downgrade() -> None:
"""Downgrade schema."""
${downgrades if downgrades else "pass"}

View File

@@ -0,0 +1,31 @@
"""add_retry_count_to_files
Revision ID: 271dc036ea80
Revises: a7802b126240
Create Date: 2025-11-12 01:48:34.258048
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '271dc036ea80'
down_revision: Union[str, None] = 'a7802b126240'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Add retry_count column to paddle_ocr_files table."""
op.add_column(
'paddle_ocr_files',
sa.Column('retry_count', sa.Integer(), nullable=False, server_default='0')
)
def downgrade() -> None:
"""Remove retry_count column from paddle_ocr_files table."""
op.drop_column('paddle_ocr_files', 'retry_count')

View File

@@ -0,0 +1,154 @@
"""Initial migration with paddle_ocr prefix
Revision ID: a7802b126240
Revises:
Create Date: 2025-11-12 00:46:58.519941
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
# revision identifiers, used by Alembic.
revision: str = 'a7802b126240'
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('paddle_ocr_users',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('username', sa.String(length=50), nullable=False),
sa.Column('email', sa.String(length=100), nullable=False),
sa.Column('password_hash', sa.String(length=255), nullable=False),
sa.Column('full_name', sa.String(length=100), nullable=True),
sa.Column('is_active', sa.Boolean(), nullable=False),
sa.Column('is_admin', sa.Boolean(), nullable=False),
sa.Column('created_at', sa.DateTime(), nullable=False),
sa.Column('updated_at', sa.DateTime(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_paddle_ocr_users_email'), 'paddle_ocr_users', ['email'], unique=True)
op.create_index(op.f('ix_paddle_ocr_users_id'), 'paddle_ocr_users', ['id'], unique=False)
op.create_index(op.f('ix_paddle_ocr_users_username'), 'paddle_ocr_users', ['username'], unique=True)
op.create_table('paddle_ocr_batches',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('user_id', sa.Integer(), nullable=False),
sa.Column('batch_name', sa.String(length=255), nullable=True),
sa.Column('status', sa.Enum('PENDING', 'PROCESSING', 'COMPLETED', 'PARTIAL', 'FAILED', name='batchstatus'), nullable=False),
sa.Column('total_files', sa.Integer(), nullable=False),
sa.Column('completed_files', sa.Integer(), nullable=False),
sa.Column('failed_files', sa.Integer(), nullable=False),
sa.Column('created_at', sa.DateTime(), nullable=False),
sa.Column('started_at', sa.DateTime(), nullable=True),
sa.Column('completed_at', sa.DateTime(), nullable=True),
sa.ForeignKeyConstraint(['user_id'], ['paddle_ocr_users.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_paddle_ocr_batches_created_at'), 'paddle_ocr_batches', ['created_at'], unique=False)
op.create_index(op.f('ix_paddle_ocr_batches_id'), 'paddle_ocr_batches', ['id'], unique=False)
op.create_index(op.f('ix_paddle_ocr_batches_status'), 'paddle_ocr_batches', ['status'], unique=False)
op.create_index(op.f('ix_paddle_ocr_batches_user_id'), 'paddle_ocr_batches', ['user_id'], unique=False)
op.create_table('paddle_ocr_export_rules',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('user_id', sa.Integer(), nullable=False),
sa.Column('rule_name', sa.String(length=100), nullable=False),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('config_json', sa.JSON(), nullable=False),
sa.Column('css_template', sa.Text(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=False),
sa.Column('updated_at', sa.DateTime(), nullable=False),
sa.ForeignKeyConstraint(['user_id'], ['paddle_ocr_users.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_paddle_ocr_export_rules_id'), 'paddle_ocr_export_rules', ['id'], unique=False)
op.create_index(op.f('ix_paddle_ocr_export_rules_user_id'), 'paddle_ocr_export_rules', ['user_id'], unique=False)
op.create_table('paddle_ocr_translation_configs',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('user_id', sa.Integer(), nullable=False),
sa.Column('source_lang', sa.String(length=20), nullable=False),
sa.Column('target_lang', sa.String(length=20), nullable=False),
sa.Column('engine_type', sa.String(length=50), nullable=False),
sa.Column('engine_config', sa.JSON(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=False),
sa.Column('updated_at', sa.DateTime(), nullable=False),
sa.ForeignKeyConstraint(['user_id'], ['paddle_ocr_users.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_paddle_ocr_translation_configs_id'), 'paddle_ocr_translation_configs', ['id'], unique=False)
op.create_index(op.f('ix_paddle_ocr_translation_configs_user_id'), 'paddle_ocr_translation_configs', ['user_id'], unique=False)
op.create_table('paddle_ocr_files',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('batch_id', sa.Integer(), nullable=False),
sa.Column('filename', sa.String(length=255), nullable=False),
sa.Column('original_filename', sa.String(length=255), nullable=False),
sa.Column('file_path', sa.String(length=512), nullable=False),
sa.Column('file_size', sa.Integer(), nullable=False),
sa.Column('file_format', sa.String(length=20), nullable=False),
sa.Column('status', sa.Enum('PENDING', 'PROCESSING', 'COMPLETED', 'FAILED', name='filestatus'), nullable=False),
sa.Column('error_message', sa.Text(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=False),
sa.Column('started_at', sa.DateTime(), nullable=True),
sa.Column('completed_at', sa.DateTime(), nullable=True),
sa.Column('processing_time', sa.Float(), nullable=True),
sa.ForeignKeyConstraint(['batch_id'], ['paddle_ocr_batches.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_paddle_ocr_files_batch_id'), 'paddle_ocr_files', ['batch_id'], unique=False)
op.create_index(op.f('ix_paddle_ocr_files_id'), 'paddle_ocr_files', ['id'], unique=False)
op.create_index(op.f('ix_paddle_ocr_files_status'), 'paddle_ocr_files', ['status'], unique=False)
op.create_table('paddle_ocr_results',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('file_id', sa.Integer(), nullable=False),
sa.Column('markdown_path', sa.String(length=512), nullable=True),
sa.Column('json_path', sa.String(length=512), nullable=True),
sa.Column('images_dir', sa.String(length=512), nullable=True),
sa.Column('detected_language', sa.String(length=20), nullable=True),
sa.Column('total_text_regions', sa.Integer(), nullable=False),
sa.Column('average_confidence', sa.Float(), nullable=True),
sa.Column('layout_data', sa.JSON(), nullable=True),
sa.Column('images_metadata', sa.JSON(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=False),
sa.ForeignKeyConstraint(['file_id'], ['paddle_ocr_files.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_paddle_ocr_results_file_id'), 'paddle_ocr_results', ['file_id'], unique=True)
op.create_index(op.f('ix_paddle_ocr_results_id'), 'paddle_ocr_results', ['id'], unique=False)
# NOTE: Removed all drop_table/drop_index commands to preserve existing tables in shared database
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade schema - removes all paddle_ocr_ tables."""
# ### commands auto generated by Alembic - please adjust! ###
# Drop paddle_ocr tables in reverse order
op.drop_index(op.f('ix_paddle_ocr_results_id'), table_name='paddle_ocr_results')
op.drop_index(op.f('ix_paddle_ocr_results_file_id'), table_name='paddle_ocr_results')
op.drop_table('paddle_ocr_results')
op.drop_index(op.f('ix_paddle_ocr_files_status'), table_name='paddle_ocr_files')
op.drop_index(op.f('ix_paddle_ocr_files_id'), table_name='paddle_ocr_files')
op.drop_index(op.f('ix_paddle_ocr_files_batch_id'), table_name='paddle_ocr_files')
op.drop_table('paddle_ocr_files')
op.drop_index(op.f('ix_paddle_ocr_translation_configs_user_id'), table_name='paddle_ocr_translation_configs')
op.drop_index(op.f('ix_paddle_ocr_translation_configs_id'), table_name='paddle_ocr_translation_configs')
op.drop_table('paddle_ocr_translation_configs')
op.drop_index(op.f('ix_paddle_ocr_export_rules_user_id'), table_name='paddle_ocr_export_rules')
op.drop_index(op.f('ix_paddle_ocr_export_rules_id'), table_name='paddle_ocr_export_rules')
op.drop_table('paddle_ocr_export_rules')
op.drop_index(op.f('ix_paddle_ocr_batches_user_id'), table_name='paddle_ocr_batches')
op.drop_index(op.f('ix_paddle_ocr_batches_status'), table_name='paddle_ocr_batches')
op.drop_index(op.f('ix_paddle_ocr_batches_id'), table_name='paddle_ocr_batches')
op.drop_index(op.f('ix_paddle_ocr_batches_created_at'), table_name='paddle_ocr_batches')
op.drop_table('paddle_ocr_batches')
op.drop_index(op.f('ix_paddle_ocr_users_username'), table_name='paddle_ocr_users')
op.drop_index(op.f('ix_paddle_ocr_users_id'), table_name='paddle_ocr_users')
op.drop_index(op.f('ix_paddle_ocr_users_email'), table_name='paddle_ocr_users')
op.drop_table('paddle_ocr_users')
# NOTE: We do NOT recreate other tables that existed before this migration
# ### end Alembic commands ###