feat: add database table prefix and complete schema definition

Added `tool_ocr_` prefix to all database tables for clear separation
from other systems in the same database.

Changes:
- All tables now use `tool_ocr_` prefix
- Added tool_ocr_sessions table for token management
- Created complete SQL schema file with:
  - Full table definitions with comments
  - Indexes for performance
  - Views for common queries
  - Stored procedures for maintenance
  - Audit log table (optional)

New files:
- database_schema.sql: Ready-to-use SQL script for deployment

Configuration:
- Added DATABASE_TABLE_PREFIX environment variable
- Updated all references to use prefixed table names

Benefits:
- Clear namespace separation in shared databases
- Easier identification of Tool_OCR tables
- Prevent conflicts with other applications

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-11-14 15:40:24 +08:00
parent 88f9fef2d4
commit 470fa96428
3 changed files with 223 additions and 18 deletions

View File

@@ -0,0 +1,183 @@
-- Tool_OCR Database Schema with External API Authentication
-- Version: 2.0.0
-- Date: 2025-11-14
-- Description: Complete database redesign with user task isolation and history
-- ============================================
-- Drop existing tables (if needed)
-- ============================================
-- Uncomment these lines to drop existing tables
-- DROP TABLE IF EXISTS tool_ocr_sessions;
-- DROP TABLE IF EXISTS tool_ocr_task_files;
-- DROP TABLE IF EXISTS tool_ocr_tasks;
-- DROP TABLE IF EXISTS tool_ocr_users;
-- ============================================
-- 1. Users Table
-- ============================================
CREATE TABLE IF NOT EXISTS tool_ocr_users (
id INT PRIMARY KEY AUTO_INCREMENT,
email VARCHAR(255) UNIQUE NOT NULL COMMENT 'Primary identifier from Azure AD',
display_name VARCHAR(255) COMMENT 'Display name from API response',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
last_login TIMESTAMP NULL,
is_active BOOLEAN DEFAULT TRUE,
INDEX idx_email (email),
INDEX idx_active (is_active)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
COMMENT='User accounts authenticated via external API';
-- ============================================
-- 2. OCR Tasks Table
-- ============================================
CREATE TABLE IF NOT EXISTS tool_ocr_tasks (
id INT PRIMARY KEY AUTO_INCREMENT,
user_id INT NOT NULL COMMENT 'Foreign key to users table',
task_id VARCHAR(255) UNIQUE NOT NULL COMMENT 'Unique task identifier (UUID)',
filename VARCHAR(255),
file_type VARCHAR(50),
status ENUM('pending', 'processing', 'completed', 'failed') DEFAULT 'pending',
result_json_path VARCHAR(500) COMMENT 'Path to JSON result file',
result_markdown_path VARCHAR(500) COMMENT 'Path to Markdown result file',
result_pdf_path VARCHAR(500) COMMENT 'Path to searchable PDF file',
error_message TEXT COMMENT 'Error details if task failed',
processing_time_ms INT COMMENT 'Processing time in milliseconds',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
completed_at TIMESTAMP NULL,
file_deleted BOOLEAN DEFAULT FALSE COMMENT 'Track if files were auto-deleted',
FOREIGN KEY (user_id) REFERENCES tool_ocr_users(id) ON DELETE CASCADE,
INDEX idx_user_status (user_id, status),
INDEX idx_created (created_at),
INDEX idx_task_id (task_id),
INDEX idx_filename (filename)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
COMMENT='OCR processing tasks with user association';
-- ============================================
-- 3. Task Files Table
-- ============================================
CREATE TABLE IF NOT EXISTS tool_ocr_task_files (
id INT PRIMARY KEY AUTO_INCREMENT,
task_id INT NOT NULL COMMENT 'Foreign key to tasks table',
original_name VARCHAR(255),
stored_path VARCHAR(500) COMMENT 'Actual file path on server',
file_size BIGINT COMMENT 'File size in bytes',
mime_type VARCHAR(100),
file_hash VARCHAR(64) COMMENT 'SHA256 hash for deduplication',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (task_id) REFERENCES tool_ocr_tasks(id) ON DELETE CASCADE,
INDEX idx_task (task_id),
INDEX idx_hash (file_hash)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
COMMENT='Files associated with OCR tasks';
-- ============================================
-- 4. Sessions Table (Token Storage)
-- ============================================
CREATE TABLE IF NOT EXISTS tool_ocr_sessions (
id INT PRIMARY KEY AUTO_INCREMENT,
user_id INT NOT NULL COMMENT 'Foreign key to users table',
session_id VARCHAR(255) UNIQUE NOT NULL COMMENT 'Unique session identifier',
access_token TEXT COMMENT 'Azure AD access token (encrypted)',
id_token TEXT COMMENT 'Azure AD ID token (encrypted)',
refresh_token TEXT COMMENT 'Refresh token if available',
expires_at TIMESTAMP NOT NULL COMMENT 'Token expiration time',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
last_accessed TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
is_active BOOLEAN DEFAULT TRUE,
ip_address VARCHAR(45) COMMENT 'Client IP address',
user_agent TEXT COMMENT 'Client user agent',
FOREIGN KEY (user_id) REFERENCES tool_ocr_users(id) ON DELETE CASCADE,
INDEX idx_user (user_id),
INDEX idx_session (session_id),
INDEX idx_expires (expires_at),
INDEX idx_active (is_active)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
COMMENT='User session and token management';
-- ============================================
-- 5. Audit Log Table (Optional)
-- ============================================
CREATE TABLE IF NOT EXISTS tool_ocr_audit_logs (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
user_id INT COMMENT 'User who performed the action',
action VARCHAR(100) NOT NULL COMMENT 'Action performed',
entity_type VARCHAR(50) COMMENT 'Type of entity affected',
entity_id INT COMMENT 'ID of entity affected',
details JSON COMMENT 'Additional details in JSON format',
ip_address VARCHAR(45),
user_agent TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
INDEX idx_user (user_id),
INDEX idx_action (action),
INDEX idx_created (created_at)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
COMMENT='Audit trail for all system actions';
-- ============================================
-- Views for Common Queries
-- ============================================
-- User task statistics view
CREATE OR REPLACE VIEW tool_ocr_user_stats AS
SELECT
u.id as user_id,
u.email,
u.display_name,
COUNT(DISTINCT t.id) as total_tasks,
SUM(CASE WHEN t.status = 'completed' THEN 1 ELSE 0 END) as completed_tasks,
SUM(CASE WHEN t.status = 'failed' THEN 1 ELSE 0 END) as failed_tasks,
SUM(CASE WHEN t.status = 'processing' THEN 1 ELSE 0 END) as processing_tasks,
SUM(CASE WHEN t.status = 'pending' THEN 1 ELSE 0 END) as pending_tasks,
AVG(t.processing_time_ms) as avg_processing_time_ms,
MAX(t.created_at) as last_task_created
FROM tool_ocr_users u
LEFT JOIN tool_ocr_tasks t ON u.id = t.user_id
GROUP BY u.id, u.email, u.display_name;
-- Recent tasks view
CREATE OR REPLACE VIEW tool_ocr_recent_tasks AS
SELECT
t.*,
u.email as user_email,
u.display_name as user_name
FROM tool_ocr_tasks t
INNER JOIN tool_ocr_users u ON t.user_id = u.id
ORDER BY t.created_at DESC
LIMIT 100;
-- ============================================
-- Stored Procedures (Optional)
-- ============================================
DELIMITER $$
-- Procedure to clean up expired sessions
CREATE PROCEDURE IF NOT EXISTS cleanup_expired_sessions()
BEGIN
DELETE FROM tool_ocr_sessions
WHERE expires_at < NOW() OR is_active = FALSE;
END$$
-- Procedure to clean up old tasks
CREATE PROCEDURE IF NOT EXISTS cleanup_old_tasks(IN days_to_keep INT)
BEGIN
UPDATE tool_ocr_tasks
SET file_deleted = TRUE
WHERE created_at < DATE_SUB(NOW(), INTERVAL days_to_keep DAY)
AND status IN ('completed', 'failed');
END$$
DELIMITER ;
-- ============================================
-- Initial Data (Optional)
-- ============================================
-- Add any initial data here if needed
-- ============================================
-- Grants (Adjust as needed)
-- ============================================
-- GRANT ALL PRIVILEGES ON tool_ocr_* TO 'tool_ocr_user'@'localhost';
-- FLUSH PRIVILEGES;