Added `tool_ocr_` prefix to all database tables for clear separation from other systems in the same database. Changes: - All tables now use `tool_ocr_` prefix - Added tool_ocr_sessions table for token management - Created complete SQL schema file with: - Full table definitions with comments - Indexes for performance - Views for common queries - Stored procedures for maintenance - Audit log table (optional) New files: - database_schema.sql: Ready-to-use SQL script for deployment Configuration: - Added DATABASE_TABLE_PREFIX environment variable - Updated all references to use prefixed table names Benefits: - Clear namespace separation in shared databases - Easier identification of Tool_OCR tables - Prevent conflicts with other applications 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
183 lines
7.4 KiB
SQL
183 lines
7.4 KiB
SQL
-- Tool_OCR Database Schema with External API Authentication
|
|
-- Version: 2.0.0
|
|
-- Date: 2025-11-14
|
|
-- Description: Complete database redesign with user task isolation and history
|
|
|
|
-- ============================================
|
|
-- Drop existing tables (if needed)
|
|
-- ============================================
|
|
-- Uncomment these lines to drop existing tables
|
|
-- DROP TABLE IF EXISTS tool_ocr_sessions;
|
|
-- DROP TABLE IF EXISTS tool_ocr_task_files;
|
|
-- DROP TABLE IF EXISTS tool_ocr_tasks;
|
|
-- DROP TABLE IF EXISTS tool_ocr_users;
|
|
|
|
-- ============================================
|
|
-- 1. Users Table
|
|
-- ============================================
|
|
CREATE TABLE IF NOT EXISTS tool_ocr_users (
|
|
id INT PRIMARY KEY AUTO_INCREMENT,
|
|
email VARCHAR(255) UNIQUE NOT NULL COMMENT 'Primary identifier from Azure AD',
|
|
display_name VARCHAR(255) COMMENT 'Display name from API response',
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
last_login TIMESTAMP NULL,
|
|
is_active BOOLEAN DEFAULT TRUE,
|
|
INDEX idx_email (email),
|
|
INDEX idx_active (is_active)
|
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
|
|
COMMENT='User accounts authenticated via external API';
|
|
|
|
-- ============================================
|
|
-- 2. OCR Tasks Table
|
|
-- ============================================
|
|
CREATE TABLE IF NOT EXISTS tool_ocr_tasks (
|
|
id INT PRIMARY KEY AUTO_INCREMENT,
|
|
user_id INT NOT NULL COMMENT 'Foreign key to users table',
|
|
task_id VARCHAR(255) UNIQUE NOT NULL COMMENT 'Unique task identifier (UUID)',
|
|
filename VARCHAR(255),
|
|
file_type VARCHAR(50),
|
|
status ENUM('pending', 'processing', 'completed', 'failed') DEFAULT 'pending',
|
|
result_json_path VARCHAR(500) COMMENT 'Path to JSON result file',
|
|
result_markdown_path VARCHAR(500) COMMENT 'Path to Markdown result file',
|
|
result_pdf_path VARCHAR(500) COMMENT 'Path to searchable PDF file',
|
|
error_message TEXT COMMENT 'Error details if task failed',
|
|
processing_time_ms INT COMMENT 'Processing time in milliseconds',
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
|
completed_at TIMESTAMP NULL,
|
|
file_deleted BOOLEAN DEFAULT FALSE COMMENT 'Track if files were auto-deleted',
|
|
FOREIGN KEY (user_id) REFERENCES tool_ocr_users(id) ON DELETE CASCADE,
|
|
INDEX idx_user_status (user_id, status),
|
|
INDEX idx_created (created_at),
|
|
INDEX idx_task_id (task_id),
|
|
INDEX idx_filename (filename)
|
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
|
|
COMMENT='OCR processing tasks with user association';
|
|
|
|
-- ============================================
|
|
-- 3. Task Files Table
|
|
-- ============================================
|
|
CREATE TABLE IF NOT EXISTS tool_ocr_task_files (
|
|
id INT PRIMARY KEY AUTO_INCREMENT,
|
|
task_id INT NOT NULL COMMENT 'Foreign key to tasks table',
|
|
original_name VARCHAR(255),
|
|
stored_path VARCHAR(500) COMMENT 'Actual file path on server',
|
|
file_size BIGINT COMMENT 'File size in bytes',
|
|
mime_type VARCHAR(100),
|
|
file_hash VARCHAR(64) COMMENT 'SHA256 hash for deduplication',
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (task_id) REFERENCES tool_ocr_tasks(id) ON DELETE CASCADE,
|
|
INDEX idx_task (task_id),
|
|
INDEX idx_hash (file_hash)
|
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
|
|
COMMENT='Files associated with OCR tasks';
|
|
|
|
-- ============================================
|
|
-- 4. Sessions Table (Token Storage)
|
|
-- ============================================
|
|
CREATE TABLE IF NOT EXISTS tool_ocr_sessions (
|
|
id INT PRIMARY KEY AUTO_INCREMENT,
|
|
user_id INT NOT NULL COMMENT 'Foreign key to users table',
|
|
session_id VARCHAR(255) UNIQUE NOT NULL COMMENT 'Unique session identifier',
|
|
access_token TEXT COMMENT 'Azure AD access token (encrypted)',
|
|
id_token TEXT COMMENT 'Azure AD ID token (encrypted)',
|
|
refresh_token TEXT COMMENT 'Refresh token if available',
|
|
expires_at TIMESTAMP NOT NULL COMMENT 'Token expiration time',
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
last_accessed TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
|
is_active BOOLEAN DEFAULT TRUE,
|
|
ip_address VARCHAR(45) COMMENT 'Client IP address',
|
|
user_agent TEXT COMMENT 'Client user agent',
|
|
FOREIGN KEY (user_id) REFERENCES tool_ocr_users(id) ON DELETE CASCADE,
|
|
INDEX idx_user (user_id),
|
|
INDEX idx_session (session_id),
|
|
INDEX idx_expires (expires_at),
|
|
INDEX idx_active (is_active)
|
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
|
|
COMMENT='User session and token management';
|
|
|
|
-- ============================================
|
|
-- 5. Audit Log Table (Optional)
|
|
-- ============================================
|
|
CREATE TABLE IF NOT EXISTS tool_ocr_audit_logs (
|
|
id BIGINT PRIMARY KEY AUTO_INCREMENT,
|
|
user_id INT COMMENT 'User who performed the action',
|
|
action VARCHAR(100) NOT NULL COMMENT 'Action performed',
|
|
entity_type VARCHAR(50) COMMENT 'Type of entity affected',
|
|
entity_id INT COMMENT 'ID of entity affected',
|
|
details JSON COMMENT 'Additional details in JSON format',
|
|
ip_address VARCHAR(45),
|
|
user_agent TEXT,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
INDEX idx_user (user_id),
|
|
INDEX idx_action (action),
|
|
INDEX idx_created (created_at)
|
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
|
|
COMMENT='Audit trail for all system actions';
|
|
|
|
-- ============================================
|
|
-- Views for Common Queries
|
|
-- ============================================
|
|
|
|
-- User task statistics view
|
|
CREATE OR REPLACE VIEW tool_ocr_user_stats AS
|
|
SELECT
|
|
u.id as user_id,
|
|
u.email,
|
|
u.display_name,
|
|
COUNT(DISTINCT t.id) as total_tasks,
|
|
SUM(CASE WHEN t.status = 'completed' THEN 1 ELSE 0 END) as completed_tasks,
|
|
SUM(CASE WHEN t.status = 'failed' THEN 1 ELSE 0 END) as failed_tasks,
|
|
SUM(CASE WHEN t.status = 'processing' THEN 1 ELSE 0 END) as processing_tasks,
|
|
SUM(CASE WHEN t.status = 'pending' THEN 1 ELSE 0 END) as pending_tasks,
|
|
AVG(t.processing_time_ms) as avg_processing_time_ms,
|
|
MAX(t.created_at) as last_task_created
|
|
FROM tool_ocr_users u
|
|
LEFT JOIN tool_ocr_tasks t ON u.id = t.user_id
|
|
GROUP BY u.id, u.email, u.display_name;
|
|
|
|
-- Recent tasks view
|
|
CREATE OR REPLACE VIEW tool_ocr_recent_tasks AS
|
|
SELECT
|
|
t.*,
|
|
u.email as user_email,
|
|
u.display_name as user_name
|
|
FROM tool_ocr_tasks t
|
|
INNER JOIN tool_ocr_users u ON t.user_id = u.id
|
|
ORDER BY t.created_at DESC
|
|
LIMIT 100;
|
|
|
|
-- ============================================
|
|
-- Stored Procedures (Optional)
|
|
-- ============================================
|
|
|
|
DELIMITER $$
|
|
|
|
-- Procedure to clean up expired sessions
|
|
CREATE PROCEDURE IF NOT EXISTS cleanup_expired_sessions()
|
|
BEGIN
|
|
DELETE FROM tool_ocr_sessions
|
|
WHERE expires_at < NOW() OR is_active = FALSE;
|
|
END$$
|
|
|
|
-- Procedure to clean up old tasks
|
|
CREATE PROCEDURE IF NOT EXISTS cleanup_old_tasks(IN days_to_keep INT)
|
|
BEGIN
|
|
UPDATE tool_ocr_tasks
|
|
SET file_deleted = TRUE
|
|
WHERE created_at < DATE_SUB(NOW(), INTERVAL days_to_keep DAY)
|
|
AND status IN ('completed', 'failed');
|
|
END$$
|
|
|
|
DELIMITER ;
|
|
|
|
-- ============================================
|
|
-- Initial Data (Optional)
|
|
-- ============================================
|
|
-- Add any initial data here if needed
|
|
|
|
-- ============================================
|
|
-- Grants (Adjust as needed)
|
|
-- ============================================
|
|
-- GRANT ALL PRIVILEGES ON tool_ocr_* TO 'tool_ocr_user'@'localhost';
|
|
-- FLUSH PRIVILEGES; |