feat: add database table prefix and complete schema definition
Added `tool_ocr_` prefix to all database tables for clear separation from other systems in the same database. Changes: - All tables now use `tool_ocr_` prefix - Added tool_ocr_sessions table for token management - Created complete SQL schema file with: - Full table definitions with comments - Indexes for performance - Views for common queries - Stored procedures for maintenance - Audit log table (optional) New files: - database_schema.sql: Ready-to-use SQL script for deployment Configuration: - Added DATABASE_TABLE_PREFIX environment variable - Updated all references to use prefixed table names Benefits: - Clear namespace separation in shared databases - Easier identification of Tool_OCR tables - Prevent conflicts with other applications 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,183 @@
|
|||||||
|
-- Tool_OCR Database Schema with External API Authentication
|
||||||
|
-- Version: 2.0.0
|
||||||
|
-- Date: 2025-11-14
|
||||||
|
-- Description: Complete database redesign with user task isolation and history
|
||||||
|
|
||||||
|
-- ============================================
|
||||||
|
-- Drop existing tables (if needed)
|
||||||
|
-- ============================================
|
||||||
|
-- Uncomment these lines to drop existing tables
|
||||||
|
-- DROP TABLE IF EXISTS tool_ocr_sessions;
|
||||||
|
-- DROP TABLE IF EXISTS tool_ocr_task_files;
|
||||||
|
-- DROP TABLE IF EXISTS tool_ocr_tasks;
|
||||||
|
-- DROP TABLE IF EXISTS tool_ocr_users;
|
||||||
|
|
||||||
|
-- ============================================
|
||||||
|
-- 1. Users Table
|
||||||
|
-- ============================================
|
||||||
|
CREATE TABLE IF NOT EXISTS tool_ocr_users (
|
||||||
|
id INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
|
email VARCHAR(255) UNIQUE NOT NULL COMMENT 'Primary identifier from Azure AD',
|
||||||
|
display_name VARCHAR(255) COMMENT 'Display name from API response',
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
last_login TIMESTAMP NULL,
|
||||||
|
is_active BOOLEAN DEFAULT TRUE,
|
||||||
|
INDEX idx_email (email),
|
||||||
|
INDEX idx_active (is_active)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
|
||||||
|
COMMENT='User accounts authenticated via external API';
|
||||||
|
|
||||||
|
-- ============================================
|
||||||
|
-- 2. OCR Tasks Table
|
||||||
|
-- ============================================
|
||||||
|
CREATE TABLE IF NOT EXISTS tool_ocr_tasks (
|
||||||
|
id INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
|
user_id INT NOT NULL COMMENT 'Foreign key to users table',
|
||||||
|
task_id VARCHAR(255) UNIQUE NOT NULL COMMENT 'Unique task identifier (UUID)',
|
||||||
|
filename VARCHAR(255),
|
||||||
|
file_type VARCHAR(50),
|
||||||
|
status ENUM('pending', 'processing', 'completed', 'failed') DEFAULT 'pending',
|
||||||
|
result_json_path VARCHAR(500) COMMENT 'Path to JSON result file',
|
||||||
|
result_markdown_path VARCHAR(500) COMMENT 'Path to Markdown result file',
|
||||||
|
result_pdf_path VARCHAR(500) COMMENT 'Path to searchable PDF file',
|
||||||
|
error_message TEXT COMMENT 'Error details if task failed',
|
||||||
|
processing_time_ms INT COMMENT 'Processing time in milliseconds',
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||||
|
completed_at TIMESTAMP NULL,
|
||||||
|
file_deleted BOOLEAN DEFAULT FALSE COMMENT 'Track if files were auto-deleted',
|
||||||
|
FOREIGN KEY (user_id) REFERENCES tool_ocr_users(id) ON DELETE CASCADE,
|
||||||
|
INDEX idx_user_status (user_id, status),
|
||||||
|
INDEX idx_created (created_at),
|
||||||
|
INDEX idx_task_id (task_id),
|
||||||
|
INDEX idx_filename (filename)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
|
||||||
|
COMMENT='OCR processing tasks with user association';
|
||||||
|
|
||||||
|
-- ============================================
|
||||||
|
-- 3. Task Files Table
|
||||||
|
-- ============================================
|
||||||
|
CREATE TABLE IF NOT EXISTS tool_ocr_task_files (
|
||||||
|
id INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
|
task_id INT NOT NULL COMMENT 'Foreign key to tasks table',
|
||||||
|
original_name VARCHAR(255),
|
||||||
|
stored_path VARCHAR(500) COMMENT 'Actual file path on server',
|
||||||
|
file_size BIGINT COMMENT 'File size in bytes',
|
||||||
|
mime_type VARCHAR(100),
|
||||||
|
file_hash VARCHAR(64) COMMENT 'SHA256 hash for deduplication',
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
FOREIGN KEY (task_id) REFERENCES tool_ocr_tasks(id) ON DELETE CASCADE,
|
||||||
|
INDEX idx_task (task_id),
|
||||||
|
INDEX idx_hash (file_hash)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
|
||||||
|
COMMENT='Files associated with OCR tasks';
|
||||||
|
|
||||||
|
-- ============================================
|
||||||
|
-- 4. Sessions Table (Token Storage)
|
||||||
|
-- ============================================
|
||||||
|
CREATE TABLE IF NOT EXISTS tool_ocr_sessions (
|
||||||
|
id INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
|
user_id INT NOT NULL COMMENT 'Foreign key to users table',
|
||||||
|
session_id VARCHAR(255) UNIQUE NOT NULL COMMENT 'Unique session identifier',
|
||||||
|
access_token TEXT COMMENT 'Azure AD access token (encrypted)',
|
||||||
|
id_token TEXT COMMENT 'Azure AD ID token (encrypted)',
|
||||||
|
refresh_token TEXT COMMENT 'Refresh token if available',
|
||||||
|
expires_at TIMESTAMP NOT NULL COMMENT 'Token expiration time',
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
last_accessed TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||||
|
is_active BOOLEAN DEFAULT TRUE,
|
||||||
|
ip_address VARCHAR(45) COMMENT 'Client IP address',
|
||||||
|
user_agent TEXT COMMENT 'Client user agent',
|
||||||
|
FOREIGN KEY (user_id) REFERENCES tool_ocr_users(id) ON DELETE CASCADE,
|
||||||
|
INDEX idx_user (user_id),
|
||||||
|
INDEX idx_session (session_id),
|
||||||
|
INDEX idx_expires (expires_at),
|
||||||
|
INDEX idx_active (is_active)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
|
||||||
|
COMMENT='User session and token management';
|
||||||
|
|
||||||
|
-- ============================================
|
||||||
|
-- 5. Audit Log Table (Optional)
|
||||||
|
-- ============================================
|
||||||
|
CREATE TABLE IF NOT EXISTS tool_ocr_audit_logs (
|
||||||
|
id BIGINT PRIMARY KEY AUTO_INCREMENT,
|
||||||
|
user_id INT COMMENT 'User who performed the action',
|
||||||
|
action VARCHAR(100) NOT NULL COMMENT 'Action performed',
|
||||||
|
entity_type VARCHAR(50) COMMENT 'Type of entity affected',
|
||||||
|
entity_id INT COMMENT 'ID of entity affected',
|
||||||
|
details JSON COMMENT 'Additional details in JSON format',
|
||||||
|
ip_address VARCHAR(45),
|
||||||
|
user_agent TEXT,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
INDEX idx_user (user_id),
|
||||||
|
INDEX idx_action (action),
|
||||||
|
INDEX idx_created (created_at)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
|
||||||
|
COMMENT='Audit trail for all system actions';
|
||||||
|
|
||||||
|
-- ============================================
|
||||||
|
-- Views for Common Queries
|
||||||
|
-- ============================================
|
||||||
|
|
||||||
|
-- User task statistics view
|
||||||
|
CREATE OR REPLACE VIEW tool_ocr_user_stats AS
|
||||||
|
SELECT
|
||||||
|
u.id as user_id,
|
||||||
|
u.email,
|
||||||
|
u.display_name,
|
||||||
|
COUNT(DISTINCT t.id) as total_tasks,
|
||||||
|
SUM(CASE WHEN t.status = 'completed' THEN 1 ELSE 0 END) as completed_tasks,
|
||||||
|
SUM(CASE WHEN t.status = 'failed' THEN 1 ELSE 0 END) as failed_tasks,
|
||||||
|
SUM(CASE WHEN t.status = 'processing' THEN 1 ELSE 0 END) as processing_tasks,
|
||||||
|
SUM(CASE WHEN t.status = 'pending' THEN 1 ELSE 0 END) as pending_tasks,
|
||||||
|
AVG(t.processing_time_ms) as avg_processing_time_ms,
|
||||||
|
MAX(t.created_at) as last_task_created
|
||||||
|
FROM tool_ocr_users u
|
||||||
|
LEFT JOIN tool_ocr_tasks t ON u.id = t.user_id
|
||||||
|
GROUP BY u.id, u.email, u.display_name;
|
||||||
|
|
||||||
|
-- Recent tasks view
|
||||||
|
CREATE OR REPLACE VIEW tool_ocr_recent_tasks AS
|
||||||
|
SELECT
|
||||||
|
t.*,
|
||||||
|
u.email as user_email,
|
||||||
|
u.display_name as user_name
|
||||||
|
FROM tool_ocr_tasks t
|
||||||
|
INNER JOIN tool_ocr_users u ON t.user_id = u.id
|
||||||
|
ORDER BY t.created_at DESC
|
||||||
|
LIMIT 100;
|
||||||
|
|
||||||
|
-- ============================================
|
||||||
|
-- Stored Procedures (Optional)
|
||||||
|
-- ============================================
|
||||||
|
|
||||||
|
DELIMITER $$
|
||||||
|
|
||||||
|
-- Procedure to clean up expired sessions
|
||||||
|
CREATE PROCEDURE IF NOT EXISTS cleanup_expired_sessions()
|
||||||
|
BEGIN
|
||||||
|
DELETE FROM tool_ocr_sessions
|
||||||
|
WHERE expires_at < NOW() OR is_active = FALSE;
|
||||||
|
END$$
|
||||||
|
|
||||||
|
-- Procedure to clean up old tasks
|
||||||
|
CREATE PROCEDURE IF NOT EXISTS cleanup_old_tasks(IN days_to_keep INT)
|
||||||
|
BEGIN
|
||||||
|
UPDATE tool_ocr_tasks
|
||||||
|
SET file_deleted = TRUE
|
||||||
|
WHERE created_at < DATE_SUB(NOW(), INTERVAL days_to_keep DAY)
|
||||||
|
AND status IN ('completed', 'failed');
|
||||||
|
END$$
|
||||||
|
|
||||||
|
DELIMITER ;
|
||||||
|
|
||||||
|
-- ============================================
|
||||||
|
-- Initial Data (Optional)
|
||||||
|
-- ============================================
|
||||||
|
-- Add any initial data here if needed
|
||||||
|
|
||||||
|
-- ============================================
|
||||||
|
-- Grants (Adjust as needed)
|
||||||
|
-- ============================================
|
||||||
|
-- GRANT ALL PRIVILEGES ON tool_ocr_* TO 'tool_ocr_user'@'localhost';
|
||||||
|
-- FLUSH PRIVILEGES;
|
||||||
@@ -75,9 +75,11 @@ By migrating to the external API authentication service at https://pj-auth-api.v
|
|||||||
|
|
||||||
**Complete Redesign (No backward compatibility needed)**:
|
**Complete Redesign (No backward compatibility needed)**:
|
||||||
|
|
||||||
1. **users table (redesigned)**:
|
**Table Prefix**: `tool_ocr_` (for clear separation from other systems in the same database)
|
||||||
|
|
||||||
|
1. **tool_ocr_users table (redesigned)**:
|
||||||
```sql
|
```sql
|
||||||
CREATE TABLE users (
|
CREATE TABLE tool_ocr_users (
|
||||||
id INT PRIMARY KEY AUTO_INCREMENT,
|
id INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
email VARCHAR(255) UNIQUE NOT NULL, -- Primary identifier from Azure AD
|
email VARCHAR(255) UNIQUE NOT NULL, -- Primary identifier from Azure AD
|
||||||
display_name VARCHAR(255), -- Display name from API response
|
display_name VARCHAR(255), -- Display name from API response
|
||||||
@@ -88,9 +90,9 @@ By migrating to the external API authentication service at https://pj-auth-api.v
|
|||||||
```
|
```
|
||||||
Note: No Azure AD ID storage needed - email is sufficient as unique identifier
|
Note: No Azure AD ID storage needed - email is sufficient as unique identifier
|
||||||
|
|
||||||
2. **ocr_tasks table (new - for task history)**:
|
2. **tool_ocr_tasks table (new - for task history)**:
|
||||||
```sql
|
```sql
|
||||||
CREATE TABLE ocr_tasks (
|
CREATE TABLE tool_ocr_tasks (
|
||||||
id INT PRIMARY KEY AUTO_INCREMENT,
|
id INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
user_id INT NOT NULL, -- Foreign key to users table
|
user_id INT NOT NULL, -- Foreign key to users table
|
||||||
task_id VARCHAR(255) UNIQUE, -- Unique task identifier
|
task_id VARCHAR(255) UNIQUE, -- Unique task identifier
|
||||||
@@ -104,22 +106,37 @@ By migrating to the external API authentication service at https://pj-auth-api.v
|
|||||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||||
completed_at TIMESTAMP NULL,
|
completed_at TIMESTAMP NULL,
|
||||||
file_deleted BOOLEAN DEFAULT FALSE, -- Track if files were auto-deleted
|
file_deleted BOOLEAN DEFAULT FALSE, -- Track if files were auto-deleted
|
||||||
FOREIGN KEY (user_id) REFERENCES users(id),
|
FOREIGN KEY (user_id) REFERENCES tool_ocr_users(id),
|
||||||
INDEX idx_user_status (user_id, status),
|
INDEX idx_user_status (user_id, status),
|
||||||
INDEX idx_created (created_at)
|
INDEX idx_created (created_at)
|
||||||
);
|
);
|
||||||
```
|
```
|
||||||
|
|
||||||
3. **task_files table (for multiple files per task)**:
|
3. **tool_ocr_task_files table (for multiple files per task)**:
|
||||||
```sql
|
```sql
|
||||||
CREATE TABLE task_files (
|
CREATE TABLE tool_ocr_task_files (
|
||||||
id INT PRIMARY KEY AUTO_INCREMENT,
|
id INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
task_id INT NOT NULL,
|
task_id INT NOT NULL,
|
||||||
original_name VARCHAR(255),
|
original_name VARCHAR(255),
|
||||||
stored_path VARCHAR(500),
|
stored_path VARCHAR(500),
|
||||||
file_size BIGINT,
|
file_size BIGINT,
|
||||||
mime_type VARCHAR(100),
|
mime_type VARCHAR(100),
|
||||||
FOREIGN KEY (task_id) REFERENCES ocr_tasks(id) ON DELETE CASCADE
|
FOREIGN KEY (task_id) REFERENCES tool_ocr_tasks(id) ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **tool_ocr_sessions table (for token management)**:
|
||||||
|
```sql
|
||||||
|
CREATE TABLE tool_ocr_sessions (
|
||||||
|
id INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
|
user_id INT NOT NULL,
|
||||||
|
access_token TEXT,
|
||||||
|
id_token TEXT,
|
||||||
|
expires_at TIMESTAMP,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
FOREIGN KEY (user_id) REFERENCES tool_ocr_users(id) ON DELETE CASCADE,
|
||||||
|
INDEX idx_user (user_id),
|
||||||
|
INDEX idx_expires (expires_at)
|
||||||
);
|
);
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -227,6 +244,7 @@ By migrating to the external API authentication service at https://pj-auth-api.v
|
|||||||
- `TASK_RETENTION_DAYS` = 30 (auto-delete old tasks)
|
- `TASK_RETENTION_DAYS` = 30 (auto-delete old tasks)
|
||||||
- `MAX_TASKS_PER_USER` = 1000 (limit per user)
|
- `MAX_TASKS_PER_USER` = 1000 (limit per user)
|
||||||
- `ENABLE_TASK_HISTORY` = true (enable history feature)
|
- `ENABLE_TASK_HISTORY` = true (enable history feature)
|
||||||
|
- `DATABASE_TABLE_PREFIX` = "tool_ocr_" (table naming prefix)
|
||||||
|
|
||||||
### Security Considerations
|
### Security Considerations
|
||||||
- HTTPS required for all authentication requests
|
- HTTPS required for all authentication requests
|
||||||
|
|||||||
@@ -5,21 +5,23 @@
|
|||||||
- Export current schema and data
|
- Export current schema and data
|
||||||
- Document any important data to preserve
|
- Document any important data to preserve
|
||||||
- [ ] 1.2 Drop old tables
|
- [ ] 1.2 Drop old tables
|
||||||
- Remove existing users table
|
- Remove existing tables with old naming convention
|
||||||
- Remove any related tables
|
|
||||||
- Clear database for fresh start
|
- Clear database for fresh start
|
||||||
- [ ] 1.3 Create new database schema
|
- [ ] 1.3 Create new database schema with `tool_ocr_` prefix
|
||||||
- Create new `users` table (email as primary identifier)
|
- Create new `tool_ocr_users` table (email as primary identifier)
|
||||||
- Create `ocr_tasks` table with user association
|
- Create `tool_ocr_tasks` table with user association
|
||||||
- Create `task_files` table for file tracking
|
- Create `tool_ocr_task_files` table for file tracking
|
||||||
|
- Create `tool_ocr_sessions` table for token storage
|
||||||
- Add proper indexes for performance
|
- Add proper indexes for performance
|
||||||
- [ ] 1.4 Create SQLAlchemy models
|
- [ ] 1.4 Create SQLAlchemy models
|
||||||
- User model (simplified)
|
- User model (mapped to `tool_ocr_users`)
|
||||||
- Task model with user relationship
|
- Task model (mapped to `tool_ocr_tasks`)
|
||||||
- TaskFile model with cascade delete
|
- TaskFile model (mapped to `tool_ocr_task_files`)
|
||||||
|
- Session model (mapped to `tool_ocr_sessions`)
|
||||||
|
- Configure table prefix in base model
|
||||||
- [ ] 1.5 Generate Alembic migration
|
- [ ] 1.5 Generate Alembic migration
|
||||||
- Create initial migration for new schema
|
- Create initial migration for new schema
|
||||||
- Test migration script
|
- Test migration script with proper table prefixes
|
||||||
|
|
||||||
## 2. Configuration Management
|
## 2. Configuration Management
|
||||||
- [ ] 2.1 Update environment configuration
|
- [ ] 2.1 Update environment configuration
|
||||||
@@ -30,9 +32,11 @@
|
|||||||
- Add `TASK_RETENTION_DAYS` for auto-cleanup
|
- Add `TASK_RETENTION_DAYS` for auto-cleanup
|
||||||
- Add `MAX_TASKS_PER_USER` for limits
|
- Add `MAX_TASKS_PER_USER` for limits
|
||||||
- Add `ENABLE_TASK_HISTORY` feature flag
|
- Add `ENABLE_TASK_HISTORY` feature flag
|
||||||
|
- Add `DATABASE_TABLE_PREFIX` = "tool_ocr_"
|
||||||
- [ ] 2.2 Update Settings class
|
- [ ] 2.2 Update Settings class
|
||||||
- Add external auth settings to `backend/app/core/config.py`
|
- Add external auth settings to `backend/app/core/config.py`
|
||||||
- Add task management settings
|
- Add task management settings
|
||||||
|
- Add database table prefix configuration
|
||||||
- Add validation for new configuration values
|
- Add validation for new configuration values
|
||||||
- Remove old authentication settings
|
- Remove old authentication settings
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user