feat: add GPU optimization and fix TableData consistency

GPU Optimization (Section 3.1): - Add comprehensive memory management for RTX 4060 8GB - Enable all recognition features (chart, formula, table, seal, text) - Implement model cache with auto-unload for idle models - Add memory monitoring and warning system Bug Fix (Section 3.3): - Fix TableData field inconsistency: 'columns' -> 'cols' - Remove invalid 'html' and 'extracted_text' parameters - Add proper TableCell conversion in _convert_table_data Documentation: - Add Future Improvements section for batch processing enhancement 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-19 09:17:27 +08:00
parent ecdce961ca
commit 8b9a364452
4 changed files with 205 additions and 24 deletions
--- a/backend/app/services/ocr_to_unified_converter.py
+++ b/backend/app/services/ocr_to_unified_converter.py
@@ -405,11 +405,28 @@ class OCRToUnifiedConverter:
            )

            # Create table data
+            # Note: TableData uses 'cols' not 'columns', and doesn't have 'html' field
+            # HTML content is stored in metadata instead
+            raw_cells = table_dict.get('cells', [])
+            table_cells = []
+
+            # Convert raw cells to TableCell objects if needed
+            for cell_data in raw_cells:
+                if isinstance(cell_data, dict):
+                    from app.models.unified_document import TableCell
+                    table_cells.append(TableCell(
+                        row=cell_data.get('row', 0),
+                        col=cell_data.get('col', 0),
+                        row_span=cell_data.get('row_span', 1),
+                        col_span=cell_data.get('col_span', 1),
+                        content=cell_data.get('content', '')
+                    ))
+
            table_data = TableData(
                rows=table_dict.get('rows', 0),
-                columns=table_dict.get('columns', 0),
-                cells=table_dict.get('cells', []),
-                html=table_dict.get('html', '')
+                cols=table_dict.get('columns', table_dict.get('cols', 0)),
+                cells=table_cells,
+                caption=table_dict.get('caption')
            )

            element = DocumentElement(
@@ -435,7 +452,7 @@ class OCRToUnifiedConverter:

            # Try to parse HTML to get rows and columns
            rows = 0
-            columns = 0
+            cols = 0
            cells = []

            if html:
@@ -446,14 +463,15 @@ class OCRToUnifiedConverter:
                    first_row_end = html.find('</tr>')
                    if first_row_end > 0:
                        first_row = html[:first_row_end]
-                        columns = first_row.count('<td') + first_row.count('<th')
+                        cols = first_row.count('<td') + first_row.count('<th')

+            # Note: TableData uses 'cols' not 'columns'
+            # HTML content can be stored as caption or in element metadata
            return TableData(
                rows=rows,
-                columns=columns,
+                cols=cols,
                cells=cells,
-                html=html,
-                extracted_text=extracted_text
+                caption=extracted_text if extracted_text else None
            )
        except:
            return None