feat: create extract-table-cell-boxes proposal and archive old proposal

- Archive unify-image-scaling proposal to archive/2025-11-28
- Create new extract-table-cell-boxes proposal for supplementing PPStructureV3
  with direct SLANeXt model calls to extract table cell bounding boxes
- Add debug logging to pp_structure_enhanced.py for table cell boxes investigation
- Discovered that PPStructureV3 high-level API filters out cell bbox data,
  but paddlex.create_model() can directly invoke underlying models

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-11-28 12:15:06 +08:00
parent dda9621e17
commit 801ee9c4b6
7 changed files with 393 additions and 4 deletions

View File

@@ -355,14 +355,54 @@ class PPStructureEnhanced:
# Special handling for tables
if mapped_type == ElementType.TABLE:
# Use HTML content from content-based detection or extract from 'res'
html_content = html_table_content # From content-based detection
if not html_content and 'res' in item and isinstance(item['res'], dict):
html_content = item['res'].get('html', '')
# 1. 提取 HTML (原有邏輯)
html_content = html_table_content
res_data = {}
# 獲取 res 字典 (包含 html 和 boxes)
if 'res' in item and isinstance(item['res'], dict):
res_data = item['res']
logger.info(f"[TABLE] Found 'res' dict with keys: {list(res_data.keys())}")
if not html_content:
html_content = res_data.get('html', '')
else:
logger.info(f"[TABLE] No 'res' key in item. Available keys: {list(item.keys())}")
if html_content:
element['html'] = html_content
element['extracted_text'] = self._extract_text_from_html(html_content)
# 2. 【新增】提取 Cell 座標 (boxes)
# SLANet 回傳的格式通常是 [[x1, y1, x2, y2], ...]
if 'boxes' in res_data:
cell_boxes = res_data['boxes']
logger.info(f"[TABLE] Found {len(cell_boxes)} cell boxes in res_data")
# 獲取表格自身的偏移量 (用於將 Cell 的相對座標轉為絕對座標)
table_x, table_y = 0, 0
if len(bbox) >= 2: # bbox is [x1, y1, x2, y2]
table_x, table_y = bbox[0], bbox[1]
processed_cells = []
for cell_box in cell_boxes:
# 確保格式正確
if isinstance(cell_box, (list, tuple)) and len(cell_box) >= 4:
# 轉換為絕對座標: Cell x + 表格 x
abs_cell_box = [
cell_box[0] + table_x,
cell_box[1] + table_y,
cell_box[2] + table_x,
cell_box[3] + table_y
]
processed_cells.append(abs_cell_box)
# 將處理後的 Cell 座標存入 element
element['cell_boxes'] = processed_cells
element['raw_cell_boxes'] = cell_boxes
logger.info(f"[TABLE] Processed {len(processed_cells)} cell boxes with table offset ({table_x}, {table_y})")
else:
logger.info(f"[TABLE] No 'boxes' key in res_data. Available: {list(res_data.keys()) if res_data else 'empty'}")
# Special handling for images/figures
elif mapped_type in [ElementType.IMAGE, ElementType.FIGURE]:
# Save image if path provided