feat: create extract-table-cell-boxes proposal and archive old proposal
- Archive unify-image-scaling proposal to archive/2025-11-28 - Create new extract-table-cell-boxes proposal for supplementing PPStructureV3 with direct SLANeXt model calls to extract table cell bounding boxes - Add debug logging to pp_structure_enhanced.py for table cell boxes investigation - Discovered that PPStructureV3 high-level API filters out cell bbox data, but paddlex.create_model() can directly invoke underlying models 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -355,14 +355,54 @@ class PPStructureEnhanced:
|
||||
|
||||
# Special handling for tables
|
||||
if mapped_type == ElementType.TABLE:
|
||||
# Use HTML content from content-based detection or extract from 'res'
|
||||
html_content = html_table_content # From content-based detection
|
||||
if not html_content and 'res' in item and isinstance(item['res'], dict):
|
||||
html_content = item['res'].get('html', '')
|
||||
# 1. 提取 HTML (原有邏輯)
|
||||
html_content = html_table_content
|
||||
res_data = {}
|
||||
|
||||
# 獲取 res 字典 (包含 html 和 boxes)
|
||||
if 'res' in item and isinstance(item['res'], dict):
|
||||
res_data = item['res']
|
||||
logger.info(f"[TABLE] Found 'res' dict with keys: {list(res_data.keys())}")
|
||||
if not html_content:
|
||||
html_content = res_data.get('html', '')
|
||||
else:
|
||||
logger.info(f"[TABLE] No 'res' key in item. Available keys: {list(item.keys())}")
|
||||
|
||||
if html_content:
|
||||
element['html'] = html_content
|
||||
element['extracted_text'] = self._extract_text_from_html(html_content)
|
||||
|
||||
# 2. 【新增】提取 Cell 座標 (boxes)
|
||||
# SLANet 回傳的格式通常是 [[x1, y1, x2, y2], ...]
|
||||
if 'boxes' in res_data:
|
||||
cell_boxes = res_data['boxes']
|
||||
logger.info(f"[TABLE] Found {len(cell_boxes)} cell boxes in res_data")
|
||||
|
||||
# 獲取表格自身的偏移量 (用於將 Cell 的相對座標轉為絕對座標)
|
||||
table_x, table_y = 0, 0
|
||||
if len(bbox) >= 2: # bbox is [x1, y1, x2, y2]
|
||||
table_x, table_y = bbox[0], bbox[1]
|
||||
|
||||
processed_cells = []
|
||||
for cell_box in cell_boxes:
|
||||
# 確保格式正確
|
||||
if isinstance(cell_box, (list, tuple)) and len(cell_box) >= 4:
|
||||
# 轉換為絕對座標: Cell x + 表格 x
|
||||
abs_cell_box = [
|
||||
cell_box[0] + table_x,
|
||||
cell_box[1] + table_y,
|
||||
cell_box[2] + table_x,
|
||||
cell_box[3] + table_y
|
||||
]
|
||||
processed_cells.append(abs_cell_box)
|
||||
|
||||
# 將處理後的 Cell 座標存入 element
|
||||
element['cell_boxes'] = processed_cells
|
||||
element['raw_cell_boxes'] = cell_boxes
|
||||
logger.info(f"[TABLE] Processed {len(processed_cells)} cell boxes with table offset ({table_x}, {table_y})")
|
||||
else:
|
||||
logger.info(f"[TABLE] No 'boxes' key in res_data. Available: {list(res_data.keys()) if res_data else 'empty'}")
|
||||
|
||||
# Special handling for images/figures
|
||||
elif mapped_type in [ElementType.IMAGE, ElementType.FIGURE]:
|
||||
# Save image if path provided
|
||||
|
||||
Reference in New Issue
Block a user