"""Debug PyMuPDF table.cells structure""" import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent)) import fitz pdf_path = Path(__file__).parent.parent.parent / "demo_docs" / "edit3.pdf" doc = fitz.open(str(pdf_path)) page = doc[0] tables = page.find_tables() for idx, table in enumerate(tables.tables): data = table.extract() num_rows = len(data) num_cols = max(len(row) for row in data) if data else 0 print(f"Table {idx}:") print(f" table.extract() dimensions: {num_rows} rows x {num_cols} cols") print(f" Expected positions: {num_rows * num_cols}") cell_rects = getattr(table, 'cells', None) if cell_rects: print(f" table.cells length: {len(cell_rects)}") none_count = sum(1 for c in cell_rects if c is None) actual_count = sum(1 for c in cell_rects if c is not None) print(f" None cells: {none_count}") print(f" Actual cells: {actual_count}") # Check if cell_rects matches grid size if len(cell_rects) != num_rows * num_cols: print(f" WARNING: cell_rects length ({len(cell_rects)}) != grid size ({num_rows * num_cols})") # Show first few cells print(f" First 5 cells: {cell_rects[:5]}") else: print(f" table.cells: NOT AVAILABLE") # Check row_count and col_count print(f" table.row_count: {getattr(table, 'row_count', 'N/A')}") print(f" table.col_count: {getattr(table, 'col_count', 'N/A')}") doc.close()