feat: add detailed logging to PDF generation process
Problem:
User reported issues with PDF generation:
- Text appears cramped/overlapping
- Incorrect spacing
- Tables in wrong positions
- Images in wrong positions
Solution:
Add comprehensive logging at every stage of PDF generation to help diagnose
coordinate transformation and scaling issues.
Changes:
- backend/app/services/pdf_generator_service.py:
1. draw_text_region():
- Log OCR original coordinates (L, T, R, B)
- Log scaled coordinates after applying scale factors
- Log final PDF position, font size, and bbox dimensions
- Use separate variables for raw vs scaled coords (fix bug)
2. draw_table_region():
- Log table OCR original coordinates
- Log scaled coordinates
- Log final PDF position and table dimensions
- Log row/column count
3. draw_image_region():
- Log image OCR original coordinates
- Log scaled coordinates
- Log final PDF position and image dimensions
- Log success message after drawing
4. generate_layout_pdf():
- Log page processing progress
- Log count of text/table/image elements per page
- Add visual separators for better readability
Log Format:
- [文字] prefix for text regions
- [表格] prefix for tables
- [圖片] prefix for images
- L=Left, T=Top, R=Right, B=Bottom for coordinates
- Clear before/after scaling information
This will help identify:
- Coordinate transformation errors
- Scale factor calculation issues
- Y-axis flip problems
- Element positioning bugs
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -368,15 +368,19 @@ class PDFGeneratorService:
|
|||||||
ocr_x_right = bbox[2][0] # Right X
|
ocr_x_right = bbox[2][0] # Right X
|
||||||
ocr_y_bottom = bbox[2][1] # Bottom Y in OCR coordinates
|
ocr_y_bottom = bbox[2][1] # Bottom Y in OCR coordinates
|
||||||
|
|
||||||
|
logger.debug(f"[文字] '{text[:20]}...' OCR原始座標: L={ocr_x_left:.0f}, T={ocr_y_top:.0f}, R={ocr_x_right:.0f}, B={ocr_y_bottom:.0f}")
|
||||||
|
|
||||||
# Apply scale factors to convert from OCR space to PDF space
|
# Apply scale factors to convert from OCR space to PDF space
|
||||||
ocr_x_left = ocr_x_left * scale_w
|
scaled_x_left = ocr_x_left * scale_w
|
||||||
ocr_y_top = ocr_y_top * scale_h
|
scaled_y_top = ocr_y_top * scale_h
|
||||||
ocr_x_right = ocr_x_right * scale_w
|
scaled_x_right = ocr_x_right * scale_w
|
||||||
ocr_y_bottom = ocr_y_bottom * scale_h
|
scaled_y_bottom = ocr_y_bottom * scale_h
|
||||||
|
|
||||||
|
logger.debug(f"[文字] '{text[:20]}...' 縮放後(scale={scale_w:.3f},{scale_h:.3f}): L={scaled_x_left:.1f}, T={scaled_y_top:.1f}, R={scaled_x_right:.1f}, B={scaled_y_bottom:.1f}")
|
||||||
|
|
||||||
# Calculate bbox dimensions (after scaling)
|
# Calculate bbox dimensions (after scaling)
|
||||||
bbox_width = abs(ocr_x_right - ocr_x_left)
|
bbox_width = abs(scaled_x_right - scaled_x_left)
|
||||||
bbox_height = abs(ocr_y_bottom - ocr_y_top)
|
bbox_height = abs(scaled_y_bottom - scaled_y_top)
|
||||||
|
|
||||||
# Calculate font size using heuristics
|
# Calculate font size using heuristics
|
||||||
# Font size is typically 70-90% of bbox height
|
# Font size is typically 70-90% of bbox height
|
||||||
@@ -386,8 +390,10 @@ class PDFGeneratorService:
|
|||||||
|
|
||||||
# Transform coordinates: OCR (top-left origin) → PDF (bottom-left origin)
|
# Transform coordinates: OCR (top-left origin) → PDF (bottom-left origin)
|
||||||
# CRITICAL: Y-axis flip!
|
# CRITICAL: Y-axis flip!
|
||||||
pdf_x = ocr_x_left
|
pdf_x = scaled_x_left
|
||||||
pdf_y = page_height - ocr_y_bottom # Flip Y-axis using bottom coordinate
|
pdf_y = page_height - scaled_y_bottom # Flip Y-axis using bottom coordinate
|
||||||
|
|
||||||
|
logger.info(f"[文字] '{text[:30]}' → PDF位置: ({pdf_x:.1f}, {pdf_y:.1f}), 字體:{font_size:.1f}pt, 寬x高:{bbox_width:.0f}x{bbox_height:.0f}")
|
||||||
|
|
||||||
# Set font
|
# Set font
|
||||||
font_name = self.font_name if self.font_registered else 'Helvetica'
|
font_name = self.font_name if self.font_registered else 'Helvetica'
|
||||||
@@ -475,11 +481,19 @@ class PDFGeneratorService:
|
|||||||
logger.warning("No bbox found for table")
|
logger.warning("No bbox found for table")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Extract bbox coordinates and apply scaling
|
# Extract bbox coordinates
|
||||||
ocr_x_left = table_bbox[0][0] * scale_w
|
ocr_x_left_raw = table_bbox[0][0]
|
||||||
ocr_y_top = table_bbox[0][1] * scale_h
|
ocr_y_top_raw = table_bbox[0][1]
|
||||||
ocr_x_right = table_bbox[2][0] * scale_w
|
ocr_x_right_raw = table_bbox[2][0]
|
||||||
ocr_y_bottom = table_bbox[2][1] * scale_h
|
ocr_y_bottom_raw = table_bbox[2][1]
|
||||||
|
|
||||||
|
logger.debug(f"[表格] OCR原始座標: L={ocr_x_left_raw:.0f}, T={ocr_y_top_raw:.0f}, R={ocr_x_right_raw:.0f}, B={ocr_y_bottom_raw:.0f}")
|
||||||
|
|
||||||
|
# Apply scaling
|
||||||
|
ocr_x_left = ocr_x_left_raw * scale_w
|
||||||
|
ocr_y_top = ocr_y_top_raw * scale_h
|
||||||
|
ocr_x_right = ocr_x_right_raw * scale_w
|
||||||
|
ocr_y_bottom = ocr_y_bottom_raw * scale_h
|
||||||
|
|
||||||
table_width = abs(ocr_x_right - ocr_x_left)
|
table_width = abs(ocr_x_right - ocr_x_left)
|
||||||
table_height = abs(ocr_y_bottom - ocr_y_top)
|
table_height = abs(ocr_y_bottom - ocr_y_top)
|
||||||
@@ -488,6 +502,8 @@ class PDFGeneratorService:
|
|||||||
pdf_x = ocr_x_left
|
pdf_x = ocr_x_left
|
||||||
pdf_y = page_height - ocr_y_bottom
|
pdf_y = page_height - ocr_y_bottom
|
||||||
|
|
||||||
|
logger.info(f"[表格] {len(rows)}行x{max_cols}列 → PDF位置: ({pdf_x:.1f}, {pdf_y:.1f}), 寬x高: {table_width:.0f}x{table_height:.0f}")
|
||||||
|
|
||||||
# Build table data for ReportLab
|
# Build table data for ReportLab
|
||||||
# Convert parsed structure to simple 2D array
|
# Convert parsed structure to simple 2D array
|
||||||
max_cols = max(len(row['cells']) for row in rows)
|
max_cols = max(len(row['cells']) for row in rows)
|
||||||
@@ -589,10 +605,18 @@ class PDFGeneratorService:
|
|||||||
|
|
||||||
# bbox from OCR: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
|
# bbox from OCR: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
|
||||||
# OCR coordinates: origin (0,0) at top-left, Y increases downward
|
# OCR coordinates: origin (0,0) at top-left, Y increases downward
|
||||||
ocr_x_left = bbox[0][0] * scale_w
|
ocr_x_left_raw = bbox[0][0]
|
||||||
ocr_y_top = bbox[0][1] * scale_h
|
ocr_y_top_raw = bbox[0][1]
|
||||||
ocr_x_right = bbox[2][0] * scale_w
|
ocr_x_right_raw = bbox[2][0]
|
||||||
ocr_y_bottom = bbox[2][1] * scale_h
|
ocr_y_bottom_raw = bbox[2][1]
|
||||||
|
|
||||||
|
logger.debug(f"[圖片] '{image_path_str}' OCR原始座標: L={ocr_x_left_raw:.0f}, T={ocr_y_top_raw:.0f}, R={ocr_x_right_raw:.0f}, B={ocr_y_bottom_raw:.0f}")
|
||||||
|
|
||||||
|
# Apply scaling
|
||||||
|
ocr_x_left = ocr_x_left_raw * scale_w
|
||||||
|
ocr_y_top = ocr_y_top_raw * scale_h
|
||||||
|
ocr_x_right = ocr_x_right_raw * scale_w
|
||||||
|
ocr_y_bottom = ocr_y_bottom_raw * scale_h
|
||||||
|
|
||||||
# Calculate bbox dimensions (after scaling)
|
# Calculate bbox dimensions (after scaling)
|
||||||
bbox_width = abs(ocr_x_right - ocr_x_left)
|
bbox_width = abs(ocr_x_right - ocr_x_left)
|
||||||
@@ -604,6 +628,8 @@ class PDFGeneratorService:
|
|||||||
pdf_x_left = ocr_x_left
|
pdf_x_left = ocr_x_left
|
||||||
pdf_y_bottom = page_height - ocr_y_bottom # Flip Y-axis
|
pdf_y_bottom = page_height - ocr_y_bottom # Flip Y-axis
|
||||||
|
|
||||||
|
logger.info(f"[圖片] '{image_path_str}' → PDF位置: ({pdf_x_left:.1f}, {pdf_y_bottom:.1f}), 寬x高: {bbox_width:.0f}x{bbox_height:.0f}")
|
||||||
|
|
||||||
# Draw image using ReportLab
|
# Draw image using ReportLab
|
||||||
# drawImage expects: (path, x, y, width, height)
|
# drawImage expects: (path, x, y, width, height)
|
||||||
# where (x, y) is the bottom-left corner of the image
|
# where (x, y) is the bottom-left corner of the image
|
||||||
@@ -617,7 +643,7 @@ class PDFGeneratorService:
|
|||||||
mask='auto' # Handle transparency
|
mask='auto' # Handle transparency
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"Drew image: {image_path_str} at ({pdf_x_left:.0f}, {pdf_y_bottom:.0f}) size {bbox_width:.0f}x{bbox_height:.0f}")
|
logger.info(f"[圖片] ✓ 成功繪製 '{image_path_str}'")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed to draw image region: {e}")
|
logger.warning(f"Failed to draw image region: {e}")
|
||||||
@@ -716,26 +742,32 @@ class PDFGeneratorService:
|
|||||||
|
|
||||||
# Process each page
|
# Process each page
|
||||||
total_pages = ocr_data.get('total_pages', 1)
|
total_pages = ocr_data.get('total_pages', 1)
|
||||||
|
logger.info(f"=" * 70)
|
||||||
|
logger.info(f"開始處理 {total_pages} 頁 PDF")
|
||||||
|
logger.info(f"=" * 70)
|
||||||
|
|
||||||
for page_num in range(1, total_pages + 1):
|
for page_num in range(1, total_pages + 1):
|
||||||
|
logger.info(f"\n>>> 處理第 {page_num}/{total_pages} 頁")
|
||||||
if page_num > 1:
|
if page_num > 1:
|
||||||
pdf_canvas.showPage() # Start new page
|
pdf_canvas.showPage() # Start new page
|
||||||
|
|
||||||
# Draw text regions for this page (excluding table text)
|
# Draw text regions for this page (excluding table text)
|
||||||
page_regions = pages_data.get(page_num, [])
|
page_regions = pages_data.get(page_num, [])
|
||||||
for region in page_regions:
|
logger.info(f"第 {page_num} 頁: 繪製 {len(page_regions)} 個文字區域")
|
||||||
|
for i, region in enumerate(page_regions, 1):
|
||||||
|
logger.debug(f" 文字 {i}/{len(page_regions)}")
|
||||||
self.draw_text_region(pdf_canvas, region, target_height, scale_w, scale_h)
|
self.draw_text_region(pdf_canvas, region, target_height, scale_w, scale_h)
|
||||||
|
|
||||||
# Draw tables for this page
|
# Draw tables for this page
|
||||||
for table_elem in table_elements:
|
page_tables = [t for t in table_elements if t.get('page', 0) == page_num - 1]
|
||||||
if table_elem.get('page', 0) == page_num - 1: # page is 0-indexed
|
logger.info(f"第 {page_num} 頁: 繪製 {len(page_tables)} 個表格")
|
||||||
|
for table_elem in page_tables:
|
||||||
self.draw_table_region(pdf_canvas, table_elem, images_metadata, target_height, scale_w, scale_h)
|
self.draw_table_region(pdf_canvas, table_elem, images_metadata, target_height, scale_w, scale_h)
|
||||||
|
|
||||||
# Draw non-table images for this page (figure, chart, seal, etc.)
|
# Draw non-table images for this page (figure, chart, seal, etc.)
|
||||||
for img_meta in images_metadata:
|
page_images = [img for img in images_metadata if img.get('page') == page_num - 1 and 'table' not in img.get('image_path', '').lower()]
|
||||||
if img_meta.get('page') == page_num - 1: # page is 0-indexed
|
logger.info(f"第 {page_num} 頁: 繪製 {len(page_images)} 個圖片")
|
||||||
img_path = img_meta.get('image_path', '')
|
for img_meta in page_images:
|
||||||
# Skip table images (they're now rendered as tables)
|
|
||||||
if 'table' not in img_path.lower():
|
|
||||||
self.draw_image_region(
|
self.draw_image_region(
|
||||||
pdf_canvas,
|
pdf_canvas,
|
||||||
img_meta,
|
img_meta,
|
||||||
@@ -745,6 +777,8 @@ class PDFGeneratorService:
|
|||||||
scale_h
|
scale_h
|
||||||
)
|
)
|
||||||
|
|
||||||
|
logger.info(f"<<< 第 {page_num} 頁完成")
|
||||||
|
|
||||||
# Save PDF
|
# Save PDF
|
||||||
pdf_canvas.save()
|
pdf_canvas.save()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user