diff --git a/backend/app/services/pdf_generator_service.py b/backend/app/services/pdf_generator_service.py index 0a066a7..9825984 100644 --- a/backend/app/services/pdf_generator_service.py +++ b/backend/app/services/pdf_generator_service.py @@ -368,15 +368,19 @@ class PDFGeneratorService: ocr_x_right = bbox[2][0] # Right X ocr_y_bottom = bbox[2][1] # Bottom Y in OCR coordinates + logger.debug(f"[文字] '{text[:20]}...' OCR原始座標: L={ocr_x_left:.0f}, T={ocr_y_top:.0f}, R={ocr_x_right:.0f}, B={ocr_y_bottom:.0f}") + # Apply scale factors to convert from OCR space to PDF space - ocr_x_left = ocr_x_left * scale_w - ocr_y_top = ocr_y_top * scale_h - ocr_x_right = ocr_x_right * scale_w - ocr_y_bottom = ocr_y_bottom * scale_h + scaled_x_left = ocr_x_left * scale_w + scaled_y_top = ocr_y_top * scale_h + scaled_x_right = ocr_x_right * scale_w + scaled_y_bottom = ocr_y_bottom * scale_h + + logger.debug(f"[文字] '{text[:20]}...' 縮放後(scale={scale_w:.3f},{scale_h:.3f}): L={scaled_x_left:.1f}, T={scaled_y_top:.1f}, R={scaled_x_right:.1f}, B={scaled_y_bottom:.1f}") # Calculate bbox dimensions (after scaling) - bbox_width = abs(ocr_x_right - ocr_x_left) - bbox_height = abs(ocr_y_bottom - ocr_y_top) + bbox_width = abs(scaled_x_right - scaled_x_left) + bbox_height = abs(scaled_y_bottom - scaled_y_top) # Calculate font size using heuristics # Font size is typically 70-90% of bbox height @@ -386,8 +390,10 @@ class PDFGeneratorService: # Transform coordinates: OCR (top-left origin) → PDF (bottom-left origin) # CRITICAL: Y-axis flip! - pdf_x = ocr_x_left - pdf_y = page_height - ocr_y_bottom # Flip Y-axis using bottom coordinate + pdf_x = scaled_x_left + pdf_y = page_height - scaled_y_bottom # Flip Y-axis using bottom coordinate + + logger.info(f"[文字] '{text[:30]}' → PDF位置: ({pdf_x:.1f}, {pdf_y:.1f}), 字體:{font_size:.1f}pt, 寬x高:{bbox_width:.0f}x{bbox_height:.0f}") # Set font font_name = self.font_name if self.font_registered else 'Helvetica' @@ -475,11 +481,19 @@ class PDFGeneratorService: logger.warning("No bbox found for table") return - # Extract bbox coordinates and apply scaling - ocr_x_left = table_bbox[0][0] * scale_w - ocr_y_top = table_bbox[0][1] * scale_h - ocr_x_right = table_bbox[2][0] * scale_w - ocr_y_bottom = table_bbox[2][1] * scale_h + # Extract bbox coordinates + ocr_x_left_raw = table_bbox[0][0] + ocr_y_top_raw = table_bbox[0][1] + ocr_x_right_raw = table_bbox[2][0] + ocr_y_bottom_raw = table_bbox[2][1] + + logger.debug(f"[表格] OCR原始座標: L={ocr_x_left_raw:.0f}, T={ocr_y_top_raw:.0f}, R={ocr_x_right_raw:.0f}, B={ocr_y_bottom_raw:.0f}") + + # Apply scaling + ocr_x_left = ocr_x_left_raw * scale_w + ocr_y_top = ocr_y_top_raw * scale_h + ocr_x_right = ocr_x_right_raw * scale_w + ocr_y_bottom = ocr_y_bottom_raw * scale_h table_width = abs(ocr_x_right - ocr_x_left) table_height = abs(ocr_y_bottom - ocr_y_top) @@ -488,6 +502,8 @@ class PDFGeneratorService: pdf_x = ocr_x_left pdf_y = page_height - ocr_y_bottom + logger.info(f"[表格] {len(rows)}行x{max_cols}列 → PDF位置: ({pdf_x:.1f}, {pdf_y:.1f}), 寬x高: {table_width:.0f}x{table_height:.0f}") + # Build table data for ReportLab # Convert parsed structure to simple 2D array max_cols = max(len(row['cells']) for row in rows) @@ -589,10 +605,18 @@ class PDFGeneratorService: # bbox from OCR: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] # OCR coordinates: origin (0,0) at top-left, Y increases downward - ocr_x_left = bbox[0][0] * scale_w - ocr_y_top = bbox[0][1] * scale_h - ocr_x_right = bbox[2][0] * scale_w - ocr_y_bottom = bbox[2][1] * scale_h + ocr_x_left_raw = bbox[0][0] + ocr_y_top_raw = bbox[0][1] + ocr_x_right_raw = bbox[2][0] + ocr_y_bottom_raw = bbox[2][1] + + logger.debug(f"[圖片] '{image_path_str}' OCR原始座標: L={ocr_x_left_raw:.0f}, T={ocr_y_top_raw:.0f}, R={ocr_x_right_raw:.0f}, B={ocr_y_bottom_raw:.0f}") + + # Apply scaling + ocr_x_left = ocr_x_left_raw * scale_w + ocr_y_top = ocr_y_top_raw * scale_h + ocr_x_right = ocr_x_right_raw * scale_w + ocr_y_bottom = ocr_y_bottom_raw * scale_h # Calculate bbox dimensions (after scaling) bbox_width = abs(ocr_x_right - ocr_x_left) @@ -604,6 +628,8 @@ class PDFGeneratorService: pdf_x_left = ocr_x_left pdf_y_bottom = page_height - ocr_y_bottom # Flip Y-axis + logger.info(f"[圖片] '{image_path_str}' → PDF位置: ({pdf_x_left:.1f}, {pdf_y_bottom:.1f}), 寬x高: {bbox_width:.0f}x{bbox_height:.0f}") + # Draw image using ReportLab # drawImage expects: (path, x, y, width, height) # where (x, y) is the bottom-left corner of the image @@ -617,7 +643,7 @@ class PDFGeneratorService: mask='auto' # Handle transparency ) - logger.info(f"Drew image: {image_path_str} at ({pdf_x_left:.0f}, {pdf_y_bottom:.0f}) size {bbox_width:.0f}x{bbox_height:.0f}") + logger.info(f"[圖片] ✓ 成功繪製 '{image_path_str}'") except Exception as e: logger.warning(f"Failed to draw image region: {e}") @@ -716,34 +742,42 @@ class PDFGeneratorService: # Process each page total_pages = ocr_data.get('total_pages', 1) + logger.info(f"=" * 70) + logger.info(f"開始處理 {total_pages} 頁 PDF") + logger.info(f"=" * 70) + for page_num in range(1, total_pages + 1): + logger.info(f"\n>>> 處理第 {page_num}/{total_pages} 頁") if page_num > 1: pdf_canvas.showPage() # Start new page # Draw text regions for this page (excluding table text) page_regions = pages_data.get(page_num, []) - for region in page_regions: + logger.info(f"第 {page_num} 頁: 繪製 {len(page_regions)} 個文字區域") + for i, region in enumerate(page_regions, 1): + logger.debug(f" 文字 {i}/{len(page_regions)}") self.draw_text_region(pdf_canvas, region, target_height, scale_w, scale_h) # Draw tables for this page - for table_elem in table_elements: - if table_elem.get('page', 0) == page_num - 1: # page is 0-indexed - self.draw_table_region(pdf_canvas, table_elem, images_metadata, target_height, scale_w, scale_h) + page_tables = [t for t in table_elements if t.get('page', 0) == page_num - 1] + logger.info(f"第 {page_num} 頁: 繪製 {len(page_tables)} 個表格") + for table_elem in page_tables: + self.draw_table_region(pdf_canvas, table_elem, images_metadata, target_height, scale_w, scale_h) # Draw non-table images for this page (figure, chart, seal, etc.) - for img_meta in images_metadata: - if img_meta.get('page') == page_num - 1: # page is 0-indexed - img_path = img_meta.get('image_path', '') - # Skip table images (they're now rendered as tables) - if 'table' not in img_path.lower(): - self.draw_image_region( - pdf_canvas, - img_meta, - target_height, - json_path.parent, - scale_w, - scale_h - ) + page_images = [img for img in images_metadata if img.get('page') == page_num - 1 and 'table' not in img.get('image_path', '').lower()] + logger.info(f"第 {page_num} 頁: 繪製 {len(page_images)} 個圖片") + for img_meta in page_images: + self.draw_image_region( + pdf_canvas, + img_meta, + target_height, + json_path.parent, + scale_w, + scale_h + ) + + logger.info(f"<<< 第 {page_num} 頁完成") # Save PDF pdf_canvas.save()