diff --git a/backend/app/services/pdf_generator_service.py b/backend/app/services/pdf_generator_service.py index b380ce3..7e21dd9 100644 --- a/backend/app/services/pdf_generator_service.py +++ b/backend/app/services/pdf_generator_service.py @@ -153,14 +153,27 @@ class PDFGeneratorService: max_x = 0 max_y = 0 - # 我們需要檢查所有可能的區域,以找到最大的座標 - text_regions = ocr_data.get('text_regions', []) - layout_elements = ocr_data.get('layout_data', {}).get('elements', []) if ocr_data.get('layout_data') else [] - all_regions = text_regions + layout_elements + # *** 關鍵修復:檢查所有可能包含 bbox 的字段 *** + # 不同版本的 OCR 輸出可能使用不同的字段名 + all_regions = [] + + # 1. text_regions - 包含所有文字區域(最常見) + if 'text_regions' in ocr_data: + all_regions.extend(ocr_data['text_regions']) + + # 2. layout - 可能包含布局信息 + if 'layout' in ocr_data and isinstance(ocr_data['layout'], list): + all_regions.extend(ocr_data['layout']) + + # 3. layout_data.elements - PP-StructureV3 格式 + if 'layout_data' in ocr_data and isinstance(ocr_data['layout_data'], dict): + elements = ocr_data['layout_data'].get('elements', []) + if elements: + all_regions.extend(elements) if not all_regions: # 如果 JSON 為空,回退到原始檔案尺寸 - logger.warning("JSON 中沒有找到 text_regions 或 layout elements,回退到原始檔案尺寸。") + logger.warning("JSON 中沒有找到任何包含 bbox 的區域,回退到原始檔案尺寸。") if source_file_path: dims = self.get_original_page_size(source_file_path) if dims: @@ -176,11 +189,12 @@ class PDFGeneratorService: region_count += 1 + # *** 關鍵修復:正確處理多邊形 [[x, y], ...] 格式 *** if isinstance(bbox[0], (int, float)): # 處理簡單的 [x1, y1, x2, y2] 格式 max_x = max(max_x, bbox[2]) max_y = max(max_y, bbox[3]) - else: + elif isinstance(bbox[0], (list, tuple)): # 處理多邊形 [[x, y], ...] 格式 x_coords = [p[0] for p in bbox if isinstance(p, (list, tuple)) and len(p) >= 2] y_coords = [p[1] for p in bbox if isinstance(p, (list, tuple)) and len(p) >= 2] diff --git a/backend/test_empty_layout.py b/backend/test_empty_layout.py new file mode 100644 index 0000000..cf205fb --- /dev/null +++ b/backend/test_empty_layout.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python +""" +測試 calculate_page_dimensions 是否正確處理 layout=[] 但 text_regions 有數據的情況 +這模擬了用戶報告的 ELER-8-100HFV Data Sheet 的場景 +""" + +import json +from pathlib import Path +from app.services.pdf_generator_service import pdf_generator_service +import logging + +# Set up logging +logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') + +def test_empty_layout_with_text_regions(): + """ + 測試場景: + - layout: [] (空列表) + - text_regions: 包含高解析度 bbox 數據 + - 應該從 text_regions 推斷出正確的 OCR 尺寸 + """ + + test_dir = Path("test_output_empty_layout") + test_dir.mkdir(exist_ok=True) + + print("\n" + "="*70) + print("測試場景:layout=[] 但 text_regions 包含數據") + print("="*70) + + # 模擬用戶的 JSON 結構 + mock_ocr_data = { + "status": "success", + "file_name": "ELER-8-100HFV_Data_Sheet.pdf", + "language": "ch", + "layout": [], # *** 關鍵:這是空的 *** + "text_regions": [ + { + "text": "義典科技", + "bbox": [[461, 270], [819, 252], [822, 408], [464, 426]], # 高解析度座標 + "confidence": 0.95 + }, + { + "text": "ELER-8-100HFV", + "bbox": [[1150, 580], [1850, 580], [1850, 680], [1150, 680]], + "confidence": 0.93 + }, + { + "text": "表格中的文字", + "bbox": [[1259, 936], [1317, 936], [1317, 960], [1259, 960]], # X=1259 超出 A4 寬度 + "confidence": 0.92 + }, + { + "text": "底部文字", + "bbox": [[400, 2800], [1200, 2800], [1200, 2880], [400, 2880]], # Y=2880 + "confidence": 0.91 + } + ], + "total_text_regions": 4, + "average_confidence": 0.928, + "layout_data": None, + "images_metadata": [], + "markdown_content": "義典科技\nELER-8-100HFV\n表格中的文字\n底部文字", + "processing_time": 3.2, + "timestamp": "2025-11-17T00:00:00" + } + + # Save mock JSON + json_path = test_dir / "empty_layout_test.json" + with open(json_path, "w", encoding="utf-8") as f: + json.dump(mock_ocr_data, f, ensure_ascii=False, indent=2) + + print(f"\n✓ 創建測試 JSON: {json_path}") + print(f" - layout: [] (空列表)") + print(f" - text_regions: 4 個區域") + print(f" - OCR 座標範圍: X=[400..1850], Y=[252..2880]") + print(f" - 預期 OCR 尺寸: ~1850 x ~2880") + + # Create A4 source PDF + from reportlab.pdfgen import canvas + from reportlab.lib.pagesizes import A4 + + source_pdf = test_dir / "source_a4.pdf" + c = canvas.Canvas(str(source_pdf), pagesize=A4) + c.drawString(100, 800, "Original A4 Document") + c.save() + + print(f"✓ 創建 A4 源文件: {source_pdf}") + print(f" - A4 尺寸: 595 x 842 點") + + # Test PDF generation + pdf_path = test_dir / "output.pdf" + + print(f"\n開始生成 PDF...") + print("-" * 70) + + success = pdf_generator_service.generate_layout_pdf( + json_path=json_path, + output_path=pdf_path, + source_file_path=source_pdf + ) + + print("-" * 70) + + if success: + print(f"\n✓ PDF 生成成功: {pdf_path}") + print(f"\n預期結果:") + print(f" - OCR 尺寸(從 text_regions 推斷): ~1850 x ~2880") + print(f" - 目標 PDF 尺寸: 595 x 842") + print(f" - 預期縮放因子: X={595/1850:.3f}, Y={842/2880:.3f}") + print(f"\n如果實際縮放因子是 1.0,說明 Bug 仍存在!") + return True + else: + print(f"\n✗ PDF 生成失敗") + return False + +if __name__ == "__main__": + import sys + sys.path.insert(0, str(Path(__file__).parent)) + + success = test_empty_layout_with_text_regions() + + print("\n" + "="*70) + if success: + print("✓ 測試完成") + print("="*70) + sys.exit(0) + else: + print("✗ 測試失敗") + print("="*70) + sys.exit(1)