#!/usr/bin/env python """ 測試 calculate_page_dimensions 是否正確處理 layout=[] 但 text_regions 有數據的情況 這模擬了用戶報告的 ELER-8-100HFV Data Sheet 的場景 """ import json from pathlib import Path from app.services.pdf_generator_service import pdf_generator_service import logging # Set up logging logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') def test_empty_layout_with_text_regions(): """ 測試場景: - layout: [] (空列表) - text_regions: 包含高解析度 bbox 數據 - 應該從 text_regions 推斷出正確的 OCR 尺寸 """ test_dir = Path("test_output_empty_layout") test_dir.mkdir(exist_ok=True) print("\n" + "="*70) print("測試場景:layout=[] 但 text_regions 包含數據") print("="*70) # 模擬用戶的 JSON 結構 mock_ocr_data = { "status": "success", "file_name": "ELER-8-100HFV_Data_Sheet.pdf", "language": "ch", "layout": [], # *** 關鍵:這是空的 *** "text_regions": [ { "text": "義典科技", "bbox": [[461, 270], [819, 252], [822, 408], [464, 426]], # 高解析度座標 "confidence": 0.95 }, { "text": "ELER-8-100HFV", "bbox": [[1150, 580], [1850, 580], [1850, 680], [1150, 680]], "confidence": 0.93 }, { "text": "表格中的文字", "bbox": [[1259, 936], [1317, 936], [1317, 960], [1259, 960]], # X=1259 超出 A4 寬度 "confidence": 0.92 }, { "text": "底部文字", "bbox": [[400, 2800], [1200, 2800], [1200, 2880], [400, 2880]], # Y=2880 "confidence": 0.91 } ], "total_text_regions": 4, "average_confidence": 0.928, "layout_data": None, "images_metadata": [], "markdown_content": "義典科技\nELER-8-100HFV\n表格中的文字\n底部文字", "processing_time": 3.2, "timestamp": "2025-11-17T00:00:00" } # Save mock JSON json_path = test_dir / "empty_layout_test.json" with open(json_path, "w", encoding="utf-8") as f: json.dump(mock_ocr_data, f, ensure_ascii=False, indent=2) print(f"\n✓ 創建測試 JSON: {json_path}") print(f" - layout: [] (空列表)") print(f" - text_regions: 4 個區域") print(f" - OCR 座標範圍: X=[400..1850], Y=[252..2880]") print(f" - 預期 OCR 尺寸: ~1850 x ~2880") # Create A4 source PDF from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import A4 source_pdf = test_dir / "source_a4.pdf" c = canvas.Canvas(str(source_pdf), pagesize=A4) c.drawString(100, 800, "Original A4 Document") c.save() print(f"✓ 創建 A4 源文件: {source_pdf}") print(f" - A4 尺寸: 595 x 842 點") # Test PDF generation pdf_path = test_dir / "output.pdf" print(f"\n開始生成 PDF...") print("-" * 70) success = pdf_generator_service.generate_layout_pdf( json_path=json_path, output_path=pdf_path, source_file_path=source_pdf ) print("-" * 70) if success: print(f"\n✓ PDF 生成成功: {pdf_path}") print(f"\n預期結果:") print(f" - OCR 尺寸(從 text_regions 推斷): ~1850 x ~2880") print(f" - 目標 PDF 尺寸: 595 x 842") print(f" - 預期縮放因子: X={595/1850:.3f}, Y={842/2880:.3f}") print(f"\n如果實際縮放因子是 1.0,說明 Bug 仍存在!") return True else: print(f"\n✗ PDF 生成失敗") return False if __name__ == "__main__": import sys sys.path.insert(0, str(Path(__file__).parent)) success = test_empty_layout_with_text_regions() print("\n" + "="*70) if success: print("✓ 測試完成") print("="*70) sys.exit(0) else: print("✗ 測試失敗") print("="*70) sys.exit(1)