feat: update PDF generator to support UnifiedDocument directly
- Add generate_from_unified_document() method for direct UnifiedDocument processing - Create convert_unified_document_to_ocr_data() for format conversion - Extract _generate_pdf_from_data() as reusable core logic - Support both OCR and DIRECT processing tracks in PDF generation - Handle coordinate transformations (BoundingBox to polygon format) - Update OCR service to use appropriate PDF generation method Completes Section 4 (Unified Processing Pipeline) of dual-track proposal. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1223,6 +1223,16 @@ class OCRService:
|
|||||||
|
|
||||||
logger.info(f"Generating layout-preserving PDF: {pdf_filename}")
|
logger.info(f"Generating layout-preserving PDF: {pdf_filename}")
|
||||||
|
|
||||||
|
# Use appropriate method based on result type
|
||||||
|
if isinstance(result, UnifiedDocument):
|
||||||
|
# Use direct UnifiedDocument generation for better accuracy
|
||||||
|
success = pdf_generator_service.generate_from_unified_document(
|
||||||
|
unified_doc=result,
|
||||||
|
output_path=pdf_path,
|
||||||
|
source_file_path=source_file_path
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Legacy path: use JSON file
|
||||||
success = pdf_generator_service.generate_layout_pdf(
|
success = pdf_generator_service.generate_layout_pdf(
|
||||||
json_path=json_path,
|
json_path=json_path,
|
||||||
output_path=pdf_path,
|
output_path=pdf_path,
|
||||||
|
|||||||
@@ -24,6 +24,17 @@ from html.parser import HTMLParser
|
|||||||
|
|
||||||
from app.core.config import settings
|
from app.core.config import settings
|
||||||
|
|
||||||
|
# Import UnifiedDocument for dual-track support
|
||||||
|
try:
|
||||||
|
from app.models.unified_document import (
|
||||||
|
UnifiedDocument, DocumentElement, ElementType,
|
||||||
|
BoundingBox, TableData, ProcessingTrack
|
||||||
|
)
|
||||||
|
UNIFIED_DOCUMENT_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
UNIFIED_DOCUMENT_AVAILABLE = False
|
||||||
|
UnifiedDocument = None
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -138,6 +149,310 @@ class PDFGeneratorService:
|
|||||||
logger.error(f"Failed to load JSON {json_path}: {e}")
|
logger.error(f"Failed to load JSON {json_path}: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def convert_unified_document_to_ocr_data(self, unified_doc: 'UnifiedDocument') -> Dict:
|
||||||
|
"""
|
||||||
|
Convert UnifiedDocument to OCR data format for PDF generation.
|
||||||
|
|
||||||
|
This method transforms the UnifiedDocument structure into the legacy
|
||||||
|
OCR data format that the PDF generator expects, supporting both
|
||||||
|
OCR and DIRECT processing tracks.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
unified_doc: UnifiedDocument object from either processing track
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary in OCR data format with text_regions, images_metadata, layout_data
|
||||||
|
"""
|
||||||
|
text_regions = []
|
||||||
|
images_metadata = []
|
||||||
|
layout_elements = []
|
||||||
|
|
||||||
|
for page in unified_doc.pages:
|
||||||
|
page_num = page.page_number # 1-based
|
||||||
|
|
||||||
|
for element in page.elements:
|
||||||
|
# Convert BoundingBox to polygon format [[x,y], [x,y], [x,y], [x,y]]
|
||||||
|
bbox_polygon = [
|
||||||
|
[element.bbox.x0, element.bbox.y0], # top-left
|
||||||
|
[element.bbox.x1, element.bbox.y0], # top-right
|
||||||
|
[element.bbox.x1, element.bbox.y1], # bottom-right
|
||||||
|
[element.bbox.x0, element.bbox.y1], # bottom-left
|
||||||
|
]
|
||||||
|
|
||||||
|
# Handle text elements
|
||||||
|
if element.is_text or element.type in [
|
||||||
|
ElementType.TEXT, ElementType.TITLE, ElementType.HEADER,
|
||||||
|
ElementType.FOOTER, ElementType.PARAGRAPH, ElementType.CAPTION,
|
||||||
|
ElementType.LIST_ITEM, ElementType.FOOTNOTE, ElementType.REFERENCE
|
||||||
|
]:
|
||||||
|
text_content = element.get_text()
|
||||||
|
if text_content:
|
||||||
|
text_regions.append({
|
||||||
|
'text': text_content,
|
||||||
|
'bbox': bbox_polygon,
|
||||||
|
'confidence': element.confidence or 1.0,
|
||||||
|
'page': page_num
|
||||||
|
})
|
||||||
|
|
||||||
|
# Handle table elements
|
||||||
|
elif element.type == ElementType.TABLE:
|
||||||
|
# Convert TableData to HTML for layout_data
|
||||||
|
if isinstance(element.content, TableData):
|
||||||
|
html_content = element.content.to_html()
|
||||||
|
elif isinstance(element.content, dict):
|
||||||
|
html_content = element.content.get('html', str(element.content))
|
||||||
|
else:
|
||||||
|
html_content = str(element.content)
|
||||||
|
|
||||||
|
layout_elements.append({
|
||||||
|
'type': 'table',
|
||||||
|
'content': html_content,
|
||||||
|
'bbox': [element.bbox.x0, element.bbox.y0,
|
||||||
|
element.bbox.x1, element.bbox.y1],
|
||||||
|
'page': page_num - 1 # layout uses 0-based
|
||||||
|
})
|
||||||
|
|
||||||
|
# Also add to images_metadata for overlap filtering
|
||||||
|
# Tables are often rendered as images
|
||||||
|
table_id = element.element_id or f"table_{page_num}_{len(images_metadata)}"
|
||||||
|
images_metadata.append({
|
||||||
|
'image_path': f"table_{table_id}.png",
|
||||||
|
'bbox': bbox_polygon,
|
||||||
|
'page': page_num - 1, # 0-based for images_metadata
|
||||||
|
'type': 'table'
|
||||||
|
})
|
||||||
|
|
||||||
|
# Handle image/visual elements
|
||||||
|
elif element.is_visual or element.type in [
|
||||||
|
ElementType.IMAGE, ElementType.FIGURE, ElementType.CHART,
|
||||||
|
ElementType.DIAGRAM, ElementType.LOGO
|
||||||
|
]:
|
||||||
|
# Get image path from content or metadata
|
||||||
|
if isinstance(element.content, dict):
|
||||||
|
image_path = element.content.get('path', '')
|
||||||
|
else:
|
||||||
|
image_path = element.metadata.get('path', f"image_{element.element_id}.png")
|
||||||
|
|
||||||
|
images_metadata.append({
|
||||||
|
'image_path': image_path,
|
||||||
|
'bbox': bbox_polygon,
|
||||||
|
'page': page_num - 1, # 0-based
|
||||||
|
'type': element.type.value
|
||||||
|
})
|
||||||
|
|
||||||
|
# Build OCR data structure
|
||||||
|
ocr_data = {
|
||||||
|
'text_regions': text_regions,
|
||||||
|
'images_metadata': images_metadata,
|
||||||
|
'layout_data': {
|
||||||
|
'elements': layout_elements,
|
||||||
|
'total_elements': len(layout_elements)
|
||||||
|
},
|
||||||
|
'total_pages': unified_doc.page_count,
|
||||||
|
'ocr_dimensions': {
|
||||||
|
'width': unified_doc.pages[0].dimensions.width if unified_doc.pages else 0,
|
||||||
|
'height': unified_doc.pages[0].dimensions.height if unified_doc.pages else 0
|
||||||
|
},
|
||||||
|
# Metadata for tracking
|
||||||
|
'_from_unified_document': True,
|
||||||
|
'_processing_track': unified_doc.metadata.processing_track.value
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Converted UnifiedDocument to OCR data: "
|
||||||
|
f"{len(text_regions)} text regions, "
|
||||||
|
f"{len(images_metadata)} images, "
|
||||||
|
f"{len(layout_elements)} layout elements, "
|
||||||
|
f"track={unified_doc.metadata.processing_track.value}")
|
||||||
|
|
||||||
|
return ocr_data
|
||||||
|
|
||||||
|
def generate_from_unified_document(
|
||||||
|
self,
|
||||||
|
unified_doc: 'UnifiedDocument',
|
||||||
|
output_path: Path,
|
||||||
|
source_file_path: Optional[Path] = None
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Generate layout-preserving PDF directly from UnifiedDocument.
|
||||||
|
|
||||||
|
This method supports both OCR and DIRECT processing tracks,
|
||||||
|
preserving layout and coordinate information from either source.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
unified_doc: UnifiedDocument object
|
||||||
|
output_path: Path to save generated PDF
|
||||||
|
source_file_path: Optional path to original source file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if successful, False otherwise
|
||||||
|
"""
|
||||||
|
if not UNIFIED_DOCUMENT_AVAILABLE:
|
||||||
|
logger.error("UnifiedDocument support not available")
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Convert UnifiedDocument to OCR data format
|
||||||
|
ocr_data = self.convert_unified_document_to_ocr_data(unified_doc)
|
||||||
|
|
||||||
|
# Use internal generation with pre-loaded data
|
||||||
|
return self._generate_pdf_from_data(
|
||||||
|
ocr_data=ocr_data,
|
||||||
|
output_path=output_path,
|
||||||
|
source_file_path=source_file_path
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to generate PDF from UnifiedDocument: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _generate_pdf_from_data(
|
||||||
|
self,
|
||||||
|
ocr_data: Dict,
|
||||||
|
output_path: Path,
|
||||||
|
source_file_path: Optional[Path] = None,
|
||||||
|
json_parent_dir: Optional[Path] = None
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Internal method to generate PDF from OCR data dictionary.
|
||||||
|
|
||||||
|
This is the core generation logic extracted for reuse by both
|
||||||
|
JSON-based and UnifiedDocument-based generation paths.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ocr_data: OCR data dictionary
|
||||||
|
output_path: Path to save generated PDF
|
||||||
|
source_file_path: Optional path to original source file
|
||||||
|
json_parent_dir: Directory containing images (for JSON-based generation)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if successful, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Check if PDF already exists (caching)
|
||||||
|
if output_path.exists():
|
||||||
|
logger.info(f"PDF already exists: {output_path.name}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Get text regions
|
||||||
|
text_regions = ocr_data.get('text_regions', [])
|
||||||
|
if not text_regions:
|
||||||
|
logger.warning("No text regions found in data")
|
||||||
|
# Don't fail - might have only tables/images
|
||||||
|
|
||||||
|
# Get images metadata
|
||||||
|
images_metadata = ocr_data.get('images_metadata', [])
|
||||||
|
|
||||||
|
# Get layout data
|
||||||
|
layout_data = ocr_data.get('layout_data', {})
|
||||||
|
|
||||||
|
# Step 1: Get OCR processing dimensions
|
||||||
|
ocr_width, ocr_height = self.calculate_page_dimensions(ocr_data, source_file_path=None)
|
||||||
|
logger.info(f"OCR 處理時使用的座標系尺寸: {ocr_width:.1f} x {ocr_height:.1f}")
|
||||||
|
|
||||||
|
# Step 2: Get target PDF dimensions
|
||||||
|
if source_file_path:
|
||||||
|
target_dims = self.get_original_page_size(source_file_path)
|
||||||
|
if target_dims:
|
||||||
|
target_width, target_height = target_dims
|
||||||
|
logger.info(f"目標 PDF 尺寸(來自原始文件): {target_width:.1f} x {target_height:.1f}")
|
||||||
|
else:
|
||||||
|
target_width, target_height = ocr_width, ocr_height
|
||||||
|
logger.warning(f"無法獲取原始文件尺寸,使用 OCR 尺寸作為目標")
|
||||||
|
else:
|
||||||
|
target_width, target_height = ocr_width, ocr_height
|
||||||
|
logger.info(f"無原始文件,使用 OCR 尺寸作為目標: {target_width:.1f} x {target_height:.1f}")
|
||||||
|
|
||||||
|
# Step 3: Calculate scale factors
|
||||||
|
scale_w = target_width / ocr_width if ocr_width > 0 else 1.0
|
||||||
|
scale_h = target_height / ocr_height if ocr_height > 0 else 1.0
|
||||||
|
logger.info(f"縮放因子: X={scale_w:.3f}, Y={scale_h:.3f}")
|
||||||
|
|
||||||
|
# Create PDF canvas
|
||||||
|
pdf_canvas = canvas.Canvas(str(output_path), pagesize=(target_width, target_height))
|
||||||
|
|
||||||
|
# Filter text regions to avoid overlap with tables/images
|
||||||
|
regions_to_avoid = images_metadata
|
||||||
|
table_count = len([img for img in images_metadata if 'table' in img.get('image_path', '').lower()])
|
||||||
|
|
||||||
|
logger.info(f"過濾文字區域: {len(regions_to_avoid)} 個區域需要避免")
|
||||||
|
|
||||||
|
filtered_text_regions = self._filter_text_in_regions(text_regions, regions_to_avoid)
|
||||||
|
|
||||||
|
# Group regions by page
|
||||||
|
pages_data = {}
|
||||||
|
for region in filtered_text_regions:
|
||||||
|
page_num = region.get('page', 1)
|
||||||
|
if page_num not in pages_data:
|
||||||
|
pages_data[page_num] = []
|
||||||
|
pages_data[page_num].append(region)
|
||||||
|
|
||||||
|
# Get table elements from layout_data
|
||||||
|
table_elements = []
|
||||||
|
if layout_data and layout_data.get('elements'):
|
||||||
|
table_elements = [e for e in layout_data['elements'] if e.get('type') == 'table']
|
||||||
|
|
||||||
|
# Process each page
|
||||||
|
total_pages = ocr_data.get('total_pages', 1)
|
||||||
|
logger.info(f"開始處理 {total_pages} 頁 PDF")
|
||||||
|
|
||||||
|
# Determine image directory
|
||||||
|
if json_parent_dir is None:
|
||||||
|
json_parent_dir = output_path.parent
|
||||||
|
|
||||||
|
for page_num in range(1, total_pages + 1):
|
||||||
|
logger.info(f">>> 處理第 {page_num}/{total_pages} 頁")
|
||||||
|
if page_num > 1:
|
||||||
|
pdf_canvas.showPage()
|
||||||
|
|
||||||
|
# Get regions for this page
|
||||||
|
page_text_regions = pages_data.get(page_num, [])
|
||||||
|
page_table_regions = [t for t in table_elements if t.get('page') == page_num - 1]
|
||||||
|
page_image_regions = [
|
||||||
|
img for img in images_metadata
|
||||||
|
if img.get('page') == page_num - 1
|
||||||
|
and 'table' not in img.get('image_path', '').lower()
|
||||||
|
]
|
||||||
|
|
||||||
|
# Draw in layers: images → tables → text
|
||||||
|
|
||||||
|
# 1. Draw images (bottom layer)
|
||||||
|
for img_meta in page_image_regions:
|
||||||
|
self.draw_image_region(
|
||||||
|
pdf_canvas, img_meta, target_height,
|
||||||
|
json_parent_dir, scale_w, scale_h
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Draw tables (middle layer)
|
||||||
|
for table_elem in page_table_regions:
|
||||||
|
self.draw_table_region(
|
||||||
|
pdf_canvas, table_elem, images_metadata,
|
||||||
|
target_height, scale_w, scale_h
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3. Draw text (top layer)
|
||||||
|
for region in page_text_regions:
|
||||||
|
self.draw_text_region(
|
||||||
|
pdf_canvas, region, target_height,
|
||||||
|
scale_w, scale_h
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"<<< 第 {page_num} 頁完成")
|
||||||
|
|
||||||
|
# Save PDF
|
||||||
|
pdf_canvas.save()
|
||||||
|
|
||||||
|
file_size = output_path.stat().st_size
|
||||||
|
logger.info(f"Generated PDF: {output_path.name} ({file_size} bytes)")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to generate PDF: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
def calculate_page_dimensions(self, ocr_data: Dict, source_file_path: Optional[Path] = None) -> Tuple[float, float]:
|
def calculate_page_dimensions(self, ocr_data: Dict, source_file_path: Optional[Path] = None) -> Tuple[float, float]:
|
||||||
"""
|
"""
|
||||||
從 OCR JSON 數據中推斷 OCR 處理時的實際頁面尺寸。
|
從 OCR JSON 數據中推斷 OCR 處理時的實際頁面尺寸。
|
||||||
@@ -717,141 +1032,19 @@ class PDFGeneratorService:
|
|||||||
True if successful, False otherwise
|
True if successful, False otherwise
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Check if PDF already exists (caching)
|
|
||||||
if output_path.exists():
|
|
||||||
logger.info(f"PDF already exists: {output_path.name}")
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Load JSON data
|
# Load JSON data
|
||||||
ocr_data = self.load_ocr_json(json_path)
|
ocr_data = self.load_ocr_json(json_path)
|
||||||
if not ocr_data:
|
if not ocr_data:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Get text regions
|
# Use internal generation with pre-loaded data
|
||||||
text_regions = ocr_data.get('text_regions', [])
|
return self._generate_pdf_from_data(
|
||||||
if not text_regions:
|
ocr_data=ocr_data,
|
||||||
logger.warning("No text regions found in JSON")
|
output_path=output_path,
|
||||||
return False
|
source_file_path=source_file_path,
|
||||||
|
json_parent_dir=json_path.parent
|
||||||
# Get images metadata
|
|
||||||
images_metadata = ocr_data.get('images_metadata', [])
|
|
||||||
|
|
||||||
# Get layout data
|
|
||||||
layout_data = ocr_data.get('layout_data', {})
|
|
||||||
|
|
||||||
# Step 1: Get OCR processing dimensions (the large image OCR actually used)
|
|
||||||
# This comes from analyzing all bbox coordinates in the OCR data
|
|
||||||
ocr_width, ocr_height = self.calculate_page_dimensions(ocr_data, source_file_path=None)
|
|
||||||
logger.info(f"OCR 處理時使用的座標系尺寸: {ocr_width:.1f} x {ocr_height:.1f}")
|
|
||||||
|
|
||||||
# Step 2: Get target PDF dimensions (usually the original file size)
|
|
||||||
# This is what we want the final PDF size to be
|
|
||||||
if source_file_path:
|
|
||||||
target_dims = self.get_original_page_size(source_file_path)
|
|
||||||
if target_dims:
|
|
||||||
target_width, target_height = target_dims
|
|
||||||
logger.info(f"目標 PDF 尺寸(來自原始文件): {target_width:.1f} x {target_height:.1f}")
|
|
||||||
else:
|
|
||||||
# If we can't get original size, use OCR dimensions as target
|
|
||||||
target_width, target_height = ocr_width, ocr_height
|
|
||||||
logger.warning(f"無法獲取原始文件尺寸,使用 OCR 尺寸作為目標: {target_width:.1f} x {target_height:.1f}")
|
|
||||||
else:
|
|
||||||
# No source file, use OCR dimensions as target (1:1 mapping)
|
|
||||||
target_width, target_height = ocr_width, ocr_height
|
|
||||||
logger.info(f"無原始文件,使用 OCR 尺寸作為目標: {target_width:.1f} x {target_height:.1f}")
|
|
||||||
|
|
||||||
# Step 3: Calculate scale factors to convert OCR coordinates to PDF coordinates
|
|
||||||
scale_w = target_width / ocr_width
|
|
||||||
scale_h = target_height / ocr_height
|
|
||||||
logger.info(f"縮放因子: X={scale_w:.3f}, Y={scale_h:.3f} (OCR座標 → PDF座標)")
|
|
||||||
|
|
||||||
# Create PDF canvas with target dimensions
|
|
||||||
pdf_canvas = canvas.Canvas(str(output_path), pagesize=(target_width, target_height))
|
|
||||||
|
|
||||||
# *** 關鍵修復:收集所有需要避免的區域(表格 + 圖片)***
|
|
||||||
# 注意:OCR JSON 中沒有 'tables' 和 'image_regions' 頂層欄位
|
|
||||||
# 重要發現:
|
|
||||||
# - layout_data.elements 中的表格元素沒有 bbox(都是空列表)
|
|
||||||
# - images_metadata 包含所有表格和圖片,並且有正確的 bbox
|
|
||||||
# - 因此,只需使用 images_metadata 來過濾文字即可
|
|
||||||
|
|
||||||
# 使用 images_metadata 作為要避免的區域(包含表格圖片和其他圖片)
|
|
||||||
regions_to_avoid = images_metadata
|
|
||||||
|
|
||||||
table_count = len([img for img in images_metadata if 'table' in img.get('image_path', '').lower()])
|
|
||||||
other_count = len(images_metadata) - table_count
|
|
||||||
|
|
||||||
logger.info(f"使用 images_metadata 過濾文字區域:")
|
|
||||||
logger.info(f" - 表格圖片: {table_count}")
|
|
||||||
logger.info(f" - 其他圖片: {other_count}")
|
|
||||||
logger.info(f" - 總計需要避免的區域: {len(regions_to_avoid)}")
|
|
||||||
|
|
||||||
# 使用新的過濾函式過濾文字區域
|
|
||||||
filtered_text_regions = self._filter_text_in_regions(text_regions, regions_to_avoid)
|
|
||||||
|
|
||||||
# Group regions by page
|
|
||||||
pages_data = {}
|
|
||||||
for region in filtered_text_regions:
|
|
||||||
page_num = region.get('page', 1)
|
|
||||||
if page_num not in pages_data:
|
|
||||||
pages_data[page_num] = []
|
|
||||||
pages_data[page_num].append(region)
|
|
||||||
|
|
||||||
# Get table elements from layout_data
|
|
||||||
table_elements = []
|
|
||||||
if layout_data and layout_data.get('elements'):
|
|
||||||
table_elements = [e for e in layout_data['elements'] if e.get('type') == 'table']
|
|
||||||
|
|
||||||
# Process each page
|
|
||||||
total_pages = ocr_data.get('total_pages', 1)
|
|
||||||
logger.info(f"=" * 70)
|
|
||||||
logger.info(f"開始處理 {total_pages} 頁 PDF")
|
|
||||||
logger.info(f"=" * 70)
|
|
||||||
|
|
||||||
for page_num in range(1, total_pages + 1):
|
|
||||||
logger.info(f"\n>>> 處理第 {page_num}/{total_pages} 頁")
|
|
||||||
if page_num > 1:
|
|
||||||
pdf_canvas.showPage() # Start new page
|
|
||||||
|
|
||||||
# Get filtered regions for this page
|
|
||||||
page_text_regions = pages_data.get(page_num, [])
|
|
||||||
page_table_regions = [t for t in table_elements if t.get('page') == page_num - 1]
|
|
||||||
page_image_regions = [img for img in images_metadata if img.get('page') == page_num - 1 and 'table' not in img.get('image_path', '').lower()]
|
|
||||||
|
|
||||||
# 繪製順序:圖片(底層) → 表格(中間層) → 文字(最上層)
|
|
||||||
|
|
||||||
# 1. Draw images first (bottom layer)
|
|
||||||
logger.info(f"第 {page_num} 頁: 繪製 {len(page_image_regions)} 個圖片")
|
|
||||||
for img_meta in page_image_regions:
|
|
||||||
self.draw_image_region(
|
|
||||||
pdf_canvas,
|
|
||||||
img_meta,
|
|
||||||
target_height,
|
|
||||||
json_path.parent,
|
|
||||||
scale_w,
|
|
||||||
scale_h
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# 2. Draw tables (middle layer)
|
|
||||||
logger.info(f"第 {page_num} 頁: 繪製 {len(page_table_regions)} 個表格")
|
|
||||||
for table_elem in page_table_regions:
|
|
||||||
self.draw_table_region(pdf_canvas, table_elem, images_metadata, target_height, scale_w, scale_h)
|
|
||||||
|
|
||||||
# 3. Draw text regions last (top layer) - excluding table text
|
|
||||||
logger.info(f"第 {page_num} 頁: 繪製 {len(page_text_regions)} 個文字區域")
|
|
||||||
for i, region in enumerate(page_text_regions, 1):
|
|
||||||
logger.debug(f" 文字 {i}/{len(page_text_regions)}")
|
|
||||||
self.draw_text_region(pdf_canvas, region, target_height, scale_w, scale_h)
|
|
||||||
|
|
||||||
logger.info(f"<<< 第 {page_num} 頁完成")
|
|
||||||
|
|
||||||
# Save PDF
|
|
||||||
pdf_canvas.save()
|
|
||||||
|
|
||||||
file_size = output_path.stat().st_size
|
|
||||||
logger.info(f"Generated layout-preserving PDF: {output_path.name} ({file_size} bytes)")
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to generate PDF: {e}")
|
logger.error(f"Failed to generate PDF: {e}")
|
||||||
import traceback
|
import traceback
|
||||||
|
|||||||
@@ -63,10 +63,10 @@
|
|||||||
- [x] 4.2.1 Define standardized JSON schema
|
- [x] 4.2.1 Define standardized JSON schema
|
||||||
- [x] 4.2.2 Include processing metadata
|
- [x] 4.2.2 Include processing metadata
|
||||||
- [x] 4.2.3 Support both track outputs
|
- [x] 4.2.3 Support both track outputs
|
||||||
- [ ] 4.3 Update PDF generator for UnifiedDocument
|
- [x] 4.3 Update PDF generator for UnifiedDocument
|
||||||
- [ ] 4.3.1 Adapt PDF generation to use UnifiedDocument
|
- [x] 4.3.1 Adapt PDF generation to use UnifiedDocument
|
||||||
- [ ] 4.3.2 Preserve layout from both tracks
|
- [x] 4.3.2 Preserve layout from both tracks
|
||||||
- [ ] 4.3.3 Handle coordinate transformations
|
- [x] 4.3.3 Handle coordinate transformations
|
||||||
|
|
||||||
## 5. Translation System Foundation
|
## 5. Translation System Foundation
|
||||||
- [ ] 5.1 Create TranslationEngine interface
|
- [ ] 5.1 Create TranslationEngine interface
|
||||||
|
|||||||
Reference in New Issue
Block a user