feat: unify Direct Track PDF rendering and simplify export options

Backend changes: - Apply background image + invisible text layer to all Direct Track PDFs - Add CHART to regions_to_avoid for text extraction - Improve visual fidelity for native PDFs and Office documents Frontend changes: - Remove JSON, UnifiedDocument, Markdown download buttons - Simplify to 2-column layout with only Layout PDF and Reflow PDF - Remove translation JSON download and Layout PDF option - Keep only Reflow PDF for translated document downloads - Clean up unused imports (FileJson, Database, FileOutput) Archives two OpenSpec proposals: - unify-direct-track-pdf-rendering - simplify-frontend-export-options 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 07:50:43 +08:00
parent 53bfa88773
commit 24253ac15e
15 changed files with 891 additions and 195 deletions
--- a/backend/app/services/direct_extraction_engine.py
+++ b/backend/app/services/direct_extraction_engine.py
@@ -2920,6 +2920,7 @@ class DirectExtractionEngine:
        1. Are mostly solid black or white
        2. Are within page boundaries
        3. Actually overlap with text content (IoU check)
+        4. Are rendered AFTER the text they overlap (z-order check)

        Args:
            page: PyMuPDF page object
@@ -2939,6 +2940,22 @@ class DirectExtractionEngine:
            if not image_list:
                return covering_images

+            # Get rendering order (z-order) using get_bboxlog()
+            # Items rendered later (higher index) appear on top
+            bboxlog = page.get_bboxlog()
+
+            # Build a map of bbox -> sequence number for images and text
+            # This helps determine if an image is rendered before or after text
+            image_seqnos = {}  # bbox tuple -> seqno
+            text_seqnos = {}   # bbox tuple -> seqno
+
+            for seqno, (action_type, bbox) in enumerate(bboxlog):
+                bbox_tuple = tuple(fitz.Rect(bbox))
+                if "image" in action_type:
+                    image_seqnos[bbox_tuple] = seqno
+                elif "text" in action_type:
+                    text_seqnos[bbox_tuple] = seqno
+
            # Get all text words for coverage check
            words = page.get_text("words")  # (x0, y0, x1, y1, word, block_no, line_no, word_no)

@@ -3005,8 +3022,23 @@ class DirectExtractionEngine:
                            # Clip image rect to page boundaries
                            clipped_rect = img_rect & page_rect

+                            # Get image's rendering sequence number
+                            img_bbox_tuple = tuple(clipped_rect)
+                            img_seqno = image_seqnos.get(img_bbox_tuple, -1)
+
+                            # If we can't find exact match, try to find closest match
+                            if img_seqno == -1:
+                                for bbox_tuple, seqno in image_seqnos.items():
+                                    if fitz.Rect(bbox_tuple).intersects(clipped_rect):
+                                        # Use the matching seqno
+                                        img_seqno = seqno
+                                        break
+
                            # Check if image actually covers any text (IoU check)
+                            # AND is rendered AFTER the text (z-order check)
                            covered_text_count = 0
+                            is_background_image = False
+
                            for word_info in words:
                                word_rect = fitz.Rect(word_info[:4])
                                word_area = word_rect.width * word_rect.height
@@ -3017,13 +3049,35 @@ class DirectExtractionEngine:
                                if not intersection.is_empty:
                                    intersection_area = intersection.width * intersection.height
                                    coverage_ratio = intersection_area / word_area
+
                                    # Count as covered if >= 50% of word is under the image
                                    if coverage_ratio >= 0.5:
-                                        covered_text_count += 1
+                                        # Z-order check: Find the text's rendering sequence
+                                        text_seqno = -1
+                                        for bbox_tuple, seqno in text_seqnos.items():
+                                            text_bbox = fitz.Rect(bbox_tuple)
+                                            if text_bbox.intersects(word_rect):
+                                                text_seqno = seqno
+                                                break
+
+                                        # Only count as covered if image is rendered AFTER text
+                                        # If image is rendered BEFORE text, it's a background
+                                        if img_seqno > text_seqno and text_seqno >= 0:
+                                            covered_text_count += 1
+                                        elif img_seqno < text_seqno and img_seqno >= 0:
+                                            # Image is rendered before text = background
+                                            is_background_image = True
+
+                            # Skip this image if it's detected as a background image
+                            if is_background_image and covered_text_count == 0:
+                                logger.debug(f"Page {page_num}: Skipping background image xref={xref} "
+                                           f"(rendered before text, seqno={img_seqno})")
+                                continue

                            # Report if image covers text OR is pure solid black/white
                            # Pure solid fills are likely redaction/placeholder boxes
-                            if covered_text_count > 0 or is_pure_solid:
+                            # But skip if it's a background image (rendered before text)
+                            if covered_text_count > 0 or (is_pure_solid and not is_background_image):
                                covering_images.append({
                                    'xref': xref,  # Include xref for filtering
                                    'bbox': tuple(clipped_rect),
@@ -3031,7 +3085,9 @@ class DirectExtractionEngine:
                                    'avg_color': (avg_r, avg_g, avg_b),
                                    'size': (width, height),
                                    'covered_text_count': covered_text_count,
-                                    'is_pure_solid': is_pure_solid
+                                    'is_pure_solid': is_pure_solid,
+                                    'is_background': is_background_image,
+                                    'render_seqno': img_seqno
                                })

                except Exception as e: