chore: backup before code cleanup
Backup commit before executing remove-unused-code proposal. This includes all pending changes and new features. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
664
backend/app/services/text_region_renderer.py
Normal file
664
backend/app/services/text_region_renderer.py
Normal file
@@ -0,0 +1,664 @@
|
||||
"""
|
||||
Simple Text Region Renderer
|
||||
|
||||
Renders raw OCR text regions directly to PDF at their detected positions,
|
||||
with rotation correction based on bbox quadrilateral geometry.
|
||||
|
||||
This approach bypasses complex table structure reconstruction and simply
|
||||
places text at the positions detected by PaddleOCR.
|
||||
"""
|
||||
|
||||
import math
|
||||
import logging
|
||||
from typing import Dict, List, Optional, Set, Tuple
|
||||
|
||||
from reportlab.pdfgen import canvas
|
||||
from reportlab.lib.colors import black
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TextRegionRenderer:
|
||||
"""
|
||||
Render raw OCR text regions to PDF with position and rotation correction.
|
||||
|
||||
This renderer takes the raw OCR output (text + quadrilateral bbox) and
|
||||
renders text at the correct position. Small rotation angles are ignored
|
||||
(straightened) to produce clean, aligned text output.
|
||||
"""
|
||||
|
||||
# Minimum font size to prevent illegible text
|
||||
MIN_FONT_SIZE = 6.0
|
||||
|
||||
# Maximum font size to prevent oversized text
|
||||
MAX_FONT_SIZE = 72.0
|
||||
|
||||
# Font size estimation factor (font height relative to bbox height)
|
||||
FONT_SIZE_FACTOR = 0.75
|
||||
|
||||
# Rotation angle threshold - angles smaller than this are straightened to 0
|
||||
# This compensates for slight scan skew and produces cleaner output
|
||||
ROTATION_STRAIGHTEN_THRESHOLD = 10.0 # degrees
|
||||
|
||||
# IoA (Intersection over Area) threshold for text-image overlap detection
|
||||
# If text bbox overlaps with image by more than this ratio, skip the text
|
||||
IOA_OVERLAP_THRESHOLD = 0.3 # 30% overlap
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
font_name: str = 'NotoSansSC',
|
||||
debug: bool = False,
|
||||
straighten_threshold: float = None,
|
||||
ioa_threshold: float = None
|
||||
):
|
||||
"""
|
||||
Initialize the text region renderer.
|
||||
|
||||
Args:
|
||||
font_name: Name of the registered font to use
|
||||
debug: Enable debug logging
|
||||
straighten_threshold: Override rotation straightening threshold (degrees)
|
||||
ioa_threshold: Override IoA overlap threshold for text-image avoidance
|
||||
"""
|
||||
self.font_name = font_name
|
||||
self.debug = debug
|
||||
self.straighten_threshold = straighten_threshold or self.ROTATION_STRAIGHTEN_THRESHOLD
|
||||
self.ioa_threshold = ioa_threshold or self.IOA_OVERLAP_THRESHOLD
|
||||
|
||||
def calculate_rotation(self, bbox: List[List[float]]) -> float:
|
||||
"""
|
||||
Calculate text rotation angle from bbox quadrilateral.
|
||||
|
||||
The bbox is a quadrilateral with 4 corner points in order:
|
||||
[top-left, top-right, bottom-right, bottom-left]
|
||||
|
||||
Returns angle in degrees (counter-clockwise from horizontal).
|
||||
Positive angle means text is tilted upward to the right.
|
||||
|
||||
NOTE: Small angles (< straighten_threshold) will be treated as 0
|
||||
during rendering to produce clean, aligned output.
|
||||
|
||||
Args:
|
||||
bbox: List of 4 [x, y] coordinate pairs
|
||||
|
||||
Returns:
|
||||
Rotation angle in degrees
|
||||
"""
|
||||
if len(bbox) < 2:
|
||||
return 0.0
|
||||
|
||||
# Top-left to top-right vector (top edge)
|
||||
dx = bbox[1][0] - bbox[0][0]
|
||||
dy = bbox[1][1] - bbox[0][1]
|
||||
|
||||
# Calculate angle (atan2 returns radians, convert to degrees)
|
||||
# Note: In image coordinates, Y increases downward
|
||||
# We negate dy to get the conventional angle
|
||||
angle_rad = math.atan2(-dy, dx)
|
||||
angle_deg = math.degrees(angle_rad)
|
||||
|
||||
if self.debug:
|
||||
logger.debug(f"Rotation calculation: dx={dx:.1f}, dy={dy:.1f}, angle={angle_deg:.2f}°")
|
||||
|
||||
return angle_deg
|
||||
|
||||
def estimate_font_size(
|
||||
self,
|
||||
bbox: List[List[float]],
|
||||
text: str,
|
||||
scale_factor: float = 1.0
|
||||
) -> float:
|
||||
"""
|
||||
Estimate appropriate font size from bbox dimensions.
|
||||
|
||||
Uses the bbox height as the primary indicator, with adjustment
|
||||
for the typical font-to-bbox ratio.
|
||||
|
||||
Args:
|
||||
bbox: List of 4 [x, y] coordinate pairs
|
||||
text: The text content (for width-based adjustments)
|
||||
scale_factor: Coordinate scaling factor
|
||||
|
||||
Returns:
|
||||
Estimated font size in points
|
||||
"""
|
||||
if len(bbox) < 4:
|
||||
return 12.0 # Default font size
|
||||
|
||||
# Calculate bbox height (average of left and right edges)
|
||||
left_height = math.dist(bbox[0], bbox[3])
|
||||
right_height = math.dist(bbox[1], bbox[2])
|
||||
avg_height = (left_height + right_height) / 2
|
||||
|
||||
# Apply scale factor and font size ratio
|
||||
font_size = avg_height * scale_factor * self.FONT_SIZE_FACTOR
|
||||
|
||||
# Clamp to reasonable range
|
||||
font_size = max(self.MIN_FONT_SIZE, min(self.MAX_FONT_SIZE, font_size))
|
||||
|
||||
if self.debug:
|
||||
logger.debug(f"Font size estimation: bbox_h={avg_height:.1f}, "
|
||||
f"scale={scale_factor:.3f}, font={font_size:.1f}pt")
|
||||
|
||||
return font_size
|
||||
|
||||
def get_bbox_center(self, bbox: List[List[float]]) -> Tuple[float, float]:
|
||||
"""
|
||||
Calculate the center point of a bbox quadrilateral.
|
||||
|
||||
Args:
|
||||
bbox: List of 4 [x, y] coordinate pairs
|
||||
|
||||
Returns:
|
||||
Tuple of (center_x, center_y)
|
||||
"""
|
||||
if len(bbox) < 4:
|
||||
return (0.0, 0.0)
|
||||
|
||||
center_x = sum(p[0] for p in bbox) / 4
|
||||
center_y = sum(p[1] for p in bbox) / 4
|
||||
return (center_x, center_y)
|
||||
|
||||
def get_bbox_as_rect(self, bbox: List[List[float]]) -> Tuple[float, float, float, float]:
|
||||
"""
|
||||
Convert quadrilateral bbox to axis-aligned rectangle (x0, y0, x1, y1).
|
||||
|
||||
Args:
|
||||
bbox: List of 4 [x, y] coordinate pairs
|
||||
|
||||
Returns:
|
||||
Tuple of (x0, y0, x1, y1) - min/max coordinates
|
||||
"""
|
||||
if len(bbox) < 4:
|
||||
return (0.0, 0.0, 0.0, 0.0)
|
||||
|
||||
x_coords = [p[0] for p in bbox]
|
||||
y_coords = [p[1] for p in bbox]
|
||||
return (min(x_coords), min(y_coords), max(x_coords), max(y_coords))
|
||||
|
||||
def get_bbox_left_baseline(
|
||||
self,
|
||||
bbox: List[List[float]]
|
||||
) -> Tuple[float, float]:
|
||||
"""
|
||||
Get the left baseline point for text rendering.
|
||||
|
||||
For left-aligned text, we use the bottom-left corner as the
|
||||
baseline starting point (text baseline is at the bottom).
|
||||
|
||||
Args:
|
||||
bbox: List of 4 [x, y] coordinate pairs
|
||||
|
||||
Returns:
|
||||
Tuple of (x, y) for the left baseline point
|
||||
"""
|
||||
if len(bbox) < 4:
|
||||
return (0.0, 0.0)
|
||||
|
||||
# Use bottom-left corner for baseline
|
||||
# bbox[3] is bottom-left in the standard ordering
|
||||
x = bbox[3][0]
|
||||
y = bbox[3][1]
|
||||
|
||||
return (x, y)
|
||||
|
||||
def calculate_ioa(
|
||||
self,
|
||||
text_rect: Tuple[float, float, float, float],
|
||||
image_rect: Tuple[float, float, float, float]
|
||||
) -> float:
|
||||
"""
|
||||
Calculate Intersection over Area (IoA) of text bbox with image bbox.
|
||||
|
||||
IoA = intersection_area / text_area
|
||||
|
||||
This measures how much of the text region overlaps with the image.
|
||||
|
||||
Args:
|
||||
text_rect: Text bbox as (x0, y0, x1, y1)
|
||||
image_rect: Image bbox as (x0, y0, x1, y1)
|
||||
|
||||
Returns:
|
||||
IoA ratio (0.0 to 1.0)
|
||||
"""
|
||||
tx0, ty0, tx1, ty1 = text_rect
|
||||
ix0, iy0, ix1, iy1 = image_rect
|
||||
|
||||
# Calculate text area
|
||||
text_area = (tx1 - tx0) * (ty1 - ty0)
|
||||
if text_area <= 0:
|
||||
return 0.0
|
||||
|
||||
# Calculate intersection
|
||||
inter_x0 = max(tx0, ix0)
|
||||
inter_y0 = max(ty0, iy0)
|
||||
inter_x1 = min(tx1, ix1)
|
||||
inter_y1 = min(ty1, iy1)
|
||||
|
||||
if inter_x0 >= inter_x1 or inter_y0 >= inter_y1:
|
||||
return 0.0 # No intersection
|
||||
|
||||
inter_area = (inter_x1 - inter_x0) * (inter_y1 - inter_y0)
|
||||
return inter_area / text_area
|
||||
|
||||
def is_overlapping_exclusion_zones(
|
||||
self,
|
||||
bbox: List[List[float]],
|
||||
exclusion_zones: List[Tuple[float, float, float, float]]
|
||||
) -> bool:
|
||||
"""
|
||||
Check if text bbox overlaps significantly with any exclusion zone.
|
||||
|
||||
Args:
|
||||
bbox: Text bbox as quadrilateral
|
||||
exclusion_zones: List of (x0, y0, x1, y1) rectangles to avoid
|
||||
|
||||
Returns:
|
||||
True if text should be skipped due to overlap
|
||||
"""
|
||||
if not exclusion_zones:
|
||||
return False
|
||||
|
||||
text_rect = self.get_bbox_as_rect(bbox)
|
||||
|
||||
for zone in exclusion_zones:
|
||||
ioa = self.calculate_ioa(text_rect, zone)
|
||||
if ioa >= self.ioa_threshold:
|
||||
if self.debug:
|
||||
logger.debug(f"Text overlaps exclusion zone: IoA={ioa:.2f} >= {self.ioa_threshold}")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def is_inside_zone(
|
||||
self,
|
||||
bbox: List[List[float]],
|
||||
zone: Tuple[float, float, float, float],
|
||||
threshold: float = 0.5
|
||||
) -> bool:
|
||||
"""
|
||||
Check if text bbox is inside a zone (for collecting chart texts).
|
||||
|
||||
Args:
|
||||
bbox: Text bbox as quadrilateral
|
||||
zone: Zone as (x0, y0, x1, y1) rectangle
|
||||
threshold: Minimum IoA to consider "inside"
|
||||
|
||||
Returns:
|
||||
True if text is inside the zone
|
||||
"""
|
||||
text_rect = self.get_bbox_as_rect(bbox)
|
||||
ioa = self.calculate_ioa(text_rect, zone)
|
||||
return ioa >= threshold
|
||||
|
||||
def is_axis_label(
|
||||
self,
|
||||
bbox: List[List[float]],
|
||||
zone: Tuple[float, float, float, float],
|
||||
margin: float = 50.0
|
||||
) -> bool:
|
||||
"""
|
||||
Check if text bbox is an axis label for a chart/image zone.
|
||||
|
||||
Axis labels are typically:
|
||||
- Vertical text to the LEFT of the chart (Y-axis label)
|
||||
- Horizontal text BELOW the chart (X-axis label)
|
||||
|
||||
Args:
|
||||
bbox: Text bbox as quadrilateral
|
||||
zone: Chart/image zone as (x0, y0, x1, y1) rectangle
|
||||
margin: Maximum distance from zone edge to be considered axis label
|
||||
|
||||
Returns:
|
||||
True if text appears to be an axis label for this zone
|
||||
"""
|
||||
if len(bbox) < 4:
|
||||
return False
|
||||
|
||||
text_rect = self.get_bbox_as_rect(bbox)
|
||||
tx0, ty0, tx1, ty1 = text_rect
|
||||
zx0, zy0, zx1, zy1 = zone
|
||||
|
||||
# Calculate text dimensions
|
||||
text_width = tx1 - tx0
|
||||
text_height = ty1 - ty0
|
||||
|
||||
# Check for Y-axis label: vertical text to the LEFT of zone
|
||||
# - Text is to the left of zone (tx1 <= zx0 + small overlap)
|
||||
# - Text's Y range overlaps with zone's Y range
|
||||
# - Text is taller than wide (aspect ratio > 2) OR very narrow
|
||||
is_left_of_zone = tx1 <= zx0 + margin and tx1 >= zx0 - margin
|
||||
y_overlaps = not (ty1 < zy0 or ty0 > zy1)
|
||||
is_vertical_text = text_height > text_width * 2
|
||||
|
||||
if is_left_of_zone and y_overlaps and is_vertical_text:
|
||||
if self.debug:
|
||||
logger.debug(f"Detected Y-axis label: text is left of zone, vertical")
|
||||
return True
|
||||
|
||||
# Check for X-axis label: horizontal text BELOW the zone
|
||||
# - Text is below zone (ty0 >= zy1 - small overlap)
|
||||
# - Text's X range overlaps with zone's X range
|
||||
# - Text is wider than tall (normal horizontal text)
|
||||
is_below_zone = ty0 >= zy1 - margin and ty0 <= zy1 + margin
|
||||
x_overlaps = not (tx1 < zx0 or tx0 > zx1)
|
||||
is_horizontal_text = text_width > text_height
|
||||
|
||||
if is_below_zone and x_overlaps and is_horizontal_text:
|
||||
if self.debug:
|
||||
logger.debug(f"Detected X-axis label: text is below zone, horizontal")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def is_near_zone(
|
||||
self,
|
||||
bbox: List[List[float]],
|
||||
zone: Tuple[float, float, float, float],
|
||||
margin: float = 100.0
|
||||
) -> bool:
|
||||
"""
|
||||
Check if text bbox is near (within margin) of a zone.
|
||||
|
||||
Args:
|
||||
bbox: Text bbox as quadrilateral
|
||||
zone: Zone as (x0, y0, x1, y1) rectangle
|
||||
margin: Maximum distance from zone to be considered "near"
|
||||
|
||||
Returns:
|
||||
True if text is near the zone
|
||||
"""
|
||||
if len(bbox) < 4:
|
||||
return False
|
||||
|
||||
text_rect = self.get_bbox_as_rect(bbox)
|
||||
tx0, ty0, tx1, ty1 = text_rect
|
||||
zx0, zy0, zx1, zy1 = zone
|
||||
|
||||
# Expand zone by margin
|
||||
expanded_zone = (zx0 - margin, zy0 - margin, zx1 + margin, zy1 + margin)
|
||||
|
||||
# Check if text overlaps with expanded zone
|
||||
ex0, ey0, ex1, ey1 = expanded_zone
|
||||
return not (tx1 < ex0 or tx0 > ex1 or ty1 < ey0 or ty0 > ey1)
|
||||
|
||||
def collect_zone_texts(
|
||||
self,
|
||||
regions: List[Dict],
|
||||
zones: List[Tuple[float, float, float, float]],
|
||||
threshold: float = 0.5,
|
||||
include_axis_labels: bool = True
|
||||
) -> Set[str]:
|
||||
"""
|
||||
Collect text content from regions inside zones or identified as axis labels.
|
||||
|
||||
This set is used during rendering for position-aware deduplication:
|
||||
- Text that matches this set AND is near a zone will be skipped
|
||||
- Text that matches but is far from zones will still be rendered
|
||||
|
||||
Args:
|
||||
regions: List of raw OCR region dicts
|
||||
zones: List of (x0, y0, x1, y1) rectangles (e.g., chart bboxes)
|
||||
threshold: Minimum IoA to consider text as "inside" zone
|
||||
include_axis_labels: Also collect axis labels adjacent to zones
|
||||
|
||||
Returns:
|
||||
Set of text strings found inside zones or as axis labels
|
||||
"""
|
||||
zone_texts = set()
|
||||
|
||||
for region in regions:
|
||||
text = region.get('text', '').strip()
|
||||
bbox = region.get('bbox', [])
|
||||
|
||||
if not text or len(bbox) < 4:
|
||||
continue
|
||||
|
||||
for zone in zones:
|
||||
# Check if inside zone
|
||||
if self.is_inside_zone(bbox, zone, threshold):
|
||||
zone_texts.add(text)
|
||||
if self.debug:
|
||||
logger.debug(f"Collected zone text (inside): '{text}'")
|
||||
break
|
||||
|
||||
# Check if it's an axis label
|
||||
if include_axis_labels and self.is_axis_label(bbox, zone):
|
||||
zone_texts.add(text)
|
||||
if self.debug:
|
||||
logger.debug(f"Collected zone text (axis label): '{text}'")
|
||||
break
|
||||
|
||||
return zone_texts
|
||||
|
||||
def render_text_region(
|
||||
self,
|
||||
pdf_canvas: canvas.Canvas,
|
||||
region: Dict,
|
||||
page_height: float,
|
||||
scale_x: float = 1.0,
|
||||
scale_y: float = 1.0,
|
||||
exclusion_zones: List[Tuple[float, float, float, float]] = None,
|
||||
zone_texts: Set[str] = None
|
||||
) -> Tuple[bool, str]:
|
||||
"""
|
||||
Render a single OCR text region to the PDF canvas.
|
||||
|
||||
Handles coordinate transformation from image coordinates (origin top-left)
|
||||
to PDF coordinates (origin bottom-left).
|
||||
|
||||
Small rotation angles are straightened to produce clean output.
|
||||
Text overlapping with exclusion zones (images) is skipped.
|
||||
|
||||
Deduplication logic (position-aware):
|
||||
- If text matches zone_texts AND is NEAR the zone (or is axis label),
|
||||
skip it to avoid duplicate chart labels
|
||||
- Text far from zones is rendered even if it matches zone content
|
||||
|
||||
Args:
|
||||
pdf_canvas: ReportLab canvas to draw on
|
||||
region: Raw OCR region dict with 'text' and 'bbox'
|
||||
page_height: Height of the PDF page (for Y-flip)
|
||||
scale_x: X coordinate scaling factor
|
||||
scale_y: Y coordinate scaling factor
|
||||
exclusion_zones: List of (x0, y0, x1, y1) rectangles to avoid
|
||||
zone_texts: Set of zone-internal texts (dedupe only if near zone)
|
||||
|
||||
Returns:
|
||||
Tuple of (success: bool, skip_reason: str)
|
||||
- success=True, skip_reason='' if rendered successfully
|
||||
- success=False, skip_reason='overlap'/'dedupe'/'error'/'' if skipped
|
||||
"""
|
||||
text = region.get('text', '').strip()
|
||||
bbox = region.get('bbox', [])
|
||||
|
||||
if not text or len(bbox) < 4:
|
||||
return (False, '')
|
||||
|
||||
# Check if text overlaps with exclusion zones (images/charts)
|
||||
if exclusion_zones and self.is_overlapping_exclusion_zones(bbox, exclusion_zones):
|
||||
if self.debug:
|
||||
logger.debug(f"Skipping text '{text[:20]}...' due to exclusion zone overlap")
|
||||
return (False, 'overlap')
|
||||
|
||||
# Check if text should be deduplicated based on position
|
||||
# Only skip if text matches zone content AND is near a zone (or is axis label)
|
||||
if zone_texts and text in zone_texts and exclusion_zones:
|
||||
for zone in exclusion_zones:
|
||||
# Check if it's an axis label for this zone
|
||||
if self.is_axis_label(bbox, zone):
|
||||
if self.debug:
|
||||
logger.debug(f"Skipping text '{text[:20]}...' - axis label for zone")
|
||||
return (False, 'dedupe')
|
||||
# Check if it's near this zone (for zone-internal text deduplication)
|
||||
if self.is_near_zone(bbox, zone, margin=100.0):
|
||||
if self.debug:
|
||||
logger.debug(f"Skipping text '{text[:20]}...' - matches zone text and is near zone")
|
||||
return (False, 'dedupe')
|
||||
|
||||
try:
|
||||
# Calculate text properties
|
||||
rotation = self.calculate_rotation(bbox)
|
||||
font_size = self.estimate_font_size(bbox, text, scale_y)
|
||||
|
||||
# Straighten small rotations for cleaner output
|
||||
# Only apply rotation for significant angles (e.g., 90° rotated text)
|
||||
if abs(rotation) < self.straighten_threshold:
|
||||
rotation = 0.0
|
||||
|
||||
# Get left baseline point in image coordinates
|
||||
img_x, img_y = self.get_bbox_left_baseline(bbox)
|
||||
|
||||
# Apply scaling
|
||||
scaled_x = img_x * scale_x
|
||||
scaled_y = img_y * scale_y
|
||||
|
||||
# Convert to PDF coordinates (flip Y axis)
|
||||
pdf_x = scaled_x
|
||||
pdf_y = page_height - scaled_y
|
||||
|
||||
# Save canvas state
|
||||
pdf_canvas.saveState()
|
||||
|
||||
# Try to set font with fallback
|
||||
try:
|
||||
pdf_canvas.setFont(self.font_name, font_size)
|
||||
except KeyError:
|
||||
# Font not registered, try fallback fonts
|
||||
fallback_fonts = ['Helvetica', 'Times-Roman', 'Courier']
|
||||
font_set = False
|
||||
for fallback in fallback_fonts:
|
||||
try:
|
||||
pdf_canvas.setFont(fallback, font_size)
|
||||
font_set = True
|
||||
if self.debug:
|
||||
logger.debug(f"Using fallback font: {fallback}")
|
||||
break
|
||||
except KeyError:
|
||||
continue
|
||||
if not font_set:
|
||||
logger.warning(f"No available font found, skipping region")
|
||||
pdf_canvas.restoreState()
|
||||
return (False, 'error')
|
||||
|
||||
pdf_canvas.setFillColor(black)
|
||||
|
||||
# Apply rotation if needed (only for significant angles like 90°)
|
||||
if abs(rotation) > 0.5:
|
||||
pdf_canvas.translate(pdf_x, pdf_y)
|
||||
pdf_canvas.rotate(rotation)
|
||||
pdf_canvas.drawString(0, 0, text)
|
||||
else:
|
||||
pdf_canvas.drawString(pdf_x, pdf_y, text)
|
||||
|
||||
# Restore canvas state
|
||||
pdf_canvas.restoreState()
|
||||
|
||||
if self.debug:
|
||||
logger.debug(f"Rendered text '{text[:20]}...' at ({pdf_x:.1f}, {pdf_y:.1f}), "
|
||||
f"rot={rotation:.1f}°, size={font_size:.1f}pt")
|
||||
|
||||
return (True, '')
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to render text region: {e}")
|
||||
return (False, 'error')
|
||||
|
||||
def render_all_regions(
|
||||
self,
|
||||
pdf_canvas: canvas.Canvas,
|
||||
regions: List[Dict],
|
||||
page_height: float,
|
||||
scale_x: float = 1.0,
|
||||
scale_y: float = 1.0,
|
||||
page_filter: Optional[int] = None,
|
||||
exclusion_zones: List[Tuple[float, float, float, float]] = None,
|
||||
zone_texts: Set[str] = None
|
||||
) -> int:
|
||||
"""
|
||||
Render all OCR text regions to the PDF canvas.
|
||||
|
||||
Args:
|
||||
pdf_canvas: ReportLab canvas to draw on
|
||||
regions: List of raw OCR region dicts
|
||||
page_height: Height of the PDF page
|
||||
scale_x: X coordinate scaling factor
|
||||
scale_y: Y coordinate scaling factor
|
||||
page_filter: If set, only render regions for this page index
|
||||
exclusion_zones: List of (x0, y0, x1, y1) rectangles to avoid
|
||||
zone_texts: Set of zone-internal texts (for position-aware deduplication)
|
||||
|
||||
Returns:
|
||||
Number of regions successfully rendered
|
||||
"""
|
||||
rendered_count = 0
|
||||
skipped_overlap = 0
|
||||
skipped_dedupe = 0
|
||||
|
||||
for region in regions:
|
||||
# Filter by page if specified
|
||||
if page_filter is not None:
|
||||
region_page = region.get('page', 0)
|
||||
if region_page != page_filter:
|
||||
continue
|
||||
|
||||
success, skip_reason = self.render_text_region(
|
||||
pdf_canvas, region, page_height, scale_x, scale_y,
|
||||
exclusion_zones, zone_texts
|
||||
)
|
||||
|
||||
if success:
|
||||
rendered_count += 1
|
||||
elif skip_reason == 'overlap':
|
||||
skipped_overlap += 1
|
||||
elif skip_reason == 'dedupe':
|
||||
skipped_dedupe += 1
|
||||
|
||||
# Log results with skip counts
|
||||
total_processed = rendered_count + skipped_overlap + skipped_dedupe
|
||||
skip_parts = []
|
||||
if skipped_overlap > 0:
|
||||
skip_parts.append(f"{skipped_overlap} overlap")
|
||||
if skipped_dedupe > 0:
|
||||
skip_parts.append(f"{skipped_dedupe} dedupe")
|
||||
|
||||
if skip_parts:
|
||||
logger.info(f"Rendered {rendered_count}/{total_processed} text regions "
|
||||
f"(skipped: {', '.join(skip_parts)})")
|
||||
else:
|
||||
logger.info(f"Rendered {rendered_count}/{len(regions)} text regions")
|
||||
|
||||
return rendered_count
|
||||
|
||||
|
||||
def load_raw_ocr_regions(result_dir: str, task_id: str, page_num: int) -> List[Dict]:
|
||||
"""
|
||||
Load raw OCR regions from the result directory.
|
||||
|
||||
Args:
|
||||
result_dir: Path to the result directory
|
||||
task_id: Task ID
|
||||
page_num: Page number (1-indexed)
|
||||
|
||||
Returns:
|
||||
List of raw OCR region dictionaries
|
||||
"""
|
||||
from pathlib import Path
|
||||
import json
|
||||
|
||||
# Construct filename pattern
|
||||
filename = f"{task_id}_edit_page_{page_num}_raw_ocr_regions.json"
|
||||
file_path = Path(result_dir) / filename
|
||||
|
||||
if not file_path.exists():
|
||||
logger.warning(f"Raw OCR regions file not found: {file_path}")
|
||||
return []
|
||||
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
regions = json.load(f)
|
||||
logger.info(f"Loaded {len(regions)} raw OCR regions from {filename}")
|
||||
return regions
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load raw OCR regions: {e}")
|
||||
return []
|
||||
Reference in New Issue
Block a user