Backup commit before executing remove-unused-code proposal. This includes all pending changes and new features. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
665 lines
23 KiB
Python
665 lines
23 KiB
Python
"""
|
|
Simple Text Region Renderer
|
|
|
|
Renders raw OCR text regions directly to PDF at their detected positions,
|
|
with rotation correction based on bbox quadrilateral geometry.
|
|
|
|
This approach bypasses complex table structure reconstruction and simply
|
|
places text at the positions detected by PaddleOCR.
|
|
"""
|
|
|
|
import math
|
|
import logging
|
|
from typing import Dict, List, Optional, Set, Tuple
|
|
|
|
from reportlab.pdfgen import canvas
|
|
from reportlab.lib.colors import black
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class TextRegionRenderer:
|
|
"""
|
|
Render raw OCR text regions to PDF with position and rotation correction.
|
|
|
|
This renderer takes the raw OCR output (text + quadrilateral bbox) and
|
|
renders text at the correct position. Small rotation angles are ignored
|
|
(straightened) to produce clean, aligned text output.
|
|
"""
|
|
|
|
# Minimum font size to prevent illegible text
|
|
MIN_FONT_SIZE = 6.0
|
|
|
|
# Maximum font size to prevent oversized text
|
|
MAX_FONT_SIZE = 72.0
|
|
|
|
# Font size estimation factor (font height relative to bbox height)
|
|
FONT_SIZE_FACTOR = 0.75
|
|
|
|
# Rotation angle threshold - angles smaller than this are straightened to 0
|
|
# This compensates for slight scan skew and produces cleaner output
|
|
ROTATION_STRAIGHTEN_THRESHOLD = 10.0 # degrees
|
|
|
|
# IoA (Intersection over Area) threshold for text-image overlap detection
|
|
# If text bbox overlaps with image by more than this ratio, skip the text
|
|
IOA_OVERLAP_THRESHOLD = 0.3 # 30% overlap
|
|
|
|
def __init__(
|
|
self,
|
|
font_name: str = 'NotoSansSC',
|
|
debug: bool = False,
|
|
straighten_threshold: float = None,
|
|
ioa_threshold: float = None
|
|
):
|
|
"""
|
|
Initialize the text region renderer.
|
|
|
|
Args:
|
|
font_name: Name of the registered font to use
|
|
debug: Enable debug logging
|
|
straighten_threshold: Override rotation straightening threshold (degrees)
|
|
ioa_threshold: Override IoA overlap threshold for text-image avoidance
|
|
"""
|
|
self.font_name = font_name
|
|
self.debug = debug
|
|
self.straighten_threshold = straighten_threshold or self.ROTATION_STRAIGHTEN_THRESHOLD
|
|
self.ioa_threshold = ioa_threshold or self.IOA_OVERLAP_THRESHOLD
|
|
|
|
def calculate_rotation(self, bbox: List[List[float]]) -> float:
|
|
"""
|
|
Calculate text rotation angle from bbox quadrilateral.
|
|
|
|
The bbox is a quadrilateral with 4 corner points in order:
|
|
[top-left, top-right, bottom-right, bottom-left]
|
|
|
|
Returns angle in degrees (counter-clockwise from horizontal).
|
|
Positive angle means text is tilted upward to the right.
|
|
|
|
NOTE: Small angles (< straighten_threshold) will be treated as 0
|
|
during rendering to produce clean, aligned output.
|
|
|
|
Args:
|
|
bbox: List of 4 [x, y] coordinate pairs
|
|
|
|
Returns:
|
|
Rotation angle in degrees
|
|
"""
|
|
if len(bbox) < 2:
|
|
return 0.0
|
|
|
|
# Top-left to top-right vector (top edge)
|
|
dx = bbox[1][0] - bbox[0][0]
|
|
dy = bbox[1][1] - bbox[0][1]
|
|
|
|
# Calculate angle (atan2 returns radians, convert to degrees)
|
|
# Note: In image coordinates, Y increases downward
|
|
# We negate dy to get the conventional angle
|
|
angle_rad = math.atan2(-dy, dx)
|
|
angle_deg = math.degrees(angle_rad)
|
|
|
|
if self.debug:
|
|
logger.debug(f"Rotation calculation: dx={dx:.1f}, dy={dy:.1f}, angle={angle_deg:.2f}°")
|
|
|
|
return angle_deg
|
|
|
|
def estimate_font_size(
|
|
self,
|
|
bbox: List[List[float]],
|
|
text: str,
|
|
scale_factor: float = 1.0
|
|
) -> float:
|
|
"""
|
|
Estimate appropriate font size from bbox dimensions.
|
|
|
|
Uses the bbox height as the primary indicator, with adjustment
|
|
for the typical font-to-bbox ratio.
|
|
|
|
Args:
|
|
bbox: List of 4 [x, y] coordinate pairs
|
|
text: The text content (for width-based adjustments)
|
|
scale_factor: Coordinate scaling factor
|
|
|
|
Returns:
|
|
Estimated font size in points
|
|
"""
|
|
if len(bbox) < 4:
|
|
return 12.0 # Default font size
|
|
|
|
# Calculate bbox height (average of left and right edges)
|
|
left_height = math.dist(bbox[0], bbox[3])
|
|
right_height = math.dist(bbox[1], bbox[2])
|
|
avg_height = (left_height + right_height) / 2
|
|
|
|
# Apply scale factor and font size ratio
|
|
font_size = avg_height * scale_factor * self.FONT_SIZE_FACTOR
|
|
|
|
# Clamp to reasonable range
|
|
font_size = max(self.MIN_FONT_SIZE, min(self.MAX_FONT_SIZE, font_size))
|
|
|
|
if self.debug:
|
|
logger.debug(f"Font size estimation: bbox_h={avg_height:.1f}, "
|
|
f"scale={scale_factor:.3f}, font={font_size:.1f}pt")
|
|
|
|
return font_size
|
|
|
|
def get_bbox_center(self, bbox: List[List[float]]) -> Tuple[float, float]:
|
|
"""
|
|
Calculate the center point of a bbox quadrilateral.
|
|
|
|
Args:
|
|
bbox: List of 4 [x, y] coordinate pairs
|
|
|
|
Returns:
|
|
Tuple of (center_x, center_y)
|
|
"""
|
|
if len(bbox) < 4:
|
|
return (0.0, 0.0)
|
|
|
|
center_x = sum(p[0] for p in bbox) / 4
|
|
center_y = sum(p[1] for p in bbox) / 4
|
|
return (center_x, center_y)
|
|
|
|
def get_bbox_as_rect(self, bbox: List[List[float]]) -> Tuple[float, float, float, float]:
|
|
"""
|
|
Convert quadrilateral bbox to axis-aligned rectangle (x0, y0, x1, y1).
|
|
|
|
Args:
|
|
bbox: List of 4 [x, y] coordinate pairs
|
|
|
|
Returns:
|
|
Tuple of (x0, y0, x1, y1) - min/max coordinates
|
|
"""
|
|
if len(bbox) < 4:
|
|
return (0.0, 0.0, 0.0, 0.0)
|
|
|
|
x_coords = [p[0] for p in bbox]
|
|
y_coords = [p[1] for p in bbox]
|
|
return (min(x_coords), min(y_coords), max(x_coords), max(y_coords))
|
|
|
|
def get_bbox_left_baseline(
|
|
self,
|
|
bbox: List[List[float]]
|
|
) -> Tuple[float, float]:
|
|
"""
|
|
Get the left baseline point for text rendering.
|
|
|
|
For left-aligned text, we use the bottom-left corner as the
|
|
baseline starting point (text baseline is at the bottom).
|
|
|
|
Args:
|
|
bbox: List of 4 [x, y] coordinate pairs
|
|
|
|
Returns:
|
|
Tuple of (x, y) for the left baseline point
|
|
"""
|
|
if len(bbox) < 4:
|
|
return (0.0, 0.0)
|
|
|
|
# Use bottom-left corner for baseline
|
|
# bbox[3] is bottom-left in the standard ordering
|
|
x = bbox[3][0]
|
|
y = bbox[3][1]
|
|
|
|
return (x, y)
|
|
|
|
def calculate_ioa(
|
|
self,
|
|
text_rect: Tuple[float, float, float, float],
|
|
image_rect: Tuple[float, float, float, float]
|
|
) -> float:
|
|
"""
|
|
Calculate Intersection over Area (IoA) of text bbox with image bbox.
|
|
|
|
IoA = intersection_area / text_area
|
|
|
|
This measures how much of the text region overlaps with the image.
|
|
|
|
Args:
|
|
text_rect: Text bbox as (x0, y0, x1, y1)
|
|
image_rect: Image bbox as (x0, y0, x1, y1)
|
|
|
|
Returns:
|
|
IoA ratio (0.0 to 1.0)
|
|
"""
|
|
tx0, ty0, tx1, ty1 = text_rect
|
|
ix0, iy0, ix1, iy1 = image_rect
|
|
|
|
# Calculate text area
|
|
text_area = (tx1 - tx0) * (ty1 - ty0)
|
|
if text_area <= 0:
|
|
return 0.0
|
|
|
|
# Calculate intersection
|
|
inter_x0 = max(tx0, ix0)
|
|
inter_y0 = max(ty0, iy0)
|
|
inter_x1 = min(tx1, ix1)
|
|
inter_y1 = min(ty1, iy1)
|
|
|
|
if inter_x0 >= inter_x1 or inter_y0 >= inter_y1:
|
|
return 0.0 # No intersection
|
|
|
|
inter_area = (inter_x1 - inter_x0) * (inter_y1 - inter_y0)
|
|
return inter_area / text_area
|
|
|
|
def is_overlapping_exclusion_zones(
|
|
self,
|
|
bbox: List[List[float]],
|
|
exclusion_zones: List[Tuple[float, float, float, float]]
|
|
) -> bool:
|
|
"""
|
|
Check if text bbox overlaps significantly with any exclusion zone.
|
|
|
|
Args:
|
|
bbox: Text bbox as quadrilateral
|
|
exclusion_zones: List of (x0, y0, x1, y1) rectangles to avoid
|
|
|
|
Returns:
|
|
True if text should be skipped due to overlap
|
|
"""
|
|
if not exclusion_zones:
|
|
return False
|
|
|
|
text_rect = self.get_bbox_as_rect(bbox)
|
|
|
|
for zone in exclusion_zones:
|
|
ioa = self.calculate_ioa(text_rect, zone)
|
|
if ioa >= self.ioa_threshold:
|
|
if self.debug:
|
|
logger.debug(f"Text overlaps exclusion zone: IoA={ioa:.2f} >= {self.ioa_threshold}")
|
|
return True
|
|
|
|
return False
|
|
|
|
def is_inside_zone(
|
|
self,
|
|
bbox: List[List[float]],
|
|
zone: Tuple[float, float, float, float],
|
|
threshold: float = 0.5
|
|
) -> bool:
|
|
"""
|
|
Check if text bbox is inside a zone (for collecting chart texts).
|
|
|
|
Args:
|
|
bbox: Text bbox as quadrilateral
|
|
zone: Zone as (x0, y0, x1, y1) rectangle
|
|
threshold: Minimum IoA to consider "inside"
|
|
|
|
Returns:
|
|
True if text is inside the zone
|
|
"""
|
|
text_rect = self.get_bbox_as_rect(bbox)
|
|
ioa = self.calculate_ioa(text_rect, zone)
|
|
return ioa >= threshold
|
|
|
|
def is_axis_label(
|
|
self,
|
|
bbox: List[List[float]],
|
|
zone: Tuple[float, float, float, float],
|
|
margin: float = 50.0
|
|
) -> bool:
|
|
"""
|
|
Check if text bbox is an axis label for a chart/image zone.
|
|
|
|
Axis labels are typically:
|
|
- Vertical text to the LEFT of the chart (Y-axis label)
|
|
- Horizontal text BELOW the chart (X-axis label)
|
|
|
|
Args:
|
|
bbox: Text bbox as quadrilateral
|
|
zone: Chart/image zone as (x0, y0, x1, y1) rectangle
|
|
margin: Maximum distance from zone edge to be considered axis label
|
|
|
|
Returns:
|
|
True if text appears to be an axis label for this zone
|
|
"""
|
|
if len(bbox) < 4:
|
|
return False
|
|
|
|
text_rect = self.get_bbox_as_rect(bbox)
|
|
tx0, ty0, tx1, ty1 = text_rect
|
|
zx0, zy0, zx1, zy1 = zone
|
|
|
|
# Calculate text dimensions
|
|
text_width = tx1 - tx0
|
|
text_height = ty1 - ty0
|
|
|
|
# Check for Y-axis label: vertical text to the LEFT of zone
|
|
# - Text is to the left of zone (tx1 <= zx0 + small overlap)
|
|
# - Text's Y range overlaps with zone's Y range
|
|
# - Text is taller than wide (aspect ratio > 2) OR very narrow
|
|
is_left_of_zone = tx1 <= zx0 + margin and tx1 >= zx0 - margin
|
|
y_overlaps = not (ty1 < zy0 or ty0 > zy1)
|
|
is_vertical_text = text_height > text_width * 2
|
|
|
|
if is_left_of_zone and y_overlaps and is_vertical_text:
|
|
if self.debug:
|
|
logger.debug(f"Detected Y-axis label: text is left of zone, vertical")
|
|
return True
|
|
|
|
# Check for X-axis label: horizontal text BELOW the zone
|
|
# - Text is below zone (ty0 >= zy1 - small overlap)
|
|
# - Text's X range overlaps with zone's X range
|
|
# - Text is wider than tall (normal horizontal text)
|
|
is_below_zone = ty0 >= zy1 - margin and ty0 <= zy1 + margin
|
|
x_overlaps = not (tx1 < zx0 or tx0 > zx1)
|
|
is_horizontal_text = text_width > text_height
|
|
|
|
if is_below_zone and x_overlaps and is_horizontal_text:
|
|
if self.debug:
|
|
logger.debug(f"Detected X-axis label: text is below zone, horizontal")
|
|
return True
|
|
|
|
return False
|
|
|
|
def is_near_zone(
|
|
self,
|
|
bbox: List[List[float]],
|
|
zone: Tuple[float, float, float, float],
|
|
margin: float = 100.0
|
|
) -> bool:
|
|
"""
|
|
Check if text bbox is near (within margin) of a zone.
|
|
|
|
Args:
|
|
bbox: Text bbox as quadrilateral
|
|
zone: Zone as (x0, y0, x1, y1) rectangle
|
|
margin: Maximum distance from zone to be considered "near"
|
|
|
|
Returns:
|
|
True if text is near the zone
|
|
"""
|
|
if len(bbox) < 4:
|
|
return False
|
|
|
|
text_rect = self.get_bbox_as_rect(bbox)
|
|
tx0, ty0, tx1, ty1 = text_rect
|
|
zx0, zy0, zx1, zy1 = zone
|
|
|
|
# Expand zone by margin
|
|
expanded_zone = (zx0 - margin, zy0 - margin, zx1 + margin, zy1 + margin)
|
|
|
|
# Check if text overlaps with expanded zone
|
|
ex0, ey0, ex1, ey1 = expanded_zone
|
|
return not (tx1 < ex0 or tx0 > ex1 or ty1 < ey0 or ty0 > ey1)
|
|
|
|
def collect_zone_texts(
|
|
self,
|
|
regions: List[Dict],
|
|
zones: List[Tuple[float, float, float, float]],
|
|
threshold: float = 0.5,
|
|
include_axis_labels: bool = True
|
|
) -> Set[str]:
|
|
"""
|
|
Collect text content from regions inside zones or identified as axis labels.
|
|
|
|
This set is used during rendering for position-aware deduplication:
|
|
- Text that matches this set AND is near a zone will be skipped
|
|
- Text that matches but is far from zones will still be rendered
|
|
|
|
Args:
|
|
regions: List of raw OCR region dicts
|
|
zones: List of (x0, y0, x1, y1) rectangles (e.g., chart bboxes)
|
|
threshold: Minimum IoA to consider text as "inside" zone
|
|
include_axis_labels: Also collect axis labels adjacent to zones
|
|
|
|
Returns:
|
|
Set of text strings found inside zones or as axis labels
|
|
"""
|
|
zone_texts = set()
|
|
|
|
for region in regions:
|
|
text = region.get('text', '').strip()
|
|
bbox = region.get('bbox', [])
|
|
|
|
if not text or len(bbox) < 4:
|
|
continue
|
|
|
|
for zone in zones:
|
|
# Check if inside zone
|
|
if self.is_inside_zone(bbox, zone, threshold):
|
|
zone_texts.add(text)
|
|
if self.debug:
|
|
logger.debug(f"Collected zone text (inside): '{text}'")
|
|
break
|
|
|
|
# Check if it's an axis label
|
|
if include_axis_labels and self.is_axis_label(bbox, zone):
|
|
zone_texts.add(text)
|
|
if self.debug:
|
|
logger.debug(f"Collected zone text (axis label): '{text}'")
|
|
break
|
|
|
|
return zone_texts
|
|
|
|
def render_text_region(
|
|
self,
|
|
pdf_canvas: canvas.Canvas,
|
|
region: Dict,
|
|
page_height: float,
|
|
scale_x: float = 1.0,
|
|
scale_y: float = 1.0,
|
|
exclusion_zones: List[Tuple[float, float, float, float]] = None,
|
|
zone_texts: Set[str] = None
|
|
) -> Tuple[bool, str]:
|
|
"""
|
|
Render a single OCR text region to the PDF canvas.
|
|
|
|
Handles coordinate transformation from image coordinates (origin top-left)
|
|
to PDF coordinates (origin bottom-left).
|
|
|
|
Small rotation angles are straightened to produce clean output.
|
|
Text overlapping with exclusion zones (images) is skipped.
|
|
|
|
Deduplication logic (position-aware):
|
|
- If text matches zone_texts AND is NEAR the zone (or is axis label),
|
|
skip it to avoid duplicate chart labels
|
|
- Text far from zones is rendered even if it matches zone content
|
|
|
|
Args:
|
|
pdf_canvas: ReportLab canvas to draw on
|
|
region: Raw OCR region dict with 'text' and 'bbox'
|
|
page_height: Height of the PDF page (for Y-flip)
|
|
scale_x: X coordinate scaling factor
|
|
scale_y: Y coordinate scaling factor
|
|
exclusion_zones: List of (x0, y0, x1, y1) rectangles to avoid
|
|
zone_texts: Set of zone-internal texts (dedupe only if near zone)
|
|
|
|
Returns:
|
|
Tuple of (success: bool, skip_reason: str)
|
|
- success=True, skip_reason='' if rendered successfully
|
|
- success=False, skip_reason='overlap'/'dedupe'/'error'/'' if skipped
|
|
"""
|
|
text = region.get('text', '').strip()
|
|
bbox = region.get('bbox', [])
|
|
|
|
if not text or len(bbox) < 4:
|
|
return (False, '')
|
|
|
|
# Check if text overlaps with exclusion zones (images/charts)
|
|
if exclusion_zones and self.is_overlapping_exclusion_zones(bbox, exclusion_zones):
|
|
if self.debug:
|
|
logger.debug(f"Skipping text '{text[:20]}...' due to exclusion zone overlap")
|
|
return (False, 'overlap')
|
|
|
|
# Check if text should be deduplicated based on position
|
|
# Only skip if text matches zone content AND is near a zone (or is axis label)
|
|
if zone_texts and text in zone_texts and exclusion_zones:
|
|
for zone in exclusion_zones:
|
|
# Check if it's an axis label for this zone
|
|
if self.is_axis_label(bbox, zone):
|
|
if self.debug:
|
|
logger.debug(f"Skipping text '{text[:20]}...' - axis label for zone")
|
|
return (False, 'dedupe')
|
|
# Check if it's near this zone (for zone-internal text deduplication)
|
|
if self.is_near_zone(bbox, zone, margin=100.0):
|
|
if self.debug:
|
|
logger.debug(f"Skipping text '{text[:20]}...' - matches zone text and is near zone")
|
|
return (False, 'dedupe')
|
|
|
|
try:
|
|
# Calculate text properties
|
|
rotation = self.calculate_rotation(bbox)
|
|
font_size = self.estimate_font_size(bbox, text, scale_y)
|
|
|
|
# Straighten small rotations for cleaner output
|
|
# Only apply rotation for significant angles (e.g., 90° rotated text)
|
|
if abs(rotation) < self.straighten_threshold:
|
|
rotation = 0.0
|
|
|
|
# Get left baseline point in image coordinates
|
|
img_x, img_y = self.get_bbox_left_baseline(bbox)
|
|
|
|
# Apply scaling
|
|
scaled_x = img_x * scale_x
|
|
scaled_y = img_y * scale_y
|
|
|
|
# Convert to PDF coordinates (flip Y axis)
|
|
pdf_x = scaled_x
|
|
pdf_y = page_height - scaled_y
|
|
|
|
# Save canvas state
|
|
pdf_canvas.saveState()
|
|
|
|
# Try to set font with fallback
|
|
try:
|
|
pdf_canvas.setFont(self.font_name, font_size)
|
|
except KeyError:
|
|
# Font not registered, try fallback fonts
|
|
fallback_fonts = ['Helvetica', 'Times-Roman', 'Courier']
|
|
font_set = False
|
|
for fallback in fallback_fonts:
|
|
try:
|
|
pdf_canvas.setFont(fallback, font_size)
|
|
font_set = True
|
|
if self.debug:
|
|
logger.debug(f"Using fallback font: {fallback}")
|
|
break
|
|
except KeyError:
|
|
continue
|
|
if not font_set:
|
|
logger.warning(f"No available font found, skipping region")
|
|
pdf_canvas.restoreState()
|
|
return (False, 'error')
|
|
|
|
pdf_canvas.setFillColor(black)
|
|
|
|
# Apply rotation if needed (only for significant angles like 90°)
|
|
if abs(rotation) > 0.5:
|
|
pdf_canvas.translate(pdf_x, pdf_y)
|
|
pdf_canvas.rotate(rotation)
|
|
pdf_canvas.drawString(0, 0, text)
|
|
else:
|
|
pdf_canvas.drawString(pdf_x, pdf_y, text)
|
|
|
|
# Restore canvas state
|
|
pdf_canvas.restoreState()
|
|
|
|
if self.debug:
|
|
logger.debug(f"Rendered text '{text[:20]}...' at ({pdf_x:.1f}, {pdf_y:.1f}), "
|
|
f"rot={rotation:.1f}°, size={font_size:.1f}pt")
|
|
|
|
return (True, '')
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Failed to render text region: {e}")
|
|
return (False, 'error')
|
|
|
|
def render_all_regions(
|
|
self,
|
|
pdf_canvas: canvas.Canvas,
|
|
regions: List[Dict],
|
|
page_height: float,
|
|
scale_x: float = 1.0,
|
|
scale_y: float = 1.0,
|
|
page_filter: Optional[int] = None,
|
|
exclusion_zones: List[Tuple[float, float, float, float]] = None,
|
|
zone_texts: Set[str] = None
|
|
) -> int:
|
|
"""
|
|
Render all OCR text regions to the PDF canvas.
|
|
|
|
Args:
|
|
pdf_canvas: ReportLab canvas to draw on
|
|
regions: List of raw OCR region dicts
|
|
page_height: Height of the PDF page
|
|
scale_x: X coordinate scaling factor
|
|
scale_y: Y coordinate scaling factor
|
|
page_filter: If set, only render regions for this page index
|
|
exclusion_zones: List of (x0, y0, x1, y1) rectangles to avoid
|
|
zone_texts: Set of zone-internal texts (for position-aware deduplication)
|
|
|
|
Returns:
|
|
Number of regions successfully rendered
|
|
"""
|
|
rendered_count = 0
|
|
skipped_overlap = 0
|
|
skipped_dedupe = 0
|
|
|
|
for region in regions:
|
|
# Filter by page if specified
|
|
if page_filter is not None:
|
|
region_page = region.get('page', 0)
|
|
if region_page != page_filter:
|
|
continue
|
|
|
|
success, skip_reason = self.render_text_region(
|
|
pdf_canvas, region, page_height, scale_x, scale_y,
|
|
exclusion_zones, zone_texts
|
|
)
|
|
|
|
if success:
|
|
rendered_count += 1
|
|
elif skip_reason == 'overlap':
|
|
skipped_overlap += 1
|
|
elif skip_reason == 'dedupe':
|
|
skipped_dedupe += 1
|
|
|
|
# Log results with skip counts
|
|
total_processed = rendered_count + skipped_overlap + skipped_dedupe
|
|
skip_parts = []
|
|
if skipped_overlap > 0:
|
|
skip_parts.append(f"{skipped_overlap} overlap")
|
|
if skipped_dedupe > 0:
|
|
skip_parts.append(f"{skipped_dedupe} dedupe")
|
|
|
|
if skip_parts:
|
|
logger.info(f"Rendered {rendered_count}/{total_processed} text regions "
|
|
f"(skipped: {', '.join(skip_parts)})")
|
|
else:
|
|
logger.info(f"Rendered {rendered_count}/{len(regions)} text regions")
|
|
|
|
return rendered_count
|
|
|
|
|
|
def load_raw_ocr_regions(result_dir: str, task_id: str, page_num: int) -> List[Dict]:
|
|
"""
|
|
Load raw OCR regions from the result directory.
|
|
|
|
Args:
|
|
result_dir: Path to the result directory
|
|
task_id: Task ID
|
|
page_num: Page number (1-indexed)
|
|
|
|
Returns:
|
|
List of raw OCR region dictionaries
|
|
"""
|
|
from pathlib import Path
|
|
import json
|
|
|
|
# Construct filename pattern
|
|
filename = f"{task_id}_edit_page_{page_num}_raw_ocr_regions.json"
|
|
file_path = Path(result_dir) / filename
|
|
|
|
if not file_path.exists():
|
|
logger.warning(f"Raw OCR regions file not found: {file_path}")
|
|
return []
|
|
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
regions = json.load(f)
|
|
logger.info(f"Loaded {len(regions)} raw OCR regions from {filename}")
|
|
return regions
|
|
except Exception as e:
|
|
logger.error(f"Failed to load raw OCR regions: {e}")
|
|
return []
|