""" PP-StructureV3 Debug Service Provides debugging tools for visualizing and saving PP-StructureV3 results: - Save raw results as JSON for inspection - Generate visualization images showing detected bboxes - Compare raw OCR regions with PP-StructureV3 elements """ import json import logging from pathlib import Path from typing import Dict, List, Any, Optional, Tuple from datetime import datetime from PIL import Image, ImageDraw, ImageFont from app.utils.bbox_utils import normalize_bbox logger = logging.getLogger(__name__) # Color palette for different element types (RGB) ELEMENT_COLORS: Dict[str, Tuple[int, int, int]] = { 'text': (0, 128, 0), # Green 'title': (0, 0, 255), # Blue 'table': (255, 0, 0), # Red 'figure': (255, 165, 0), # Orange 'image': (255, 165, 0), # Orange 'header': (128, 0, 128), # Purple 'footer': (128, 0, 128), # Purple 'equation': (0, 255, 255), # Cyan 'chart': (255, 192, 203), # Pink 'list': (139, 69, 19), # Brown 'reference': (128, 128, 128), # Gray 'default': (255, 0, 255), # Magenta for unknown types } # Color for raw OCR regions RAW_OCR_COLOR = (255, 215, 0) # Gold class PPStructureDebug: """Debug service for PP-StructureV3 analysis results.""" def __init__(self, output_dir: Path): """ Initialize debug service. Args: output_dir: Directory to save debug outputs """ self.output_dir = Path(output_dir) self.output_dir.mkdir(parents=True, exist_ok=True) def save_raw_results( self, pp_structure_results: Dict[str, Any], raw_ocr_regions: List[Dict[str, Any]], filename_prefix: str = "debug" ) -> Dict[str, Path]: """ Save raw PP-StructureV3 results and OCR regions as JSON files. Args: pp_structure_results: Raw PP-StructureV3 analysis results raw_ocr_regions: Raw OCR text regions filename_prefix: Prefix for output files Returns: Dictionary with paths to saved files """ saved_files = {} # Save PP-StructureV3 results pp_json_path = self.output_dir / f"{filename_prefix}_pp_structure_raw.json" try: # Convert any non-serializable types serializable_results = self._make_serializable(pp_structure_results) with open(pp_json_path, 'w', encoding='utf-8') as f: json.dump(serializable_results, f, ensure_ascii=False, indent=2) saved_files['pp_structure'] = pp_json_path logger.info(f"Saved PP-StructureV3 raw results to {pp_json_path}") except Exception as e: logger.error(f"Failed to save PP-StructureV3 results: {e}") # Save raw OCR regions ocr_json_path = self.output_dir / f"{filename_prefix}_raw_ocr_regions.json" try: serializable_ocr = self._make_serializable(raw_ocr_regions) with open(ocr_json_path, 'w', encoding='utf-8') as f: json.dump(serializable_ocr, f, ensure_ascii=False, indent=2) saved_files['raw_ocr'] = ocr_json_path logger.info(f"Saved raw OCR regions to {ocr_json_path}") except Exception as e: logger.error(f"Failed to save raw OCR regions: {e}") # Save summary comparison summary_path = self.output_dir / f"{filename_prefix}_debug_summary.json" try: summary = self._generate_summary(pp_structure_results, raw_ocr_regions) with open(summary_path, 'w', encoding='utf-8') as f: json.dump(summary, f, ensure_ascii=False, indent=2) saved_files['summary'] = summary_path logger.info(f"Saved debug summary to {summary_path}") except Exception as e: logger.error(f"Failed to save debug summary: {e}") return saved_files def save_debug_results( self, pp_structure_results: Dict[str, Any], raw_ocr_regions: List[Dict[str, Any]], filename_prefix: str = "debug" ) -> Dict[str, Path]: """ Save debug-only files (PP-Structure raw results and summary). Does NOT save raw_ocr_regions.json (that's handled separately). Args: pp_structure_results: Raw PP-StructureV3 analysis results raw_ocr_regions: Raw OCR text regions (for summary generation only) filename_prefix: Prefix for output files Returns: Dictionary with paths to saved files """ saved_files = {} # Save PP-StructureV3 results pp_json_path = self.output_dir / f"{filename_prefix}_pp_structure_raw.json" try: serializable_results = self._make_serializable(pp_structure_results) with open(pp_json_path, 'w', encoding='utf-8') as f: json.dump(serializable_results, f, ensure_ascii=False, indent=2) saved_files['pp_structure'] = pp_json_path logger.info(f"Saved PP-StructureV3 raw results to {pp_json_path}") except Exception as e: logger.error(f"Failed to save PP-StructureV3 results: {e}") # Save summary comparison summary_path = self.output_dir / f"{filename_prefix}_debug_summary.json" try: summary = self._generate_summary(pp_structure_results, raw_ocr_regions) with open(summary_path, 'w', encoding='utf-8') as f: json.dump(summary, f, ensure_ascii=False, indent=2) saved_files['summary'] = summary_path logger.info(f"Saved debug summary to {summary_path}") except Exception as e: logger.error(f"Failed to save debug summary: {e}") return saved_files def generate_visualization( self, image_path: Path, pp_structure_elements: List[Dict[str, Any]], raw_ocr_regions: Optional[List[Dict[str, Any]]] = None, filename_prefix: str = "debug", show_labels: bool = True, show_raw_ocr: bool = True ) -> Optional[Path]: """ Generate visualization image showing detected elements. Args: image_path: Path to original image pp_structure_elements: PP-StructureV3 detected elements raw_ocr_regions: Optional raw OCR regions to overlay filename_prefix: Prefix for output file show_labels: Whether to show element type labels show_raw_ocr: Whether to show raw OCR regions Returns: Path to generated visualization image """ try: # Load original image img = Image.open(image_path) if img.mode != 'RGB': img = img.convert('RGB') # Create copy for drawing viz_img = img.copy() draw = ImageDraw.Draw(viz_img) # Try to load a font, fall back to default try: font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 14) small_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 10) except (IOError, OSError): try: font = ImageFont.truetype("/home/egg/project/Tool_OCR/backend/fonts/NotoSansSC-Regular.ttf", 14) small_font = ImageFont.truetype("/home/egg/project/Tool_OCR/backend/fonts/NotoSansSC-Regular.ttf", 10) except (IOError, OSError): font = ImageFont.load_default() small_font = font # Draw raw OCR regions first (so PP-Structure boxes are on top) if show_raw_ocr and raw_ocr_regions: for idx, region in enumerate(raw_ocr_regions): bbox = self._normalize_bbox(region.get('bbox', [])) if bbox: # Draw with dashed style simulation (draw thin lines) x0, y0, x1, y1 = bbox draw.rectangle([x0, y0, x1, y1], outline=RAW_OCR_COLOR, width=1) # Add small label if show_labels: confidence = region.get('confidence', 0) label = f"OCR:{confidence:.2f}" draw.text((x0, y0 - 12), label, fill=RAW_OCR_COLOR, font=small_font) # Draw PP-StructureV3 elements for idx, elem in enumerate(pp_structure_elements): elem_type = elem.get('type', 'default') if hasattr(elem_type, 'value'): elem_type = elem_type.value elem_type = str(elem_type).lower() color = ELEMENT_COLORS.get(elem_type, ELEMENT_COLORS['default']) bbox = self._normalize_bbox(elem.get('bbox', [])) if bbox: x0, y0, x1, y1 = bbox # Draw thicker rectangle for PP-Structure elements draw.rectangle([x0, y0, x1, y1], outline=color, width=3) # Add label if show_labels: label = f"{idx}:{elem_type}" # Draw label background text_bbox = draw.textbbox((x0, y0 - 18), label, font=font) draw.rectangle(text_bbox, fill=(255, 255, 255, 200)) draw.text((x0, y0 - 18), label, fill=color, font=font) # Add legend self._draw_legend(draw, img.width, font) # Add image info info_text = f"PP-Structure: {len(pp_structure_elements)} elements" if raw_ocr_regions: info_text += f" | Raw OCR: {len(raw_ocr_regions)} regions" info_text += f" | Size: {img.width}x{img.height}" draw.text((10, img.height - 25), info_text, fill=(0, 0, 0), font=font) # Save visualization viz_path = self.output_dir / f"{filename_prefix}_pp_structure_viz.png" viz_img.save(viz_path, 'PNG') logger.info(f"Saved visualization to {viz_path}") return viz_path except Exception as e: logger.error(f"Failed to generate visualization: {e}") import traceback traceback.print_exc() return None def _draw_legend(self, draw: ImageDraw, img_width: int, font: ImageFont): """Draw a legend showing element type colors.""" legend_x = img_width - 150 legend_y = 10 # Draw legend background draw.rectangle( [legend_x - 5, legend_y - 5, img_width - 5, legend_y + len(ELEMENT_COLORS) * 18 + 25], fill=(255, 255, 255, 230), outline=(0, 0, 0) ) draw.text((legend_x, legend_y), "Legend:", fill=(0, 0, 0), font=font) legend_y += 20 for elem_type, color in ELEMENT_COLORS.items(): if elem_type == 'default': continue draw.rectangle([legend_x, legend_y + 2, legend_x + 12, legend_y + 14], fill=color) draw.text((legend_x + 18, legend_y), elem_type, fill=(0, 0, 0), font=font) legend_y += 18 # Add raw OCR legend entry draw.rectangle([legend_x, legend_y + 2, legend_x + 12, legend_y + 14], fill=RAW_OCR_COLOR) draw.text((legend_x + 18, legend_y), "raw_ocr", fill=(0, 0, 0), font=font) def _normalize_bbox(self, bbox: Any) -> Optional[Tuple[float, float, float, float]]: """Normalize bbox to (x0, y0, x1, y1) format. Uses shared bbox utility.""" return normalize_bbox(bbox) def _generate_summary( self, pp_structure_results: Dict[str, Any], raw_ocr_regions: List[Dict[str, Any]] ) -> Dict[str, Any]: """Generate summary comparing PP-Structure and raw OCR.""" pp_elements = pp_structure_results.get('elements', []) # Count element types type_counts = {} for elem in pp_elements: elem_type = elem.get('type', 'unknown') if hasattr(elem_type, 'value'): elem_type = elem_type.value type_counts[str(elem_type)] = type_counts.get(str(elem_type), 0) + 1 # Calculate bounding box coverage pp_bbox_area = 0 ocr_bbox_area = 0 for elem in pp_elements: bbox = self._normalize_bbox(elem.get('bbox')) if bbox: pp_bbox_area += (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) for region in raw_ocr_regions: bbox = self._normalize_bbox(region.get('bbox')) if bbox: ocr_bbox_area += (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) return { 'timestamp': datetime.now().isoformat(), 'pp_structure': { 'total_elements': len(pp_elements), 'element_types': type_counts, 'total_bbox_area': pp_bbox_area, 'has_parsing_res_list': pp_structure_results.get('has_parsing_res_list', False) }, 'raw_ocr': { 'total_regions': len(raw_ocr_regions), 'total_bbox_area': ocr_bbox_area, 'avg_confidence': sum(r.get('confidence', 0) for r in raw_ocr_regions) / len(raw_ocr_regions) if raw_ocr_regions else 0 }, 'comparison': { 'element_count_ratio': len(pp_elements) / len(raw_ocr_regions) if raw_ocr_regions else 0, 'area_ratio': pp_bbox_area / ocr_bbox_area if ocr_bbox_area > 0 else 0, 'potential_gap': len(raw_ocr_regions) - len(pp_elements) if raw_ocr_regions else 0 } } def _make_serializable(self, obj: Any) -> Any: """Convert object to JSON-serializable format.""" if obj is None: return None if isinstance(obj, (str, int, float, bool)): return obj if isinstance(obj, (list, tuple)): return [self._make_serializable(item) for item in obj] if isinstance(obj, dict): return {str(k): self._make_serializable(v) for k, v in obj.items()} if hasattr(obj, 'value'): return obj.value if hasattr(obj, '__dict__'): return self._make_serializable(obj.__dict__) if hasattr(obj, 'tolist'): # numpy array return obj.tolist() return str(obj)