OCR/backend/app/services/pp_structure_debug.py

"""
PP-StructureV3 Debug Service

Provides debugging tools for visualizing and saving PP-StructureV3 results:
- Save raw results as JSON for inspection
- Generate visualization images showing detected bboxes
- Compare raw OCR regions with PP-StructureV3 elements
"""

import json
import logging
from pathlib import Path
from typing import Dict, List, Any, Optional, Tuple
from datetime import datetime

from PIL import Image, ImageDraw, ImageFont

from app.utils.bbox_utils import normalize_bbox

logger = logging.getLogger(__name__)

# Color palette for different element types (RGB)
ELEMENT_COLORS: Dict[str, Tuple[int, int, int]] = {
    'text': (0, 128, 0),       # Green
    'title': (0, 0, 255),      # Blue
    'table': (255, 0, 0),      # Red
    'figure': (255, 165, 0),   # Orange
    'image': (255, 165, 0),    # Orange
    'header': (128, 0, 128),   # Purple
    'footer': (128, 0, 128),   # Purple
    'equation': (0, 255, 255), # Cyan
    'chart': (255, 192, 203),  # Pink
    'list': (139, 69, 19),     # Brown
    'reference': (128, 128, 128),  # Gray
    'default': (255, 0, 255),  # Magenta for unknown types
}

# Color for raw OCR regions
RAW_OCR_COLOR = (255, 215, 0)  # Gold


class PPStructureDebug:
    """Debug service for PP-StructureV3 analysis results."""

    def __init__(self, output_dir: Path):
        """
        Initialize debug service.

        Args:
            output_dir: Directory to save debug outputs
        """
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)

    def save_raw_results(
        self,
        pp_structure_results: Dict[str, Any],
        raw_ocr_regions: List[Dict[str, Any]],
        filename_prefix: str = "debug"
    ) -> Dict[str, Path]:
        """
        Save raw PP-StructureV3 results and OCR regions as JSON files.

        Args:
            pp_structure_results: Raw PP-StructureV3 analysis results
            raw_ocr_regions: Raw OCR text regions
            filename_prefix: Prefix for output files

        Returns:
            Dictionary with paths to saved files
        """
        saved_files = {}

        # Save PP-StructureV3 results
        pp_json_path = self.output_dir / f"{filename_prefix}_pp_structure_raw.json"
        try:
            # Convert any non-serializable types
            serializable_results = self._make_serializable(pp_structure_results)
            with open(pp_json_path, 'w', encoding='utf-8') as f:
                json.dump(serializable_results, f, ensure_ascii=False, indent=2)
            saved_files['pp_structure'] = pp_json_path
            logger.info(f"Saved PP-StructureV3 raw results to {pp_json_path}")
        except Exception as e:
            logger.error(f"Failed to save PP-StructureV3 results: {e}")

        # Save raw OCR regions
        ocr_json_path = self.output_dir / f"{filename_prefix}_raw_ocr_regions.json"
        try:
            serializable_ocr = self._make_serializable(raw_ocr_regions)
            with open(ocr_json_path, 'w', encoding='utf-8') as f:
                json.dump(serializable_ocr, f, ensure_ascii=False, indent=2)
            saved_files['raw_ocr'] = ocr_json_path
            logger.info(f"Saved raw OCR regions to {ocr_json_path}")
        except Exception as e:
            logger.error(f"Failed to save raw OCR regions: {e}")

        # Save summary comparison
        summary_path = self.output_dir / f"{filename_prefix}_debug_summary.json"
        try:
            summary = self._generate_summary(pp_structure_results, raw_ocr_regions)
            with open(summary_path, 'w', encoding='utf-8') as f:
                json.dump(summary, f, ensure_ascii=False, indent=2)
            saved_files['summary'] = summary_path
            logger.info(f"Saved debug summary to {summary_path}")
        except Exception as e:
            logger.error(f"Failed to save debug summary: {e}")

        return saved_files

    def generate_visualization(
        self,
        image_path: Path,
        pp_structure_elements: List[Dict[str, Any]],
        raw_ocr_regions: Optional[List[Dict[str, Any]]] = None,
        filename_prefix: str = "debug",
        show_labels: bool = True,
        show_raw_ocr: bool = True
    ) -> Optional[Path]:
        """
        Generate visualization image showing detected elements.

        Args:
            image_path: Path to original image
            pp_structure_elements: PP-StructureV3 detected elements
            raw_ocr_regions: Optional raw OCR regions to overlay
            filename_prefix: Prefix for output file
            show_labels: Whether to show element type labels
            show_raw_ocr: Whether to show raw OCR regions

        Returns:
            Path to generated visualization image
        """
        try:
            # Load original image
            img = Image.open(image_path)
            if img.mode != 'RGB':
                img = img.convert('RGB')

            # Create copy for drawing
            viz_img = img.copy()
            draw = ImageDraw.Draw(viz_img)

            # Try to load a font, fall back to default
            try:
                font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 14)
                small_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 10)
            except (IOError, OSError):
                try:
                    font = ImageFont.truetype("/home/egg/project/Tool_OCR/backend/fonts/NotoSansSC-Regular.ttf", 14)
                    small_font = ImageFont.truetype("/home/egg/project/Tool_OCR/backend/fonts/NotoSansSC-Regular.ttf", 10)
                except (IOError, OSError):
                    font = ImageFont.load_default()
                    small_font = font

            # Draw raw OCR regions first (so PP-Structure boxes are on top)
            if show_raw_ocr and raw_ocr_regions:
                for idx, region in enumerate(raw_ocr_regions):
                    bbox = self._normalize_bbox(region.get('bbox', []))
                    if bbox:
                        # Draw with dashed style simulation (draw thin lines)
                        x0, y0, x1, y1 = bbox
                        draw.rectangle([x0, y0, x1, y1], outline=RAW_OCR_COLOR, width=1)

                        # Add small label
                        if show_labels:
                            confidence = region.get('confidence', 0)
                            label = f"OCR:{confidence:.2f}"
                            draw.text((x0, y0 - 12), label, fill=RAW_OCR_COLOR, font=small_font)

            # Draw PP-StructureV3 elements
            for idx, elem in enumerate(pp_structure_elements):
                elem_type = elem.get('type', 'default')
                if hasattr(elem_type, 'value'):
                    elem_type = elem_type.value
                elem_type = str(elem_type).lower()

                color = ELEMENT_COLORS.get(elem_type, ELEMENT_COLORS['default'])
                bbox = self._normalize_bbox(elem.get('bbox', []))

                if bbox:
                    x0, y0, x1, y1 = bbox
                    # Draw thicker rectangle for PP-Structure elements
                    draw.rectangle([x0, y0, x1, y1], outline=color, width=3)

                    # Add label
                    if show_labels:
                        label = f"{idx}:{elem_type}"
                        # Draw label background
                        text_bbox = draw.textbbox((x0, y0 - 18), label, font=font)
                        draw.rectangle(text_bbox, fill=(255, 255, 255, 200))
                        draw.text((x0, y0 - 18), label, fill=color, font=font)

            # Add legend
            self._draw_legend(draw, img.width, font)

            # Add image info
            info_text = f"PP-Structure: {len(pp_structure_elements)} elements"
            if raw_ocr_regions:
                info_text += f" | Raw OCR: {len(raw_ocr_regions)} regions"
            info_text += f" | Size: {img.width}x{img.height}"
            draw.text((10, img.height - 25), info_text, fill=(0, 0, 0), font=font)

            # Save visualization
            viz_path = self.output_dir / f"{filename_prefix}_pp_structure_viz.png"
            viz_img.save(viz_path, 'PNG')
            logger.info(f"Saved visualization to {viz_path}")

            return viz_path

        except Exception as e:
            logger.error(f"Failed to generate visualization: {e}")
            import traceback
            traceback.print_exc()
            return None

    def _draw_legend(self, draw: ImageDraw, img_width: int, font: ImageFont):
        """Draw a legend showing element type colors."""
        legend_x = img_width - 150
        legend_y = 10

        # Draw legend background
        draw.rectangle(
            [legend_x - 5, legend_y - 5, img_width - 5, legend_y + len(ELEMENT_COLORS) * 18 + 25],
            fill=(255, 255, 255, 230),
            outline=(0, 0, 0)
        )

        draw.text((legend_x, legend_y), "Legend:", fill=(0, 0, 0), font=font)
        legend_y += 20

        for elem_type, color in ELEMENT_COLORS.items():
            if elem_type == 'default':
                continue
            draw.rectangle([legend_x, legend_y + 2, legend_x + 12, legend_y + 14], fill=color)
            draw.text((legend_x + 18, legend_y), elem_type, fill=(0, 0, 0), font=font)
            legend_y += 18

        # Add raw OCR legend entry
        draw.rectangle([legend_x, legend_y + 2, legend_x + 12, legend_y + 14], fill=RAW_OCR_COLOR)
        draw.text((legend_x + 18, legend_y), "raw_ocr", fill=(0, 0, 0), font=font)

    def _normalize_bbox(self, bbox: Any) -> Optional[Tuple[float, float, float, float]]:
        """Normalize bbox to (x0, y0, x1, y1) format. Uses shared bbox utility."""
        return normalize_bbox(bbox)

    def _generate_summary(
        self,
        pp_structure_results: Dict[str, Any],
        raw_ocr_regions: List[Dict[str, Any]]
    ) -> Dict[str, Any]:
        """Generate summary comparing PP-Structure and raw OCR."""
        pp_elements = pp_structure_results.get('elements', [])

        # Count element types
        type_counts = {}
        for elem in pp_elements:
            elem_type = elem.get('type', 'unknown')
            if hasattr(elem_type, 'value'):
                elem_type = elem_type.value
            type_counts[str(elem_type)] = type_counts.get(str(elem_type), 0) + 1

        # Calculate bounding box coverage
        pp_bbox_area = 0
        ocr_bbox_area = 0

        for elem in pp_elements:
            bbox = self._normalize_bbox(elem.get('bbox'))
            if bbox:
                pp_bbox_area += (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])

        for region in raw_ocr_regions:
            bbox = self._normalize_bbox(region.get('bbox'))
            if bbox:
                ocr_bbox_area += (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])

        return {
            'timestamp': datetime.now().isoformat(),
            'pp_structure': {
                'total_elements': len(pp_elements),
                'element_types': type_counts,
                'total_bbox_area': pp_bbox_area,
                'has_parsing_res_list': pp_structure_results.get('has_parsing_res_list', False)
            },
            'raw_ocr': {
                'total_regions': len(raw_ocr_regions),
                'total_bbox_area': ocr_bbox_area,
                'avg_confidence': sum(r.get('confidence', 0) for r in raw_ocr_regions) / len(raw_ocr_regions) if raw_ocr_regions else 0
            },
            'comparison': {
                'element_count_ratio': len(pp_elements) / len(raw_ocr_regions) if raw_ocr_regions else 0,
                'area_ratio': pp_bbox_area / ocr_bbox_area if ocr_bbox_area > 0 else 0,
                'potential_gap': len(raw_ocr_regions) - len(pp_elements) if raw_ocr_regions else 0
            }
        }

    def _make_serializable(self, obj: Any) -> Any:
        """Convert object to JSON-serializable format."""
        if obj is None:
            return None
        if isinstance(obj, (str, int, float, bool)):
            return obj
        if isinstance(obj, (list, tuple)):
            return [self._make_serializable(item) for item in obj]
        if isinstance(obj, dict):
            return {str(k): self._make_serializable(v) for k, v in obj.items()}
        if hasattr(obj, 'value'):
            return obj.value
        if hasattr(obj, '__dict__'):
            return self._make_serializable(obj.__dict__)
        if hasattr(obj, 'tolist'):  # numpy array
            return obj.tolist()
        return str(obj)