"""
PP-StructureV3 Debug Service

Provides debugging tools for visualizing and saving PP-StructureV3 results:
- Save raw results as JSON for inspection
- Generate visualization images showing detected bboxes
- Compare raw OCR regions with PP-StructureV3 elements
"""

import json
import logging
from pathlib import Path
from typing import Dict, List, Any, Optional, Tuple
from datetime import datetime

from PIL import Image, ImageDraw, ImageFont

logger = logging.getLogger(__name__)

# Color palette for different element types (RGB)
ELEMENT_COLORS: Dict[str, Tuple[int, int, int]] = {
    'text': (0, 128, 0),       # Green
    'title': (0, 0, 255),      # Blue
    'table': (255, 0, 0),      # Red
    'figure': (255, 165, 0),   # Orange
    'image': (255, 165, 0),    # Orange
    'header': (128, 0, 128),   # Purple
    'footer': (128, 0, 128),   # Purple
    'equation': (0, 255, 255), # Cyan
    'chart': (255, 192, 203),  # Pink
    'list': (139, 69, 19),     # Brown
    'reference': (128, 128, 128),  # Gray
    'default': (255, 0, 255),  # Magenta for unknown types
}

# Color for raw OCR regions
RAW_OCR_COLOR = (255, 215, 0)  # Gold


class PPStructureDebug:
    """Debug service for PP-StructureV3 analysis results."""

    def __init__(self, output_dir: Path):
        """
        Initialize debug service.

        Args:
            output_dir: Directory to save debug outputs
        """
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)

    def save_raw_results(
        self,
        pp_structure_results: Dict[str, Any],
        raw_ocr_regions: List[Dict[str, Any]],
        filename_prefix: str = "debug"
    ) -> Dict[str, Path]:
        """
        Save raw PP-StructureV3 results and OCR regions as JSON files.

        Args:
            pp_structure_results: Raw PP-StructureV3 analysis results
            raw_ocr_regions: Raw OCR text regions
            filename_prefix: Prefix for output files

        Returns:
            Dictionary with paths to saved files
        """
        saved_files = {}

        # Save PP-StructureV3 results
        pp_json_path = self.output_dir / f"{filename_prefix}_pp_structure_raw.json"
        try:
            # Convert any non-serializable types
            serializable_results = self._make_serializable(pp_structure_results)
            with open(pp_json_path, 'w', encoding='utf-8') as f:
                json.dump(serializable_results, f, ensure_ascii=False, indent=2)
            saved_files['pp_structure'] = pp_json_path
            logger.info(f"Saved PP-StructureV3 raw results to {pp_json_path}")
        except Exception as e:
            logger.error(f"Failed to save PP-StructureV3 results: {e}")

        # Save raw OCR regions
        ocr_json_path = self.output_dir / f"{filename_prefix}_raw_ocr_regions.json"
        try:
            serializable_ocr = self._make_serializable(raw_ocr_regions)
            with open(ocr_json_path, 'w', encoding='utf-8') as f:
                json.dump(serializable_ocr, f, ensure_ascii=False, indent=2)
            saved_files['raw_ocr'] = ocr_json_path
            logger.info(f"Saved raw OCR regions to {ocr_json_path}")
        except Exception as e:
            logger.error(f"Failed to save raw OCR regions: {e}")

        # Save summary comparison
        summary_path = self.output_dir / f"{filename_prefix}_debug_summary.json"
        try:
            summary = self._generate_summary(pp_structure_results, raw_ocr_regions)
            with open(summary_path, 'w', encoding='utf-8') as f:
                json.dump(summary, f, ensure_ascii=False, indent=2)
            saved_files['summary'] = summary_path
            logger.info(f"Saved debug summary to {summary_path}")
        except Exception as e:
            logger.error(f"Failed to save debug summary: {e}")

        return saved_files

    def generate_visualization(
        self,
        image_path: Path,
        pp_structure_elements: List[Dict[str, Any]],
        raw_ocr_regions: Optional[List[Dict[str, Any]]] = None,
        filename_prefix: str = "debug",
        show_labels: bool = True,
        show_raw_ocr: bool = True
    ) -> Optional[Path]:
        """
        Generate visualization image showing detected elements.

        Args:
            image_path: Path to original image
            pp_structure_elements: PP-StructureV3 detected elements
            raw_ocr_regions: Optional raw OCR regions to overlay
            filename_prefix: Prefix for output file
            show_labels: Whether to show element type labels
            show_raw_ocr: Whether to show raw OCR regions

        Returns:
            Path to generated visualization image
        """
        try:
            # Load original image
            img = Image.open(image_path)
            if img.mode != 'RGB':
                img = img.convert('RGB')

            # Create copy for drawing
            viz_img = img.copy()
            draw = ImageDraw.Draw(viz_img)

            # Try to load a font, fall back to default
            try:
                font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 14)
                small_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 10)
            except (IOError, OSError):
                try:
                    font = ImageFont.truetype("/home/egg/project/Tool_OCR/backend/fonts/NotoSansSC-Regular.ttf", 14)
                    small_font = ImageFont.truetype("/home/egg/project/Tool_OCR/backend/fonts/NotoSansSC-Regular.ttf", 10)
                except (IOError, OSError):
                    font = ImageFont.load_default()
                    small_font = font

            # Draw raw OCR regions first (so PP-Structure boxes are on top)
            if show_raw_ocr and raw_ocr_regions:
                for idx, region in enumerate(raw_ocr_regions):
                    bbox = self._normalize_bbox(region.get('bbox', []))
                    if bbox:
                        # Draw with dashed style simulation (draw thin lines)
                        x0, y0, x1, y1 = bbox
                        draw.rectangle([x0, y0, x1, y1], outline=RAW_OCR_COLOR, width=1)

                        # Add small label
                        if show_labels:
                            confidence = region.get('confidence', 0)
                            label = f"OCR:{confidence:.2f}"
                            draw.text((x0, y0 - 12), label, fill=RAW_OCR_COLOR, font=small_font)

            # Draw PP-StructureV3 elements
            for idx, elem in enumerate(pp_structure_elements):
                elem_type = elem.get('type', 'default')
                if hasattr(elem_type, 'value'):
                    elem_type = elem_type.value
                elem_type = str(elem_type).lower()

                color = ELEMENT_COLORS.get(elem_type, ELEMENT_COLORS['default'])
                bbox = self._normalize_bbox(elem.get('bbox', []))

                if bbox:
                    x0, y0, x1, y1 = bbox
                    # Draw thicker rectangle for PP-Structure elements
                    draw.rectangle([x0, y0, x1, y1], outline=color, width=3)

                    # Add label
                    if show_labels:
                        label = f"{idx}:{elem_type}"
                        # Draw label background
                        text_bbox = draw.textbbox((x0, y0 - 18), label, font=font)
                        draw.rectangle(text_bbox, fill=(255, 255, 255, 200))
                        draw.text((x0, y0 - 18), label, fill=color, font=font)

            # Add legend
            self._draw_legend(draw, img.width, font)

            # Add image info
            info_text = f"PP-Structure: {len(pp_structure_elements)} elements"
            if raw_ocr_regions:
                info_text += f" | Raw OCR: {len(raw_ocr_regions)} regions"
            info_text += f" | Size: {img.width}x{img.height}"
            draw.text((10, img.height - 25), info_text, fill=(0, 0, 0), font=font)

            # Save visualization
            viz_path = self.output_dir / f"{filename_prefix}_pp_structure_viz.png"
            viz_img.save(viz_path, 'PNG')
            logger.info(f"Saved visualization to {viz_path}")

            return viz_path

        except Exception as e:
            logger.error(f"Failed to generate visualization: {e}")
            import traceback
            traceback.print_exc()
            return None

    def _draw_legend(self, draw: ImageDraw, img_width: int, font: ImageFont):
        """Draw a legend showing element type colors."""
        legend_x = img_width - 150
        legend_y = 10

        # Draw legend background
        draw.rectangle(
            [legend_x - 5, legend_y - 5, img_width - 5, legend_y + len(ELEMENT_COLORS) * 18 + 25],
            fill=(255, 255, 255, 230),
            outline=(0, 0, 0)
        )

        draw.text((legend_x, legend_y), "Legend:", fill=(0, 0, 0), font=font)
        legend_y += 20

        for elem_type, color in ELEMENT_COLORS.items():
            if elem_type == 'default':
                continue
            draw.rectangle([legend_x, legend_y + 2, legend_x + 12, legend_y + 14], fill=color)
            draw.text((legend_x + 18, legend_y), elem_type, fill=(0, 0, 0), font=font)
            legend_y += 18

        # Add raw OCR legend entry
        draw.rectangle([legend_x, legend_y + 2, legend_x + 12, legend_y + 14], fill=RAW_OCR_COLOR)
        draw.text((legend_x + 18, legend_y), "raw_ocr", fill=(0, 0, 0), font=font)

    def _normalize_bbox(self, bbox: Any) -> Optional[Tuple[float, float, float, float]]:
        """Normalize bbox to (x0, y0, x1, y1) format."""
        if not bbox:
            return None

        try:
            # Handle nested list format [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
            if isinstance(bbox, (list, tuple)) and len(bbox) >= 1:
                if isinstance(bbox[0], (list, tuple)):
                    xs = [pt[0] for pt in bbox if len(pt) >= 2]
                    ys = [pt[1] for pt in bbox if len(pt) >= 2]
                    if xs and ys:
                        return (min(xs), min(ys), max(xs), max(ys))

            # Handle flat list [x0, y0, x1, y1]
            if isinstance(bbox, (list, tuple)) and len(bbox) == 4:
                return (float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3]))

            # Handle flat polygon [x1, y1, x2, y2, ...]
            if isinstance(bbox, (list, tuple)) and len(bbox) >= 8:
                xs = [bbox[i] for i in range(0, len(bbox), 2)]
                ys = [bbox[i] for i in range(1, len(bbox), 2)]
                return (min(xs), min(ys), max(xs), max(ys))

            # Handle dict format
            if isinstance(bbox, dict):
                return (
                    float(bbox.get('x0', bbox.get('x_min', 0))),
                    float(bbox.get('y0', bbox.get('y_min', 0))),
                    float(bbox.get('x1', bbox.get('x_max', 0))),
                    float(bbox.get('y1', bbox.get('y_max', 0)))
                )

        except (TypeError, ValueError, IndexError) as e:
            logger.warning(f"Failed to normalize bbox {bbox}: {e}")

        return None

    def _generate_summary(
        self,
        pp_structure_results: Dict[str, Any],
        raw_ocr_regions: List[Dict[str, Any]]
    ) -> Dict[str, Any]:
        """Generate summary comparing PP-Structure and raw OCR."""
        pp_elements = pp_structure_results.get('elements', [])

        # Count element types
        type_counts = {}
        for elem in pp_elements:
            elem_type = elem.get('type', 'unknown')
            if hasattr(elem_type, 'value'):
                elem_type = elem_type.value
            type_counts[str(elem_type)] = type_counts.get(str(elem_type), 0) + 1

        # Calculate bounding box coverage
        pp_bbox_area = 0
        ocr_bbox_area = 0

        for elem in pp_elements:
            bbox = self._normalize_bbox(elem.get('bbox'))
            if bbox:
                pp_bbox_area += (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])

        for region in raw_ocr_regions:
            bbox = self._normalize_bbox(region.get('bbox'))
            if bbox:
                ocr_bbox_area += (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])

        return {
            'timestamp': datetime.now().isoformat(),
            'pp_structure': {
                'total_elements': len(pp_elements),
                'element_types': type_counts,
                'total_bbox_area': pp_bbox_area,
                'has_parsing_res_list': pp_structure_results.get('has_parsing_res_list', False)
            },
            'raw_ocr': {
                'total_regions': len(raw_ocr_regions),
                'total_bbox_area': ocr_bbox_area,
                'avg_confidence': sum(r.get('confidence', 0) for r in raw_ocr_regions) / len(raw_ocr_regions) if raw_ocr_regions else 0
            },
            'comparison': {
                'element_count_ratio': len(pp_elements) / len(raw_ocr_regions) if raw_ocr_regions else 0,
                'area_ratio': pp_bbox_area / ocr_bbox_area if ocr_bbox_area > 0 else 0,
                'potential_gap': len(raw_ocr_regions) - len(pp_elements) if raw_ocr_regions else 0
            }
        }

    def _make_serializable(self, obj: Any) -> Any:
        """Convert object to JSON-serializable format."""
        if obj is None:
            return None
        if isinstance(obj, (str, int, float, bool)):
            return obj
        if isinstance(obj, (list, tuple)):
            return [self._make_serializable(item) for item in obj]
        if isinstance(obj, dict):
            return {str(k): self._make_serializable(v) for k, v in obj.items()}
        if hasattr(obj, 'value'):
            return obj.value
        if hasattr(obj, '__dict__'):
            return self._make_serializable(obj.__dict__)
        if hasattr(obj, 'tolist'):  # numpy array
            return obj.tolist()
        return str(obj)