"""
Phase 1 Bug Fixes Verification Tests

Tests for:
1.1 Direct Track table cell merging
1.2 OCR Track image path preservation
1.3 Cell boxes coordinate validation
1.4 Tiny decoration image filtering
1.5 Covering image removal
"""

import sys
import os
from pathlib import Path

# Add backend to path
sys.path.insert(0, str(Path(__file__).parent.parent))

import fitz
from app.services.direct_extraction_engine import DirectExtractionEngine
from app.services.ocr_to_unified_converter import validate_cell_boxes
from app.models.unified_document import TableCell


def test_1_1_table_cell_merging():
    """Test 1.1.5: Verify edit3.pdf returns correct merged cells"""
    print("\n" + "="*60)
    print("TEST 1.1: Direct Track Table Cell Merging")
    print("="*60)

    pdf_path = Path(__file__).parent.parent.parent / "demo_docs" / "edit3.pdf"
    if not pdf_path.exists():
        print(f"SKIP: {pdf_path} not found")
        return False

    doc = fitz.open(str(pdf_path))

    total_cells = 0
    merged_cells = 0

    for page_num, page in enumerate(doc):
        tables = page.find_tables()
        for table_idx, table in enumerate(tables.tables):
            data = table.extract()
            cell_rects = getattr(table, 'cells', None)

            if cell_rects:
                num_rows = len(data)
                num_cols = max(len(row) for row in data) if data else 0

                # Count actual cells (non-None)
                actual_cells = sum(1 for c in cell_rects if c is not None)
                none_cells = sum(1 for c in cell_rects if c is None)

                print(f"  Page {page_num}, Table {table_idx}:")
                print(f"    Grid size: {num_rows} x {num_cols} = {num_rows * num_cols} positions")
                print(f"    Actual cells: {actual_cells}")
                print(f"    Merged positions (None): {none_cells}")

                total_cells += actual_cells
                if none_cells > 0:
                    merged_cells += 1

    doc.close()

    print(f"\n  Total actual cells across all tables: {total_cells}")
    print(f"  Tables with merging: {merged_cells}")

    # According to PLAN.md, edit3.pdf should have 83 cells (not 204)
    # The presence of None values indicates merging is detected
    if total_cells > 0 and total_cells < 204:
        print("  RESULT: PASS - Cell merging detected correctly")
        return True
    elif total_cells == 204:
        print("  RESULT: FAIL - All cells treated as 1x1 (no merging detected)")
        return False
    else:
        print(f"  RESULT: INCONCLUSIVE - {total_cells} cells found")
        return None


def test_1_3_cell_boxes_validation():
    """Test 1.3: Verify cell_boxes coordinate validation"""
    print("\n" + "="*60)
    print("TEST 1.3: Cell Boxes Coordinate Validation")
    print("="*60)

    # Test case 1: Valid coordinates
    valid_boxes = [
        [10, 10, 100, 50],
        [100, 10, 200, 50],
        [10, 50, 200, 100]
    ]
    result = validate_cell_boxes(valid_boxes, [0, 0, 300, 200], 300, 200)
    print(f"  Valid boxes: valid={result['valid']}, invalid_count={result['invalid_count']}")
    assert result['valid'], "Valid boxes should pass validation"

    # Test case 2: Out of bounds coordinates
    invalid_boxes = [
        [-10, 10, 100, 50],    # x0 < 0
        [10, 10, 400, 50],     # x1 > page_width
        [10, 10, 100, 300]     # y1 > page_height
    ]
    result = validate_cell_boxes(invalid_boxes, [0, 0, 300, 200], 300, 200)
    print(f"  Invalid boxes: valid={result['valid']}, invalid_count={result['invalid_count']}")
    assert not result['valid'], "Invalid boxes should fail validation"
    assert result['invalid_count'] == 3, "Should detect 3 invalid boxes"

    # Test case 3: Clamping
    assert len(result['clamped_boxes']) == 3, "Should return clamped boxes"
    clamped = result['clamped_boxes'][0]
    assert clamped[0] >= 0, "Clamped x0 should be >= 0"

    print("  RESULT: PASS - Coordinate validation works correctly")
    return True


def test_1_4_tiny_image_filtering():
    """Test 1.4: Verify tiny decoration image filtering"""
    print("\n" + "="*60)
    print("TEST 1.4: Tiny Decoration Image Filtering")
    print("="*60)

    pdf_path = Path(__file__).parent.parent.parent / "demo_docs" / "edit3.pdf"
    if not pdf_path.exists():
        print(f"SKIP: {pdf_path} not found")
        return None

    doc = fitz.open(str(pdf_path))

    tiny_count = 0
    normal_count = 0
    min_area = 200  # Same threshold as in DirectExtractionEngine

    for page_num, page in enumerate(doc):
        images = page.get_images()
        for img in images:
            xref = img[0]
            rects = page.get_image_rects(xref)
            if rects:
                rect = rects[0]
                area = (rect.x1 - rect.x0) * (rect.y1 - rect.y0)
                if area < min_area:
                    tiny_count += 1
                    print(f"  Page {page_num}: Tiny image xref={xref}, area={area:.1f} px²")
                else:
                    normal_count += 1

    doc.close()

    print(f"\n  Tiny images (< {min_area} px²): {tiny_count}")
    print(f"  Normal images: {normal_count}")

    if tiny_count > 0:
        print("  RESULT: PASS - Tiny images detected, will be filtered")
        return True
    else:
        print("  RESULT: INFO - No tiny images found in test file")
        return None


def test_1_5_covering_image_detection():
    """Test 1.5: Verify covering image detection"""
    print("\n" + "="*60)
    print("TEST 1.5: Covering Image Detection")
    print("="*60)

    pdf_path = Path(__file__).parent.parent.parent / "demo_docs" / "edit3.pdf"
    if not pdf_path.exists():
        print(f"SKIP: {pdf_path} not found")
        return None

    engine = DirectExtractionEngine(
        enable_whiteout_detection=True,
        whiteout_iou_threshold=0.8
    )

    doc = fitz.open(str(pdf_path))

    total_covering = 0
    for page_num, page in enumerate(doc):
        result = engine._preprocess_page(page, page_num, doc)
        covering_images = result.get('covering_images', [])

        if covering_images:
            print(f"  Page {page_num}: {len(covering_images)} covering images detected")
            for img in covering_images[:3]:  # Show first 3
                print(f"    - xref={img.get('xref')}, type={img.get('color_type')}, "
                      f"bbox={[round(x, 1) for x in img.get('bbox', [])]}")
            total_covering += len(covering_images)

    doc.close()

    print(f"\n  Total covering images detected: {total_covering}")

    if total_covering > 0:
        print("  RESULT: PASS - Covering images detected, will be filtered")
        return True
    else:
        print("  RESULT: INFO - No covering images found in test file")
        return None


def test_direct_extraction_full():
    """Full integration test for Direct Track extraction"""
    print("\n" + "="*60)
    print("INTEGRATION TEST: Direct Track Full Extraction")
    print("="*60)

    pdf_path = Path(__file__).parent.parent.parent / "demo_docs" / "edit3.pdf"
    if not pdf_path.exists():
        print(f"SKIP: {pdf_path} not found")
        return None

    engine = DirectExtractionEngine(
        enable_table_detection=True,
        enable_image_extraction=True,
        min_image_area=200.0,
        enable_whiteout_detection=True
    )

    try:
        result = engine.extract(pdf_path)  # Pass Path object, not string

        # Count elements
        table_count = 0
        image_count = 0
        merged_table_count = 0

        for page in result.pages:
            for elem in page.elements:
                if elem.type.value == 'table':
                    table_count += 1
                    if elem.content and hasattr(elem.content, 'cells'):
                        # Check for merged cells
                        for cell in elem.content.cells:
                            if cell.row_span > 1 or cell.col_span > 1:
                                merged_table_count += 1
                                break
                elif elem.type.value == 'image':
                    image_count += 1

        print(f"  Document ID: {result.document_id}")
        print(f"  Pages: {len(result.pages)}")
        print(f"  Tables: {table_count} (with merging: {merged_table_count})")
        print(f"  Images: {image_count}")

        print("  RESULT: PASS - Extraction completed successfully")
        return True

    except Exception as e:
        print(f"  RESULT: FAIL - {e}")
        import traceback
        traceback.print_exc()
        return False


if __name__ == "__main__":
    print("="*60)
    print("Phase 1 Bug Fixes Verification Tests")
    print("="*60)

    results = {}

    # Run tests
    results['1.1_table_merging'] = test_1_1_table_cell_merging()
    results['1.3_coord_validation'] = test_1_3_cell_boxes_validation()
    results['1.4_tiny_filtering'] = test_1_4_tiny_image_filtering()
    results['1.5_covering_detection'] = test_1_5_covering_image_detection()
    results['integration'] = test_direct_extraction_full()

    # Summary
    print("\n" + "="*60)
    print("TEST SUMMARY")
    print("="*60)

    for test_name, result in results.items():
        status = "PASS" if result is True else "FAIL" if result is False else "SKIP/INFO"
        print(f"  {test_name}: {status}")

    passed = sum(1 for r in results.values() if r is True)
    failed = sum(1 for r in results.values() if r is False)
    skipped = sum(1 for r in results.values() if r is None)

    print(f"\n  Total: {passed} passed, {failed} failed, {skipped} skipped/info")