diff --git a/backend/alembic/versions/e51c9a16ee16_increase_file_type_column_length.py b/backend/alembic/versions/e51c9a16ee16_increase_file_type_column_length.py
new file mode 100644
index 0000000..298e5e7
--- /dev/null
+++ b/backend/alembic/versions/e51c9a16ee16_increase_file_type_column_length.py
@@ -0,0 +1,40 @@
+"""increase_file_type_column_length
+
+Revision ID: e51c9a16ee16
+Revises: 4d37f412d37a
+Create Date: 2025-11-30 15:03:28.950186
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = 'e51c9a16ee16'
+down_revision: Union[str, None] = '4d37f412d37a'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    op.alter_column(
+        'tool_ocr_tasks',
+        'file_type',
+        existing_type=sa.String(50),
+        type_=sa.String(100),
+        existing_nullable=True
+    )
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    op.alter_column(
+        'tool_ocr_tasks',
+        'file_type',
+        existing_type=sa.String(100),
+        type_=sa.String(50),
+        existing_nullable=True
+    )
diff --git a/backend/app/models/task.py b/backend/app/models/task.py
index 735ef02..cf78380 100644
--- a/backend/app/models/task.py
+++ b/backend/app/models/task.py
@@ -36,7 +36,7 @@ class Task(Base):
     task_id = Column(String(255), unique=True, nullable=False, index=True,
                     comment="Unique task identifier (UUID)")
     filename = Column(String(255), nullable=True, index=True)
-    file_type = Column(String(50), nullable=True)
+    file_type = Column(String(100), nullable=True)
     status = Column(SQLEnum(TaskStatus), default=TaskStatus.PENDING, nullable=False,
                    index=True)
     result_json_path = Column(String(500), nullable=True,
diff --git a/backend/app/services/direct_extraction_engine.py b/backend/app/services/direct_extraction_engine.py
index 0aad35c..6ec39f0 100644
--- a/backend/app/services/direct_extraction_engine.py
+++ b/backend/app/services/direct_extraction_engine.py
@@ -1317,8 +1317,6 @@ class DirectExtractionEngine:
             doc.close()
 
             if images_added > 0:
-                current_images = unified_doc.metadata.total_images or 0
-                unified_doc.metadata.total_images = current_images + images_added
                 logger.info(f"Added {images_added} inline image regions to document")
 
         except Exception as e:
diff --git a/backend/app/services/document_type_detector.py b/backend/app/services/document_type_detector.py
index d13c710..f12266d 100644
--- a/backend/app/services/document_type_detector.py
+++ b/backend/app/services/document_type_detector.py
@@ -291,9 +291,14 @@ class DocumentTypeDetector:
 
         Strategy:
         1. Convert Office file to PDF using LibreOffice
-        2. Analyze the converted PDF for text extractability
-        3. Route to direct track if PDF has extractable text
+        2. LibreOffice always produces text-based PDFs (not scanned images)
+        3. Always use Direct track for successful conversions
         4. This significantly improves processing time (from >300s to ~2-5s)
+
+        Note: LibreOffice conversion preserves text as extractable text layer,
+        even for documents with complex backgrounds (PPT slides, etc.).
+        The "mixed content" detection in PDF analysis is misleading for Office docs
+        because it counts background images, not scanned text.
         """
         document_type = self.OFFICE_MIMES.get(mime_type, DocumentType.UNKNOWN)
         file_size = file_path.stat().st_size
@@ -318,32 +323,24 @@ class DocumentTypeDetector:
                 pdf_path = converter.convert_to_pdf(file_path, temp_path)
                 logger.info(f"Office document converted to PDF: {pdf_path.name}")
 
-                # Analyze the converted PDF for text extractability
+                # Analyze the converted PDF for metadata (but always use Direct track)
                 pdf_recommendation = self._analyze_pdf(pdf_path)
 
                 # Merge metadata
                 merged_metadata = {**base_metadata, **pdf_recommendation.metadata}
                 merged_metadata["converted_pdf_analyzed"] = True
 
-                # Determine final recommendation based on PDF analysis
-                if pdf_recommendation.track == "direct":
-                    # Converted PDF has extractable text - use direct track
-                    return ProcessingTrackRecommendation(
-                        track="direct",
-                        confidence=pdf_recommendation.confidence * 0.95,  # Slightly lower confidence for converted files
-                        reason=f"Office document converted to text-based PDF ({pdf_recommendation.metadata.get('text_coverage', 0):.0%} text coverage)",
-                        document_type=document_type,  # Keep original Office type
-                        metadata=merged_metadata
-                    )
-                else:
-                    # Converted PDF is image-based or mixed - use OCR track
-                    return ProcessingTrackRecommendation(
-                        track="ocr",
-                        confidence=pdf_recommendation.confidence,
-                        reason=f"Office document converted to image-based PDF, requires OCR",
-                        document_type=document_type,  # Keep original Office type
-                        metadata=merged_metadata
-                    )
+                # LibreOffice always produces text-based PDFs - use Direct track
+                # Even "mixed content" PDFs from Office docs have extractable text
+                # The images are backgrounds/decorations, not scanned content
+                text_coverage = pdf_recommendation.metadata.get('text_coverage', 0)
+                return ProcessingTrackRecommendation(
+                    track="direct",
+                    confidence=0.95,
+                    reason=f"Office document converted to PDF (text coverage: {text_coverage:.0%}, using Direct track)",
+                    document_type=document_type,
+                    metadata=merged_metadata
+                )
 
         except OfficeConverterError as e:
             logger.error(f"Office conversion failed: {e}")
diff --git a/backend/app/services/pdf_generator_service.py b/backend/app/services/pdf_generator_service.py
index 9e39643..fb4c076 100644
--- a/backend/app/services/pdf_generator_service.py
+++ b/backend/app/services/pdf_generator_service.py
@@ -854,6 +854,9 @@ class PDFGeneratorService:
                 # FIX: Collect exclusion regions (tables, images) to prevent duplicate rendering
                 regions_to_avoid = []
 
+                # Calculate page area for background detection
+                page_area = current_page_width * current_page_height
+
                 for element in page.elements:
                     if element.type == ElementType.TABLE:
                         table_elements.append(element)
@@ -867,6 +870,29 @@ class PDFGeneratorService:
                         # Charts often have large bounding boxes that include text labels
                         # which should be rendered as selectable text on top
                         if element.type in [ElementType.IMAGE, ElementType.FIGURE, ElementType.LOGO, ElementType.STAMP]:
+                            # Check if this is Direct track (text from PDF text layer, not OCR)
+                            is_direct = (self.current_processing_track == ProcessingTrack.DIRECT or
+                                        self.current_processing_track == ProcessingTrack.HYBRID)
+
+                            if is_direct:
+                                # Direct track: text is from PDF text layer, not OCR'd from images
+                                # Don't exclude any images - text should be rendered on top
+                                # This is critical for Office documents with background images
+                                logger.debug(f"Direct track: not excluding {element.element_id} from text regions")
+                                continue
+
+                            # OCR track: Skip full-page background images from exclusion regions
+                            # Smaller images that might contain OCR'd text should still be excluded
+                            if element.bbox:
+                                elem_area = (element.bbox.x1 - element.bbox.x0) * (element.bbox.y1 - element.bbox.y0)
+                                coverage_ratio = elem_area / page_area if page_area > 0 else 0
+
+                                # If image covers >70% of page, it's likely a background - don't exclude text
+                                if coverage_ratio > 0.7:
+                                    logger.debug(f"OCR track: skipping background image {element.element_id} from exclusion "
+                                               f"(covers {coverage_ratio*100:.1f}% of page)")
+                                    continue
+
                             regions_to_avoid.append(element)
                     elif element.type == ElementType.LIST_ITEM:
                         list_elements.append(element)