refactor: remove unused code and migrate legacy API

Backend cleanup: - Remove ocr_service_original.py (legacy OCR service, replaced by ocr_service.py) - Remove preprocessor.py (unused, functionality absorbed by layout_preprocessing_service.py) - Remove pdf_font_manager.py (unused, never referenced by any service) Frontend cleanup: - Remove MarkdownPreview.tsx (unused component) - Remove ResultsTable.tsx (unused, replaced by TaskHistoryPage) - Remove services/api.ts (legacy API client, migrated to apiV2) - Remove types/api.ts (legacy types, migrated to apiV2.ts) API migration: - Add export rules CRUD methods to apiClientV2 - Update SettingsPage.tsx to use apiClientV2 - Update Layout.tsx to use only apiClientV2 for logout This reduces ~1,500 lines of redundant code and unifies the API client. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-11 12:03:09 +08:00
parent 940a406dce
commit 5d962ca97c
10 changed files with 40 additions and 1958 deletions
--- a/backend/app/services/ocr_service_original.py
+++ b/backend/app/services/ocr_service_original.py
@@ -1,835 +0,0 @@
-"""
-Tool_OCR - Core OCR Service
-PaddleOCR-VL integration for text and structure extraction
-"""
-
-import json
-import logging
-from pathlib import Path
-from typing import Dict, List, Optional, Tuple
-from datetime import datetime
-import uuid
-
-from paddleocr import PaddleOCR, PPStructureV3
-from PIL import Image
-from pdf2image import convert_from_path
-import paddle
-
-from app.core.config import settings
-from app.services.office_converter import OfficeConverter, OfficeConverterError
-
-logger = logging.getLogger(__name__)
-
-
-class OCRService:
-    """
-    Core OCR service using PaddleOCR-VL
-    Handles text recognition and document structure analysis
-    """
-
-    def __init__(self):
-        """Initialize PaddleOCR and PPStructure engines with GPU detection"""
-        self.ocr_languages = settings.ocr_languages_list
-        self.confidence_threshold = settings.ocr_confidence_threshold
-
-        # Initialize PaddleOCR engine (will be lazy-loaded per language)
-        self.ocr_engines = {}
-
-        # Initialize PP-Structure for layout analysis
-        self.structure_engine = None
-
-        # Initialize Office document converter
-        self.office_converter = OfficeConverter()
-
-        # GPU Detection and Configuration
-        self.gpu_available = False
-        self.use_gpu = False
-        self.gpu_info = {}
-
-        self._detect_and_configure_gpu()
-
-        logger.info("OCR Service initialized")
-
-    def _detect_and_configure_gpu(self):
-        """Detect GPU availability and configure usage"""
-        try:
-            # Check if forced CPU mode
-            if settings.force_cpu_mode:
-                logger.info("GPU mode forced to CPU by configuration")
-                self.use_gpu = False
-                self.gpu_info = {
-                    'available': False,
-                    'reason': 'CPU mode forced by configuration',
-                }
-                return
-
-            # Check if PaddlePaddle is compiled with CUDA
-            if paddle.is_compiled_with_cuda():
-                # Check if GPU devices are available
-                gpu_count = paddle.device.cuda.device_count()
-
-                if gpu_count > 0:
-                    self.gpu_available = True
-                    self.use_gpu = True
-
-                    # Get GPU device information
-                    device_id = settings.gpu_device_id if settings.gpu_device_id < gpu_count else 0
-                    gpu_props = paddle.device.cuda.get_device_properties(device_id)
-
-                    self.gpu_info = {
-                        'available': True,
-                        'device_count': gpu_count,
-                        'device_id': device_id,
-                        'device_name': gpu_props.name,
-                        'total_memory': gpu_props.total_memory,
-                        'compute_capability': f"{gpu_props.major}.{gpu_props.minor}",
-                    }
-
-                    # Set GPU memory fraction
-                    try:
-                        paddle.device.set_device(f'gpu:{device_id}')
-                        logger.info(f"GPU {device_id} selected: {gpu_props.name}")
-                        logger.info(f"GPU memory: {gpu_props.total_memory / (1024**3):.2f} GB")
-                        logger.info(f"Compute capability: {gpu_props.major}.{gpu_props.minor}")
-                        logger.info(f"GPU memory fraction set to: {settings.gpu_memory_fraction}")
-                    except Exception as e:
-                        logger.warning(f"Failed to configure GPU device: {e}")
-                        self.use_gpu = False
-                        self.gpu_info['available'] = False
-                        self.gpu_info['reason'] = f'GPU configuration failed: {str(e)}'
-                else:
-                    logger.warning("CUDA is available but no GPU devices found")
-                    self.gpu_info = {
-                        'available': False,
-                        'reason': 'CUDA compiled but no GPU devices detected',
-                    }
-            else:
-                logger.info("PaddlePaddle not compiled with CUDA support")
-                self.gpu_info = {
-                    'available': False,
-                    'reason': 'PaddlePaddle not compiled with CUDA',
-                }
-
-        except Exception as e:
-            logger.error(f"GPU detection failed: {e}")
-            self.use_gpu = False
-            self.gpu_info = {
-                'available': False,
-                'reason': f'GPU detection error: {str(e)}',
-            }
-
-        # Log final GPU status
-        if self.use_gpu:
-            logger.info(f"✓ GPU acceleration ENABLED - Using {self.gpu_info.get('device_name', 'Unknown GPU')}")
-        else:
-            reason = self.gpu_info.get('reason', 'Unknown')
-            logger.info(f"ℹ GPU acceleration DISABLED - {reason} - Using CPU mode")
-
-    def get_gpu_status(self) -> Dict:
-        """
-        Get current GPU status and information
-
-        Returns:
-            Dictionary with GPU status information
-        """
-        status = {
-            'gpu_enabled': self.use_gpu,
-            'gpu_available': self.gpu_available,
-            **self.gpu_info,
-        }
-
-        # Add current GPU memory usage if GPU is being used
-        if self.use_gpu and self.gpu_available:
-            try:
-                device_id = self.gpu_info.get('device_id', 0)
-                # Get memory info (returns allocated, total in bytes)
-                memory_allocated = paddle.device.cuda.memory_allocated(device_id)
-                memory_reserved = paddle.device.cuda.memory_reserved(device_id)
-                total_memory = self.gpu_info.get('total_memory', 0)
-
-                status['memory_allocated_mb'] = memory_allocated / (1024**2)
-                status['memory_reserved_mb'] = memory_reserved / (1024**2)
-                status['memory_total_mb'] = total_memory / (1024**2)
-                status['memory_utilization'] = (memory_allocated / total_memory * 100) if total_memory > 0 else 0
-            except Exception as e:
-                logger.warning(f"Failed to get GPU memory info: {e}")
-
-        return status
-
-    def get_ocr_engine(self, lang: str = 'ch') -> PaddleOCR:
-        """
-        Get or create OCR engine for specified language with GPU support
-
-        Args:
-            lang: Language code (ch, en, japan, korean, etc.)
-
-        Returns:
-            PaddleOCR engine instance
-        """
-        if lang not in self.ocr_engines:
-            logger.info(f"Initializing PaddleOCR engine for language: {lang} (GPU: {self.use_gpu})")
-
-            try:
-                # PaddleOCR 3.x: Device is set globally via paddle.set_device()
-                # No need to pass device/use_gpu/gpu_mem parameters
-                self.ocr_engines[lang] = PaddleOCR(
-                    lang=lang,
-                    use_textline_orientation=True,  # Replaces deprecated use_angle_cls
-                )
-                logger.info(f"PaddleOCR engine ready for {lang} (PaddlePaddle {paddle.__version__}, {'GPU' if self.use_gpu else 'CPU'} mode)")
-
-            except Exception as e:
-                # If GPU initialization fails, fall back to CPU
-                if self.use_gpu:
-                    logger.warning(f"GPU initialization failed, falling back to CPU: {e}")
-                    self.use_gpu = False
-                    # Switch to CPU device globally
-                    paddle.set_device('cpu')
-                    self.ocr_engines[lang] = PaddleOCR(
-                        lang=lang,
-                        use_textline_orientation=True,
-                    )
-                    logger.info(f"PaddleOCR engine ready for {lang} (CPU mode - fallback)")
-                else:
-                    raise
-
-        return self.ocr_engines[lang]
-
-    def get_structure_engine(self) -> PPStructureV3:
-        """
-        Get or create PP-Structure engine for layout analysis with GPU support
-
-        Returns:
-            PPStructure engine instance
-        """
-        if self.structure_engine is None:
-            logger.info(f"Initializing PP-StructureV3 engine (GPU: {self.use_gpu})")
-
-            try:
-                # PaddleOCR 3.x: Device is set globally via paddle.set_device()
-                # No need to pass device/use_gpu/gpu_mem parameters
-                self.structure_engine = PPStructureV3(
-                    use_doc_orientation_classify=False,
-                    use_doc_unwarping=False,
-                    use_textline_orientation=False,
-                    use_table_recognition=True,
-                    use_formula_recognition=True,
-                    use_chart_recognition=True,  # Enable chart recognition (requires PaddlePaddle >= 3.2.0 for fused_rms_norm_ext)
-                    layout_threshold=0.5,
-                )
-                logger.info(f"PP-StructureV3 engine ready (PaddlePaddle {paddle.__version__}, {'GPU' if self.use_gpu else 'CPU'} mode)")
-
-            except Exception as e:
-                # If GPU initialization fails, fall back to CPU
-                if self.use_gpu:
-                    logger.warning(f"GPU initialization failed for PP-Structure, falling back to CPU: {e}")
-                    self.use_gpu = False
-                    # Switch to CPU device globally
-                    paddle.set_device('cpu')
-                    self.structure_engine = PPStructureV3(
-                        use_doc_orientation_classify=False,
-                        use_doc_unwarping=False,
-                        use_textline_orientation=False,
-                        use_table_recognition=True,
-                        use_formula_recognition=True,
-                        use_chart_recognition=True,  # Enable chart recognition (CPU fallback mode)
-                        layout_threshold=0.5,
-                    )
-                    logger.info("PP-StructureV3 engine ready (CPU mode - fallback)")
-                else:
-                    raise
-
-        return self.structure_engine
-
-    def convert_pdf_to_images(self, pdf_path: Path, output_dir: Path) -> List[Path]:
-        """
-        Convert PDF to images (one per page)
-
-        Args:
-            pdf_path: Path to PDF file
-            output_dir: Directory to save converted images
-
-        Returns:
-            List of paths to converted images
-        """
-        try:
-            output_dir.mkdir(parents=True, exist_ok=True)
-
-            logger.info(f"Converting PDF {pdf_path.name} to images")
-
-            # Convert PDF to images (300 DPI for good quality)
-            images = convert_from_path(
-                str(pdf_path),
-                dpi=300,
-                fmt='png'
-            )
-
-            image_paths = []
-            for i, image in enumerate(images):
-                # Save each page as PNG
-                image_path = output_dir / f"{pdf_path.stem}_page_{i+1}.png"
-                image.save(str(image_path), 'PNG')
-                image_paths.append(image_path)
-                logger.info(f"Saved page {i+1} to {image_path.name}")
-
-            logger.info(f"Converted {len(image_paths)} pages from PDF")
-            return image_paths
-
-        except Exception as e:
-            logger.error(f"PDF conversion error: {str(e)}")
-            raise
-
-    def process_image(
-        self,
-        image_path: Path,
-        lang: str = 'ch',
-        detect_layout: bool = True,
-        confidence_threshold: Optional[float] = None,
-        output_dir: Optional[Path] = None,
-        current_page: int = 0
-    ) -> Dict:
-        """
-        Process single image with OCR and layout analysis
-
-        Args:
-            image_path: Path to image file
-            lang: Language for OCR
-            detect_layout: Whether to perform layout analysis
-            confidence_threshold: Minimum confidence threshold (uses default if None)
-            output_dir: Optional output directory for saving extracted images
-            current_page: Current page number (0-based) for multi-page documents
-
-        Returns:
-            Dictionary with OCR results and metadata
-        """
-        start_time = datetime.now()
-        threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold
-
-        try:
-            # Check if file is Office document
-            if self.office_converter.is_office_document(image_path):
-                logger.info(f"Detected Office document: {image_path.name}, converting to PDF")
-                try:
-                    # Convert Office document to PDF
-                    pdf_path = self.office_converter.convert_to_pdf(image_path)
-                    logger.info(f"Office document converted to PDF: {pdf_path.name}")
-
-                    # Process the PDF (will be handled by PDF processing logic below)
-                    image_path = pdf_path
-                except OfficeConverterError as e:
-                    logger.error(f"Office conversion failed: {str(e)}")
-                    raise
-
-            # Check if file is PDF
-            is_pdf = image_path.suffix.lower() == '.pdf'
-
-            if is_pdf:
-                # Convert PDF to images
-                logger.info(f"Detected PDF file: {image_path.name}, converting to images")
-                pdf_images_dir = image_path.parent / f"{image_path.stem}_pages"
-                image_paths = self.convert_pdf_to_images(image_path, pdf_images_dir)
-
-                # Process all pages
-                all_text_regions = []
-                total_confidence_sum = 0.0
-                total_valid_regions = 0
-                all_layout_data = []
-                all_images_metadata = []
-                all_ocr_dimensions = []
-
-                for page_num, page_image_path in enumerate(image_paths, 1):
-                    logger.info(f"Processing PDF page {page_num}/{len(image_paths)}")
-
-                    # Process each page with correct page number (0-based for layout data)
-                    page_result = self.process_image(
-                        page_image_path,
-                        lang=lang,
-                        detect_layout=detect_layout,
-                        confidence_threshold=confidence_threshold,
-                        output_dir=output_dir,
-                        current_page=page_num - 1  # Convert to 0-based page number for layout data
-                    )
-
-                    # Accumulate results
-                    if page_result['status'] == 'success':
-                        # Add page number to each text region
-                        for region in page_result['text_regions']:
-                            region['page'] = page_num
-                            all_text_regions.append(region)
-
-                        total_confidence_sum += page_result['average_confidence'] * page_result['total_text_regions']
-                        total_valid_regions += page_result['total_text_regions']
-
-                        # Accumulate layout data (page numbers already set correctly in analyze_layout)
-                        if page_result.get('layout_data'):
-                            layout_data = page_result['layout_data']
-                            all_layout_data.append(layout_data)
-
-                        # Accumulate images metadata (page numbers already set correctly in analyze_layout)
-                        if page_result.get('images_metadata'):
-                            all_images_metadata.extend(page_result['images_metadata'])
-
-                        # Store OCR dimensions for each page
-                        if page_result.get('ocr_dimensions'):
-                            all_ocr_dimensions.append({
-                                'page': page_num,
-                                'width': page_result['ocr_dimensions']['width'],
-                                'height': page_result['ocr_dimensions']['height']
-                            })
-
-                # Calculate overall average confidence
-                avg_confidence = total_confidence_sum / total_valid_regions if total_valid_regions > 0 else 0.0
-
-                # Combine layout data from all pages
-                combined_layout = None
-                if all_layout_data:
-                    combined_elements = []
-                    for layout in all_layout_data:
-                        if layout.get('elements'):
-                            combined_elements.extend(layout['elements'])
-                    if combined_elements:
-                        combined_layout = {
-                            'elements': combined_elements,
-                            'total_elements': len(combined_elements),
-                            'reading_order': list(range(len(combined_elements))),
-                        }
-
-                # Generate combined markdown
-                markdown_content = self.generate_markdown(all_text_regions, combined_layout)
-
-                # Calculate processing time
-                processing_time = (datetime.now() - start_time).total_seconds()
-
-                logger.info(
-                    f"PDF processing completed: {image_path.name} - "
-                    f"{len(image_paths)} pages, "
-                    f"{len(all_text_regions)} regions, "
-                    f"{avg_confidence:.2f} avg confidence, "
-                    f"{processing_time:.2f}s"
-                )
-
-                return {
-                    'status': 'success',
-                    'file_name': image_path.name,
-                    'language': lang,
-                    'text_regions': all_text_regions,
-                    'total_text_regions': len(all_text_regions),
-                    'average_confidence': avg_confidence,
-                    'layout_data': combined_layout,
-                    'images_metadata': all_images_metadata,
-                    'markdown_content': markdown_content,
-                    'processing_time': processing_time,
-                    'timestamp': datetime.utcnow().isoformat(),
-                    'total_pages': len(image_paths),
-                    'ocr_dimensions': all_ocr_dimensions if all_ocr_dimensions else None,
-                }
-
-            # Get OCR engine (for non-PDF images)
-            ocr_engine = self.get_ocr_engine(lang)
-
-            # Get the actual image dimensions that OCR will use
-            from PIL import Image
-            with Image.open(image_path) as img:
-                ocr_width, ocr_height = img.size
-                logger.info(f"OCR processing image dimensions: {ocr_width}x{ocr_height}")
-
-            # Perform OCR
-            logger.info(f"Processing image: {image_path.name}")
-            # Note: In PaddleOCR 3.x, use_angle_cls is set during initialization, not in ocr() call
-            ocr_results = ocr_engine.ocr(str(image_path))
-
-            # Parse OCR results (PaddleOCR 3.x format)
-            text_regions = []
-            total_confidence = 0.0
-            valid_regions = 0
-
-            if ocr_results and isinstance(ocr_results, (list, tuple)) and len(ocr_results) > 0:
-                # PaddleOCR 3.x returns a list of dictionaries (one per page)
-                for page_result in ocr_results:
-                    if isinstance(page_result, dict):
-                        # New format: {'rec_texts': [...], 'rec_scores': [...], 'rec_polys': [...]}
-                        texts = page_result.get('rec_texts', [])
-                        scores = page_result.get('rec_scores', [])
-                        polys = page_result.get('rec_polys', [])
-
-                        # Process each recognized text
-                        for idx, text in enumerate(texts):
-                            # Get corresponding score and bbox
-                            confidence = scores[idx] if idx < len(scores) else 1.0
-                            bbox = polys[idx] if idx < len(polys) else []
-
-                            # Convert numpy array bbox to list for JSON serialization
-                            if hasattr(bbox, 'tolist'):
-                                bbox = bbox.tolist()
-
-                            # Filter by confidence threshold
-                            if confidence >= threshold:
-                                text_regions.append({
-                                    'text': text,
-                                    'bbox': bbox,
-                                    'confidence': float(confidence),
-                                })
-                                total_confidence += confidence
-                                valid_regions += 1
-
-            avg_confidence = total_confidence / valid_regions if valid_regions > 0 else 0.0
-
-            logger.info(f"Parsed {len(text_regions)} text regions with avg confidence {avg_confidence:.3f}")
-
-            # Layout analysis (if requested)
-            layout_data = None
-            images_metadata = []
-
-            if detect_layout:
-                # Pass current_page to analyze_layout for correct page numbering
-                layout_data, images_metadata = self.analyze_layout(image_path, output_dir=output_dir, current_page=current_page)
-
-            # Generate Markdown
-            markdown_content = self.generate_markdown(text_regions, layout_data)
-
-            # Calculate processing time
-            processing_time = (datetime.now() - start_time).total_seconds()
-
-            result = {
-                'status': 'success',
-                'file_name': image_path.name,
-                'language': lang,
-                'text_regions': text_regions,
-                'total_text_regions': len(text_regions),
-                'average_confidence': avg_confidence,
-                'layout_data': layout_data,
-                'images_metadata': images_metadata,
-                'markdown_content': markdown_content,
-                'processing_time': processing_time,
-                'timestamp': datetime.utcnow().isoformat(),
-                'ocr_dimensions': {
-                    'width': ocr_width,
-                    'height': ocr_height
-                }
-            }
-
-            logger.info(
-                f"OCR completed: {image_path.name} - "
-                f"{len(text_regions)} regions, "
-                f"{avg_confidence:.2f} avg confidence, "
-                f"{processing_time:.2f}s"
-            )
-
-            return result
-
-        except Exception as e:
-            import traceback
-            error_trace = traceback.format_exc()
-            logger.error(f"OCR processing error for {image_path.name}: {str(e)}\n{error_trace}")
-            return {
-                'status': 'error',
-                'file_name': image_path.name,
-                'error_message': str(e),
-                'processing_time': (datetime.now() - start_time).total_seconds(),
-            }
-
-    def _extract_table_text(self, html_content: str) -> str:
-        """
-        Extract text from HTML table content for translation purposes
-
-        Args:
-            html_content: HTML content containing table
-
-        Returns:
-            Extracted text from table cells
-        """
-        try:
-            from html.parser import HTMLParser
-
-            class TableTextExtractor(HTMLParser):
-                def __init__(self):
-                    super().__init__()
-                    self.text_parts = []
-                    self.in_table = False
-
-                def handle_starttag(self, tag, attrs):
-                    if tag == 'table':
-                        self.in_table = True
-
-                def handle_endtag(self, tag):
-                    if tag == 'table':
-                        self.in_table = False
-                    elif tag in ('td', 'th') and self.in_table:
-                        self.text_parts.append(' | ')  # Cell separator
-                    elif tag == 'tr' and self.in_table:
-                        self.text_parts.append('\n')  # Row separator
-
-                def handle_data(self, data):
-                    if self.in_table:
-                        stripped = data.strip()
-                        if stripped:
-                            self.text_parts.append(stripped)
-
-            parser = TableTextExtractor()
-            parser.feed(html_content)
-
-            # Clean up the extracted text
-            extracted = ''.join(parser.text_parts)
-            # Remove multiple separators
-            import re
-            extracted = re.sub(r'\s*\|\s*\|+\s*', ' | ', extracted)
-            extracted = re.sub(r'\n+', '\n', extracted)
-            extracted = extracted.strip()
-
-            return extracted
-
-        except Exception as e:
-            logger.warning(f"Failed to extract table text: {e}")
-            # Fallback: just remove HTML tags
-            import re
-            text = re.sub(r'<[^>]+>', ' ', html_content)
-            text = re.sub(r'\s+', ' ', text)
-            return text.strip()
-
-    def analyze_layout(self, image_path: Path, output_dir: Optional[Path] = None, current_page: int = 0) -> Tuple[Optional[Dict], List[Dict]]:
-        """
-        Analyze document layout using PP-StructureV3
-
-        Args:
-            image_path: Path to image file
-            output_dir: Optional output directory for saving extracted images (defaults to image_path.parent)
-            current_page: Current page number (0-based) for multi-page documents
-
-        Returns:
-            Tuple of (layout_data, images_metadata)
-        """
-        try:
-            structure_engine = self.get_structure_engine()
-
-            # Perform structure analysis using predict() method (PaddleOCR 3.x API)
-            logger.info(f"Running layout analysis on {image_path.name}")
-            results = structure_engine.predict(str(image_path))
-
-            layout_elements = []
-            images_metadata = []
-
-            # Process each page result (for images, usually just one page)
-            for page_idx, page_result in enumerate(results):
-                # Get markdown dictionary from result object
-                if hasattr(page_result, 'markdown'):
-                    markdown_dict = page_result.markdown
-                    logger.info(f"Page {page_idx} markdown keys: {markdown_dict.keys() if isinstance(markdown_dict, dict) else type(markdown_dict)}")
-
-                    # Extract layout information from markdown structure
-                    if isinstance(markdown_dict, dict):
-                        # Get markdown texts (HTML format with tables and structure)
-                        markdown_texts = markdown_dict.get('markdown_texts', '')
-                        markdown_images = markdown_dict.get('markdown_images', {})
-
-                        # Create a layout element for the structured content
-                        if markdown_texts:
-                            # Parse HTML content to identify tables and text
-                            import re
-
-                            # Check if content contains tables
-                            has_table = '<table' in markdown_texts.lower()
-
-                            element = {
-                                'element_id': len(layout_elements),
-                                'type': 'table' if has_table else 'text',
-                                'content': markdown_texts,
-                                'page': current_page,  # Use current_page parameter instead of page_idx
-                                'bbox': [],  # PP-StructureV3 doesn't provide individual bbox in this format
-                            }
-
-                            # Extract text from table for translation purposes
-                            if has_table:
-                                table_text = self._extract_table_text(markdown_texts)
-                                element['extracted_text'] = table_text
-                                logger.info(f"Extracted {len(table_text)} characters from table")
-
-                            layout_elements.append(element)
-
-                        # Add image metadata and SAVE images to disk
-                        for img_idx, (img_path, img_obj) in enumerate(markdown_images.items()):
-                            # Save image to disk
-                            try:
-                                # Determine base directory for saving images
-                                base_dir = output_dir if output_dir else image_path.parent
-
-                                # Create full path for image file
-                                full_img_path = base_dir / img_path
-
-                                # Create imgs/ subdirectory if it doesn't exist
-                                full_img_path.parent.mkdir(parents=True, exist_ok=True)
-
-                                # Save image object to disk
-                                if hasattr(img_obj, 'save'):
-                                    # img_obj is PIL Image
-                                    img_obj.save(str(full_img_path))
-                                    logger.info(f"Saved extracted image to {full_img_path}")
-                                else:
-                                    logger.warning(f"Image object for {img_path} does not have save() method, skipping")
-
-                            except Exception as e:
-                                logger.warning(f"Failed to save image {img_path}: {str(e)}")
-                                # Continue processing even if image save fails
-
-                            # Extract bbox from filename (format: img_in_table_box_x1_y1_x2_y2.jpg)
-                            bbox = []
-                            try:
-                                import re
-                                match = re.search(r'box_(\d+)_(\d+)_(\d+)_(\d+)', img_path)
-                                if match:
-                                    x1, y1, x2, y2 = map(int, match.groups())
-                                    # Convert to 4-point bbox format: [[x1,y1], [x2,y1], [x2,y2], [x1,y2]]
-                                    bbox = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]]
-                                    logger.info(f"Extracted bbox from filename: {bbox}")
-                            except Exception as e:
-                                logger.warning(f"Failed to extract bbox from {img_path}: {e}")
-
-                            images_metadata.append({
-                                'element_id': len(layout_elements) + img_idx,
-                                'image_path': img_path,
-                                'type': 'image',
-                                'page': current_page,  # Use current_page parameter instead of page_idx
-                                'bbox': bbox,
-                            })
-
-            if layout_elements:
-                layout_data = {
-                    'elements': layout_elements,
-                    'total_elements': len(layout_elements),
-                    'reading_order': list(range(len(layout_elements))),
-                }
-                logger.info(f"Detected {len(layout_elements)} layout elements")
-                return layout_data, images_metadata
-            else:
-                logger.warning("No layout elements detected")
-                return None, []
-
-        except Exception as e:
-            import traceback
-            error_trace = traceback.format_exc()
-            logger.error(f"Layout analysis error: {str(e)}\n{error_trace}")
-            return None, []
-
-    def generate_markdown(
-        self,
-        text_regions: List[Dict],
-        layout_data: Optional[Dict] = None
-    ) -> str:
-        """
-        Generate Markdown from OCR results
-
-        Args:
-            text_regions: List of text regions with bbox and text
-            layout_data: Optional layout structure information
-
-        Returns:
-            Markdown formatted string
-        """
-        markdown_lines = []
-
-        if layout_data and layout_data.get('elements'):
-            # Generate structured Markdown based on layout
-            for element in layout_data['elements']:
-                element_type = element.get('type', 'text')
-                content = element.get('content', '')
-
-                if element_type == 'title':
-                    markdown_lines.append(f"# {content}\n")
-                elif element_type == 'table':
-                    # Table in HTML format
-                    markdown_lines.append(content)
-                    markdown_lines.append("")
-                elif element_type == 'figure':
-                    element_id = element.get('element_id')
-                    markdown_lines.append(f"![Figure {element_id}](./images/img_{element_id}.jpg)\n")
-                else:
-                    markdown_lines.append(f"{content}\n")
-
-        else:
-            # Simple Markdown from text regions only
-            # Sort by vertical position (top to bottom)
-            def get_y_coord(region):
-                """Safely extract Y coordinate from bbox"""
-                bbox = region.get('bbox', [])
-                if isinstance(bbox, (list, tuple)) and len(bbox) > 0:
-                    if isinstance(bbox[0], (list, tuple)) and len(bbox[0]) > 1:
-                        return bbox[0][1]  # [[x1,y1], [x2,y2], ...] format
-                    elif len(bbox) > 1:
-                        return bbox[1]  # [x1, y1, x2, y2, ...] format
-                return 0  # Default to 0 if can't extract
-
-            sorted_regions = sorted(text_regions, key=get_y_coord)
-
-            for region in sorted_regions:
-                text = region['text']
-                markdown_lines.append(text)
-
-        return "\n".join(markdown_lines)
-
-    def save_results(
-        self,
-        result: Dict,
-        output_dir: Path,
-        file_id: str,
-        source_file_path: Optional[Path] = None
-    ) -> Tuple[Optional[Path], Optional[Path], Optional[Path]]:
-        """
-        Save OCR results to JSON, Markdown, and layout-preserving PDF files
-
-        Args:
-            result: OCR result dictionary
-            output_dir: Output directory
-            file_id: Unique file identifier
-            source_file_path: Optional path to original source file for PDF generation
-
-        Returns:
-            Tuple of (json_path, markdown_path, pdf_path)
-        """
-        try:
-            output_dir.mkdir(parents=True, exist_ok=True)
-
-            # Save JSON
-            json_path = output_dir / f"{file_id}_result.json"
-            with open(json_path, 'w', encoding='utf-8') as f:
-                json.dump(result, f, ensure_ascii=False, indent=2)
-
-            # Save Markdown
-            markdown_path = output_dir / f"{file_id}_output.md"
-            markdown_content = result.get('markdown_content', '')
-            with open(markdown_path, 'w', encoding='utf-8') as f:
-                f.write(markdown_content)
-
-            logger.info(f"Results saved: {json_path.name}, {markdown_path.name}")
-
-            # Generate layout-preserving PDF
-            pdf_path = None
-            try:
-                from app.services.pdf_generator_service import pdf_generator_service
-
-                pdf_filename = f"{file_id}_layout.pdf"
-                pdf_path = output_dir / pdf_filename
-
-                logger.info(f"Generating layout-preserving PDF: {pdf_filename}")
-
-                success = pdf_generator_service.generate_layout_pdf(
-                    json_path=json_path,
-                    output_path=pdf_path,
-                    source_file_path=source_file_path
-                )
-
-                if success:
-                    logger.info(f"✓ PDF generated successfully: {pdf_path.name}")
-                else:
-                    logger.warning(f"✗ PDF generation failed for {file_id}")
-                    pdf_path = None
-
-            except Exception as e:
-                logger.error(f"Error generating PDF for {file_id}: {str(e)}")
-                import traceback
-                traceback.print_exc()
-                pdf_path = None
-
-            return json_path, markdown_path, pdf_path
-
-        except Exception as e:
-            logger.error(f"Error saving results: {str(e)}")
-            return None, None, None
--- a/backend/app/services/pdf_font_manager.py
+++ b/backend/app/services/pdf_font_manager.py
@@ -1,312 +0,0 @@
-"""
-PDF Font Manager - Handles font loading, registration, and fallback.
-
-This module provides unified font management for PDF generation,
-including CJK font support and font fallback mechanisms.
-"""
-
-import logging
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Dict, List, Optional, Tuple
-
-from reportlab.pdfbase import pdfmetrics
-from reportlab.pdfbase.ttfonts import TTFont
-
-logger = logging.getLogger(__name__)
-
-
-# ============================================================================
-# Configuration
-# ============================================================================
-
-@dataclass
-class FontConfig:
-    """Configuration for font management."""
-    # Primary fonts
-    chinese_font_name: str = "NotoSansSC"
-    chinese_font_path: Optional[Path] = None
-
-    # Fallback fonts (built-in)
-    fallback_font_name: str = "Helvetica"
-    fallback_cjk_font_name: str = "HeiseiMin-W3"  # Built-in ReportLab CJK
-
-    # Font sizes
-    default_font_size: int = 10
-    min_font_size: int = 6
-    max_font_size: int = 14
-
-    # Font registration options
-    auto_register: bool = True
-    enable_cjk_fallback: bool = True
-
-
-# ============================================================================
-# Font Manager
-# ============================================================================
-
-class FontManager:
-    """
-    Manages font registration and selection for PDF generation.
-
-    Features:
-    - Lazy font registration
-    - CJK (Chinese/Japanese/Korean) font support
-    - Automatic fallback to built-in fonts
-    - Font caching to avoid duplicate registration
-    """
-
-    _instance = None
-    _registered_fonts: Dict[str, Path] = {}
-
-    def __new__(cls, *args, **kwargs):
-        """Singleton pattern to avoid duplicate font registration."""
-        if cls._instance is None:
-            cls._instance = super().__new__(cls)
-            cls._instance._initialized = False
-        return cls._instance
-
-    def __init__(self, config: Optional[FontConfig] = None):
-        """
-        Initialize FontManager.
-
-        Args:
-            config: FontConfig instance (uses defaults if None)
-        """
-        if self._initialized:
-            return
-
-        self.config = config or FontConfig()
-        self._primary_font_registered = False
-        self._cjk_fallback_available = False
-
-        # Auto-register fonts if enabled
-        if self.config.auto_register:
-            self._register_fonts()
-
-        self._initialized = True
-
-    @property
-    def primary_font_name(self) -> str:
-        """Get the primary font name to use."""
-        if self._primary_font_registered:
-            return self.config.chinese_font_name
-        return self.config.fallback_font_name
-
-    @property
-    def is_cjk_enabled(self) -> bool:
-        """Check if CJK fonts are available."""
-        return self._primary_font_registered or self._cjk_fallback_available
-
-    @classmethod
-    def reset(cls):
-        """Reset singleton instance (for testing)."""
-        cls._instance = None
-        cls._registered_fonts = {}
-
-    def get_font_for_text(self, text: str) -> str:
-        """
-        Get appropriate font name for given text.
-
-        Args:
-            text: Text to render
-
-        Returns:
-            Font name suitable for the text content
-        """
-        if self._contains_cjk(text):
-            if self._primary_font_registered:
-                return self.config.chinese_font_name
-            elif self._cjk_fallback_available:
-                return self.config.fallback_cjk_font_name
-        return self.primary_font_name
-
-    def get_font_size(
-        self,
-        text: str,
-        available_width: float,
-        available_height: float,
-        pdf_canvas=None
-    ) -> int:
-        """
-        Calculate optimal font size for text to fit within bounds.
-
-        Args:
-            text: Text to render
-            available_width: Maximum width available
-            available_height: Maximum height available
-            pdf_canvas: Optional canvas for precise measurement
-
-        Returns:
-            Font size that fits within bounds
-        """
-        font_name = self.get_font_for_text(text)
-
-        for size in range(self.config.max_font_size, self.config.min_font_size - 1, -1):
-            if pdf_canvas:
-                # Precise measurement with canvas
-                text_width = pdf_canvas.stringWidth(text, font_name, size)
-            else:
-                # Approximate measurement
-                text_width = len(text) * size * 0.6  # Rough estimate
-
-            text_height = size * 1.2  # Line height
-
-            if text_width <= available_width and text_height <= available_height:
-                return size
-
-        return self.config.min_font_size
-
-    def register_font(
-        self,
-        font_name: str,
-        font_path: Path,
-        force: bool = False
-    ) -> bool:
-        """
-        Register a custom font.
-
-        Args:
-            font_name: Name to register font under
-            font_path: Path to TTF font file
-            force: Force re-registration if already registered
-
-        Returns:
-            True if registration successful
-        """
-        if font_name in self._registered_fonts and not force:
-            logger.debug(f"Font {font_name} already registered")
-            return True
-
-        try:
-            if not font_path.exists():
-                logger.error(f"Font file not found: {font_path}")
-                return False
-
-            pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
-            self._registered_fonts[font_name] = font_path
-            logger.info(f"Font registered: {font_name} from {font_path}")
-            return True
-
-        except Exception as e:
-            logger.error(f"Failed to register font {font_name}: {e}")
-            return False
-
-    def get_registered_fonts(self) -> List[str]:
-        """Get list of registered custom font names."""
-        return list(self._registered_fonts.keys())
-
-    # =========================================================================
-    # Private Methods
-    # =========================================================================
-
-    def _register_fonts(self):
-        """Register configured fonts."""
-        # Register primary Chinese font
-        if self.config.chinese_font_path:
-            self._register_chinese_font()
-
-        # Setup CJK fallback
-        if self.config.enable_cjk_fallback:
-            self._setup_cjk_fallback()
-
-    def _register_chinese_font(self):
-        """Register the primary Chinese font."""
-        font_path = self.config.chinese_font_path
-
-        if font_path is None:
-            # Try to load from settings
-            try:
-                from app.core.config import settings
-                font_path = Path(settings.chinese_font_path)
-            except Exception as e:
-                logger.debug(f"Could not load font path from settings: {e}")
-                return
-
-        # Resolve relative path
-        if not font_path.is_absolute():
-            # Try project root
-            project_root = Path(__file__).resolve().parent.parent.parent.parent
-            font_path = project_root / font_path
-
-        if not font_path.exists():
-            logger.warning(f"Chinese font not found at {font_path}")
-            return
-
-        try:
-            pdfmetrics.registerFont(TTFont(self.config.chinese_font_name, str(font_path)))
-            self._registered_fonts[self.config.chinese_font_name] = font_path
-            self._primary_font_registered = True
-            logger.info(f"Chinese font registered: {self.config.chinese_font_name}")
-        except Exception as e:
-            logger.error(f"Failed to register Chinese font: {e}")
-
-    def _setup_cjk_fallback(self):
-        """Setup CJK fallback using built-in fonts."""
-        try:
-            # ReportLab includes CID fonts for CJK
-            from reportlab.pdfbase.cidfonts import UnicodeCIDFont
-
-            # Register CJK fonts if not already registered
-            try:
-                pdfmetrics.registerFont(UnicodeCIDFont('HeiseiMin-W3'))
-                self._cjk_fallback_available = True
-                logger.debug("CJK fallback font available: HeiseiMin-W3")
-            except Exception:
-                pass  # Font may already be registered
-
-        except ImportError:
-            logger.debug("CID fonts not available for CJK fallback")
-
-    def _contains_cjk(self, text: str) -> bool:
-        """
-        Check if text contains CJK characters.
-
-        Args:
-            text: Text to check
-
-        Returns:
-            True if text contains Chinese, Japanese, or Korean characters
-        """
-        if not text:
-            return False
-
-        for char in text:
-            code = ord(char)
-            # CJK Unified Ideographs and related ranges
-            if any([
-                0x4E00 <= code <= 0x9FFF,    # CJK Unified Ideographs
-                0x3400 <= code <= 0x4DBF,    # CJK Extension A
-                0x20000 <= code <= 0x2A6DF,  # CJK Extension B
-                0x3000 <= code <= 0x303F,    # CJK Punctuation
-                0x3040 <= code <= 0x309F,    # Hiragana
-                0x30A0 <= code <= 0x30FF,    # Katakana
-                0xAC00 <= code <= 0xD7AF,    # Korean Hangul
-            ]):
-                return True
-        return False
-
-
-# ============================================================================
-# Convenience Functions
-# ============================================================================
-
-_default_manager: Optional[FontManager] = None
-
-
-def get_font_manager() -> FontManager:
-    """Get the default FontManager instance."""
-    global _default_manager
-    if _default_manager is None:
-        _default_manager = FontManager()
-    return _default_manager
-
-
-def register_font(font_name: str, font_path: Path) -> bool:
-    """Register a font using the default manager."""
-    return get_font_manager().register_font(font_name, font_path)
-
-
-def get_font_for_text(text: str) -> str:
-    """Get appropriate font for text using the default manager."""
-    return get_font_manager().get_font_for_text(text)
--- a/backend/app/services/preprocessor.py
+++ b/backend/app/services/preprocessor.py
@@ -1,230 +0,0 @@
-"""
-Tool_OCR - Document Preprocessor Service
-Handles file validation, format detection, and preprocessing
-"""
-
-import magic
-from pathlib import Path
-from typing import Tuple, Optional
-import logging
-from PIL import Image
-import cv2
-import numpy as np
-
-from app.core.config import settings
-
-logger = logging.getLogger(__name__)
-
-
-class DocumentPreprocessor:
-    """
-    Document preprocessing service for format standardization
-    Validates and prepares documents for OCR processing
-    """
-
-    SUPPORTED_IMAGE_FORMATS = ['png', 'jpg', 'jpeg', 'bmp', 'tiff', 'tif']
-    SUPPORTED_PDF_FORMAT = ['pdf']
-    ALL_SUPPORTED_FORMATS = SUPPORTED_IMAGE_FORMATS + SUPPORTED_PDF_FORMAT
-
-    def __init__(self):
-        self.allowed_extensions = settings.allowed_extensions_list
-        self.max_file_size = settings.max_upload_size
-        logger.info(f"DocumentPreprocessor initialized with allowed_extensions: {self.allowed_extensions}")
-
-    def validate_file(self, file_path: Path) -> Tuple[bool, Optional[str], Optional[str]]:
-        """
-        Validate file format, size, and integrity
-
-        Args:
-            file_path: Path to the file to validate
-
-        Returns:
-            Tuple of (is_valid, file_format, error_message)
-        """
-        try:
-            # Check file exists
-            if not file_path.exists():
-                return False, None, f"File not found: {file_path}"
-
-            # Check file size
-            file_size = file_path.stat().st_size
-            if file_size > self.max_file_size:
-                max_mb = self.max_file_size / (1024 * 1024)
-                actual_mb = file_size / (1024 * 1024)
-                return False, None, f"File too large: {actual_mb:.2f}MB (max {max_mb:.2f}MB)"
-
-            # Detect file format using magic numbers
-            mime = magic.Magic(mime=True)
-            mime_type = mime.from_file(str(file_path))
-
-            # Map MIME type to format
-            file_format = self._mime_to_format(mime_type)
-            if not file_format:
-                return False, None, f"Unsupported file type: {mime_type}"
-
-            # Check if format is in allowed extensions
-            if file_format not in self.allowed_extensions:
-                return False, None, f"File format '{file_format}' not allowed"
-
-            # Validate file integrity
-            is_valid, error = self._validate_integrity(file_path, file_format)
-            if not is_valid:
-                return False, file_format, f"File corrupted: {error}"
-
-            logger.info(f"File validated successfully: {file_path.name} ({file_format})")
-            return True, file_format, None
-
-        except Exception as e:
-            logger.error(f"File validation error: {str(e)}")
-            return False, None, f"Validation error: {str(e)}"
-
-    def _mime_to_format(self, mime_type: str) -> Optional[str]:
-        """Convert MIME type to file format"""
-        mime_map = {
-            'image/png': 'png',
-            'image/jpeg': 'jpg',
-            'image/jpg': 'jpg',
-            'image/bmp': 'bmp',
-            'image/tiff': 'tiff',
-            'image/x-tiff': 'tiff',
-            'application/pdf': 'pdf',
-            'application/msword': 'doc',
-            'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
-            'application/vnd.ms-powerpoint': 'ppt',
-            'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
-        }
-        return mime_map.get(mime_type)
-
-    def _validate_integrity(self, file_path: Path, file_format: str) -> Tuple[bool, Optional[str]]:
-        """
-        Validate file integrity by attempting to open it
-
-        Args:
-            file_path: Path to file
-            file_format: Detected file format
-
-        Returns:
-            Tuple of (is_valid, error_message)
-        """
-        try:
-            if file_format in self.SUPPORTED_IMAGE_FORMATS:
-                # Try to open image
-                with Image.open(file_path) as img:
-                    img.verify()  # Verify image integrity
-                # Reopen for actual check (verify() closes the file)
-                with Image.open(file_path) as img:
-                    _ = img.size  # Force load to detect corruption
-                return True, None
-
-            elif file_format == 'pdf':
-                # Basic PDF validation - check file starts with PDF signature
-                with open(file_path, 'rb') as f:
-                    header = f.read(5)
-                    if header != b'%PDF-':
-                        return False, "Invalid PDF header"
-                return True, None
-
-            elif file_format in ['doc', 'docx', 'ppt', 'pptx']:
-                # Office documents - basic validation (check file size and can be opened)
-                # Modern Office formats (docx, pptx) are ZIP-based
-                if file_format in ['docx', 'pptx']:
-                    import zipfile
-                    try:
-                        with zipfile.ZipFile(file_path, 'r') as zf:
-                            # Check if it has the required Office structure
-                            if file_format == 'docx' and 'word/document.xml' not in zf.namelist():
-                                return False, "Invalid DOCX structure"
-                            elif file_format == 'pptx' and 'ppt/presentation.xml' not in zf.namelist():
-                                return False, "Invalid PPTX structure"
-                    except zipfile.BadZipFile:
-                        return False, "Invalid Office file (corrupt ZIP)"
-                # Old formats (doc, ppt) - just check file exists and has content
-                return True, None
-
-            else:
-                return False, f"Unknown format: {file_format}"
-
-        except Exception as e:
-            return False, str(e)
-
-    def preprocess_image(
-        self,
-        image_path: Path,
-        enhance: bool = True,
-        output_path: Optional[Path] = None
-    ) -> Tuple[bool, Optional[Path], Optional[str]]:
-        """
-        Preprocess image to improve OCR accuracy
-
-        Args:
-            image_path: Path to input image
-            enhance: Whether to apply enhancement
-            output_path: Optional output path (defaults to temp directory)
-
-        Returns:
-            Tuple of (success, processed_image_path, error_message)
-        """
-        try:
-            # Read image
-            img = cv2.imread(str(image_path))
-            if img is None:
-                return False, None, "Failed to read image"
-
-            if not enhance:
-                # No preprocessing, return original
-                return True, image_path, None
-
-            # Convert to grayscale
-            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-
-            # Apply adaptive thresholding to handle varying lighting
-            processed = cv2.adaptiveThreshold(
-                gray,
-                255,
-                cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-                cv2.THRESH_BINARY,
-                11,
-                2
-            )
-
-            # Denoise
-            processed = cv2.fastNlMeansDenoising(processed, None, 10, 7, 21)
-
-            # Determine output path
-            if output_path is None:
-                output_path = Path(settings.processed_dir) / f"processed_{image_path.name}"
-
-            # Save processed image
-            cv2.imwrite(str(output_path), processed)
-
-            logger.info(f"Image preprocessed: {image_path.name} -> {output_path.name}")
-            return True, output_path, None
-
-        except Exception as e:
-            logger.error(f"Image preprocessing error: {str(e)}")
-            return False, None, f"Preprocessing error: {str(e)}"
-
-    def get_file_info(self, file_path: Path) -> dict:
-        """
-        Get comprehensive file information
-
-        Args:
-            file_path: Path to file
-
-        Returns:
-            Dictionary with file information
-        """
-        stat = file_path.stat()
-        mime = magic.Magic(mime=True)
-        mime_type = mime.from_file(str(file_path))
-
-        return {
-            'name': file_path.name,
-            'path': str(file_path),
-            'size': stat.st_size,
-            'size_mb': stat.st_size / (1024 * 1024),
-            'mime_type': mime_type,
-            'format': self._mime_to_format(mime_type),
-            'created_at': stat.st_ctime,
-            'modified_at': stat.st_mtime,
-        }
--- a/frontend/src/components/Layout.tsx
+++ b/frontend/src/components/Layout.tsx
@@ -1,7 +1,6 @@
 import { Outlet, NavLink, useNavigate } from 'react-router-dom'
 import { useTranslation } from 'react-i18next'
 import { useAuthStore } from '@/store/authStore'
-import { apiClient } from '@/services/api'
 import { apiClientV2 } from '@/services/apiV2'
 import {
  Upload,
@@ -29,12 +28,7 @@ export default function Layout() {

  const handleLogout = async () => {
    try {
-      // Use V2 API if authenticated with V2
-      if (apiClientV2.isAuthenticated()) {
      await apiClientV2.logout()
-      } else {
-        apiClient.logout()
-      }
    } catch (error) {
      console.error('Logout error:', error)
    } finally {
--- a/frontend/src/components/MarkdownPreview.tsx
+++ b/frontend/src/components/MarkdownPreview.tsx
@@ -1,26 +0,0 @@
-import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
-
-interface MarkdownPreviewProps {
-  title?: string
-  content: string
-  className?: string
-}
-
-export default function MarkdownPreview({ title, content, className }: MarkdownPreviewProps) {
-  return (
-    <Card className={className}>
-      {title && (
-        <CardHeader>
-          <CardTitle>{title}</CardTitle>
-        </CardHeader>
-      )}
-      <CardContent>
-        <div className="prose prose-sm max-w-none dark:prose-invert">
-          <pre className="whitespace-pre-wrap break-words bg-muted p-4 rounded-md overflow-auto max-h-[600px]">
-            {content}
-          </pre>
-        </div>
-      </CardContent>
-    </Card>
-  )
-}
--- a/frontend/src/components/ResultsTable.tsx
+++ b/frontend/src/components/ResultsTable.tsx
@@ -1,90 +0,0 @@
-import { useTranslation } from 'react-i18next'
-import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from '@/components/ui/table'
-import { Badge } from '@/components/ui/badge'
-import { Button } from '@/components/ui/button'
-import type { FileResult } from '@/types/apiV2'
-
-interface ResultsTableProps {
-  files: FileResult[]
-  onViewResult?: (fileId: number) => void
-  onDownloadPDF?: (fileId: number) => void
-}
-
-export default function ResultsTable({ files, onViewResult, onDownloadPDF }: ResultsTableProps) {
-  const { t } = useTranslation()
-
-  const getStatusBadge = (status: FileResult['status']) => {
-    switch (status) {
-      case 'completed':
-        return <Badge variant="success">{t('processing.completed')}</Badge>
-      case 'processing':
-        return <Badge variant="default">{t('processing.processing')}</Badge>
-      case 'failed':
-        return <Badge variant="destructive">{t('processing.failed')}</Badge>
-      default:
-        return <Badge variant="secondary">{t('processing.pending')}</Badge>
-    }
-  }
-
-  const formatTime = (seconds?: number) => {
-    if (!seconds) return 'N/A'
-    return `${seconds.toFixed(2)}s`
-  }
-
-  return (
-    <div className="rounded-md border">
-      <Table>
-        <TableHeader>
-          <TableRow>
-            <TableHead>{t('results.filename')}</TableHead>
-            <TableHead>{t('results.status')}</TableHead>
-            <TableHead>{t('results.processingTime')}</TableHead>
-            <TableHead className="text-right">{t('results.actions')}</TableHead>
-          </TableRow>
-        </TableHeader>
-        <TableBody>
-          {files.length === 0 ? (
-            <TableRow>
-              <TableCell colSpan={4} className="text-center text-muted-foreground">
-                {t('results.noResults')}
-              </TableCell>
-            </TableRow>
-          ) : (
-            files.map((file) => (
-              <TableRow key={file.id}>
-                <TableCell className="font-medium">{file.filename}</TableCell>
-                <TableCell>{getStatusBadge(file.status)}</TableCell>
-                <TableCell>{formatTime(file.processing_time)}</TableCell>
-                <TableCell className="text-right">
-                  <div className="flex justify-end gap-2">
-                    {file.status === 'completed' && (
-                      <>
-                        <Button
-                          variant="outline"
-                          size="sm"
-                          onClick={() => onViewResult?.(file.id)}
-                        >
-                          {t('results.viewMarkdown')}
-                        </Button>
-                        <Button
-                          variant="outline"
-                          size="sm"
-                          onClick={() => onDownloadPDF?.(file.id)}
-                        >
-                          {t('results.downloadPDF')}
-                        </Button>
-                      </>
-                    )}
-                    {file.status === 'failed' && file.error && (
-                      <span className="text-sm text-destructive">{file.error}</span>
-                    )}
-                  </div>
-                </TableCell>
-              </TableRow>
-            ))
-          )}
-        </TableBody>
-      </Table>
-    </div>
-  )
-}
--- a/frontend/src/pages/SettingsPage.tsx
+++ b/frontend/src/pages/SettingsPage.tsx
@@ -4,7 +4,7 @@ import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'
 import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
 import { Button } from '@/components/ui/button'
 import { useToast } from '@/components/ui/toast'
-import { apiClient } from '@/services/api'
+import { apiClientV2 } from '@/services/apiV2'
 import type { ExportRule } from '@/types/apiV2'

 export default function SettingsPage() {
@@ -25,12 +25,12 @@ export default function SettingsPage() {
  // Fetch export rules
  const { data: exportRules, isLoading } = useQuery({
    queryKey: ['exportRules'],
-    queryFn: () => apiClient.getExportRules(),
+    queryFn: () => apiClientV2.getExportRules(),
  })

  // Create rule mutation
  const createRuleMutation = useMutation({
-    mutationFn: (rule: any) => apiClient.createExportRule(rule),
+    mutationFn: (rule: any) => apiClientV2.createExportRule(rule),
    onSuccess: () => {
      queryClient.invalidateQueries({ queryKey: ['exportRules'] })
      setIsCreating(false)
@@ -53,7 +53,7 @@ export default function SettingsPage() {
  // Update rule mutation
  const updateRuleMutation = useMutation({
    mutationFn: ({ ruleId, rule }: { ruleId: number; rule: any }) =>
-      apiClient.updateExportRule(ruleId, rule),
+      apiClientV2.updateExportRule(ruleId, rule),
    onSuccess: () => {
      queryClient.invalidateQueries({ queryKey: ['exportRules'] })
      setEditingRule(null)
@@ -75,7 +75,7 @@ export default function SettingsPage() {

  // Delete rule mutation
  const deleteRuleMutation = useMutation({
-    mutationFn: (ruleId: number) => apiClient.deleteExportRule(ruleId),
+    mutationFn: (ruleId: number) => apiClientV2.deleteExportRule(ruleId),
    onSuccess: () => {
      queryClient.invalidateQueries({ queryKey: ['exportRules'] })
      toast({
--- a/frontend/src/services/api.ts
+++ b/frontend/src/services/api.ts
@@ -1,271 +0,0 @@
-import axios, { AxiosError } from 'axios'
-import type { AxiosInstance } from 'axios'
-import type {
-  LoginRequest,
-  LoginResponse,
-  UploadResponse,
-  ProcessRequest,
-  ProcessResponse,
-  BatchStatus,
-  OCRResult,
-  ExportRequest,
-  ExportRule,
-  CSSTemplate,
-  TranslateRequest,
-  TranslateResponse,
-  ApiError,
-} from '@/types/api'
-
-/**
- * API Client Configuration
- * - In Docker: VITE_API_BASE_URL is empty string, use relative path
- * - In development: Use VITE_API_BASE_URL from .env or default to localhost:8000
- */
-const envApiBaseUrl = import.meta.env.VITE_API_BASE_URL
-const API_BASE_URL = envApiBaseUrl !== undefined ? envApiBaseUrl : 'http://localhost:8000'
-const API_VERSION = 'v2'
-
-class ApiClient {
-  private client: AxiosInstance
-  private token: string | null = null
-
-  constructor() {
-    this.client = axios.create({
-      baseURL: `${API_BASE_URL}/api/${API_VERSION}`,
-      timeout: 30000,
-      headers: {
-        'Content-Type': 'application/json',
-      },
-    })
-
-    // Request interceptor to add auth token
-    this.client.interceptors.request.use(
-      (config) => {
-        if (this.token) {
-          config.headers.Authorization = `Bearer ${this.token}`
-        }
-        return config
-      },
-      (error) => Promise.reject(error)
-    )
-
-    // Response interceptor for error handling
-    this.client.interceptors.response.use(
-      (response) => response,
-      (error: AxiosError<ApiError>) => {
-        if (error.response?.status === 401) {
-          // Token expired or invalid
-          this.clearToken()
-          window.location.href = '/login'
-        }
-        return Promise.reject(error)
-      }
-    )
-
-    // Load token from localStorage
-    this.loadToken()
-  }
-
-  /**
-   * Set authentication token
-   */
-  setToken(token: string) {
-    this.token = token
-    localStorage.setItem('auth_token', token)
-  }
-
-  /**
-   * Clear authentication token
-   */
-  clearToken() {
-    this.token = null
-    localStorage.removeItem('auth_token')
-  }
-
-  /**
-   * Load token from localStorage
-   */
-  private loadToken() {
-    const token = localStorage.getItem('auth_token')
-    if (token) {
-      this.token = token
-    }
-  }
-
-  /**
-   * Check if user is authenticated
-   */
-  isAuthenticated(): boolean {
-    return this.token !== null
-  }
-
-  // ==================== Authentication ====================
-
-  /**
-   * Login
-   */
-  async login(data: LoginRequest): Promise<LoginResponse> {
-    const response = await this.client.post<LoginResponse>('/auth/login', {
-      username: data.username,
-      password: data.password,
-    })
-
-    this.setToken(response.data.access_token)
-    return response.data
-  }
-
-  /**
-   * Logout
-   */
-  logout() {
-    this.clearToken()
-  }
-
-  // ==================== File Upload ====================
-
-  /**
-   * Upload files
-   */
-  async uploadFiles(files: File[]): Promise<UploadResponse> {
-    const formData = new FormData()
-    files.forEach((file) => {
-      formData.append('files', file)
-    })
-
-    const response = await this.client.post<UploadResponse>('/upload', formData, {
-      headers: {
-        'Content-Type': 'multipart/form-data',
-      },
-    })
-
-    return response.data
-  }
-
-  // ==================== OCR Processing ====================
-
-  /**
-   * Process OCR
-   */
-  async processOCR(data: ProcessRequest): Promise<ProcessResponse> {
-    const response = await this.client.post<ProcessResponse>('/ocr/process', data)
-    return response.data
-  }
-
-  /**
-   * Get OCR result by file ID
-   * Note: Backend uses file-level tracking, not task-level
-   */
-  async getOCRResult(fileId: number): Promise<OCRResult> {
-    const response = await this.client.get<OCRResult>(`/ocr/result/${fileId}`)
-    return response.data
-  }
-
-  /**
-   * Get batch status
-   */
-  async getBatchStatus(batchId: number): Promise<BatchStatus> {
-    const response = await this.client.get<BatchStatus>(`/batch/${batchId}/status`)
-    return response.data
-  }
-
-  // ==================== Export ====================
-
-  /**
-   * Export results
-   */
-  async exportResults(data: ExportRequest): Promise<Blob> {
-    const response = await this.client.post('/export', data, {
-      responseType: 'blob',
-    })
-    return response.data
-  }
-
-  /**
-   * Generate and download PDF
-   */
-  async exportPDF(fileId: number, cssTemplate?: string): Promise<Blob> {
-    const params = cssTemplate ? { css_template: cssTemplate } : {}
-    const response = await this.client.get(`/export/pdf/${fileId}`, {
-      params,
-      responseType: 'blob',
-    })
-    return response.data
-  }
-
-  /**
-   * Get export rules
-   */
-  async getExportRules(): Promise<ExportRule[]> {
-    const response = await this.client.get<ExportRule[]>('/export/rules')
-    return response.data
-  }
-
-  /**
-   * Create export rule
-   */
-  async createExportRule(rule: Omit<ExportRule, 'id' | 'created_at'>): Promise<ExportRule> {
-    const response = await this.client.post<ExportRule>('/export/rules', rule)
-    return response.data
-  }
-
-  /**
-   * Update export rule
-   */
-  async updateExportRule(ruleId: number, rule: Partial<ExportRule>): Promise<ExportRule> {
-    const response = await this.client.put<ExportRule>(`/export/rules/${ruleId}`, rule)
-    return response.data
-  }
-
-  /**
-   * Delete export rule
-   */
-  async deleteExportRule(ruleId: number): Promise<void> {
-    await this.client.delete(`/export/rules/${ruleId}`)
-  }
-
-  /**
-   * Get CSS templates
-   */
-  async getCSSTemplates(): Promise<CSSTemplate[]> {
-    const response = await this.client.get<CSSTemplate[]>('/export/css-templates')
-    return response.data
-  }
-
-  // ==================== Translation (FUTURE FEATURE - STUB) ====================
-
-  /**
-   * Translate document (STUB - Not yet implemented)
-   * This is a placeholder for future translation functionality
-   * @throws Will throw error with status 501 (Not Implemented)
-   */
-  async translateDocument(data: TranslateRequest): Promise<TranslateResponse> {
-    // This endpoint is expected to return 501 Not Implemented until Phase 5
-    const response = await this.client.post<TranslateResponse>('/translate/document', data)
-    return response.data
-  }
-
-  /**
-   * Get translation configs (NOT IMPLEMENTED)
-   * This endpoint does not exist on backend - configs will be part of Phase 5
-   * @deprecated Backend endpoint does not exist - will return 404
-   */
-  // async getTranslationConfigs(): Promise<TranslationConfig[]> {
-  //   const response = await this.client.get<TranslationConfig[]>('/translate/configs')
-  //   return response.data
-  // }
-
-  /**
-   * Create translation config (NOT IMPLEMENTED)
-   * This endpoint does not exist on backend - configs will be part of Phase 5
-   * @deprecated Backend endpoint does not exist - will return 404
-   */
-  // async createTranslationConfig(
-  //   config: Omit<TranslationConfig, 'id' | 'created_at'>
-  // ): Promise<TranslationConfig> {
-  //   const response = await this.client.post<TranslationConfig>('/translate/configs', config)
-  //   return response.data
-  // }
-}
-
-// Export singleton instance
-export const apiClient = new ApiClient()
--- a/frontend/src/services/apiV2.ts
+++ b/frontend/src/services/apiV2.ts
@@ -38,6 +38,7 @@ import type {
  TranslationStatusResponse,
  TranslationListResponse,
  TranslationResult,
+  ExportRule,
 } from '@/types/apiV2'

 /**
@@ -713,6 +714,39 @@ class ApiClientV2 {
    link.click()
    window.URL.revokeObjectURL(link.href)
  }
+
+  // ==================== Export Rules APIs ====================
+
+  /**
+   * Get export rules
+   */
+  async getExportRules(): Promise<ExportRule[]> {
+    const response = await this.client.get<ExportRule[]>('/export/rules')
+    return response.data
+  }
+
+  /**
+   * Create export rule
+   */
+  async createExportRule(rule: Omit<ExportRule, 'id' | 'created_at'>): Promise<ExportRule> {
+    const response = await this.client.post<ExportRule>('/export/rules', rule)
+    return response.data
+  }
+
+  /**
+   * Update export rule
+   */
+  async updateExportRule(ruleId: number, rule: Partial<ExportRule>): Promise<ExportRule> {
+    const response = await this.client.put<ExportRule>(`/export/rules/${ruleId}`, rule)
+    return response.data
+  }
+
+  /**
+   * Delete export rule
+   */
+  async deleteExportRule(ruleId: number): Promise<void> {
+    await this.client.delete(`/export/rules/${ruleId}`)
+  }
 }

 // Export singleton instance
--- a/frontend/src/types/api.ts
+++ b/frontend/src/types/api.ts
@@ -1,182 +0,0 @@
-/**
- * API Type Definitions
- * Based on backend OpenAPI specification
- */
-
-// Authentication
-export interface LoginRequest {
-  username: string
-  password: string
-}
-
-export interface LoginResponse {
-  access_token: string
-  token_type: string
-  expires_in: number  // Token expiration time in seconds
-}
-
-export interface User {
-  id: number
-  username: string
-  email?: string
-  displayName?: string | null
-}
-
-// File Upload (V2 API)
-export interface UploadResponse {
-  task_id: string
-  filename: string
-  file_size: number
-  file_type: string
-  status: 'pending' | 'processing' | 'completed' | 'failed'
-}
-
-export interface FileInfo {
-  id: number
-  filename: string
-  file_size: number
-  file_format: string  // Changed from 'format' to match backend
-  status: 'pending' | 'processing' | 'completed' | 'failed'
-}
-
-// OCR Processing
-export interface ProcessRequest {
-  batch_id: number
-  lang?: string
-  detect_layout?: boolean  // Changed from confidence_threshold to match backend
-}
-
-export interface ProcessResponse {
-  message: string       // Added to match backend
-  batch_id: number
-  total_files: number   // Added to match backend
-  status: string
-  // Removed task_id - backend uses batch-level tracking instead
-}
-
-export interface TaskStatus {
-  task_id: string
-  status: 'pending' | 'processing' | 'completed' | 'failed'
-  progress_percentage: number
-  current_file?: string
-  files_processed: number
-  total_files: number
-  error?: string
-}
-
-export interface BatchStatus {
-  batch: {
-    id: number
-    status: 'pending' | 'processing' | 'completed' | 'failed'
-    progress_percentage: number
-    created_at: string
-    completed_at?: string
-  }
-  files: FileResult[]
-}
-
-export interface FileResult {
-  id: number
-  filename: string
-  status: 'pending' | 'processing' | 'completed' | 'failed'
-  processing_time?: number
-  error?: string
-}
-
-// OCR Results
-export interface OCRResult {
-  file_id: number
-  filename: string
-  status: string
-  markdown_content: string
-  json_data: OCRJsonData
-  confidence: number
-  processing_time: number
-}
-
-export interface OCRJsonData {
-  total_text_regions: number
-  average_confidence: number
-  text_blocks: TextBlock[]
-  layout_info?: LayoutInfo
-}
-
-export interface TextBlock {
-  text: string
-  confidence: number
-  bbox: [number, number, number, number]
-  position: number
-}
-
-export interface LayoutInfo {
-  tables_detected: number
-  images_detected: number
-  structure: string
-}
-
-// Export
-export interface ExportRequest {
-  batch_id: number
-  format: 'txt' | 'json' | 'excel' | 'markdown' | 'pdf'
-  rule_id?: number
-  options?: ExportOptions
-}
-
-export interface ExportOptions {
-  confidence_threshold?: number
-  include_metadata?: boolean
-  filename_pattern?: string
-  css_template?: string
-}
-
-export interface ExportRule {
-  id: number
-  rule_name: string
-  config_json: Record<string, any>
-  css_template?: string
-  created_at: string
-}
-
-export interface CSSTemplate {
-  name: string
-  description: string
-  // filename is not returned by backend - use name as identifier
-}
-
-// Translation (FUTURE FEATURE)
-export interface TranslateRequest {
-  file_id: number
-  source_lang: string
-  target_lang: string
-  engine_type?: 'argos' | 'ernie' | 'google'
-}
-
-export interface TranslateResponse {
-  task_id: string
-  file_id: number
-  status: 'pending' | 'processing' | 'completed' | 'failed'
-  translated_content?: string
-}
-
-export interface TranslationConfig {
-  id: number
-  source_lang: string
-  target_lang: string
-  engine_type: 'argos' | 'ernie' | 'google'
-  engine_config: Record<string, any>
-  created_at: string
-}
-
-// API Response
-export interface ApiResponse<T = any> {
-  success: boolean
-  data?: T
-  error?: string
-  message?: string
-}
-
-// Error Response
-export interface ApiError {
-  detail: string
-  status_code: number
-}