diff --git a/backend/app/services/ocr_service_original.py b/backend/app/services/ocr_service_original.py
deleted file mode 100644
index 0e24d7f..0000000
--- a/backend/app/services/ocr_service_original.py
+++ /dev/null
@@ -1,835 +0,0 @@
-"""
-Tool_OCR - Core OCR Service
-PaddleOCR-VL integration for text and structure extraction
-"""
-
-import json
-import logging
-from pathlib import Path
-from typing import Dict, List, Optional, Tuple
-from datetime import datetime
-import uuid
-
-from paddleocr import PaddleOCR, PPStructureV3
-from PIL import Image
-from pdf2image import convert_from_path
-import paddle
-
-from app.core.config import settings
-from app.services.office_converter import OfficeConverter, OfficeConverterError
-
-logger = logging.getLogger(__name__)
-
-
-class OCRService:
- """
- Core OCR service using PaddleOCR-VL
- Handles text recognition and document structure analysis
- """
-
- def __init__(self):
- """Initialize PaddleOCR and PPStructure engines with GPU detection"""
- self.ocr_languages = settings.ocr_languages_list
- self.confidence_threshold = settings.ocr_confidence_threshold
-
- # Initialize PaddleOCR engine (will be lazy-loaded per language)
- self.ocr_engines = {}
-
- # Initialize PP-Structure for layout analysis
- self.structure_engine = None
-
- # Initialize Office document converter
- self.office_converter = OfficeConverter()
-
- # GPU Detection and Configuration
- self.gpu_available = False
- self.use_gpu = False
- self.gpu_info = {}
-
- self._detect_and_configure_gpu()
-
- logger.info("OCR Service initialized")
-
- def _detect_and_configure_gpu(self):
- """Detect GPU availability and configure usage"""
- try:
- # Check if forced CPU mode
- if settings.force_cpu_mode:
- logger.info("GPU mode forced to CPU by configuration")
- self.use_gpu = False
- self.gpu_info = {
- 'available': False,
- 'reason': 'CPU mode forced by configuration',
- }
- return
-
- # Check if PaddlePaddle is compiled with CUDA
- if paddle.is_compiled_with_cuda():
- # Check if GPU devices are available
- gpu_count = paddle.device.cuda.device_count()
-
- if gpu_count > 0:
- self.gpu_available = True
- self.use_gpu = True
-
- # Get GPU device information
- device_id = settings.gpu_device_id if settings.gpu_device_id < gpu_count else 0
- gpu_props = paddle.device.cuda.get_device_properties(device_id)
-
- self.gpu_info = {
- 'available': True,
- 'device_count': gpu_count,
- 'device_id': device_id,
- 'device_name': gpu_props.name,
- 'total_memory': gpu_props.total_memory,
- 'compute_capability': f"{gpu_props.major}.{gpu_props.minor}",
- }
-
- # Set GPU memory fraction
- try:
- paddle.device.set_device(f'gpu:{device_id}')
- logger.info(f"GPU {device_id} selected: {gpu_props.name}")
- logger.info(f"GPU memory: {gpu_props.total_memory / (1024**3):.2f} GB")
- logger.info(f"Compute capability: {gpu_props.major}.{gpu_props.minor}")
- logger.info(f"GPU memory fraction set to: {settings.gpu_memory_fraction}")
- except Exception as e:
- logger.warning(f"Failed to configure GPU device: {e}")
- self.use_gpu = False
- self.gpu_info['available'] = False
- self.gpu_info['reason'] = f'GPU configuration failed: {str(e)}'
- else:
- logger.warning("CUDA is available but no GPU devices found")
- self.gpu_info = {
- 'available': False,
- 'reason': 'CUDA compiled but no GPU devices detected',
- }
- else:
- logger.info("PaddlePaddle not compiled with CUDA support")
- self.gpu_info = {
- 'available': False,
- 'reason': 'PaddlePaddle not compiled with CUDA',
- }
-
- except Exception as e:
- logger.error(f"GPU detection failed: {e}")
- self.use_gpu = False
- self.gpu_info = {
- 'available': False,
- 'reason': f'GPU detection error: {str(e)}',
- }
-
- # Log final GPU status
- if self.use_gpu:
- logger.info(f"✓ GPU acceleration ENABLED - Using {self.gpu_info.get('device_name', 'Unknown GPU')}")
- else:
- reason = self.gpu_info.get('reason', 'Unknown')
- logger.info(f"ℹ GPU acceleration DISABLED - {reason} - Using CPU mode")
-
- def get_gpu_status(self) -> Dict:
- """
- Get current GPU status and information
-
- Returns:
- Dictionary with GPU status information
- """
- status = {
- 'gpu_enabled': self.use_gpu,
- 'gpu_available': self.gpu_available,
- **self.gpu_info,
- }
-
- # Add current GPU memory usage if GPU is being used
- if self.use_gpu and self.gpu_available:
- try:
- device_id = self.gpu_info.get('device_id', 0)
- # Get memory info (returns allocated, total in bytes)
- memory_allocated = paddle.device.cuda.memory_allocated(device_id)
- memory_reserved = paddle.device.cuda.memory_reserved(device_id)
- total_memory = self.gpu_info.get('total_memory', 0)
-
- status['memory_allocated_mb'] = memory_allocated / (1024**2)
- status['memory_reserved_mb'] = memory_reserved / (1024**2)
- status['memory_total_mb'] = total_memory / (1024**2)
- status['memory_utilization'] = (memory_allocated / total_memory * 100) if total_memory > 0 else 0
- except Exception as e:
- logger.warning(f"Failed to get GPU memory info: {e}")
-
- return status
-
- def get_ocr_engine(self, lang: str = 'ch') -> PaddleOCR:
- """
- Get or create OCR engine for specified language with GPU support
-
- Args:
- lang: Language code (ch, en, japan, korean, etc.)
-
- Returns:
- PaddleOCR engine instance
- """
- if lang not in self.ocr_engines:
- logger.info(f"Initializing PaddleOCR engine for language: {lang} (GPU: {self.use_gpu})")
-
- try:
- # PaddleOCR 3.x: Device is set globally via paddle.set_device()
- # No need to pass device/use_gpu/gpu_mem parameters
- self.ocr_engines[lang] = PaddleOCR(
- lang=lang,
- use_textline_orientation=True, # Replaces deprecated use_angle_cls
- )
- logger.info(f"PaddleOCR engine ready for {lang} (PaddlePaddle {paddle.__version__}, {'GPU' if self.use_gpu else 'CPU'} mode)")
-
- except Exception as e:
- # If GPU initialization fails, fall back to CPU
- if self.use_gpu:
- logger.warning(f"GPU initialization failed, falling back to CPU: {e}")
- self.use_gpu = False
- # Switch to CPU device globally
- paddle.set_device('cpu')
- self.ocr_engines[lang] = PaddleOCR(
- lang=lang,
- use_textline_orientation=True,
- )
- logger.info(f"PaddleOCR engine ready for {lang} (CPU mode - fallback)")
- else:
- raise
-
- return self.ocr_engines[lang]
-
- def get_structure_engine(self) -> PPStructureV3:
- """
- Get or create PP-Structure engine for layout analysis with GPU support
-
- Returns:
- PPStructure engine instance
- """
- if self.structure_engine is None:
- logger.info(f"Initializing PP-StructureV3 engine (GPU: {self.use_gpu})")
-
- try:
- # PaddleOCR 3.x: Device is set globally via paddle.set_device()
- # No need to pass device/use_gpu/gpu_mem parameters
- self.structure_engine = PPStructureV3(
- use_doc_orientation_classify=False,
- use_doc_unwarping=False,
- use_textline_orientation=False,
- use_table_recognition=True,
- use_formula_recognition=True,
- use_chart_recognition=True, # Enable chart recognition (requires PaddlePaddle >= 3.2.0 for fused_rms_norm_ext)
- layout_threshold=0.5,
- )
- logger.info(f"PP-StructureV3 engine ready (PaddlePaddle {paddle.__version__}, {'GPU' if self.use_gpu else 'CPU'} mode)")
-
- except Exception as e:
- # If GPU initialization fails, fall back to CPU
- if self.use_gpu:
- logger.warning(f"GPU initialization failed for PP-Structure, falling back to CPU: {e}")
- self.use_gpu = False
- # Switch to CPU device globally
- paddle.set_device('cpu')
- self.structure_engine = PPStructureV3(
- use_doc_orientation_classify=False,
- use_doc_unwarping=False,
- use_textline_orientation=False,
- use_table_recognition=True,
- use_formula_recognition=True,
- use_chart_recognition=True, # Enable chart recognition (CPU fallback mode)
- layout_threshold=0.5,
- )
- logger.info("PP-StructureV3 engine ready (CPU mode - fallback)")
- else:
- raise
-
- return self.structure_engine
-
- def convert_pdf_to_images(self, pdf_path: Path, output_dir: Path) -> List[Path]:
- """
- Convert PDF to images (one per page)
-
- Args:
- pdf_path: Path to PDF file
- output_dir: Directory to save converted images
-
- Returns:
- List of paths to converted images
- """
- try:
- output_dir.mkdir(parents=True, exist_ok=True)
-
- logger.info(f"Converting PDF {pdf_path.name} to images")
-
- # Convert PDF to images (300 DPI for good quality)
- images = convert_from_path(
- str(pdf_path),
- dpi=300,
- fmt='png'
- )
-
- image_paths = []
- for i, image in enumerate(images):
- # Save each page as PNG
- image_path = output_dir / f"{pdf_path.stem}_page_{i+1}.png"
- image.save(str(image_path), 'PNG')
- image_paths.append(image_path)
- logger.info(f"Saved page {i+1} to {image_path.name}")
-
- logger.info(f"Converted {len(image_paths)} pages from PDF")
- return image_paths
-
- except Exception as e:
- logger.error(f"PDF conversion error: {str(e)}")
- raise
-
- def process_image(
- self,
- image_path: Path,
- lang: str = 'ch',
- detect_layout: bool = True,
- confidence_threshold: Optional[float] = None,
- output_dir: Optional[Path] = None,
- current_page: int = 0
- ) -> Dict:
- """
- Process single image with OCR and layout analysis
-
- Args:
- image_path: Path to image file
- lang: Language for OCR
- detect_layout: Whether to perform layout analysis
- confidence_threshold: Minimum confidence threshold (uses default if None)
- output_dir: Optional output directory for saving extracted images
- current_page: Current page number (0-based) for multi-page documents
-
- Returns:
- Dictionary with OCR results and metadata
- """
- start_time = datetime.now()
- threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold
-
- try:
- # Check if file is Office document
- if self.office_converter.is_office_document(image_path):
- logger.info(f"Detected Office document: {image_path.name}, converting to PDF")
- try:
- # Convert Office document to PDF
- pdf_path = self.office_converter.convert_to_pdf(image_path)
- logger.info(f"Office document converted to PDF: {pdf_path.name}")
-
- # Process the PDF (will be handled by PDF processing logic below)
- image_path = pdf_path
- except OfficeConverterError as e:
- logger.error(f"Office conversion failed: {str(e)}")
- raise
-
- # Check if file is PDF
- is_pdf = image_path.suffix.lower() == '.pdf'
-
- if is_pdf:
- # Convert PDF to images
- logger.info(f"Detected PDF file: {image_path.name}, converting to images")
- pdf_images_dir = image_path.parent / f"{image_path.stem}_pages"
- image_paths = self.convert_pdf_to_images(image_path, pdf_images_dir)
-
- # Process all pages
- all_text_regions = []
- total_confidence_sum = 0.0
- total_valid_regions = 0
- all_layout_data = []
- all_images_metadata = []
- all_ocr_dimensions = []
-
- for page_num, page_image_path in enumerate(image_paths, 1):
- logger.info(f"Processing PDF page {page_num}/{len(image_paths)}")
-
- # Process each page with correct page number (0-based for layout data)
- page_result = self.process_image(
- page_image_path,
- lang=lang,
- detect_layout=detect_layout,
- confidence_threshold=confidence_threshold,
- output_dir=output_dir,
- current_page=page_num - 1 # Convert to 0-based page number for layout data
- )
-
- # Accumulate results
- if page_result['status'] == 'success':
- # Add page number to each text region
- for region in page_result['text_regions']:
- region['page'] = page_num
- all_text_regions.append(region)
-
- total_confidence_sum += page_result['average_confidence'] * page_result['total_text_regions']
- total_valid_regions += page_result['total_text_regions']
-
- # Accumulate layout data (page numbers already set correctly in analyze_layout)
- if page_result.get('layout_data'):
- layout_data = page_result['layout_data']
- all_layout_data.append(layout_data)
-
- # Accumulate images metadata (page numbers already set correctly in analyze_layout)
- if page_result.get('images_metadata'):
- all_images_metadata.extend(page_result['images_metadata'])
-
- # Store OCR dimensions for each page
- if page_result.get('ocr_dimensions'):
- all_ocr_dimensions.append({
- 'page': page_num,
- 'width': page_result['ocr_dimensions']['width'],
- 'height': page_result['ocr_dimensions']['height']
- })
-
- # Calculate overall average confidence
- avg_confidence = total_confidence_sum / total_valid_regions if total_valid_regions > 0 else 0.0
-
- # Combine layout data from all pages
- combined_layout = None
- if all_layout_data:
- combined_elements = []
- for layout in all_layout_data:
- if layout.get('elements'):
- combined_elements.extend(layout['elements'])
- if combined_elements:
- combined_layout = {
- 'elements': combined_elements,
- 'total_elements': len(combined_elements),
- 'reading_order': list(range(len(combined_elements))),
- }
-
- # Generate combined markdown
- markdown_content = self.generate_markdown(all_text_regions, combined_layout)
-
- # Calculate processing time
- processing_time = (datetime.now() - start_time).total_seconds()
-
- logger.info(
- f"PDF processing completed: {image_path.name} - "
- f"{len(image_paths)} pages, "
- f"{len(all_text_regions)} regions, "
- f"{avg_confidence:.2f} avg confidence, "
- f"{processing_time:.2f}s"
- )
-
- return {
- 'status': 'success',
- 'file_name': image_path.name,
- 'language': lang,
- 'text_regions': all_text_regions,
- 'total_text_regions': len(all_text_regions),
- 'average_confidence': avg_confidence,
- 'layout_data': combined_layout,
- 'images_metadata': all_images_metadata,
- 'markdown_content': markdown_content,
- 'processing_time': processing_time,
- 'timestamp': datetime.utcnow().isoformat(),
- 'total_pages': len(image_paths),
- 'ocr_dimensions': all_ocr_dimensions if all_ocr_dimensions else None,
- }
-
- # Get OCR engine (for non-PDF images)
- ocr_engine = self.get_ocr_engine(lang)
-
- # Get the actual image dimensions that OCR will use
- from PIL import Image
- with Image.open(image_path) as img:
- ocr_width, ocr_height = img.size
- logger.info(f"OCR processing image dimensions: {ocr_width}x{ocr_height}")
-
- # Perform OCR
- logger.info(f"Processing image: {image_path.name}")
- # Note: In PaddleOCR 3.x, use_angle_cls is set during initialization, not in ocr() call
- ocr_results = ocr_engine.ocr(str(image_path))
-
- # Parse OCR results (PaddleOCR 3.x format)
- text_regions = []
- total_confidence = 0.0
- valid_regions = 0
-
- if ocr_results and isinstance(ocr_results, (list, tuple)) and len(ocr_results) > 0:
- # PaddleOCR 3.x returns a list of dictionaries (one per page)
- for page_result in ocr_results:
- if isinstance(page_result, dict):
- # New format: {'rec_texts': [...], 'rec_scores': [...], 'rec_polys': [...]}
- texts = page_result.get('rec_texts', [])
- scores = page_result.get('rec_scores', [])
- polys = page_result.get('rec_polys', [])
-
- # Process each recognized text
- for idx, text in enumerate(texts):
- # Get corresponding score and bbox
- confidence = scores[idx] if idx < len(scores) else 1.0
- bbox = polys[idx] if idx < len(polys) else []
-
- # Convert numpy array bbox to list for JSON serialization
- if hasattr(bbox, 'tolist'):
- bbox = bbox.tolist()
-
- # Filter by confidence threshold
- if confidence >= threshold:
- text_regions.append({
- 'text': text,
- 'bbox': bbox,
- 'confidence': float(confidence),
- })
- total_confidence += confidence
- valid_regions += 1
-
- avg_confidence = total_confidence / valid_regions if valid_regions > 0 else 0.0
-
- logger.info(f"Parsed {len(text_regions)} text regions with avg confidence {avg_confidence:.3f}")
-
- # Layout analysis (if requested)
- layout_data = None
- images_metadata = []
-
- if detect_layout:
- # Pass current_page to analyze_layout for correct page numbering
- layout_data, images_metadata = self.analyze_layout(image_path, output_dir=output_dir, current_page=current_page)
-
- # Generate Markdown
- markdown_content = self.generate_markdown(text_regions, layout_data)
-
- # Calculate processing time
- processing_time = (datetime.now() - start_time).total_seconds()
-
- result = {
- 'status': 'success',
- 'file_name': image_path.name,
- 'language': lang,
- 'text_regions': text_regions,
- 'total_text_regions': len(text_regions),
- 'average_confidence': avg_confidence,
- 'layout_data': layout_data,
- 'images_metadata': images_metadata,
- 'markdown_content': markdown_content,
- 'processing_time': processing_time,
- 'timestamp': datetime.utcnow().isoformat(),
- 'ocr_dimensions': {
- 'width': ocr_width,
- 'height': ocr_height
- }
- }
-
- logger.info(
- f"OCR completed: {image_path.name} - "
- f"{len(text_regions)} regions, "
- f"{avg_confidence:.2f} avg confidence, "
- f"{processing_time:.2f}s"
- )
-
- return result
-
- except Exception as e:
- import traceback
- error_trace = traceback.format_exc()
- logger.error(f"OCR processing error for {image_path.name}: {str(e)}\n{error_trace}")
- return {
- 'status': 'error',
- 'file_name': image_path.name,
- 'error_message': str(e),
- 'processing_time': (datetime.now() - start_time).total_seconds(),
- }
-
- def _extract_table_text(self, html_content: str) -> str:
- """
- Extract text from HTML table content for translation purposes
-
- Args:
- html_content: HTML content containing table
-
- Returns:
- Extracted text from table cells
- """
- try:
- from html.parser import HTMLParser
-
- class TableTextExtractor(HTMLParser):
- def __init__(self):
- super().__init__()
- self.text_parts = []
- self.in_table = False
-
- def handle_starttag(self, tag, attrs):
- if tag == 'table':
- self.in_table = True
-
- def handle_endtag(self, tag):
- if tag == 'table':
- self.in_table = False
- elif tag in ('td', 'th') and self.in_table:
- self.text_parts.append(' | ') # Cell separator
- elif tag == 'tr' and self.in_table:
- self.text_parts.append('\n') # Row separator
-
- def handle_data(self, data):
- if self.in_table:
- stripped = data.strip()
- if stripped:
- self.text_parts.append(stripped)
-
- parser = TableTextExtractor()
- parser.feed(html_content)
-
- # Clean up the extracted text
- extracted = ''.join(parser.text_parts)
- # Remove multiple separators
- import re
- extracted = re.sub(r'\s*\|\s*\|+\s*', ' | ', extracted)
- extracted = re.sub(r'\n+', '\n', extracted)
- extracted = extracted.strip()
-
- return extracted
-
- except Exception as e:
- logger.warning(f"Failed to extract table text: {e}")
- # Fallback: just remove HTML tags
- import re
- text = re.sub(r'<[^>]+>', ' ', html_content)
- text = re.sub(r'\s+', ' ', text)
- return text.strip()
-
- def analyze_layout(self, image_path: Path, output_dir: Optional[Path] = None, current_page: int = 0) -> Tuple[Optional[Dict], List[Dict]]:
- """
- Analyze document layout using PP-StructureV3
-
- Args:
- image_path: Path to image file
- output_dir: Optional output directory for saving extracted images (defaults to image_path.parent)
- current_page: Current page number (0-based) for multi-page documents
-
- Returns:
- Tuple of (layout_data, images_metadata)
- """
- try:
- structure_engine = self.get_structure_engine()
-
- # Perform structure analysis using predict() method (PaddleOCR 3.x API)
- logger.info(f"Running layout analysis on {image_path.name}")
- results = structure_engine.predict(str(image_path))
-
- layout_elements = []
- images_metadata = []
-
- # Process each page result (for images, usually just one page)
- for page_idx, page_result in enumerate(results):
- # Get markdown dictionary from result object
- if hasattr(page_result, 'markdown'):
- markdown_dict = page_result.markdown
- logger.info(f"Page {page_idx} markdown keys: {markdown_dict.keys() if isinstance(markdown_dict, dict) else type(markdown_dict)}")
-
- # Extract layout information from markdown structure
- if isinstance(markdown_dict, dict):
- # Get markdown texts (HTML format with tables and structure)
- markdown_texts = markdown_dict.get('markdown_texts', '')
- markdown_images = markdown_dict.get('markdown_images', {})
-
- # Create a layout element for the structured content
- if markdown_texts:
- # Parse HTML content to identify tables and text
- import re
-
- # Check if content contains tables
- has_table = '
str:
- """
- Generate Markdown from OCR results
-
- Args:
- text_regions: List of text regions with bbox and text
- layout_data: Optional layout structure information
-
- Returns:
- Markdown formatted string
- """
- markdown_lines = []
-
- if layout_data and layout_data.get('elements'):
- # Generate structured Markdown based on layout
- for element in layout_data['elements']:
- element_type = element.get('type', 'text')
- content = element.get('content', '')
-
- if element_type == 'title':
- markdown_lines.append(f"# {content}\n")
- elif element_type == 'table':
- # Table in HTML format
- markdown_lines.append(content)
- markdown_lines.append("")
- elif element_type == 'figure':
- element_id = element.get('element_id')
- markdown_lines.append(f"\n")
- else:
- markdown_lines.append(f"{content}\n")
-
- else:
- # Simple Markdown from text regions only
- # Sort by vertical position (top to bottom)
- def get_y_coord(region):
- """Safely extract Y coordinate from bbox"""
- bbox = region.get('bbox', [])
- if isinstance(bbox, (list, tuple)) and len(bbox) > 0:
- if isinstance(bbox[0], (list, tuple)) and len(bbox[0]) > 1:
- return bbox[0][1] # [[x1,y1], [x2,y2], ...] format
- elif len(bbox) > 1:
- return bbox[1] # [x1, y1, x2, y2, ...] format
- return 0 # Default to 0 if can't extract
-
- sorted_regions = sorted(text_regions, key=get_y_coord)
-
- for region in sorted_regions:
- text = region['text']
- markdown_lines.append(text)
-
- return "\n".join(markdown_lines)
-
- def save_results(
- self,
- result: Dict,
- output_dir: Path,
- file_id: str,
- source_file_path: Optional[Path] = None
- ) -> Tuple[Optional[Path], Optional[Path], Optional[Path]]:
- """
- Save OCR results to JSON, Markdown, and layout-preserving PDF files
-
- Args:
- result: OCR result dictionary
- output_dir: Output directory
- file_id: Unique file identifier
- source_file_path: Optional path to original source file for PDF generation
-
- Returns:
- Tuple of (json_path, markdown_path, pdf_path)
- """
- try:
- output_dir.mkdir(parents=True, exist_ok=True)
-
- # Save JSON
- json_path = output_dir / f"{file_id}_result.json"
- with open(json_path, 'w', encoding='utf-8') as f:
- json.dump(result, f, ensure_ascii=False, indent=2)
-
- # Save Markdown
- markdown_path = output_dir / f"{file_id}_output.md"
- markdown_content = result.get('markdown_content', '')
- with open(markdown_path, 'w', encoding='utf-8') as f:
- f.write(markdown_content)
-
- logger.info(f"Results saved: {json_path.name}, {markdown_path.name}")
-
- # Generate layout-preserving PDF
- pdf_path = None
- try:
- from app.services.pdf_generator_service import pdf_generator_service
-
- pdf_filename = f"{file_id}_layout.pdf"
- pdf_path = output_dir / pdf_filename
-
- logger.info(f"Generating layout-preserving PDF: {pdf_filename}")
-
- success = pdf_generator_service.generate_layout_pdf(
- json_path=json_path,
- output_path=pdf_path,
- source_file_path=source_file_path
- )
-
- if success:
- logger.info(f"✓ PDF generated successfully: {pdf_path.name}")
- else:
- logger.warning(f"✗ PDF generation failed for {file_id}")
- pdf_path = None
-
- except Exception as e:
- logger.error(f"Error generating PDF for {file_id}: {str(e)}")
- import traceback
- traceback.print_exc()
- pdf_path = None
-
- return json_path, markdown_path, pdf_path
-
- except Exception as e:
- logger.error(f"Error saving results: {str(e)}")
- return None, None, None
diff --git a/backend/app/services/pdf_font_manager.py b/backend/app/services/pdf_font_manager.py
deleted file mode 100644
index d56f265..0000000
--- a/backend/app/services/pdf_font_manager.py
+++ /dev/null
@@ -1,312 +0,0 @@
-"""
-PDF Font Manager - Handles font loading, registration, and fallback.
-
-This module provides unified font management for PDF generation,
-including CJK font support and font fallback mechanisms.
-"""
-
-import logging
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Dict, List, Optional, Tuple
-
-from reportlab.pdfbase import pdfmetrics
-from reportlab.pdfbase.ttfonts import TTFont
-
-logger = logging.getLogger(__name__)
-
-
-# ============================================================================
-# Configuration
-# ============================================================================
-
-@dataclass
-class FontConfig:
- """Configuration for font management."""
- # Primary fonts
- chinese_font_name: str = "NotoSansSC"
- chinese_font_path: Optional[Path] = None
-
- # Fallback fonts (built-in)
- fallback_font_name: str = "Helvetica"
- fallback_cjk_font_name: str = "HeiseiMin-W3" # Built-in ReportLab CJK
-
- # Font sizes
- default_font_size: int = 10
- min_font_size: int = 6
- max_font_size: int = 14
-
- # Font registration options
- auto_register: bool = True
- enable_cjk_fallback: bool = True
-
-
-# ============================================================================
-# Font Manager
-# ============================================================================
-
-class FontManager:
- """
- Manages font registration and selection for PDF generation.
-
- Features:
- - Lazy font registration
- - CJK (Chinese/Japanese/Korean) font support
- - Automatic fallback to built-in fonts
- - Font caching to avoid duplicate registration
- """
-
- _instance = None
- _registered_fonts: Dict[str, Path] = {}
-
- def __new__(cls, *args, **kwargs):
- """Singleton pattern to avoid duplicate font registration."""
- if cls._instance is None:
- cls._instance = super().__new__(cls)
- cls._instance._initialized = False
- return cls._instance
-
- def __init__(self, config: Optional[FontConfig] = None):
- """
- Initialize FontManager.
-
- Args:
- config: FontConfig instance (uses defaults if None)
- """
- if self._initialized:
- return
-
- self.config = config or FontConfig()
- self._primary_font_registered = False
- self._cjk_fallback_available = False
-
- # Auto-register fonts if enabled
- if self.config.auto_register:
- self._register_fonts()
-
- self._initialized = True
-
- @property
- def primary_font_name(self) -> str:
- """Get the primary font name to use."""
- if self._primary_font_registered:
- return self.config.chinese_font_name
- return self.config.fallback_font_name
-
- @property
- def is_cjk_enabled(self) -> bool:
- """Check if CJK fonts are available."""
- return self._primary_font_registered or self._cjk_fallback_available
-
- @classmethod
- def reset(cls):
- """Reset singleton instance (for testing)."""
- cls._instance = None
- cls._registered_fonts = {}
-
- def get_font_for_text(self, text: str) -> str:
- """
- Get appropriate font name for given text.
-
- Args:
- text: Text to render
-
- Returns:
- Font name suitable for the text content
- """
- if self._contains_cjk(text):
- if self._primary_font_registered:
- return self.config.chinese_font_name
- elif self._cjk_fallback_available:
- return self.config.fallback_cjk_font_name
- return self.primary_font_name
-
- def get_font_size(
- self,
- text: str,
- available_width: float,
- available_height: float,
- pdf_canvas=None
- ) -> int:
- """
- Calculate optimal font size for text to fit within bounds.
-
- Args:
- text: Text to render
- available_width: Maximum width available
- available_height: Maximum height available
- pdf_canvas: Optional canvas for precise measurement
-
- Returns:
- Font size that fits within bounds
- """
- font_name = self.get_font_for_text(text)
-
- for size in range(self.config.max_font_size, self.config.min_font_size - 1, -1):
- if pdf_canvas:
- # Precise measurement with canvas
- text_width = pdf_canvas.stringWidth(text, font_name, size)
- else:
- # Approximate measurement
- text_width = len(text) * size * 0.6 # Rough estimate
-
- text_height = size * 1.2 # Line height
-
- if text_width <= available_width and text_height <= available_height:
- return size
-
- return self.config.min_font_size
-
- def register_font(
- self,
- font_name: str,
- font_path: Path,
- force: bool = False
- ) -> bool:
- """
- Register a custom font.
-
- Args:
- font_name: Name to register font under
- font_path: Path to TTF font file
- force: Force re-registration if already registered
-
- Returns:
- True if registration successful
- """
- if font_name in self._registered_fonts and not force:
- logger.debug(f"Font {font_name} already registered")
- return True
-
- try:
- if not font_path.exists():
- logger.error(f"Font file not found: {font_path}")
- return False
-
- pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
- self._registered_fonts[font_name] = font_path
- logger.info(f"Font registered: {font_name} from {font_path}")
- return True
-
- except Exception as e:
- logger.error(f"Failed to register font {font_name}: {e}")
- return False
-
- def get_registered_fonts(self) -> List[str]:
- """Get list of registered custom font names."""
- return list(self._registered_fonts.keys())
-
- # =========================================================================
- # Private Methods
- # =========================================================================
-
- def _register_fonts(self):
- """Register configured fonts."""
- # Register primary Chinese font
- if self.config.chinese_font_path:
- self._register_chinese_font()
-
- # Setup CJK fallback
- if self.config.enable_cjk_fallback:
- self._setup_cjk_fallback()
-
- def _register_chinese_font(self):
- """Register the primary Chinese font."""
- font_path = self.config.chinese_font_path
-
- if font_path is None:
- # Try to load from settings
- try:
- from app.core.config import settings
- font_path = Path(settings.chinese_font_path)
- except Exception as e:
- logger.debug(f"Could not load font path from settings: {e}")
- return
-
- # Resolve relative path
- if not font_path.is_absolute():
- # Try project root
- project_root = Path(__file__).resolve().parent.parent.parent.parent
- font_path = project_root / font_path
-
- if not font_path.exists():
- logger.warning(f"Chinese font not found at {font_path}")
- return
-
- try:
- pdfmetrics.registerFont(TTFont(self.config.chinese_font_name, str(font_path)))
- self._registered_fonts[self.config.chinese_font_name] = font_path
- self._primary_font_registered = True
- logger.info(f"Chinese font registered: {self.config.chinese_font_name}")
- except Exception as e:
- logger.error(f"Failed to register Chinese font: {e}")
-
- def _setup_cjk_fallback(self):
- """Setup CJK fallback using built-in fonts."""
- try:
- # ReportLab includes CID fonts for CJK
- from reportlab.pdfbase.cidfonts import UnicodeCIDFont
-
- # Register CJK fonts if not already registered
- try:
- pdfmetrics.registerFont(UnicodeCIDFont('HeiseiMin-W3'))
- self._cjk_fallback_available = True
- logger.debug("CJK fallback font available: HeiseiMin-W3")
- except Exception:
- pass # Font may already be registered
-
- except ImportError:
- logger.debug("CID fonts not available for CJK fallback")
-
- def _contains_cjk(self, text: str) -> bool:
- """
- Check if text contains CJK characters.
-
- Args:
- text: Text to check
-
- Returns:
- True if text contains Chinese, Japanese, or Korean characters
- """
- if not text:
- return False
-
- for char in text:
- code = ord(char)
- # CJK Unified Ideographs and related ranges
- if any([
- 0x4E00 <= code <= 0x9FFF, # CJK Unified Ideographs
- 0x3400 <= code <= 0x4DBF, # CJK Extension A
- 0x20000 <= code <= 0x2A6DF, # CJK Extension B
- 0x3000 <= code <= 0x303F, # CJK Punctuation
- 0x3040 <= code <= 0x309F, # Hiragana
- 0x30A0 <= code <= 0x30FF, # Katakana
- 0xAC00 <= code <= 0xD7AF, # Korean Hangul
- ]):
- return True
- return False
-
-
-# ============================================================================
-# Convenience Functions
-# ============================================================================
-
-_default_manager: Optional[FontManager] = None
-
-
-def get_font_manager() -> FontManager:
- """Get the default FontManager instance."""
- global _default_manager
- if _default_manager is None:
- _default_manager = FontManager()
- return _default_manager
-
-
-def register_font(font_name: str, font_path: Path) -> bool:
- """Register a font using the default manager."""
- return get_font_manager().register_font(font_name, font_path)
-
-
-def get_font_for_text(text: str) -> str:
- """Get appropriate font for text using the default manager."""
- return get_font_manager().get_font_for_text(text)
diff --git a/backend/app/services/preprocessor.py b/backend/app/services/preprocessor.py
deleted file mode 100644
index 4953f13..0000000
--- a/backend/app/services/preprocessor.py
+++ /dev/null
@@ -1,230 +0,0 @@
-"""
-Tool_OCR - Document Preprocessor Service
-Handles file validation, format detection, and preprocessing
-"""
-
-import magic
-from pathlib import Path
-from typing import Tuple, Optional
-import logging
-from PIL import Image
-import cv2
-import numpy as np
-
-from app.core.config import settings
-
-logger = logging.getLogger(__name__)
-
-
-class DocumentPreprocessor:
- """
- Document preprocessing service for format standardization
- Validates and prepares documents for OCR processing
- """
-
- SUPPORTED_IMAGE_FORMATS = ['png', 'jpg', 'jpeg', 'bmp', 'tiff', 'tif']
- SUPPORTED_PDF_FORMAT = ['pdf']
- ALL_SUPPORTED_FORMATS = SUPPORTED_IMAGE_FORMATS + SUPPORTED_PDF_FORMAT
-
- def __init__(self):
- self.allowed_extensions = settings.allowed_extensions_list
- self.max_file_size = settings.max_upload_size
- logger.info(f"DocumentPreprocessor initialized with allowed_extensions: {self.allowed_extensions}")
-
- def validate_file(self, file_path: Path) -> Tuple[bool, Optional[str], Optional[str]]:
- """
- Validate file format, size, and integrity
-
- Args:
- file_path: Path to the file to validate
-
- Returns:
- Tuple of (is_valid, file_format, error_message)
- """
- try:
- # Check file exists
- if not file_path.exists():
- return False, None, f"File not found: {file_path}"
-
- # Check file size
- file_size = file_path.stat().st_size
- if file_size > self.max_file_size:
- max_mb = self.max_file_size / (1024 * 1024)
- actual_mb = file_size / (1024 * 1024)
- return False, None, f"File too large: {actual_mb:.2f}MB (max {max_mb:.2f}MB)"
-
- # Detect file format using magic numbers
- mime = magic.Magic(mime=True)
- mime_type = mime.from_file(str(file_path))
-
- # Map MIME type to format
- file_format = self._mime_to_format(mime_type)
- if not file_format:
- return False, None, f"Unsupported file type: {mime_type}"
-
- # Check if format is in allowed extensions
- if file_format not in self.allowed_extensions:
- return False, None, f"File format '{file_format}' not allowed"
-
- # Validate file integrity
- is_valid, error = self._validate_integrity(file_path, file_format)
- if not is_valid:
- return False, file_format, f"File corrupted: {error}"
-
- logger.info(f"File validated successfully: {file_path.name} ({file_format})")
- return True, file_format, None
-
- except Exception as e:
- logger.error(f"File validation error: {str(e)}")
- return False, None, f"Validation error: {str(e)}"
-
- def _mime_to_format(self, mime_type: str) -> Optional[str]:
- """Convert MIME type to file format"""
- mime_map = {
- 'image/png': 'png',
- 'image/jpeg': 'jpg',
- 'image/jpg': 'jpg',
- 'image/bmp': 'bmp',
- 'image/tiff': 'tiff',
- 'image/x-tiff': 'tiff',
- 'application/pdf': 'pdf',
- 'application/msword': 'doc',
- 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
- 'application/vnd.ms-powerpoint': 'ppt',
- 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
- }
- return mime_map.get(mime_type)
-
- def _validate_integrity(self, file_path: Path, file_format: str) -> Tuple[bool, Optional[str]]:
- """
- Validate file integrity by attempting to open it
-
- Args:
- file_path: Path to file
- file_format: Detected file format
-
- Returns:
- Tuple of (is_valid, error_message)
- """
- try:
- if file_format in self.SUPPORTED_IMAGE_FORMATS:
- # Try to open image
- with Image.open(file_path) as img:
- img.verify() # Verify image integrity
- # Reopen for actual check (verify() closes the file)
- with Image.open(file_path) as img:
- _ = img.size # Force load to detect corruption
- return True, None
-
- elif file_format == 'pdf':
- # Basic PDF validation - check file starts with PDF signature
- with open(file_path, 'rb') as f:
- header = f.read(5)
- if header != b'%PDF-':
- return False, "Invalid PDF header"
- return True, None
-
- elif file_format in ['doc', 'docx', 'ppt', 'pptx']:
- # Office documents - basic validation (check file size and can be opened)
- # Modern Office formats (docx, pptx) are ZIP-based
- if file_format in ['docx', 'pptx']:
- import zipfile
- try:
- with zipfile.ZipFile(file_path, 'r') as zf:
- # Check if it has the required Office structure
- if file_format == 'docx' and 'word/document.xml' not in zf.namelist():
- return False, "Invalid DOCX structure"
- elif file_format == 'pptx' and 'ppt/presentation.xml' not in zf.namelist():
- return False, "Invalid PPTX structure"
- except zipfile.BadZipFile:
- return False, "Invalid Office file (corrupt ZIP)"
- # Old formats (doc, ppt) - just check file exists and has content
- return True, None
-
- else:
- return False, f"Unknown format: {file_format}"
-
- except Exception as e:
- return False, str(e)
-
- def preprocess_image(
- self,
- image_path: Path,
- enhance: bool = True,
- output_path: Optional[Path] = None
- ) -> Tuple[bool, Optional[Path], Optional[str]]:
- """
- Preprocess image to improve OCR accuracy
-
- Args:
- image_path: Path to input image
- enhance: Whether to apply enhancement
- output_path: Optional output path (defaults to temp directory)
-
- Returns:
- Tuple of (success, processed_image_path, error_message)
- """
- try:
- # Read image
- img = cv2.imread(str(image_path))
- if img is None:
- return False, None, "Failed to read image"
-
- if not enhance:
- # No preprocessing, return original
- return True, image_path, None
-
- # Convert to grayscale
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-
- # Apply adaptive thresholding to handle varying lighting
- processed = cv2.adaptiveThreshold(
- gray,
- 255,
- cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
- cv2.THRESH_BINARY,
- 11,
- 2
- )
-
- # Denoise
- processed = cv2.fastNlMeansDenoising(processed, None, 10, 7, 21)
-
- # Determine output path
- if output_path is None:
- output_path = Path(settings.processed_dir) / f"processed_{image_path.name}"
-
- # Save processed image
- cv2.imwrite(str(output_path), processed)
-
- logger.info(f"Image preprocessed: {image_path.name} -> {output_path.name}")
- return True, output_path, None
-
- except Exception as e:
- logger.error(f"Image preprocessing error: {str(e)}")
- return False, None, f"Preprocessing error: {str(e)}"
-
- def get_file_info(self, file_path: Path) -> dict:
- """
- Get comprehensive file information
-
- Args:
- file_path: Path to file
-
- Returns:
- Dictionary with file information
- """
- stat = file_path.stat()
- mime = magic.Magic(mime=True)
- mime_type = mime.from_file(str(file_path))
-
- return {
- 'name': file_path.name,
- 'path': str(file_path),
- 'size': stat.st_size,
- 'size_mb': stat.st_size / (1024 * 1024),
- 'mime_type': mime_type,
- 'format': self._mime_to_format(mime_type),
- 'created_at': stat.st_ctime,
- 'modified_at': stat.st_mtime,
- }
diff --git a/frontend/src/components/Layout.tsx b/frontend/src/components/Layout.tsx
index 0db8f5a..799a5f7 100644
--- a/frontend/src/components/Layout.tsx
+++ b/frontend/src/components/Layout.tsx
@@ -1,7 +1,6 @@
import { Outlet, NavLink, useNavigate } from 'react-router-dom'
import { useTranslation } from 'react-i18next'
import { useAuthStore } from '@/store/authStore'
-import { apiClient } from '@/services/api'
import { apiClientV2 } from '@/services/apiV2'
import {
Upload,
@@ -29,12 +28,7 @@ export default function Layout() {
const handleLogout = async () => {
try {
- // Use V2 API if authenticated with V2
- if (apiClientV2.isAuthenticated()) {
- await apiClientV2.logout()
- } else {
- apiClient.logout()
- }
+ await apiClientV2.logout()
} catch (error) {
console.error('Logout error:', error)
} finally {
diff --git a/frontend/src/components/MarkdownPreview.tsx b/frontend/src/components/MarkdownPreview.tsx
deleted file mode 100644
index 3ad5a09..0000000
--- a/frontend/src/components/MarkdownPreview.tsx
+++ /dev/null
@@ -1,26 +0,0 @@
-import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
-
-interface MarkdownPreviewProps {
- title?: string
- content: string
- className?: string
-}
-
-export default function MarkdownPreview({ title, content, className }: MarkdownPreviewProps) {
- return (
-
- {title && (
-
- {title}
-
- )}
-
-
-
-
- )
-}
diff --git a/frontend/src/components/ResultsTable.tsx b/frontend/src/components/ResultsTable.tsx
deleted file mode 100644
index 1fac66c..0000000
--- a/frontend/src/components/ResultsTable.tsx
+++ /dev/null
@@ -1,90 +0,0 @@
-import { useTranslation } from 'react-i18next'
-import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from '@/components/ui/table'
-import { Badge } from '@/components/ui/badge'
-import { Button } from '@/components/ui/button'
-import type { FileResult } from '@/types/apiV2'
-
-interface ResultsTableProps {
- files: FileResult[]
- onViewResult?: (fileId: number) => void
- onDownloadPDF?: (fileId: number) => void
-}
-
-export default function ResultsTable({ files, onViewResult, onDownloadPDF }: ResultsTableProps) {
- const { t } = useTranslation()
-
- const getStatusBadge = (status: FileResult['status']) => {
- switch (status) {
- case 'completed':
- return {t('processing.completed')}
- case 'processing':
- return {t('processing.processing')}
- case 'failed':
- return {t('processing.failed')}
- default:
- return {t('processing.pending')}
- }
- }
-
- const formatTime = (seconds?: number) => {
- if (!seconds) return 'N/A'
- return `${seconds.toFixed(2)}s`
- }
-
- return (
-
-
-
-
- {t('results.filename')}
- {t('results.status')}
- {t('results.processingTime')}
- {t('results.actions')}
-
-
-
- {files.length === 0 ? (
-
-
- {t('results.noResults')}
-
-
- ) : (
- files.map((file) => (
-
- {file.filename}
- {getStatusBadge(file.status)}
- {formatTime(file.processing_time)}
-
-
- {file.status === 'completed' && (
- <>
-
-
- >
- )}
- {file.status === 'failed' && file.error && (
- {file.error}
- )}
-
-
-
- ))
- )}
-
-
-
- )
-}
diff --git a/frontend/src/pages/SettingsPage.tsx b/frontend/src/pages/SettingsPage.tsx
index ff3a9e9..4b686bb 100644
--- a/frontend/src/pages/SettingsPage.tsx
+++ b/frontend/src/pages/SettingsPage.tsx
@@ -4,7 +4,7 @@ import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Button } from '@/components/ui/button'
import { useToast } from '@/components/ui/toast'
-import { apiClient } from '@/services/api'
+import { apiClientV2 } from '@/services/apiV2'
import type { ExportRule } from '@/types/apiV2'
export default function SettingsPage() {
@@ -25,12 +25,12 @@ export default function SettingsPage() {
// Fetch export rules
const { data: exportRules, isLoading } = useQuery({
queryKey: ['exportRules'],
- queryFn: () => apiClient.getExportRules(),
+ queryFn: () => apiClientV2.getExportRules(),
})
// Create rule mutation
const createRuleMutation = useMutation({
- mutationFn: (rule: any) => apiClient.createExportRule(rule),
+ mutationFn: (rule: any) => apiClientV2.createExportRule(rule),
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: ['exportRules'] })
setIsCreating(false)
@@ -53,7 +53,7 @@ export default function SettingsPage() {
// Update rule mutation
const updateRuleMutation = useMutation({
mutationFn: ({ ruleId, rule }: { ruleId: number; rule: any }) =>
- apiClient.updateExportRule(ruleId, rule),
+ apiClientV2.updateExportRule(ruleId, rule),
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: ['exportRules'] })
setEditingRule(null)
@@ -75,7 +75,7 @@ export default function SettingsPage() {
// Delete rule mutation
const deleteRuleMutation = useMutation({
- mutationFn: (ruleId: number) => apiClient.deleteExportRule(ruleId),
+ mutationFn: (ruleId: number) => apiClientV2.deleteExportRule(ruleId),
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: ['exportRules'] })
toast({
diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts
deleted file mode 100644
index 5cc97a9..0000000
--- a/frontend/src/services/api.ts
+++ /dev/null
@@ -1,271 +0,0 @@
-import axios, { AxiosError } from 'axios'
-import type { AxiosInstance } from 'axios'
-import type {
- LoginRequest,
- LoginResponse,
- UploadResponse,
- ProcessRequest,
- ProcessResponse,
- BatchStatus,
- OCRResult,
- ExportRequest,
- ExportRule,
- CSSTemplate,
- TranslateRequest,
- TranslateResponse,
- ApiError,
-} from '@/types/api'
-
-/**
- * API Client Configuration
- * - In Docker: VITE_API_BASE_URL is empty string, use relative path
- * - In development: Use VITE_API_BASE_URL from .env or default to localhost:8000
- */
-const envApiBaseUrl = import.meta.env.VITE_API_BASE_URL
-const API_BASE_URL = envApiBaseUrl !== undefined ? envApiBaseUrl : 'http://localhost:8000'
-const API_VERSION = 'v2'
-
-class ApiClient {
- private client: AxiosInstance
- private token: string | null = null
-
- constructor() {
- this.client = axios.create({
- baseURL: `${API_BASE_URL}/api/${API_VERSION}`,
- timeout: 30000,
- headers: {
- 'Content-Type': 'application/json',
- },
- })
-
- // Request interceptor to add auth token
- this.client.interceptors.request.use(
- (config) => {
- if (this.token) {
- config.headers.Authorization = `Bearer ${this.token}`
- }
- return config
- },
- (error) => Promise.reject(error)
- )
-
- // Response interceptor for error handling
- this.client.interceptors.response.use(
- (response) => response,
- (error: AxiosError) => {
- if (error.response?.status === 401) {
- // Token expired or invalid
- this.clearToken()
- window.location.href = '/login'
- }
- return Promise.reject(error)
- }
- )
-
- // Load token from localStorage
- this.loadToken()
- }
-
- /**
- * Set authentication token
- */
- setToken(token: string) {
- this.token = token
- localStorage.setItem('auth_token', token)
- }
-
- /**
- * Clear authentication token
- */
- clearToken() {
- this.token = null
- localStorage.removeItem('auth_token')
- }
-
- /**
- * Load token from localStorage
- */
- private loadToken() {
- const token = localStorage.getItem('auth_token')
- if (token) {
- this.token = token
- }
- }
-
- /**
- * Check if user is authenticated
- */
- isAuthenticated(): boolean {
- return this.token !== null
- }
-
- // ==================== Authentication ====================
-
- /**
- * Login
- */
- async login(data: LoginRequest): Promise {
- const response = await this.client.post('/auth/login', {
- username: data.username,
- password: data.password,
- })
-
- this.setToken(response.data.access_token)
- return response.data
- }
-
- /**
- * Logout
- */
- logout() {
- this.clearToken()
- }
-
- // ==================== File Upload ====================
-
- /**
- * Upload files
- */
- async uploadFiles(files: File[]): Promise {
- const formData = new FormData()
- files.forEach((file) => {
- formData.append('files', file)
- })
-
- const response = await this.client.post('/upload', formData, {
- headers: {
- 'Content-Type': 'multipart/form-data',
- },
- })
-
- return response.data
- }
-
- // ==================== OCR Processing ====================
-
- /**
- * Process OCR
- */
- async processOCR(data: ProcessRequest): Promise {
- const response = await this.client.post('/ocr/process', data)
- return response.data
- }
-
- /**
- * Get OCR result by file ID
- * Note: Backend uses file-level tracking, not task-level
- */
- async getOCRResult(fileId: number): Promise {
- const response = await this.client.get(`/ocr/result/${fileId}`)
- return response.data
- }
-
- /**
- * Get batch status
- */
- async getBatchStatus(batchId: number): Promise {
- const response = await this.client.get(`/batch/${batchId}/status`)
- return response.data
- }
-
- // ==================== Export ====================
-
- /**
- * Export results
- */
- async exportResults(data: ExportRequest): Promise {
- const response = await this.client.post('/export', data, {
- responseType: 'blob',
- })
- return response.data
- }
-
- /**
- * Generate and download PDF
- */
- async exportPDF(fileId: number, cssTemplate?: string): Promise {
- const params = cssTemplate ? { css_template: cssTemplate } : {}
- const response = await this.client.get(`/export/pdf/${fileId}`, {
- params,
- responseType: 'blob',
- })
- return response.data
- }
-
- /**
- * Get export rules
- */
- async getExportRules(): Promise {
- const response = await this.client.get('/export/rules')
- return response.data
- }
-
- /**
- * Create export rule
- */
- async createExportRule(rule: Omit): Promise {
- const response = await this.client.post('/export/rules', rule)
- return response.data
- }
-
- /**
- * Update export rule
- */
- async updateExportRule(ruleId: number, rule: Partial): Promise {
- const response = await this.client.put(`/export/rules/${ruleId}`, rule)
- return response.data
- }
-
- /**
- * Delete export rule
- */
- async deleteExportRule(ruleId: number): Promise {
- await this.client.delete(`/export/rules/${ruleId}`)
- }
-
- /**
- * Get CSS templates
- */
- async getCSSTemplates(): Promise {
- const response = await this.client.get('/export/css-templates')
- return response.data
- }
-
- // ==================== Translation (FUTURE FEATURE - STUB) ====================
-
- /**
- * Translate document (STUB - Not yet implemented)
- * This is a placeholder for future translation functionality
- * @throws Will throw error with status 501 (Not Implemented)
- */
- async translateDocument(data: TranslateRequest): Promise {
- // This endpoint is expected to return 501 Not Implemented until Phase 5
- const response = await this.client.post('/translate/document', data)
- return response.data
- }
-
- /**
- * Get translation configs (NOT IMPLEMENTED)
- * This endpoint does not exist on backend - configs will be part of Phase 5
- * @deprecated Backend endpoint does not exist - will return 404
- */
- // async getTranslationConfigs(): Promise {
- // const response = await this.client.get('/translate/configs')
- // return response.data
- // }
-
- /**
- * Create translation config (NOT IMPLEMENTED)
- * This endpoint does not exist on backend - configs will be part of Phase 5
- * @deprecated Backend endpoint does not exist - will return 404
- */
- // async createTranslationConfig(
- // config: Omit
- // ): Promise {
- // const response = await this.client.post('/translate/configs', config)
- // return response.data
- // }
-}
-
-// Export singleton instance
-export const apiClient = new ApiClient()
diff --git a/frontend/src/services/apiV2.ts b/frontend/src/services/apiV2.ts
index 3c36969..5717101 100644
--- a/frontend/src/services/apiV2.ts
+++ b/frontend/src/services/apiV2.ts
@@ -38,6 +38,7 @@ import type {
TranslationStatusResponse,
TranslationListResponse,
TranslationResult,
+ ExportRule,
} from '@/types/apiV2'
/**
@@ -713,6 +714,39 @@ class ApiClientV2 {
link.click()
window.URL.revokeObjectURL(link.href)
}
+
+ // ==================== Export Rules APIs ====================
+
+ /**
+ * Get export rules
+ */
+ async getExportRules(): Promise {
+ const response = await this.client.get('/export/rules')
+ return response.data
+ }
+
+ /**
+ * Create export rule
+ */
+ async createExportRule(rule: Omit): Promise {
+ const response = await this.client.post('/export/rules', rule)
+ return response.data
+ }
+
+ /**
+ * Update export rule
+ */
+ async updateExportRule(ruleId: number, rule: Partial): Promise {
+ const response = await this.client.put(`/export/rules/${ruleId}`, rule)
+ return response.data
+ }
+
+ /**
+ * Delete export rule
+ */
+ async deleteExportRule(ruleId: number): Promise {
+ await this.client.delete(`/export/rules/${ruleId}`)
+ }
}
// Export singleton instance
diff --git a/frontend/src/types/api.ts b/frontend/src/types/api.ts
deleted file mode 100644
index eca1a36..0000000
--- a/frontend/src/types/api.ts
+++ /dev/null
@@ -1,182 +0,0 @@
-/**
- * API Type Definitions
- * Based on backend OpenAPI specification
- */
-
-// Authentication
-export interface LoginRequest {
- username: string
- password: string
-}
-
-export interface LoginResponse {
- access_token: string
- token_type: string
- expires_in: number // Token expiration time in seconds
-}
-
-export interface User {
- id: number
- username: string
- email?: string
- displayName?: string | null
-}
-
-// File Upload (V2 API)
-export interface UploadResponse {
- task_id: string
- filename: string
- file_size: number
- file_type: string
- status: 'pending' | 'processing' | 'completed' | 'failed'
-}
-
-export interface FileInfo {
- id: number
- filename: string
- file_size: number
- file_format: string // Changed from 'format' to match backend
- status: 'pending' | 'processing' | 'completed' | 'failed'
-}
-
-// OCR Processing
-export interface ProcessRequest {
- batch_id: number
- lang?: string
- detect_layout?: boolean // Changed from confidence_threshold to match backend
-}
-
-export interface ProcessResponse {
- message: string // Added to match backend
- batch_id: number
- total_files: number // Added to match backend
- status: string
- // Removed task_id - backend uses batch-level tracking instead
-}
-
-export interface TaskStatus {
- task_id: string
- status: 'pending' | 'processing' | 'completed' | 'failed'
- progress_percentage: number
- current_file?: string
- files_processed: number
- total_files: number
- error?: string
-}
-
-export interface BatchStatus {
- batch: {
- id: number
- status: 'pending' | 'processing' | 'completed' | 'failed'
- progress_percentage: number
- created_at: string
- completed_at?: string
- }
- files: FileResult[]
-}
-
-export interface FileResult {
- id: number
- filename: string
- status: 'pending' | 'processing' | 'completed' | 'failed'
- processing_time?: number
- error?: string
-}
-
-// OCR Results
-export interface OCRResult {
- file_id: number
- filename: string
- status: string
- markdown_content: string
- json_data: OCRJsonData
- confidence: number
- processing_time: number
-}
-
-export interface OCRJsonData {
- total_text_regions: number
- average_confidence: number
- text_blocks: TextBlock[]
- layout_info?: LayoutInfo
-}
-
-export interface TextBlock {
- text: string
- confidence: number
- bbox: [number, number, number, number]
- position: number
-}
-
-export interface LayoutInfo {
- tables_detected: number
- images_detected: number
- structure: string
-}
-
-// Export
-export interface ExportRequest {
- batch_id: number
- format: 'txt' | 'json' | 'excel' | 'markdown' | 'pdf'
- rule_id?: number
- options?: ExportOptions
-}
-
-export interface ExportOptions {
- confidence_threshold?: number
- include_metadata?: boolean
- filename_pattern?: string
- css_template?: string
-}
-
-export interface ExportRule {
- id: number
- rule_name: string
- config_json: Record
- css_template?: string
- created_at: string
-}
-
-export interface CSSTemplate {
- name: string
- description: string
- // filename is not returned by backend - use name as identifier
-}
-
-// Translation (FUTURE FEATURE)
-export interface TranslateRequest {
- file_id: number
- source_lang: string
- target_lang: string
- engine_type?: 'argos' | 'ernie' | 'google'
-}
-
-export interface TranslateResponse {
- task_id: string
- file_id: number
- status: 'pending' | 'processing' | 'completed' | 'failed'
- translated_content?: string
-}
-
-export interface TranslationConfig {
- id: number
- source_lang: string
- target_lang: string
- engine_type: 'argos' | 'ernie' | 'google'
- engine_config: Record
- created_at: string
-}
-
-// API Response
-export interface ApiResponse {
- success: boolean
- data?: T
- error?: string
- message?: string
-}
-
-// Error Response
-export interface ApiError {
- detail: string
- status_code: number
-}