refactor: remove unused code and migrate legacy API

Backend cleanup:
- Remove ocr_service_original.py (legacy OCR service, replaced by ocr_service.py)
- Remove preprocessor.py (unused, functionality absorbed by layout_preprocessing_service.py)
- Remove pdf_font_manager.py (unused, never referenced by any service)

Frontend cleanup:
- Remove MarkdownPreview.tsx (unused component)
- Remove ResultsTable.tsx (unused, replaced by TaskHistoryPage)
- Remove services/api.ts (legacy API client, migrated to apiV2)
- Remove types/api.ts (legacy types, migrated to apiV2.ts)

API migration:
- Add export rules CRUD methods to apiClientV2
- Update SettingsPage.tsx to use apiClientV2
- Update Layout.tsx to use only apiClientV2 for logout

This reduces ~1,500 lines of redundant code and unifies the API client.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-11 12:03:09 +08:00
parent 940a406dce
commit 5d962ca97c
10 changed files with 40 additions and 1958 deletions

View File

@@ -1,835 +0,0 @@
"""
Tool_OCR - Core OCR Service
PaddleOCR-VL integration for text and structure extraction
"""
import json
import logging
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from datetime import datetime
import uuid
from paddleocr import PaddleOCR, PPStructureV3
from PIL import Image
from pdf2image import convert_from_path
import paddle
from app.core.config import settings
from app.services.office_converter import OfficeConverter, OfficeConverterError
logger = logging.getLogger(__name__)
class OCRService:
"""
Core OCR service using PaddleOCR-VL
Handles text recognition and document structure analysis
"""
def __init__(self):
"""Initialize PaddleOCR and PPStructure engines with GPU detection"""
self.ocr_languages = settings.ocr_languages_list
self.confidence_threshold = settings.ocr_confidence_threshold
# Initialize PaddleOCR engine (will be lazy-loaded per language)
self.ocr_engines = {}
# Initialize PP-Structure for layout analysis
self.structure_engine = None
# Initialize Office document converter
self.office_converter = OfficeConverter()
# GPU Detection and Configuration
self.gpu_available = False
self.use_gpu = False
self.gpu_info = {}
self._detect_and_configure_gpu()
logger.info("OCR Service initialized")
def _detect_and_configure_gpu(self):
"""Detect GPU availability and configure usage"""
try:
# Check if forced CPU mode
if settings.force_cpu_mode:
logger.info("GPU mode forced to CPU by configuration")
self.use_gpu = False
self.gpu_info = {
'available': False,
'reason': 'CPU mode forced by configuration',
}
return
# Check if PaddlePaddle is compiled with CUDA
if paddle.is_compiled_with_cuda():
# Check if GPU devices are available
gpu_count = paddle.device.cuda.device_count()
if gpu_count > 0:
self.gpu_available = True
self.use_gpu = True
# Get GPU device information
device_id = settings.gpu_device_id if settings.gpu_device_id < gpu_count else 0
gpu_props = paddle.device.cuda.get_device_properties(device_id)
self.gpu_info = {
'available': True,
'device_count': gpu_count,
'device_id': device_id,
'device_name': gpu_props.name,
'total_memory': gpu_props.total_memory,
'compute_capability': f"{gpu_props.major}.{gpu_props.minor}",
}
# Set GPU memory fraction
try:
paddle.device.set_device(f'gpu:{device_id}')
logger.info(f"GPU {device_id} selected: {gpu_props.name}")
logger.info(f"GPU memory: {gpu_props.total_memory / (1024**3):.2f} GB")
logger.info(f"Compute capability: {gpu_props.major}.{gpu_props.minor}")
logger.info(f"GPU memory fraction set to: {settings.gpu_memory_fraction}")
except Exception as e:
logger.warning(f"Failed to configure GPU device: {e}")
self.use_gpu = False
self.gpu_info['available'] = False
self.gpu_info['reason'] = f'GPU configuration failed: {str(e)}'
else:
logger.warning("CUDA is available but no GPU devices found")
self.gpu_info = {
'available': False,
'reason': 'CUDA compiled but no GPU devices detected',
}
else:
logger.info("PaddlePaddle not compiled with CUDA support")
self.gpu_info = {
'available': False,
'reason': 'PaddlePaddle not compiled with CUDA',
}
except Exception as e:
logger.error(f"GPU detection failed: {e}")
self.use_gpu = False
self.gpu_info = {
'available': False,
'reason': f'GPU detection error: {str(e)}',
}
# Log final GPU status
if self.use_gpu:
logger.info(f"✓ GPU acceleration ENABLED - Using {self.gpu_info.get('device_name', 'Unknown GPU')}")
else:
reason = self.gpu_info.get('reason', 'Unknown')
logger.info(f" GPU acceleration DISABLED - {reason} - Using CPU mode")
def get_gpu_status(self) -> Dict:
"""
Get current GPU status and information
Returns:
Dictionary with GPU status information
"""
status = {
'gpu_enabled': self.use_gpu,
'gpu_available': self.gpu_available,
**self.gpu_info,
}
# Add current GPU memory usage if GPU is being used
if self.use_gpu and self.gpu_available:
try:
device_id = self.gpu_info.get('device_id', 0)
# Get memory info (returns allocated, total in bytes)
memory_allocated = paddle.device.cuda.memory_allocated(device_id)
memory_reserved = paddle.device.cuda.memory_reserved(device_id)
total_memory = self.gpu_info.get('total_memory', 0)
status['memory_allocated_mb'] = memory_allocated / (1024**2)
status['memory_reserved_mb'] = memory_reserved / (1024**2)
status['memory_total_mb'] = total_memory / (1024**2)
status['memory_utilization'] = (memory_allocated / total_memory * 100) if total_memory > 0 else 0
except Exception as e:
logger.warning(f"Failed to get GPU memory info: {e}")
return status
def get_ocr_engine(self, lang: str = 'ch') -> PaddleOCR:
"""
Get or create OCR engine for specified language with GPU support
Args:
lang: Language code (ch, en, japan, korean, etc.)
Returns:
PaddleOCR engine instance
"""
if lang not in self.ocr_engines:
logger.info(f"Initializing PaddleOCR engine for language: {lang} (GPU: {self.use_gpu})")
try:
# PaddleOCR 3.x: Device is set globally via paddle.set_device()
# No need to pass device/use_gpu/gpu_mem parameters
self.ocr_engines[lang] = PaddleOCR(
lang=lang,
use_textline_orientation=True, # Replaces deprecated use_angle_cls
)
logger.info(f"PaddleOCR engine ready for {lang} (PaddlePaddle {paddle.__version__}, {'GPU' if self.use_gpu else 'CPU'} mode)")
except Exception as e:
# If GPU initialization fails, fall back to CPU
if self.use_gpu:
logger.warning(f"GPU initialization failed, falling back to CPU: {e}")
self.use_gpu = False
# Switch to CPU device globally
paddle.set_device('cpu')
self.ocr_engines[lang] = PaddleOCR(
lang=lang,
use_textline_orientation=True,
)
logger.info(f"PaddleOCR engine ready for {lang} (CPU mode - fallback)")
else:
raise
return self.ocr_engines[lang]
def get_structure_engine(self) -> PPStructureV3:
"""
Get or create PP-Structure engine for layout analysis with GPU support
Returns:
PPStructure engine instance
"""
if self.structure_engine is None:
logger.info(f"Initializing PP-StructureV3 engine (GPU: {self.use_gpu})")
try:
# PaddleOCR 3.x: Device is set globally via paddle.set_device()
# No need to pass device/use_gpu/gpu_mem parameters
self.structure_engine = PPStructureV3(
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=False,
use_table_recognition=True,
use_formula_recognition=True,
use_chart_recognition=True, # Enable chart recognition (requires PaddlePaddle >= 3.2.0 for fused_rms_norm_ext)
layout_threshold=0.5,
)
logger.info(f"PP-StructureV3 engine ready (PaddlePaddle {paddle.__version__}, {'GPU' if self.use_gpu else 'CPU'} mode)")
except Exception as e:
# If GPU initialization fails, fall back to CPU
if self.use_gpu:
logger.warning(f"GPU initialization failed for PP-Structure, falling back to CPU: {e}")
self.use_gpu = False
# Switch to CPU device globally
paddle.set_device('cpu')
self.structure_engine = PPStructureV3(
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=False,
use_table_recognition=True,
use_formula_recognition=True,
use_chart_recognition=True, # Enable chart recognition (CPU fallback mode)
layout_threshold=0.5,
)
logger.info("PP-StructureV3 engine ready (CPU mode - fallback)")
else:
raise
return self.structure_engine
def convert_pdf_to_images(self, pdf_path: Path, output_dir: Path) -> List[Path]:
"""
Convert PDF to images (one per page)
Args:
pdf_path: Path to PDF file
output_dir: Directory to save converted images
Returns:
List of paths to converted images
"""
try:
output_dir.mkdir(parents=True, exist_ok=True)
logger.info(f"Converting PDF {pdf_path.name} to images")
# Convert PDF to images (300 DPI for good quality)
images = convert_from_path(
str(pdf_path),
dpi=300,
fmt='png'
)
image_paths = []
for i, image in enumerate(images):
# Save each page as PNG
image_path = output_dir / f"{pdf_path.stem}_page_{i+1}.png"
image.save(str(image_path), 'PNG')
image_paths.append(image_path)
logger.info(f"Saved page {i+1} to {image_path.name}")
logger.info(f"Converted {len(image_paths)} pages from PDF")
return image_paths
except Exception as e:
logger.error(f"PDF conversion error: {str(e)}")
raise
def process_image(
self,
image_path: Path,
lang: str = 'ch',
detect_layout: bool = True,
confidence_threshold: Optional[float] = None,
output_dir: Optional[Path] = None,
current_page: int = 0
) -> Dict:
"""
Process single image with OCR and layout analysis
Args:
image_path: Path to image file
lang: Language for OCR
detect_layout: Whether to perform layout analysis
confidence_threshold: Minimum confidence threshold (uses default if None)
output_dir: Optional output directory for saving extracted images
current_page: Current page number (0-based) for multi-page documents
Returns:
Dictionary with OCR results and metadata
"""
start_time = datetime.now()
threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold
try:
# Check if file is Office document
if self.office_converter.is_office_document(image_path):
logger.info(f"Detected Office document: {image_path.name}, converting to PDF")
try:
# Convert Office document to PDF
pdf_path = self.office_converter.convert_to_pdf(image_path)
logger.info(f"Office document converted to PDF: {pdf_path.name}")
# Process the PDF (will be handled by PDF processing logic below)
image_path = pdf_path
except OfficeConverterError as e:
logger.error(f"Office conversion failed: {str(e)}")
raise
# Check if file is PDF
is_pdf = image_path.suffix.lower() == '.pdf'
if is_pdf:
# Convert PDF to images
logger.info(f"Detected PDF file: {image_path.name}, converting to images")
pdf_images_dir = image_path.parent / f"{image_path.stem}_pages"
image_paths = self.convert_pdf_to_images(image_path, pdf_images_dir)
# Process all pages
all_text_regions = []
total_confidence_sum = 0.0
total_valid_regions = 0
all_layout_data = []
all_images_metadata = []
all_ocr_dimensions = []
for page_num, page_image_path in enumerate(image_paths, 1):
logger.info(f"Processing PDF page {page_num}/{len(image_paths)}")
# Process each page with correct page number (0-based for layout data)
page_result = self.process_image(
page_image_path,
lang=lang,
detect_layout=detect_layout,
confidence_threshold=confidence_threshold,
output_dir=output_dir,
current_page=page_num - 1 # Convert to 0-based page number for layout data
)
# Accumulate results
if page_result['status'] == 'success':
# Add page number to each text region
for region in page_result['text_regions']:
region['page'] = page_num
all_text_regions.append(region)
total_confidence_sum += page_result['average_confidence'] * page_result['total_text_regions']
total_valid_regions += page_result['total_text_regions']
# Accumulate layout data (page numbers already set correctly in analyze_layout)
if page_result.get('layout_data'):
layout_data = page_result['layout_data']
all_layout_data.append(layout_data)
# Accumulate images metadata (page numbers already set correctly in analyze_layout)
if page_result.get('images_metadata'):
all_images_metadata.extend(page_result['images_metadata'])
# Store OCR dimensions for each page
if page_result.get('ocr_dimensions'):
all_ocr_dimensions.append({
'page': page_num,
'width': page_result['ocr_dimensions']['width'],
'height': page_result['ocr_dimensions']['height']
})
# Calculate overall average confidence
avg_confidence = total_confidence_sum / total_valid_regions if total_valid_regions > 0 else 0.0
# Combine layout data from all pages
combined_layout = None
if all_layout_data:
combined_elements = []
for layout in all_layout_data:
if layout.get('elements'):
combined_elements.extend(layout['elements'])
if combined_elements:
combined_layout = {
'elements': combined_elements,
'total_elements': len(combined_elements),
'reading_order': list(range(len(combined_elements))),
}
# Generate combined markdown
markdown_content = self.generate_markdown(all_text_regions, combined_layout)
# Calculate processing time
processing_time = (datetime.now() - start_time).total_seconds()
logger.info(
f"PDF processing completed: {image_path.name} - "
f"{len(image_paths)} pages, "
f"{len(all_text_regions)} regions, "
f"{avg_confidence:.2f} avg confidence, "
f"{processing_time:.2f}s"
)
return {
'status': 'success',
'file_name': image_path.name,
'language': lang,
'text_regions': all_text_regions,
'total_text_regions': len(all_text_regions),
'average_confidence': avg_confidence,
'layout_data': combined_layout,
'images_metadata': all_images_metadata,
'markdown_content': markdown_content,
'processing_time': processing_time,
'timestamp': datetime.utcnow().isoformat(),
'total_pages': len(image_paths),
'ocr_dimensions': all_ocr_dimensions if all_ocr_dimensions else None,
}
# Get OCR engine (for non-PDF images)
ocr_engine = self.get_ocr_engine(lang)
# Get the actual image dimensions that OCR will use
from PIL import Image
with Image.open(image_path) as img:
ocr_width, ocr_height = img.size
logger.info(f"OCR processing image dimensions: {ocr_width}x{ocr_height}")
# Perform OCR
logger.info(f"Processing image: {image_path.name}")
# Note: In PaddleOCR 3.x, use_angle_cls is set during initialization, not in ocr() call
ocr_results = ocr_engine.ocr(str(image_path))
# Parse OCR results (PaddleOCR 3.x format)
text_regions = []
total_confidence = 0.0
valid_regions = 0
if ocr_results and isinstance(ocr_results, (list, tuple)) and len(ocr_results) > 0:
# PaddleOCR 3.x returns a list of dictionaries (one per page)
for page_result in ocr_results:
if isinstance(page_result, dict):
# New format: {'rec_texts': [...], 'rec_scores': [...], 'rec_polys': [...]}
texts = page_result.get('rec_texts', [])
scores = page_result.get('rec_scores', [])
polys = page_result.get('rec_polys', [])
# Process each recognized text
for idx, text in enumerate(texts):
# Get corresponding score and bbox
confidence = scores[idx] if idx < len(scores) else 1.0
bbox = polys[idx] if idx < len(polys) else []
# Convert numpy array bbox to list for JSON serialization
if hasattr(bbox, 'tolist'):
bbox = bbox.tolist()
# Filter by confidence threshold
if confidence >= threshold:
text_regions.append({
'text': text,
'bbox': bbox,
'confidence': float(confidence),
})
total_confidence += confidence
valid_regions += 1
avg_confidence = total_confidence / valid_regions if valid_regions > 0 else 0.0
logger.info(f"Parsed {len(text_regions)} text regions with avg confidence {avg_confidence:.3f}")
# Layout analysis (if requested)
layout_data = None
images_metadata = []
if detect_layout:
# Pass current_page to analyze_layout for correct page numbering
layout_data, images_metadata = self.analyze_layout(image_path, output_dir=output_dir, current_page=current_page)
# Generate Markdown
markdown_content = self.generate_markdown(text_regions, layout_data)
# Calculate processing time
processing_time = (datetime.now() - start_time).total_seconds()
result = {
'status': 'success',
'file_name': image_path.name,
'language': lang,
'text_regions': text_regions,
'total_text_regions': len(text_regions),
'average_confidence': avg_confidence,
'layout_data': layout_data,
'images_metadata': images_metadata,
'markdown_content': markdown_content,
'processing_time': processing_time,
'timestamp': datetime.utcnow().isoformat(),
'ocr_dimensions': {
'width': ocr_width,
'height': ocr_height
}
}
logger.info(
f"OCR completed: {image_path.name} - "
f"{len(text_regions)} regions, "
f"{avg_confidence:.2f} avg confidence, "
f"{processing_time:.2f}s"
)
return result
except Exception as e:
import traceback
error_trace = traceback.format_exc()
logger.error(f"OCR processing error for {image_path.name}: {str(e)}\n{error_trace}")
return {
'status': 'error',
'file_name': image_path.name,
'error_message': str(e),
'processing_time': (datetime.now() - start_time).total_seconds(),
}
def _extract_table_text(self, html_content: str) -> str:
"""
Extract text from HTML table content for translation purposes
Args:
html_content: HTML content containing table
Returns:
Extracted text from table cells
"""
try:
from html.parser import HTMLParser
class TableTextExtractor(HTMLParser):
def __init__(self):
super().__init__()
self.text_parts = []
self.in_table = False
def handle_starttag(self, tag, attrs):
if tag == 'table':
self.in_table = True
def handle_endtag(self, tag):
if tag == 'table':
self.in_table = False
elif tag in ('td', 'th') and self.in_table:
self.text_parts.append(' | ') # Cell separator
elif tag == 'tr' and self.in_table:
self.text_parts.append('\n') # Row separator
def handle_data(self, data):
if self.in_table:
stripped = data.strip()
if stripped:
self.text_parts.append(stripped)
parser = TableTextExtractor()
parser.feed(html_content)
# Clean up the extracted text
extracted = ''.join(parser.text_parts)
# Remove multiple separators
import re
extracted = re.sub(r'\s*\|\s*\|+\s*', ' | ', extracted)
extracted = re.sub(r'\n+', '\n', extracted)
extracted = extracted.strip()
return extracted
except Exception as e:
logger.warning(f"Failed to extract table text: {e}")
# Fallback: just remove HTML tags
import re
text = re.sub(r'<[^>]+>', ' ', html_content)
text = re.sub(r'\s+', ' ', text)
return text.strip()
def analyze_layout(self, image_path: Path, output_dir: Optional[Path] = None, current_page: int = 0) -> Tuple[Optional[Dict], List[Dict]]:
"""
Analyze document layout using PP-StructureV3
Args:
image_path: Path to image file
output_dir: Optional output directory for saving extracted images (defaults to image_path.parent)
current_page: Current page number (0-based) for multi-page documents
Returns:
Tuple of (layout_data, images_metadata)
"""
try:
structure_engine = self.get_structure_engine()
# Perform structure analysis using predict() method (PaddleOCR 3.x API)
logger.info(f"Running layout analysis on {image_path.name}")
results = structure_engine.predict(str(image_path))
layout_elements = []
images_metadata = []
# Process each page result (for images, usually just one page)
for page_idx, page_result in enumerate(results):
# Get markdown dictionary from result object
if hasattr(page_result, 'markdown'):
markdown_dict = page_result.markdown
logger.info(f"Page {page_idx} markdown keys: {markdown_dict.keys() if isinstance(markdown_dict, dict) else type(markdown_dict)}")
# Extract layout information from markdown structure
if isinstance(markdown_dict, dict):
# Get markdown texts (HTML format with tables and structure)
markdown_texts = markdown_dict.get('markdown_texts', '')
markdown_images = markdown_dict.get('markdown_images', {})
# Create a layout element for the structured content
if markdown_texts:
# Parse HTML content to identify tables and text
import re
# Check if content contains tables
has_table = '<table' in markdown_texts.lower()
element = {
'element_id': len(layout_elements),
'type': 'table' if has_table else 'text',
'content': markdown_texts,
'page': current_page, # Use current_page parameter instead of page_idx
'bbox': [], # PP-StructureV3 doesn't provide individual bbox in this format
}
# Extract text from table for translation purposes
if has_table:
table_text = self._extract_table_text(markdown_texts)
element['extracted_text'] = table_text
logger.info(f"Extracted {len(table_text)} characters from table")
layout_elements.append(element)
# Add image metadata and SAVE images to disk
for img_idx, (img_path, img_obj) in enumerate(markdown_images.items()):
# Save image to disk
try:
# Determine base directory for saving images
base_dir = output_dir if output_dir else image_path.parent
# Create full path for image file
full_img_path = base_dir / img_path
# Create imgs/ subdirectory if it doesn't exist
full_img_path.parent.mkdir(parents=True, exist_ok=True)
# Save image object to disk
if hasattr(img_obj, 'save'):
# img_obj is PIL Image
img_obj.save(str(full_img_path))
logger.info(f"Saved extracted image to {full_img_path}")
else:
logger.warning(f"Image object for {img_path} does not have save() method, skipping")
except Exception as e:
logger.warning(f"Failed to save image {img_path}: {str(e)}")
# Continue processing even if image save fails
# Extract bbox from filename (format: img_in_table_box_x1_y1_x2_y2.jpg)
bbox = []
try:
import re
match = re.search(r'box_(\d+)_(\d+)_(\d+)_(\d+)', img_path)
if match:
x1, y1, x2, y2 = map(int, match.groups())
# Convert to 4-point bbox format: [[x1,y1], [x2,y1], [x2,y2], [x1,y2]]
bbox = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]]
logger.info(f"Extracted bbox from filename: {bbox}")
except Exception as e:
logger.warning(f"Failed to extract bbox from {img_path}: {e}")
images_metadata.append({
'element_id': len(layout_elements) + img_idx,
'image_path': img_path,
'type': 'image',
'page': current_page, # Use current_page parameter instead of page_idx
'bbox': bbox,
})
if layout_elements:
layout_data = {
'elements': layout_elements,
'total_elements': len(layout_elements),
'reading_order': list(range(len(layout_elements))),
}
logger.info(f"Detected {len(layout_elements)} layout elements")
return layout_data, images_metadata
else:
logger.warning("No layout elements detected")
return None, []
except Exception as e:
import traceback
error_trace = traceback.format_exc()
logger.error(f"Layout analysis error: {str(e)}\n{error_trace}")
return None, []
def generate_markdown(
self,
text_regions: List[Dict],
layout_data: Optional[Dict] = None
) -> str:
"""
Generate Markdown from OCR results
Args:
text_regions: List of text regions with bbox and text
layout_data: Optional layout structure information
Returns:
Markdown formatted string
"""
markdown_lines = []
if layout_data and layout_data.get('elements'):
# Generate structured Markdown based on layout
for element in layout_data['elements']:
element_type = element.get('type', 'text')
content = element.get('content', '')
if element_type == 'title':
markdown_lines.append(f"# {content}\n")
elif element_type == 'table':
# Table in HTML format
markdown_lines.append(content)
markdown_lines.append("")
elif element_type == 'figure':
element_id = element.get('element_id')
markdown_lines.append(f"![Figure {element_id}](./images/img_{element_id}.jpg)\n")
else:
markdown_lines.append(f"{content}\n")
else:
# Simple Markdown from text regions only
# Sort by vertical position (top to bottom)
def get_y_coord(region):
"""Safely extract Y coordinate from bbox"""
bbox = region.get('bbox', [])
if isinstance(bbox, (list, tuple)) and len(bbox) > 0:
if isinstance(bbox[0], (list, tuple)) and len(bbox[0]) > 1:
return bbox[0][1] # [[x1,y1], [x2,y2], ...] format
elif len(bbox) > 1:
return bbox[1] # [x1, y1, x2, y2, ...] format
return 0 # Default to 0 if can't extract
sorted_regions = sorted(text_regions, key=get_y_coord)
for region in sorted_regions:
text = region['text']
markdown_lines.append(text)
return "\n".join(markdown_lines)
def save_results(
self,
result: Dict,
output_dir: Path,
file_id: str,
source_file_path: Optional[Path] = None
) -> Tuple[Optional[Path], Optional[Path], Optional[Path]]:
"""
Save OCR results to JSON, Markdown, and layout-preserving PDF files
Args:
result: OCR result dictionary
output_dir: Output directory
file_id: Unique file identifier
source_file_path: Optional path to original source file for PDF generation
Returns:
Tuple of (json_path, markdown_path, pdf_path)
"""
try:
output_dir.mkdir(parents=True, exist_ok=True)
# Save JSON
json_path = output_dir / f"{file_id}_result.json"
with open(json_path, 'w', encoding='utf-8') as f:
json.dump(result, f, ensure_ascii=False, indent=2)
# Save Markdown
markdown_path = output_dir / f"{file_id}_output.md"
markdown_content = result.get('markdown_content', '')
with open(markdown_path, 'w', encoding='utf-8') as f:
f.write(markdown_content)
logger.info(f"Results saved: {json_path.name}, {markdown_path.name}")
# Generate layout-preserving PDF
pdf_path = None
try:
from app.services.pdf_generator_service import pdf_generator_service
pdf_filename = f"{file_id}_layout.pdf"
pdf_path = output_dir / pdf_filename
logger.info(f"Generating layout-preserving PDF: {pdf_filename}")
success = pdf_generator_service.generate_layout_pdf(
json_path=json_path,
output_path=pdf_path,
source_file_path=source_file_path
)
if success:
logger.info(f"✓ PDF generated successfully: {pdf_path.name}")
else:
logger.warning(f"✗ PDF generation failed for {file_id}")
pdf_path = None
except Exception as e:
logger.error(f"Error generating PDF for {file_id}: {str(e)}")
import traceback
traceback.print_exc()
pdf_path = None
return json_path, markdown_path, pdf_path
except Exception as e:
logger.error(f"Error saving results: {str(e)}")
return None, None, None

View File

@@ -1,312 +0,0 @@
"""
PDF Font Manager - Handles font loading, registration, and fallback.
This module provides unified font management for PDF generation,
including CJK font support and font fallback mechanisms.
"""
import logging
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
logger = logging.getLogger(__name__)
# ============================================================================
# Configuration
# ============================================================================
@dataclass
class FontConfig:
"""Configuration for font management."""
# Primary fonts
chinese_font_name: str = "NotoSansSC"
chinese_font_path: Optional[Path] = None
# Fallback fonts (built-in)
fallback_font_name: str = "Helvetica"
fallback_cjk_font_name: str = "HeiseiMin-W3" # Built-in ReportLab CJK
# Font sizes
default_font_size: int = 10
min_font_size: int = 6
max_font_size: int = 14
# Font registration options
auto_register: bool = True
enable_cjk_fallback: bool = True
# ============================================================================
# Font Manager
# ============================================================================
class FontManager:
"""
Manages font registration and selection for PDF generation.
Features:
- Lazy font registration
- CJK (Chinese/Japanese/Korean) font support
- Automatic fallback to built-in fonts
- Font caching to avoid duplicate registration
"""
_instance = None
_registered_fonts: Dict[str, Path] = {}
def __new__(cls, *args, **kwargs):
"""Singleton pattern to avoid duplicate font registration."""
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self, config: Optional[FontConfig] = None):
"""
Initialize FontManager.
Args:
config: FontConfig instance (uses defaults if None)
"""
if self._initialized:
return
self.config = config or FontConfig()
self._primary_font_registered = False
self._cjk_fallback_available = False
# Auto-register fonts if enabled
if self.config.auto_register:
self._register_fonts()
self._initialized = True
@property
def primary_font_name(self) -> str:
"""Get the primary font name to use."""
if self._primary_font_registered:
return self.config.chinese_font_name
return self.config.fallback_font_name
@property
def is_cjk_enabled(self) -> bool:
"""Check if CJK fonts are available."""
return self._primary_font_registered or self._cjk_fallback_available
@classmethod
def reset(cls):
"""Reset singleton instance (for testing)."""
cls._instance = None
cls._registered_fonts = {}
def get_font_for_text(self, text: str) -> str:
"""
Get appropriate font name for given text.
Args:
text: Text to render
Returns:
Font name suitable for the text content
"""
if self._contains_cjk(text):
if self._primary_font_registered:
return self.config.chinese_font_name
elif self._cjk_fallback_available:
return self.config.fallback_cjk_font_name
return self.primary_font_name
def get_font_size(
self,
text: str,
available_width: float,
available_height: float,
pdf_canvas=None
) -> int:
"""
Calculate optimal font size for text to fit within bounds.
Args:
text: Text to render
available_width: Maximum width available
available_height: Maximum height available
pdf_canvas: Optional canvas for precise measurement
Returns:
Font size that fits within bounds
"""
font_name = self.get_font_for_text(text)
for size in range(self.config.max_font_size, self.config.min_font_size - 1, -1):
if pdf_canvas:
# Precise measurement with canvas
text_width = pdf_canvas.stringWidth(text, font_name, size)
else:
# Approximate measurement
text_width = len(text) * size * 0.6 # Rough estimate
text_height = size * 1.2 # Line height
if text_width <= available_width and text_height <= available_height:
return size
return self.config.min_font_size
def register_font(
self,
font_name: str,
font_path: Path,
force: bool = False
) -> bool:
"""
Register a custom font.
Args:
font_name: Name to register font under
font_path: Path to TTF font file
force: Force re-registration if already registered
Returns:
True if registration successful
"""
if font_name in self._registered_fonts and not force:
logger.debug(f"Font {font_name} already registered")
return True
try:
if not font_path.exists():
logger.error(f"Font file not found: {font_path}")
return False
pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
self._registered_fonts[font_name] = font_path
logger.info(f"Font registered: {font_name} from {font_path}")
return True
except Exception as e:
logger.error(f"Failed to register font {font_name}: {e}")
return False
def get_registered_fonts(self) -> List[str]:
"""Get list of registered custom font names."""
return list(self._registered_fonts.keys())
# =========================================================================
# Private Methods
# =========================================================================
def _register_fonts(self):
"""Register configured fonts."""
# Register primary Chinese font
if self.config.chinese_font_path:
self._register_chinese_font()
# Setup CJK fallback
if self.config.enable_cjk_fallback:
self._setup_cjk_fallback()
def _register_chinese_font(self):
"""Register the primary Chinese font."""
font_path = self.config.chinese_font_path
if font_path is None:
# Try to load from settings
try:
from app.core.config import settings
font_path = Path(settings.chinese_font_path)
except Exception as e:
logger.debug(f"Could not load font path from settings: {e}")
return
# Resolve relative path
if not font_path.is_absolute():
# Try project root
project_root = Path(__file__).resolve().parent.parent.parent.parent
font_path = project_root / font_path
if not font_path.exists():
logger.warning(f"Chinese font not found at {font_path}")
return
try:
pdfmetrics.registerFont(TTFont(self.config.chinese_font_name, str(font_path)))
self._registered_fonts[self.config.chinese_font_name] = font_path
self._primary_font_registered = True
logger.info(f"Chinese font registered: {self.config.chinese_font_name}")
except Exception as e:
logger.error(f"Failed to register Chinese font: {e}")
def _setup_cjk_fallback(self):
"""Setup CJK fallback using built-in fonts."""
try:
# ReportLab includes CID fonts for CJK
from reportlab.pdfbase.cidfonts import UnicodeCIDFont
# Register CJK fonts if not already registered
try:
pdfmetrics.registerFont(UnicodeCIDFont('HeiseiMin-W3'))
self._cjk_fallback_available = True
logger.debug("CJK fallback font available: HeiseiMin-W3")
except Exception:
pass # Font may already be registered
except ImportError:
logger.debug("CID fonts not available for CJK fallback")
def _contains_cjk(self, text: str) -> bool:
"""
Check if text contains CJK characters.
Args:
text: Text to check
Returns:
True if text contains Chinese, Japanese, or Korean characters
"""
if not text:
return False
for char in text:
code = ord(char)
# CJK Unified Ideographs and related ranges
if any([
0x4E00 <= code <= 0x9FFF, # CJK Unified Ideographs
0x3400 <= code <= 0x4DBF, # CJK Extension A
0x20000 <= code <= 0x2A6DF, # CJK Extension B
0x3000 <= code <= 0x303F, # CJK Punctuation
0x3040 <= code <= 0x309F, # Hiragana
0x30A0 <= code <= 0x30FF, # Katakana
0xAC00 <= code <= 0xD7AF, # Korean Hangul
]):
return True
return False
# ============================================================================
# Convenience Functions
# ============================================================================
_default_manager: Optional[FontManager] = None
def get_font_manager() -> FontManager:
"""Get the default FontManager instance."""
global _default_manager
if _default_manager is None:
_default_manager = FontManager()
return _default_manager
def register_font(font_name: str, font_path: Path) -> bool:
"""Register a font using the default manager."""
return get_font_manager().register_font(font_name, font_path)
def get_font_for_text(text: str) -> str:
"""Get appropriate font for text using the default manager."""
return get_font_manager().get_font_for_text(text)

View File

@@ -1,230 +0,0 @@
"""
Tool_OCR - Document Preprocessor Service
Handles file validation, format detection, and preprocessing
"""
import magic
from pathlib import Path
from typing import Tuple, Optional
import logging
from PIL import Image
import cv2
import numpy as np
from app.core.config import settings
logger = logging.getLogger(__name__)
class DocumentPreprocessor:
"""
Document preprocessing service for format standardization
Validates and prepares documents for OCR processing
"""
SUPPORTED_IMAGE_FORMATS = ['png', 'jpg', 'jpeg', 'bmp', 'tiff', 'tif']
SUPPORTED_PDF_FORMAT = ['pdf']
ALL_SUPPORTED_FORMATS = SUPPORTED_IMAGE_FORMATS + SUPPORTED_PDF_FORMAT
def __init__(self):
self.allowed_extensions = settings.allowed_extensions_list
self.max_file_size = settings.max_upload_size
logger.info(f"DocumentPreprocessor initialized with allowed_extensions: {self.allowed_extensions}")
def validate_file(self, file_path: Path) -> Tuple[bool, Optional[str], Optional[str]]:
"""
Validate file format, size, and integrity
Args:
file_path: Path to the file to validate
Returns:
Tuple of (is_valid, file_format, error_message)
"""
try:
# Check file exists
if not file_path.exists():
return False, None, f"File not found: {file_path}"
# Check file size
file_size = file_path.stat().st_size
if file_size > self.max_file_size:
max_mb = self.max_file_size / (1024 * 1024)
actual_mb = file_size / (1024 * 1024)
return False, None, f"File too large: {actual_mb:.2f}MB (max {max_mb:.2f}MB)"
# Detect file format using magic numbers
mime = magic.Magic(mime=True)
mime_type = mime.from_file(str(file_path))
# Map MIME type to format
file_format = self._mime_to_format(mime_type)
if not file_format:
return False, None, f"Unsupported file type: {mime_type}"
# Check if format is in allowed extensions
if file_format not in self.allowed_extensions:
return False, None, f"File format '{file_format}' not allowed"
# Validate file integrity
is_valid, error = self._validate_integrity(file_path, file_format)
if not is_valid:
return False, file_format, f"File corrupted: {error}"
logger.info(f"File validated successfully: {file_path.name} ({file_format})")
return True, file_format, None
except Exception as e:
logger.error(f"File validation error: {str(e)}")
return False, None, f"Validation error: {str(e)}"
def _mime_to_format(self, mime_type: str) -> Optional[str]:
"""Convert MIME type to file format"""
mime_map = {
'image/png': 'png',
'image/jpeg': 'jpg',
'image/jpg': 'jpg',
'image/bmp': 'bmp',
'image/tiff': 'tiff',
'image/x-tiff': 'tiff',
'application/pdf': 'pdf',
'application/msword': 'doc',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
'application/vnd.ms-powerpoint': 'ppt',
'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
}
return mime_map.get(mime_type)
def _validate_integrity(self, file_path: Path, file_format: str) -> Tuple[bool, Optional[str]]:
"""
Validate file integrity by attempting to open it
Args:
file_path: Path to file
file_format: Detected file format
Returns:
Tuple of (is_valid, error_message)
"""
try:
if file_format in self.SUPPORTED_IMAGE_FORMATS:
# Try to open image
with Image.open(file_path) as img:
img.verify() # Verify image integrity
# Reopen for actual check (verify() closes the file)
with Image.open(file_path) as img:
_ = img.size # Force load to detect corruption
return True, None
elif file_format == 'pdf':
# Basic PDF validation - check file starts with PDF signature
with open(file_path, 'rb') as f:
header = f.read(5)
if header != b'%PDF-':
return False, "Invalid PDF header"
return True, None
elif file_format in ['doc', 'docx', 'ppt', 'pptx']:
# Office documents - basic validation (check file size and can be opened)
# Modern Office formats (docx, pptx) are ZIP-based
if file_format in ['docx', 'pptx']:
import zipfile
try:
with zipfile.ZipFile(file_path, 'r') as zf:
# Check if it has the required Office structure
if file_format == 'docx' and 'word/document.xml' not in zf.namelist():
return False, "Invalid DOCX structure"
elif file_format == 'pptx' and 'ppt/presentation.xml' not in zf.namelist():
return False, "Invalid PPTX structure"
except zipfile.BadZipFile:
return False, "Invalid Office file (corrupt ZIP)"
# Old formats (doc, ppt) - just check file exists and has content
return True, None
else:
return False, f"Unknown format: {file_format}"
except Exception as e:
return False, str(e)
def preprocess_image(
self,
image_path: Path,
enhance: bool = True,
output_path: Optional[Path] = None
) -> Tuple[bool, Optional[Path], Optional[str]]:
"""
Preprocess image to improve OCR accuracy
Args:
image_path: Path to input image
enhance: Whether to apply enhancement
output_path: Optional output path (defaults to temp directory)
Returns:
Tuple of (success, processed_image_path, error_message)
"""
try:
# Read image
img = cv2.imread(str(image_path))
if img is None:
return False, None, "Failed to read image"
if not enhance:
# No preprocessing, return original
return True, image_path, None
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply adaptive thresholding to handle varying lighting
processed = cv2.adaptiveThreshold(
gray,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
11,
2
)
# Denoise
processed = cv2.fastNlMeansDenoising(processed, None, 10, 7, 21)
# Determine output path
if output_path is None:
output_path = Path(settings.processed_dir) / f"processed_{image_path.name}"
# Save processed image
cv2.imwrite(str(output_path), processed)
logger.info(f"Image preprocessed: {image_path.name} -> {output_path.name}")
return True, output_path, None
except Exception as e:
logger.error(f"Image preprocessing error: {str(e)}")
return False, None, f"Preprocessing error: {str(e)}"
def get_file_info(self, file_path: Path) -> dict:
"""
Get comprehensive file information
Args:
file_path: Path to file
Returns:
Dictionary with file information
"""
stat = file_path.stat()
mime = magic.Magic(mime=True)
mime_type = mime.from_file(str(file_path))
return {
'name': file_path.name,
'path': str(file_path),
'size': stat.st_size,
'size_mb': stat.st_size / (1024 * 1024),
'mime_type': mime_type,
'format': self._mime_to_format(mime_type),
'created_at': stat.st_ctime,
'modified_at': stat.st_mtime,
}

View File

@@ -1,7 +1,6 @@
import { Outlet, NavLink, useNavigate } from 'react-router-dom'
import { useTranslation } from 'react-i18next'
import { useAuthStore } from '@/store/authStore'
import { apiClient } from '@/services/api'
import { apiClientV2 } from '@/services/apiV2'
import {
Upload,
@@ -29,12 +28,7 @@ export default function Layout() {
const handleLogout = async () => {
try {
// Use V2 API if authenticated with V2
if (apiClientV2.isAuthenticated()) {
await apiClientV2.logout()
} else {
apiClient.logout()
}
await apiClientV2.logout()
} catch (error) {
console.error('Logout error:', error)
} finally {

View File

@@ -1,26 +0,0 @@
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
interface MarkdownPreviewProps {
title?: string
content: string
className?: string
}
export default function MarkdownPreview({ title, content, className }: MarkdownPreviewProps) {
return (
<Card className={className}>
{title && (
<CardHeader>
<CardTitle>{title}</CardTitle>
</CardHeader>
)}
<CardContent>
<div className="prose prose-sm max-w-none dark:prose-invert">
<pre className="whitespace-pre-wrap break-words bg-muted p-4 rounded-md overflow-auto max-h-[600px]">
{content}
</pre>
</div>
</CardContent>
</Card>
)
}

View File

@@ -1,90 +0,0 @@
import { useTranslation } from 'react-i18next'
import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from '@/components/ui/table'
import { Badge } from '@/components/ui/badge'
import { Button } from '@/components/ui/button'
import type { FileResult } from '@/types/apiV2'
interface ResultsTableProps {
files: FileResult[]
onViewResult?: (fileId: number) => void
onDownloadPDF?: (fileId: number) => void
}
export default function ResultsTable({ files, onViewResult, onDownloadPDF }: ResultsTableProps) {
const { t } = useTranslation()
const getStatusBadge = (status: FileResult['status']) => {
switch (status) {
case 'completed':
return <Badge variant="success">{t('processing.completed')}</Badge>
case 'processing':
return <Badge variant="default">{t('processing.processing')}</Badge>
case 'failed':
return <Badge variant="destructive">{t('processing.failed')}</Badge>
default:
return <Badge variant="secondary">{t('processing.pending')}</Badge>
}
}
const formatTime = (seconds?: number) => {
if (!seconds) return 'N/A'
return `${seconds.toFixed(2)}s`
}
return (
<div className="rounded-md border">
<Table>
<TableHeader>
<TableRow>
<TableHead>{t('results.filename')}</TableHead>
<TableHead>{t('results.status')}</TableHead>
<TableHead>{t('results.processingTime')}</TableHead>
<TableHead className="text-right">{t('results.actions')}</TableHead>
</TableRow>
</TableHeader>
<TableBody>
{files.length === 0 ? (
<TableRow>
<TableCell colSpan={4} className="text-center text-muted-foreground">
{t('results.noResults')}
</TableCell>
</TableRow>
) : (
files.map((file) => (
<TableRow key={file.id}>
<TableCell className="font-medium">{file.filename}</TableCell>
<TableCell>{getStatusBadge(file.status)}</TableCell>
<TableCell>{formatTime(file.processing_time)}</TableCell>
<TableCell className="text-right">
<div className="flex justify-end gap-2">
{file.status === 'completed' && (
<>
<Button
variant="outline"
size="sm"
onClick={() => onViewResult?.(file.id)}
>
{t('results.viewMarkdown')}
</Button>
<Button
variant="outline"
size="sm"
onClick={() => onDownloadPDF?.(file.id)}
>
{t('results.downloadPDF')}
</Button>
</>
)}
{file.status === 'failed' && file.error && (
<span className="text-sm text-destructive">{file.error}</span>
)}
</div>
</TableCell>
</TableRow>
))
)}
</TableBody>
</Table>
</div>
)
}

View File

@@ -4,7 +4,7 @@ import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Button } from '@/components/ui/button'
import { useToast } from '@/components/ui/toast'
import { apiClient } from '@/services/api'
import { apiClientV2 } from '@/services/apiV2'
import type { ExportRule } from '@/types/apiV2'
export default function SettingsPage() {
@@ -25,12 +25,12 @@ export default function SettingsPage() {
// Fetch export rules
const { data: exportRules, isLoading } = useQuery({
queryKey: ['exportRules'],
queryFn: () => apiClient.getExportRules(),
queryFn: () => apiClientV2.getExportRules(),
})
// Create rule mutation
const createRuleMutation = useMutation({
mutationFn: (rule: any) => apiClient.createExportRule(rule),
mutationFn: (rule: any) => apiClientV2.createExportRule(rule),
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: ['exportRules'] })
setIsCreating(false)
@@ -53,7 +53,7 @@ export default function SettingsPage() {
// Update rule mutation
const updateRuleMutation = useMutation({
mutationFn: ({ ruleId, rule }: { ruleId: number; rule: any }) =>
apiClient.updateExportRule(ruleId, rule),
apiClientV2.updateExportRule(ruleId, rule),
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: ['exportRules'] })
setEditingRule(null)
@@ -75,7 +75,7 @@ export default function SettingsPage() {
// Delete rule mutation
const deleteRuleMutation = useMutation({
mutationFn: (ruleId: number) => apiClient.deleteExportRule(ruleId),
mutationFn: (ruleId: number) => apiClientV2.deleteExportRule(ruleId),
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: ['exportRules'] })
toast({

View File

@@ -1,271 +0,0 @@
import axios, { AxiosError } from 'axios'
import type { AxiosInstance } from 'axios'
import type {
LoginRequest,
LoginResponse,
UploadResponse,
ProcessRequest,
ProcessResponse,
BatchStatus,
OCRResult,
ExportRequest,
ExportRule,
CSSTemplate,
TranslateRequest,
TranslateResponse,
ApiError,
} from '@/types/api'
/**
* API Client Configuration
* - In Docker: VITE_API_BASE_URL is empty string, use relative path
* - In development: Use VITE_API_BASE_URL from .env or default to localhost:8000
*/
const envApiBaseUrl = import.meta.env.VITE_API_BASE_URL
const API_BASE_URL = envApiBaseUrl !== undefined ? envApiBaseUrl : 'http://localhost:8000'
const API_VERSION = 'v2'
class ApiClient {
private client: AxiosInstance
private token: string | null = null
constructor() {
this.client = axios.create({
baseURL: `${API_BASE_URL}/api/${API_VERSION}`,
timeout: 30000,
headers: {
'Content-Type': 'application/json',
},
})
// Request interceptor to add auth token
this.client.interceptors.request.use(
(config) => {
if (this.token) {
config.headers.Authorization = `Bearer ${this.token}`
}
return config
},
(error) => Promise.reject(error)
)
// Response interceptor for error handling
this.client.interceptors.response.use(
(response) => response,
(error: AxiosError<ApiError>) => {
if (error.response?.status === 401) {
// Token expired or invalid
this.clearToken()
window.location.href = '/login'
}
return Promise.reject(error)
}
)
// Load token from localStorage
this.loadToken()
}
/**
* Set authentication token
*/
setToken(token: string) {
this.token = token
localStorage.setItem('auth_token', token)
}
/**
* Clear authentication token
*/
clearToken() {
this.token = null
localStorage.removeItem('auth_token')
}
/**
* Load token from localStorage
*/
private loadToken() {
const token = localStorage.getItem('auth_token')
if (token) {
this.token = token
}
}
/**
* Check if user is authenticated
*/
isAuthenticated(): boolean {
return this.token !== null
}
// ==================== Authentication ====================
/**
* Login
*/
async login(data: LoginRequest): Promise<LoginResponse> {
const response = await this.client.post<LoginResponse>('/auth/login', {
username: data.username,
password: data.password,
})
this.setToken(response.data.access_token)
return response.data
}
/**
* Logout
*/
logout() {
this.clearToken()
}
// ==================== File Upload ====================
/**
* Upload files
*/
async uploadFiles(files: File[]): Promise<UploadResponse> {
const formData = new FormData()
files.forEach((file) => {
formData.append('files', file)
})
const response = await this.client.post<UploadResponse>('/upload', formData, {
headers: {
'Content-Type': 'multipart/form-data',
},
})
return response.data
}
// ==================== OCR Processing ====================
/**
* Process OCR
*/
async processOCR(data: ProcessRequest): Promise<ProcessResponse> {
const response = await this.client.post<ProcessResponse>('/ocr/process', data)
return response.data
}
/**
* Get OCR result by file ID
* Note: Backend uses file-level tracking, not task-level
*/
async getOCRResult(fileId: number): Promise<OCRResult> {
const response = await this.client.get<OCRResult>(`/ocr/result/${fileId}`)
return response.data
}
/**
* Get batch status
*/
async getBatchStatus(batchId: number): Promise<BatchStatus> {
const response = await this.client.get<BatchStatus>(`/batch/${batchId}/status`)
return response.data
}
// ==================== Export ====================
/**
* Export results
*/
async exportResults(data: ExportRequest): Promise<Blob> {
const response = await this.client.post('/export', data, {
responseType: 'blob',
})
return response.data
}
/**
* Generate and download PDF
*/
async exportPDF(fileId: number, cssTemplate?: string): Promise<Blob> {
const params = cssTemplate ? { css_template: cssTemplate } : {}
const response = await this.client.get(`/export/pdf/${fileId}`, {
params,
responseType: 'blob',
})
return response.data
}
/**
* Get export rules
*/
async getExportRules(): Promise<ExportRule[]> {
const response = await this.client.get<ExportRule[]>('/export/rules')
return response.data
}
/**
* Create export rule
*/
async createExportRule(rule: Omit<ExportRule, 'id' | 'created_at'>): Promise<ExportRule> {
const response = await this.client.post<ExportRule>('/export/rules', rule)
return response.data
}
/**
* Update export rule
*/
async updateExportRule(ruleId: number, rule: Partial<ExportRule>): Promise<ExportRule> {
const response = await this.client.put<ExportRule>(`/export/rules/${ruleId}`, rule)
return response.data
}
/**
* Delete export rule
*/
async deleteExportRule(ruleId: number): Promise<void> {
await this.client.delete(`/export/rules/${ruleId}`)
}
/**
* Get CSS templates
*/
async getCSSTemplates(): Promise<CSSTemplate[]> {
const response = await this.client.get<CSSTemplate[]>('/export/css-templates')
return response.data
}
// ==================== Translation (FUTURE FEATURE - STUB) ====================
/**
* Translate document (STUB - Not yet implemented)
* This is a placeholder for future translation functionality
* @throws Will throw error with status 501 (Not Implemented)
*/
async translateDocument(data: TranslateRequest): Promise<TranslateResponse> {
// This endpoint is expected to return 501 Not Implemented until Phase 5
const response = await this.client.post<TranslateResponse>('/translate/document', data)
return response.data
}
/**
* Get translation configs (NOT IMPLEMENTED)
* This endpoint does not exist on backend - configs will be part of Phase 5
* @deprecated Backend endpoint does not exist - will return 404
*/
// async getTranslationConfigs(): Promise<TranslationConfig[]> {
// const response = await this.client.get<TranslationConfig[]>('/translate/configs')
// return response.data
// }
/**
* Create translation config (NOT IMPLEMENTED)
* This endpoint does not exist on backend - configs will be part of Phase 5
* @deprecated Backend endpoint does not exist - will return 404
*/
// async createTranslationConfig(
// config: Omit<TranslationConfig, 'id' | 'created_at'>
// ): Promise<TranslationConfig> {
// const response = await this.client.post<TranslationConfig>('/translate/configs', config)
// return response.data
// }
}
// Export singleton instance
export const apiClient = new ApiClient()

View File

@@ -38,6 +38,7 @@ import type {
TranslationStatusResponse,
TranslationListResponse,
TranslationResult,
ExportRule,
} from '@/types/apiV2'
/**
@@ -713,6 +714,39 @@ class ApiClientV2 {
link.click()
window.URL.revokeObjectURL(link.href)
}
// ==================== Export Rules APIs ====================
/**
* Get export rules
*/
async getExportRules(): Promise<ExportRule[]> {
const response = await this.client.get<ExportRule[]>('/export/rules')
return response.data
}
/**
* Create export rule
*/
async createExportRule(rule: Omit<ExportRule, 'id' | 'created_at'>): Promise<ExportRule> {
const response = await this.client.post<ExportRule>('/export/rules', rule)
return response.data
}
/**
* Update export rule
*/
async updateExportRule(ruleId: number, rule: Partial<ExportRule>): Promise<ExportRule> {
const response = await this.client.put<ExportRule>(`/export/rules/${ruleId}`, rule)
return response.data
}
/**
* Delete export rule
*/
async deleteExportRule(ruleId: number): Promise<void> {
await this.client.delete(`/export/rules/${ruleId}`)
}
}
// Export singleton instance

View File

@@ -1,182 +0,0 @@
/**
* API Type Definitions
* Based on backend OpenAPI specification
*/
// Authentication
export interface LoginRequest {
username: string
password: string
}
export interface LoginResponse {
access_token: string
token_type: string
expires_in: number // Token expiration time in seconds
}
export interface User {
id: number
username: string
email?: string
displayName?: string | null
}
// File Upload (V2 API)
export interface UploadResponse {
task_id: string
filename: string
file_size: number
file_type: string
status: 'pending' | 'processing' | 'completed' | 'failed'
}
export interface FileInfo {
id: number
filename: string
file_size: number
file_format: string // Changed from 'format' to match backend
status: 'pending' | 'processing' | 'completed' | 'failed'
}
// OCR Processing
export interface ProcessRequest {
batch_id: number
lang?: string
detect_layout?: boolean // Changed from confidence_threshold to match backend
}
export interface ProcessResponse {
message: string // Added to match backend
batch_id: number
total_files: number // Added to match backend
status: string
// Removed task_id - backend uses batch-level tracking instead
}
export interface TaskStatus {
task_id: string
status: 'pending' | 'processing' | 'completed' | 'failed'
progress_percentage: number
current_file?: string
files_processed: number
total_files: number
error?: string
}
export interface BatchStatus {
batch: {
id: number
status: 'pending' | 'processing' | 'completed' | 'failed'
progress_percentage: number
created_at: string
completed_at?: string
}
files: FileResult[]
}
export interface FileResult {
id: number
filename: string
status: 'pending' | 'processing' | 'completed' | 'failed'
processing_time?: number
error?: string
}
// OCR Results
export interface OCRResult {
file_id: number
filename: string
status: string
markdown_content: string
json_data: OCRJsonData
confidence: number
processing_time: number
}
export interface OCRJsonData {
total_text_regions: number
average_confidence: number
text_blocks: TextBlock[]
layout_info?: LayoutInfo
}
export interface TextBlock {
text: string
confidence: number
bbox: [number, number, number, number]
position: number
}
export interface LayoutInfo {
tables_detected: number
images_detected: number
structure: string
}
// Export
export interface ExportRequest {
batch_id: number
format: 'txt' | 'json' | 'excel' | 'markdown' | 'pdf'
rule_id?: number
options?: ExportOptions
}
export interface ExportOptions {
confidence_threshold?: number
include_metadata?: boolean
filename_pattern?: string
css_template?: string
}
export interface ExportRule {
id: number
rule_name: string
config_json: Record<string, any>
css_template?: string
created_at: string
}
export interface CSSTemplate {
name: string
description: string
// filename is not returned by backend - use name as identifier
}
// Translation (FUTURE FEATURE)
export interface TranslateRequest {
file_id: number
source_lang: string
target_lang: string
engine_type?: 'argos' | 'ernie' | 'google'
}
export interface TranslateResponse {
task_id: string
file_id: number
status: 'pending' | 'processing' | 'completed' | 'failed'
translated_content?: string
}
export interface TranslationConfig {
id: number
source_lang: string
target_lang: string
engine_type: 'argos' | 'ernie' | 'google'
engine_config: Record<string, any>
created_at: string
}
// API Response
export interface ApiResponse<T = any> {
success: boolean
data?: T
error?: string
message?: string
}
// Error Response
export interface ApiError {
detail: string
status_code: number
}