feat: refactor dual-track architecture (Phase 1-5)
## Backend Changes - **Service Layer Refactoring**: - Add ProcessingOrchestrator for unified document processing - Add PDFTableRenderer for table rendering extraction - Add PDFFontManager for font management with CJK support - Add MemoryPolicyEngine (73% code reduction from MemoryGuard) - **Bug Fixes**: - Fix Direct Track table row span calculation - Fix OCR Track image path handling - Add cell_boxes coordinate validation - Filter out small decorative images - Add covering image detection ## Frontend Changes - **State Management**: - Add TaskStore for centralized task state management - Add localStorage persistence for recent tasks - Add processing state tracking - **Type Consolidation**: - Merge shared types from api.ts to apiV2.ts - Update imports in authStore, uploadStore, ResultsTable, SettingsPage - **Page Integration**: - Integrate TaskStore in ProcessingPage and TaskDetailPage - Update useTaskValidation hook with cache sync ## Testing - Direct Track: edit.pdf (3 pages, 1.281s), edit3.pdf (2 pages, 0.203s) - Cell boxes validation: 43 valid, 0 invalid - Table merging: 12 merged cells verified 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
312
backend/app/services/pdf_font_manager.py
Normal file
312
backend/app/services/pdf_font_manager.py
Normal file
@@ -0,0 +1,312 @@
|
||||
"""
|
||||
PDF Font Manager - Handles font loading, registration, and fallback.
|
||||
|
||||
This module provides unified font management for PDF generation,
|
||||
including CJK font support and font fallback mechanisms.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
from reportlab.pdfbase import pdfmetrics
|
||||
from reportlab.pdfbase.ttfonts import TTFont
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Configuration
|
||||
# ============================================================================
|
||||
|
||||
@dataclass
|
||||
class FontConfig:
|
||||
"""Configuration for font management."""
|
||||
# Primary fonts
|
||||
chinese_font_name: str = "NotoSansSC"
|
||||
chinese_font_path: Optional[Path] = None
|
||||
|
||||
# Fallback fonts (built-in)
|
||||
fallback_font_name: str = "Helvetica"
|
||||
fallback_cjk_font_name: str = "HeiseiMin-W3" # Built-in ReportLab CJK
|
||||
|
||||
# Font sizes
|
||||
default_font_size: int = 10
|
||||
min_font_size: int = 6
|
||||
max_font_size: int = 14
|
||||
|
||||
# Font registration options
|
||||
auto_register: bool = True
|
||||
enable_cjk_fallback: bool = True
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Font Manager
|
||||
# ============================================================================
|
||||
|
||||
class FontManager:
|
||||
"""
|
||||
Manages font registration and selection for PDF generation.
|
||||
|
||||
Features:
|
||||
- Lazy font registration
|
||||
- CJK (Chinese/Japanese/Korean) font support
|
||||
- Automatic fallback to built-in fonts
|
||||
- Font caching to avoid duplicate registration
|
||||
"""
|
||||
|
||||
_instance = None
|
||||
_registered_fonts: Dict[str, Path] = {}
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
"""Singleton pattern to avoid duplicate font registration."""
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
cls._instance._initialized = False
|
||||
return cls._instance
|
||||
|
||||
def __init__(self, config: Optional[FontConfig] = None):
|
||||
"""
|
||||
Initialize FontManager.
|
||||
|
||||
Args:
|
||||
config: FontConfig instance (uses defaults if None)
|
||||
"""
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
self.config = config or FontConfig()
|
||||
self._primary_font_registered = False
|
||||
self._cjk_fallback_available = False
|
||||
|
||||
# Auto-register fonts if enabled
|
||||
if self.config.auto_register:
|
||||
self._register_fonts()
|
||||
|
||||
self._initialized = True
|
||||
|
||||
@property
|
||||
def primary_font_name(self) -> str:
|
||||
"""Get the primary font name to use."""
|
||||
if self._primary_font_registered:
|
||||
return self.config.chinese_font_name
|
||||
return self.config.fallback_font_name
|
||||
|
||||
@property
|
||||
def is_cjk_enabled(self) -> bool:
|
||||
"""Check if CJK fonts are available."""
|
||||
return self._primary_font_registered or self._cjk_fallback_available
|
||||
|
||||
@classmethod
|
||||
def reset(cls):
|
||||
"""Reset singleton instance (for testing)."""
|
||||
cls._instance = None
|
||||
cls._registered_fonts = {}
|
||||
|
||||
def get_font_for_text(self, text: str) -> str:
|
||||
"""
|
||||
Get appropriate font name for given text.
|
||||
|
||||
Args:
|
||||
text: Text to render
|
||||
|
||||
Returns:
|
||||
Font name suitable for the text content
|
||||
"""
|
||||
if self._contains_cjk(text):
|
||||
if self._primary_font_registered:
|
||||
return self.config.chinese_font_name
|
||||
elif self._cjk_fallback_available:
|
||||
return self.config.fallback_cjk_font_name
|
||||
return self.primary_font_name
|
||||
|
||||
def get_font_size(
|
||||
self,
|
||||
text: str,
|
||||
available_width: float,
|
||||
available_height: float,
|
||||
pdf_canvas=None
|
||||
) -> int:
|
||||
"""
|
||||
Calculate optimal font size for text to fit within bounds.
|
||||
|
||||
Args:
|
||||
text: Text to render
|
||||
available_width: Maximum width available
|
||||
available_height: Maximum height available
|
||||
pdf_canvas: Optional canvas for precise measurement
|
||||
|
||||
Returns:
|
||||
Font size that fits within bounds
|
||||
"""
|
||||
font_name = self.get_font_for_text(text)
|
||||
|
||||
for size in range(self.config.max_font_size, self.config.min_font_size - 1, -1):
|
||||
if pdf_canvas:
|
||||
# Precise measurement with canvas
|
||||
text_width = pdf_canvas.stringWidth(text, font_name, size)
|
||||
else:
|
||||
# Approximate measurement
|
||||
text_width = len(text) * size * 0.6 # Rough estimate
|
||||
|
||||
text_height = size * 1.2 # Line height
|
||||
|
||||
if text_width <= available_width and text_height <= available_height:
|
||||
return size
|
||||
|
||||
return self.config.min_font_size
|
||||
|
||||
def register_font(
|
||||
self,
|
||||
font_name: str,
|
||||
font_path: Path,
|
||||
force: bool = False
|
||||
) -> bool:
|
||||
"""
|
||||
Register a custom font.
|
||||
|
||||
Args:
|
||||
font_name: Name to register font under
|
||||
font_path: Path to TTF font file
|
||||
force: Force re-registration if already registered
|
||||
|
||||
Returns:
|
||||
True if registration successful
|
||||
"""
|
||||
if font_name in self._registered_fonts and not force:
|
||||
logger.debug(f"Font {font_name} already registered")
|
||||
return True
|
||||
|
||||
try:
|
||||
if not font_path.exists():
|
||||
logger.error(f"Font file not found: {font_path}")
|
||||
return False
|
||||
|
||||
pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
|
||||
self._registered_fonts[font_name] = font_path
|
||||
logger.info(f"Font registered: {font_name} from {font_path}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to register font {font_name}: {e}")
|
||||
return False
|
||||
|
||||
def get_registered_fonts(self) -> List[str]:
|
||||
"""Get list of registered custom font names."""
|
||||
return list(self._registered_fonts.keys())
|
||||
|
||||
# =========================================================================
|
||||
# Private Methods
|
||||
# =========================================================================
|
||||
|
||||
def _register_fonts(self):
|
||||
"""Register configured fonts."""
|
||||
# Register primary Chinese font
|
||||
if self.config.chinese_font_path:
|
||||
self._register_chinese_font()
|
||||
|
||||
# Setup CJK fallback
|
||||
if self.config.enable_cjk_fallback:
|
||||
self._setup_cjk_fallback()
|
||||
|
||||
def _register_chinese_font(self):
|
||||
"""Register the primary Chinese font."""
|
||||
font_path = self.config.chinese_font_path
|
||||
|
||||
if font_path is None:
|
||||
# Try to load from settings
|
||||
try:
|
||||
from app.core.config import settings
|
||||
font_path = Path(settings.chinese_font_path)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not load font path from settings: {e}")
|
||||
return
|
||||
|
||||
# Resolve relative path
|
||||
if not font_path.is_absolute():
|
||||
# Try project root
|
||||
project_root = Path(__file__).resolve().parent.parent.parent.parent
|
||||
font_path = project_root / font_path
|
||||
|
||||
if not font_path.exists():
|
||||
logger.warning(f"Chinese font not found at {font_path}")
|
||||
return
|
||||
|
||||
try:
|
||||
pdfmetrics.registerFont(TTFont(self.config.chinese_font_name, str(font_path)))
|
||||
self._registered_fonts[self.config.chinese_font_name] = font_path
|
||||
self._primary_font_registered = True
|
||||
logger.info(f"Chinese font registered: {self.config.chinese_font_name}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to register Chinese font: {e}")
|
||||
|
||||
def _setup_cjk_fallback(self):
|
||||
"""Setup CJK fallback using built-in fonts."""
|
||||
try:
|
||||
# ReportLab includes CID fonts for CJK
|
||||
from reportlab.pdfbase.cidfonts import UnicodeCIDFont
|
||||
|
||||
# Register CJK fonts if not already registered
|
||||
try:
|
||||
pdfmetrics.registerFont(UnicodeCIDFont('HeiseiMin-W3'))
|
||||
self._cjk_fallback_available = True
|
||||
logger.debug("CJK fallback font available: HeiseiMin-W3")
|
||||
except Exception:
|
||||
pass # Font may already be registered
|
||||
|
||||
except ImportError:
|
||||
logger.debug("CID fonts not available for CJK fallback")
|
||||
|
||||
def _contains_cjk(self, text: str) -> bool:
|
||||
"""
|
||||
Check if text contains CJK characters.
|
||||
|
||||
Args:
|
||||
text: Text to check
|
||||
|
||||
Returns:
|
||||
True if text contains Chinese, Japanese, or Korean characters
|
||||
"""
|
||||
if not text:
|
||||
return False
|
||||
|
||||
for char in text:
|
||||
code = ord(char)
|
||||
# CJK Unified Ideographs and related ranges
|
||||
if any([
|
||||
0x4E00 <= code <= 0x9FFF, # CJK Unified Ideographs
|
||||
0x3400 <= code <= 0x4DBF, # CJK Extension A
|
||||
0x20000 <= code <= 0x2A6DF, # CJK Extension B
|
||||
0x3000 <= code <= 0x303F, # CJK Punctuation
|
||||
0x3040 <= code <= 0x309F, # Hiragana
|
||||
0x30A0 <= code <= 0x30FF, # Katakana
|
||||
0xAC00 <= code <= 0xD7AF, # Korean Hangul
|
||||
]):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Convenience Functions
|
||||
# ============================================================================
|
||||
|
||||
_default_manager: Optional[FontManager] = None
|
||||
|
||||
|
||||
def get_font_manager() -> FontManager:
|
||||
"""Get the default FontManager instance."""
|
||||
global _default_manager
|
||||
if _default_manager is None:
|
||||
_default_manager = FontManager()
|
||||
return _default_manager
|
||||
|
||||
|
||||
def register_font(font_name: str, font_path: Path) -> bool:
|
||||
"""Register a font using the default manager."""
|
||||
return get_font_manager().register_font(font_name, font_path)
|
||||
|
||||
|
||||
def get_font_for_text(text: str) -> str:
|
||||
"""Get appropriate font for text using the default manager."""
|
||||
return get_font_manager().get_font_for_text(text)
|
||||
Reference in New Issue
Block a user