feat: implement layout-preserving PDF generation with table reconstruction

Major Features:
- Add PDF generation service with Chinese font support
- Parse HTML tables from PP-StructureV3 and rebuild with ReportLab
- Extract table text for translation purposes
- Auto-filter text regions inside tables to avoid overlaps

Backend Changes:
1. pdf_generator_service.py (NEW)
   - HTMLTableParser: Parse HTML tables to extract structure
   - PDFGeneratorService: Generate layout-preserving PDFs
   - Coordinate transformation: OCR (top-left) → PDF (bottom-left)
   - Font size heuristics: 75% of bbox height with width checking
   - Table reconstruction: Parse HTML → ReportLab Table
   - Image embedding: Extract bbox from filenames

2. ocr_service.py
   - Add _extract_table_text() for translation support
   - Add output_dir parameter to save images to result directory
   - Extract bbox from image filenames (img_in_table_box_x1_y1_x2_y2.jpg)

3. tasks.py
   - Update process_task_ocr to use save_results() with PDF generation
   - Fix download_pdf endpoint to use database-stored PDF paths
   - Support on-demand PDF generation from JSON

4. config.py
   - Add chinese_font_path configuration
   - Add pdf_enable_bbox_debug flag

Frontend Changes:
1. PDFViewer.tsx (NEW)
   - React PDF viewer with zoom and pagination
   - Memoized file config to prevent unnecessary reloads

2. TaskDetailPage.tsx & ResultsPage.tsx
   - Integrate PDF preview and download

3. main.tsx
   - Configure PDF.js worker via CDN

4. vite.config.ts
   - Add host: '0.0.0.0' for network access
   - Use VITE_API_URL environment variable for backend proxy

Dependencies:
- reportlab: PDF generation library
- Noto Sans SC font: Chinese character support

🤖 Generated with Claude Code
https://claude.com/claude-code

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-11-17 20:21:56 +08:00
parent 012da1abc4
commit fa1abcd8e6
16 changed files with 1427 additions and 57 deletions

View File

@@ -96,6 +96,11 @@ class Settings(BaseSettings):
pdf_margin_left: int = Field(default=20)
pdf_margin_right: int = Field(default=20)
# ===== Layout-Preserving PDF Configuration =====
chinese_font_path: str = Field(default="./backend/fonts/NotoSansSC-Regular.ttf")
pdf_font_size_base: int = Field(default=12)
pdf_enable_bbox_debug: bool = Field(default=False) # Draw bounding boxes for debugging
# ===== Translation Configuration (Reserved) =====
enable_translation: bool = Field(default=False)
translation_engine: str = Field(default="offline")

View File

@@ -66,34 +66,33 @@ def process_task_ocr(task_id: str, task_db_id: int, file_path: str, filename: st
# Initialize OCR service
ocr_service = OCRService()
# Create result directory before OCR processing (needed for saving extracted images)
result_dir = Path(settings.result_dir) / task_id
result_dir.mkdir(parents=True, exist_ok=True)
# Process the file with OCR
ocr_result = ocr_service.process_image(
image_path=Path(file_path),
lang='ch',
detect_layout=True
detect_layout=True,
output_dir=result_dir
)
# Calculate processing time
processing_time_ms = int((datetime.now() - start_time).total_seconds() * 1000)
# Create result directory
result_dir = Path(settings.result_dir) / task_id
result_dir.mkdir(parents=True, exist_ok=True)
# Save JSON result
json_path = result_dir / f"{Path(filename).stem}_result.json"
with open(json_path, 'w', encoding='utf-8') as f:
json.dump(ocr_result, f, ensure_ascii=False, indent=2)
# Save Markdown result
markdown_path = result_dir / f"{Path(filename).stem}_result.md"
markdown_content = ocr_result.get('markdown_content', '')
with open(markdown_path, 'w', encoding='utf-8') as f:
f.write(markdown_content)
# Save results using OCR service (includes JSON, Markdown, and PDF generation)
json_path, markdown_path, pdf_path = ocr_service.save_results(
result=ocr_result,
output_dir=result_dir,
file_id=Path(filename).stem,
source_file_path=Path(file_path)
)
# Update task with results (direct database update)
task.result_json_path = str(json_path)
task.result_markdown_path = str(markdown_path)
task.result_json_path = str(json_path) if json_path else None
task.result_markdown_path = str(markdown_path) if markdown_path else None
task.result_pdf_path = str(pdf_path) if pdf_path else None
task.processing_time_ms = processing_time_ms
task.status = TaskStatus.COMPLETED
task.completed_at = datetime.utcnow()
@@ -468,10 +467,16 @@ async def download_pdf(
current_user: User = Depends(get_current_user)
):
"""
Download task result as searchable PDF file
Download task result as layout-preserving PDF file
- **task_id**: Task UUID
Returns a PDF that preserves the original document layout using OCR results.
The PDF is generated from OCR JSON data and cached for subsequent requests.
"""
from pathlib import Path
from app.services.pdf_generator_service import pdf_generator_service
# Get task
task = task_service.get_task_by_id(
db=db,
@@ -485,12 +490,69 @@ async def download_pdf(
detail="Task not found"
)
# Check if task is completed
if task.status.value != "completed":
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Task is not completed yet. Please wait for OCR processing to finish."
)
# Check if PDF path is stored in database
if task.result_pdf_path and Path(task.result_pdf_path).exists():
pdf_path = Path(task.result_pdf_path)
logger.info(f"Using pre-generated PDF from database: {pdf_path.name}")
else:
# Fallback: Try to generate PDF on-demand
result_dir = Path(settings.result_dir) / task_id
# Use stored JSON path or construct it
if task.result_json_path and Path(task.result_json_path).exists():
json_path = Path(task.result_json_path)
else:
# Try to find JSON file in result directory
json_files = list(result_dir.glob("*_result.json"))
if not json_files:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="OCR result JSON not found"
)
json_path = json_files[0]
# Construct PDF path based on JSON filename
pdf_filename = json_path.stem.replace("_result", "_layout") + ".pdf"
pdf_path = result_dir / pdf_filename
# Generate PDF if it doesn't exist
if not pdf_path.exists():
logger.info(f"Generating layout-preserving PDF for task {task_id}")
# Get source file path if available
source_file = None
task_file = db.query(TaskFile).filter(TaskFile.task_id == task.id).first()
if task_file and task_file.stored_path and Path(task_file.stored_path).exists():
source_file = Path(task_file.stored_path)
# Generate PDF
success = pdf_generator_service.generate_layout_pdf(
json_path=json_path,
output_path=pdf_path,
source_file_path=source_file
)
if not success:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to generate PDF. Please check server logs."
)
logger.info(f"PDF generated successfully: {pdf_path.name}")
# Validate file access
is_valid, error_msg = file_access_service.validate_file_access(
db=db,
user_id=current_user.id,
task_id=task_id,
file_path=task.result_pdf_path
file_path=str(pdf_path)
)
if not is_valid:
@@ -502,7 +564,7 @@ async def download_pdf(
# Return file
filename = f"{task.filename or task_id}_result.pdf"
return FileResponse(
path=task.result_pdf_path,
path=str(pdf_path),
filename=filename,
media_type="application/pdf"
)

View File

@@ -284,7 +284,8 @@ class OCRService:
image_path: Path,
lang: str = 'ch',
detect_layout: bool = True,
confidence_threshold: Optional[float] = None
confidence_threshold: Optional[float] = None,
output_dir: Optional[Path] = None
) -> Dict:
"""
Process single image with OCR and layout analysis
@@ -340,7 +341,8 @@ class OCRService:
page_image_path,
lang=lang,
detect_layout=detect_layout,
confidence_threshold=confidence_threshold
confidence_threshold=confidence_threshold,
output_dir=output_dir
)
# Accumulate results
@@ -458,7 +460,7 @@ class OCRService:
images_metadata = []
if detect_layout:
layout_data, images_metadata = self.analyze_layout(image_path)
layout_data, images_metadata = self.analyze_layout(image_path, output_dir=output_dir)
# Generate Markdown
markdown_content = self.generate_markdown(text_regions, layout_data)
@@ -500,12 +502,71 @@ class OCRService:
'processing_time': (datetime.now() - start_time).total_seconds(),
}
def analyze_layout(self, image_path: Path) -> Tuple[Optional[Dict], List[Dict]]:
def _extract_table_text(self, html_content: str) -> str:
"""
Extract text from HTML table content for translation purposes
Args:
html_content: HTML content containing table
Returns:
Extracted text from table cells
"""
try:
from html.parser import HTMLParser
class TableTextExtractor(HTMLParser):
def __init__(self):
super().__init__()
self.text_parts = []
self.in_table = False
def handle_starttag(self, tag, attrs):
if tag == 'table':
self.in_table = True
def handle_endtag(self, tag):
if tag == 'table':
self.in_table = False
elif tag in ('td', 'th') and self.in_table:
self.text_parts.append(' | ') # Cell separator
elif tag == 'tr' and self.in_table:
self.text_parts.append('\n') # Row separator
def handle_data(self, data):
if self.in_table:
stripped = data.strip()
if stripped:
self.text_parts.append(stripped)
parser = TableTextExtractor()
parser.feed(html_content)
# Clean up the extracted text
extracted = ''.join(parser.text_parts)
# Remove multiple separators
import re
extracted = re.sub(r'\s*\|\s*\|+\s*', ' | ', extracted)
extracted = re.sub(r'\n+', '\n', extracted)
extracted = extracted.strip()
return extracted
except Exception as e:
logger.warning(f"Failed to extract table text: {e}")
# Fallback: just remove HTML tags
import re
text = re.sub(r'<[^>]+>', ' ', html_content)
text = re.sub(r'\s+', ' ', text)
return text.strip()
def analyze_layout(self, image_path: Path, output_dir: Optional[Path] = None) -> Tuple[Optional[Dict], List[Dict]]:
"""
Analyze document layout using PP-StructureV3
Args:
image_path: Path to image file
output_dir: Optional output directory for saving extracted images (defaults to image_path.parent)
Returns:
Tuple of (layout_data, images_metadata)
@@ -548,16 +609,59 @@ class OCRService:
'page': page_idx,
'bbox': [], # PP-StructureV3 doesn't provide individual bbox in this format
}
# Extract text from table for translation purposes
if has_table:
table_text = self._extract_table_text(markdown_texts)
element['extracted_text'] = table_text
logger.info(f"Extracted {len(table_text)} characters from table")
layout_elements.append(element)
# Add image metadata
# Add image metadata and SAVE images to disk
for img_idx, (img_path, img_obj) in enumerate(markdown_images.items()):
# Save image to disk
try:
# Determine base directory for saving images
base_dir = output_dir if output_dir else image_path.parent
# Create full path for image file
full_img_path = base_dir / img_path
# Create imgs/ subdirectory if it doesn't exist
full_img_path.parent.mkdir(parents=True, exist_ok=True)
# Save image object to disk
if hasattr(img_obj, 'save'):
# img_obj is PIL Image
img_obj.save(str(full_img_path))
logger.info(f"Saved extracted image to {full_img_path}")
else:
logger.warning(f"Image object for {img_path} does not have save() method, skipping")
except Exception as e:
logger.warning(f"Failed to save image {img_path}: {str(e)}")
# Continue processing even if image save fails
# Extract bbox from filename (format: img_in_table_box_x1_y1_x2_y2.jpg)
bbox = []
try:
import re
match = re.search(r'box_(\d+)_(\d+)_(\d+)_(\d+)', img_path)
if match:
x1, y1, x2, y2 = map(int, match.groups())
# Convert to 4-point bbox format: [[x1,y1], [x2,y1], [x2,y2], [x1,y2]]
bbox = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]]
logger.info(f"Extracted bbox from filename: {bbox}")
except Exception as e:
logger.warning(f"Failed to extract bbox from {img_path}: {e}")
images_metadata.append({
'element_id': len(layout_elements) + img_idx,
'image_path': img_path,
'type': 'image',
'page': page_idx,
'bbox': [],
'bbox': bbox,
})
if layout_elements:
@@ -638,18 +742,20 @@ class OCRService:
self,
result: Dict,
output_dir: Path,
file_id: str
) -> Tuple[Optional[Path], Optional[Path]]:
file_id: str,
source_file_path: Optional[Path] = None
) -> Tuple[Optional[Path], Optional[Path], Optional[Path]]:
"""
Save OCR results to JSON and Markdown files
Save OCR results to JSON, Markdown, and layout-preserving PDF files
Args:
result: OCR result dictionary
output_dir: Output directory
file_id: Unique file identifier
source_file_path: Optional path to original source file for PDF generation
Returns:
Tuple of (json_path, markdown_path)
Tuple of (json_path, markdown_path, pdf_path)
"""
try:
output_dir.mkdir(parents=True, exist_ok=True)
@@ -666,8 +772,37 @@ class OCRService:
f.write(markdown_content)
logger.info(f"Results saved: {json_path.name}, {markdown_path.name}")
return json_path, markdown_path
# Generate layout-preserving PDF
pdf_path = None
try:
from app.services.pdf_generator_service import pdf_generator_service
pdf_filename = f"{file_id}_layout.pdf"
pdf_path = output_dir / pdf_filename
logger.info(f"Generating layout-preserving PDF: {pdf_filename}")
success = pdf_generator_service.generate_layout_pdf(
json_path=json_path,
output_path=pdf_path,
source_file_path=source_file_path
)
if success:
logger.info(f"✓ PDF generated successfully: {pdf_path.name}")
else:
logger.warning(f"✗ PDF generation failed for {file_id}")
pdf_path = None
except Exception as e:
logger.error(f"Error generating PDF for {file_id}: {str(e)}")
import traceback
traceback.print_exc()
pdf_path = None
return json_path, markdown_path, pdf_path
except Exception as e:
logger.error(f"Error saving results: {str(e)}")
return None, None
return None, None, None

View File

@@ -0,0 +1,626 @@
"""
Layout-Preserving PDF Generation Service
Generates PDF files that preserve the original document layout using OCR JSON data
"""
import json
import logging
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from datetime import datetime
from reportlab.lib.pagesizes import A4, letter
from reportlab.lib.units import mm
from reportlab.pdfgen import canvas
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.platypus import Table, TableStyle
from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT
from reportlab.platypus import Paragraph
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from PIL import Image
from html.parser import HTMLParser
from app.core.config import settings
logger = logging.getLogger(__name__)
class HTMLTableParser(HTMLParser):
"""Parse HTML table to extract structure and data"""
def __init__(self):
super().__init__()
self.tables = []
self.current_table = None
self.current_row = None
self.current_cell = None
self.in_table = False
def handle_starttag(self, tag, attrs):
attrs_dict = dict(attrs)
if tag == 'table':
self.in_table = True
self.current_table = {'rows': []}
elif tag == 'tr' and self.in_table:
self.current_row = {'cells': []}
elif tag in ('td', 'th') and self.in_table and self.current_row is not None:
colspan = int(attrs_dict.get('colspan', 1))
rowspan = int(attrs_dict.get('rowspan', 1))
self.current_cell = {
'text': '',
'is_header': tag == 'th',
'colspan': colspan,
'rowspan': rowspan
}
def handle_endtag(self, tag):
if tag == 'table' and self.in_table:
if self.current_table and self.current_table['rows']:
self.tables.append(self.current_table)
self.current_table = None
self.in_table = False
elif tag == 'tr' and self.current_row is not None:
if self.current_table is not None:
self.current_table['rows'].append(self.current_row)
self.current_row = None
elif tag in ('td', 'th') and self.current_cell is not None:
if self.current_row is not None:
self.current_row['cells'].append(self.current_cell)
self.current_cell = None
def handle_data(self, data):
if self.current_cell is not None:
self.current_cell['text'] += data.strip() + ' '
class PDFGeneratorService:
"""Service for generating layout-preserving PDFs from OCR JSON data"""
def __init__(self):
"""Initialize PDF generator with font configuration"""
self.font_name = 'NotoSansSC'
self.font_path = None
self.font_registered = False
self._register_chinese_font()
def _register_chinese_font(self):
"""Register Chinese font for PDF generation"""
try:
# Get font path from settings
font_path = Path(settings.chinese_font_path)
# Try relative path from project root
if not font_path.is_absolute():
# Adjust path - settings.chinese_font_path starts with ./backend/
project_root = Path(__file__).resolve().parent.parent.parent.parent
font_path = project_root / font_path
if not font_path.exists():
logger.error(f"Chinese font not found at {font_path}")
return
# Register font
pdfmetrics.registerFont(TTFont(self.font_name, str(font_path)))
self.font_path = font_path
self.font_registered = True
logger.info(f"Chinese font registered: {self.font_name} from {font_path}")
except Exception as e:
logger.error(f"Failed to register Chinese font: {e}")
self.font_registered = False
def load_ocr_json(self, json_path: Path) -> Optional[Dict]:
"""
Load and parse OCR JSON result file
Args:
json_path: Path to JSON file
Returns:
Parsed JSON data or None if failed
"""
try:
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
logger.info(f"Loaded OCR JSON: {json_path.name}")
return data
except Exception as e:
logger.error(f"Failed to load JSON {json_path}: {e}")
return None
def calculate_page_dimensions(self, text_regions: List[Dict], source_file_path: Optional[Path] = None) -> Tuple[float, float]:
"""
Calculate page dimensions from source file or text region bounding boxes
Args:
text_regions: List of text regions with bbox coordinates
source_file_path: Optional path to source file for accurate dimensions
Returns:
Tuple of (width, height) in points
"""
# First try to get dimensions from source file
if source_file_path:
dims = self.get_original_page_size(source_file_path)
if dims:
return dims
if not text_regions:
return A4 # Default to A4 size
max_x = 0
max_y = 0
for region in text_regions:
bbox = region.get('bbox', [])
if not bbox or len(bbox) < 4:
continue
# bbox format: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
for point in bbox:
if isinstance(point, (list, tuple)) and len(point) >= 2:
x, y = point[0], point[1]
max_x = max(max_x, x)
max_y = max(max_y, y)
# OCR coordinates are in pixels, use them directly as points (1:1 mapping)
# Do NOT add padding - this causes layout issues
width = max_x if max_x > 0 else A4[0]
height = max_y if max_y > 0 else A4[1]
logger.info(f"Calculated page dimensions from OCR: {width:.1f} x {height:.1f} points")
return (width, height)
def get_original_page_size(self, file_path: Path) -> Optional[Tuple[float, float]]:
"""
Extract page dimensions from original source file
Args:
file_path: Path to original file (image or PDF)
Returns:
Tuple of (width, height) in points or None
"""
try:
if not file_path.exists():
return None
# For images, get dimensions from PIL
if file_path.suffix.lower() in ['.png', '.jpg', '.jpeg', '.bmp', '.tiff']:
img = Image.open(file_path)
# Use pixel dimensions directly as points (1:1 mapping)
# This matches how PaddleOCR reports coordinates
width_pt = float(img.width)
height_pt = float(img.height)
logger.info(f"Extracted dimensions from image: {width_pt:.1f} x {height_pt:.1f} points (1:1 pixel mapping)")
return (width_pt, height_pt)
# For PDFs, would need PyPDF2 or similar
# For now, return None to use calculated dimensions
except Exception as e:
logger.warning(f"Failed to get page size from {file_path}: {e}")
return None
def draw_text_region(
self,
pdf_canvas: canvas.Canvas,
region: Dict,
page_height: float
):
"""
Draw a text region at precise coordinates
Args:
pdf_canvas: ReportLab canvas object
region: Text region dict with text, bbox, confidence
page_height: Height of page (for coordinate transformation)
"""
text = region.get('text', '')
bbox = region.get('bbox', [])
confidence = region.get('confidence', 1.0)
if not text or not bbox or len(bbox) < 4:
return
try:
# bbox from OCR: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
# Points: top-left, top-right, bottom-right, bottom-left
# OCR coordinates: origin (0,0) at top-left, Y increases downward
ocr_x_left = bbox[0][0] # Left X
ocr_y_top = bbox[0][1] # Top Y in OCR coordinates
ocr_x_right = bbox[2][0] # Right X
ocr_y_bottom = bbox[2][1] # Bottom Y in OCR coordinates
# Calculate bbox dimensions
bbox_width = abs(ocr_x_right - ocr_x_left)
bbox_height = abs(ocr_y_bottom - ocr_y_top)
# Calculate font size using heuristics
# Font size is typically 70-90% of bbox height
# Testing shows 0.75 works well for most cases
font_size = bbox_height * 0.75
font_size = max(min(font_size, 72), 4) # Clamp between 4pt and 72pt
# Transform coordinates: OCR (top-left origin) → PDF (bottom-left origin)
# CRITICAL: Y-axis flip!
pdf_x = ocr_x_left
pdf_y = page_height - ocr_y_bottom # Flip Y-axis using bottom coordinate
# Set font
font_name = self.font_name if self.font_registered else 'Helvetica'
pdf_canvas.setFont(font_name, font_size)
# Calculate text width to prevent overflow
text_width = pdf_canvas.stringWidth(text, font_name, font_size)
# If text is too wide for bbox, scale down font
if text_width > bbox_width:
scale_factor = bbox_width / text_width
font_size = font_size * scale_factor * 0.95 # 95% to add small margin
font_size = max(font_size, 3) # Minimum 3pt
pdf_canvas.setFont(font_name, font_size)
# Draw text at calculated position
pdf_canvas.drawString(pdf_x, pdf_y, text)
# Debug: Draw bounding box (optional)
if settings.pdf_enable_bbox_debug:
pdf_canvas.setStrokeColorRGB(1, 0, 0, 0.3) # Red, semi-transparent
pdf_canvas.setLineWidth(0.5)
# Transform all bbox points to PDF coordinates
pdf_points = [(p[0], page_height - p[1]) for p in bbox]
# Draw bbox rectangle
for i in range(4):
x1, y1 = pdf_points[i]
x2, y2 = pdf_points[(i + 1) % 4]
pdf_canvas.line(x1, y1, x2, y2)
except Exception as e:
logger.warning(f"Failed to draw text region '{text[:20]}...': {e}")
def draw_table_region(
self,
pdf_canvas: canvas.Canvas,
table_element: Dict,
images_metadata: List[Dict],
page_height: float
):
"""
Draw a table region by parsing HTML and rebuilding with ReportLab Table
Args:
pdf_canvas: ReportLab canvas object
table_element: Table element dict with HTML content
images_metadata: List of image metadata to find table bbox
page_height: Height of page
"""
try:
html_content = table_element.get('content', '')
if not html_content:
return
# Parse HTML to extract table structure
parser = HTMLTableParser()
parser.feed(html_content)
if not parser.tables:
logger.warning("No tables found in HTML content")
return
# Get the first table (PP-StructureV3 usually provides one table per element)
table_data = parser.tables[0]
rows = table_data['rows']
if not rows:
return
# Find corresponding table image to get bbox
table_bbox = None
for img_meta in images_metadata:
img_path = img_meta.get('image_path', '')
if 'table' in img_path.lower():
bbox = img_meta.get('bbox', [])
if bbox and len(bbox) >= 4:
table_bbox = bbox
break
if not table_bbox:
logger.warning("No bbox found for table")
return
# Extract bbox coordinates
ocr_x_left = table_bbox[0][0]
ocr_y_top = table_bbox[0][1]
ocr_x_right = table_bbox[2][0]
ocr_y_bottom = table_bbox[2][1]
table_width = abs(ocr_x_right - ocr_x_left)
table_height = abs(ocr_y_bottom - ocr_y_top)
# Transform coordinates
pdf_x = ocr_x_left
pdf_y = page_height - ocr_y_bottom
# Build table data for ReportLab
# Convert parsed structure to simple 2D array
max_cols = max(len(row['cells']) for row in rows)
reportlab_data = []
for row in rows:
row_data = []
for cell in row['cells']:
text = cell['text'].strip()
row_data.append(text)
# Pad row if needed
while len(row_data) < max_cols:
row_data.append('')
reportlab_data.append(row_data)
# Calculate column widths (equal distribution)
col_widths = [table_width / max_cols] * max_cols
# Create ReportLab Table
# Use smaller font size to fit in bbox
font_size = min(table_height / len(rows) * 0.5, 10)
font_size = max(font_size, 6)
# Create table with font
table = Table(reportlab_data, colWidths=col_widths)
# Apply table style
style = TableStyle([
('FONT', (0, 0), (-1, -1), self.font_name if self.font_registered else 'Helvetica', font_size),
('GRID', (0, 0), (-1, -1), 0.5, colors.black),
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('LEFTPADDING', (0, 0), (-1, -1), 2),
('RIGHTPADDING', (0, 0), (-1, -1), 2),
('TOPPADDING', (0, 0), (-1, -1), 2),
('BOTTOMPADDING', (0, 0), (-1, -1), 2),
])
# Add header style if first row has headers
if rows and rows[0]['cells'] and rows[0]['cells'][0].get('is_header'):
style.add('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey)
style.add('FONT', (0, 0), (-1, 0), self.font_name if self.font_registered else 'Helvetica-Bold', font_size)
table.setStyle(style)
# Calculate table size
table.wrapOn(pdf_canvas, table_width, table_height)
# Draw table at position
table.drawOn(pdf_canvas, pdf_x, pdf_y)
logger.info(f"Drew table at ({pdf_x:.0f}, {pdf_y:.0f}) size {table_width:.0f}x{table_height:.0f} with {len(rows)} rows")
except Exception as e:
logger.warning(f"Failed to draw table region: {e}")
import traceback
traceback.print_exc()
def draw_image_region(
self,
pdf_canvas: canvas.Canvas,
region: Dict,
page_height: float,
result_dir: Path
):
"""
Draw an image region by embedding the extracted image
Handles images extracted by PP-StructureV3 (tables, figures, charts, etc.)
Args:
pdf_canvas: ReportLab canvas object
region: Image metadata dict with image_path and bbox
page_height: Height of page (for coordinate transformation)
result_dir: Directory containing result files
"""
try:
image_path_str = region.get('image_path', '')
if not image_path_str:
return
# Construct full path to image
image_path = result_dir / image_path_str
if not image_path.exists():
logger.warning(f"Image not found: {image_path}")
return
# Get bbox for positioning
bbox = region.get('bbox', [])
if not bbox or len(bbox) < 4:
# If no bbox, skip for now
logger.warning(f"No bbox for image {image_path_str}")
return
# bbox from OCR: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
# OCR coordinates: origin (0,0) at top-left, Y increases downward
ocr_x_left = bbox[0][0]
ocr_y_top = bbox[0][1]
ocr_x_right = bbox[2][0]
ocr_y_bottom = bbox[2][1]
# Calculate bbox dimensions
bbox_width = abs(ocr_x_right - ocr_x_left)
bbox_height = abs(ocr_y_bottom - ocr_y_top)
# Transform coordinates: OCR (top-left origin) → PDF (bottom-left origin)
# CRITICAL: Y-axis flip!
# For images, we position at bottom-left corner
pdf_x_left = ocr_x_left
pdf_y_bottom = page_height - ocr_y_bottom # Flip Y-axis
# Draw image using ReportLab
# drawImage expects: (path, x, y, width, height)
# where (x, y) is the bottom-left corner of the image
pdf_canvas.drawImage(
str(image_path),
pdf_x_left,
pdf_y_bottom,
width=bbox_width,
height=bbox_height,
preserveAspectRatio=True,
mask='auto' # Handle transparency
)
logger.info(f"Drew image: {image_path_str} at ({pdf_x_left:.0f}, {pdf_y_bottom:.0f}) size {bbox_width:.0f}x{bbox_height:.0f}")
except Exception as e:
logger.warning(f"Failed to draw image region: {e}")
def generate_layout_pdf(
self,
json_path: Path,
output_path: Path,
source_file_path: Optional[Path] = None
) -> bool:
"""
Generate layout-preserving PDF from OCR JSON data
Args:
json_path: Path to OCR JSON file
output_path: Path to save generated PDF
source_file_path: Optional path to original source file for dimension extraction
Returns:
True if successful, False otherwise
"""
try:
# Check if PDF already exists (caching)
if output_path.exists():
logger.info(f"PDF already exists: {output_path.name}")
return True
# Load JSON data
ocr_data = self.load_ocr_json(json_path)
if not ocr_data:
return False
# Get text regions
text_regions = ocr_data.get('text_regions', [])
if not text_regions:
logger.warning("No text regions found in JSON")
return False
# Get images metadata
images_metadata = ocr_data.get('images_metadata', [])
# Get layout data
layout_data = ocr_data.get('layout_data', {})
# Determine page dimensions
page_size = self.calculate_page_dimensions(text_regions, source_file_path)
page_width, page_height = page_size
# Create PDF canvas
pdf_canvas = canvas.Canvas(str(output_path), pagesize=(page_width, page_height))
# Extract table bboxes to exclude text in those regions
table_bboxes = []
for img_meta in images_metadata:
img_path = img_meta.get('image_path', '')
if 'table' in img_path.lower():
bbox = img_meta.get('bbox', [])
if bbox and len(bbox) >= 4:
table_bboxes.append(bbox)
# Helper function to check if a point is inside a bbox
def point_in_bbox(x, y, bbox):
x1, y1 = bbox[0]
x2, y2 = bbox[2]
return min(x1, x2) <= x <= max(x1, x2) and min(y1, y2) <= y <= max(y1, y2)
# Filter text regions to exclude those inside tables
filtered_text_regions = []
for region in text_regions:
bbox = region.get('bbox', [])
if not bbox or len(bbox) < 4:
continue
# Check if text region center is inside any table bbox
center_x = (bbox[0][0] + bbox[2][0]) / 2
center_y = (bbox[0][1] + bbox[2][1]) / 2
is_in_table = any(point_in_bbox(center_x, center_y, table_bbox) for table_bbox in table_bboxes)
if not is_in_table:
filtered_text_regions.append(region)
else:
logger.debug(f"Excluded text '{region.get('text', '')[:20]}...' (inside table)")
logger.info(f"Filtered {len(text_regions) - len(filtered_text_regions)} text regions inside tables")
# Group regions by page
pages_data = {}
for region in filtered_text_regions:
page_num = region.get('page', 1)
if page_num not in pages_data:
pages_data[page_num] = []
pages_data[page_num].append(region)
# Get table elements from layout_data
table_elements = []
if layout_data and layout_data.get('elements'):
table_elements = [e for e in layout_data['elements'] if e.get('type') == 'table']
# Process each page
total_pages = ocr_data.get('total_pages', 1)
for page_num in range(1, total_pages + 1):
if page_num > 1:
pdf_canvas.showPage() # Start new page
# Draw text regions for this page (excluding table text)
page_regions = pages_data.get(page_num, [])
for region in page_regions:
self.draw_text_region(pdf_canvas, region, page_height)
# Draw tables for this page
for table_elem in table_elements:
if table_elem.get('page', 0) == page_num - 1: # page is 0-indexed
self.draw_table_region(pdf_canvas, table_elem, images_metadata, page_height)
# Draw non-table images for this page (figure, chart, seal, etc.)
for img_meta in images_metadata:
if img_meta.get('page') == page_num - 1: # page is 0-indexed
img_path = img_meta.get('image_path', '')
# Skip table images (they're now rendered as tables)
if 'table' not in img_path.lower():
self.draw_image_region(
pdf_canvas,
img_meta,
page_height,
json_path.parent
)
# Save PDF
pdf_canvas.save()
file_size = output_path.stat().st_size
logger.info(f"Generated layout-preserving PDF: {output_path.name} ({file_size} bytes)")
return True
except Exception as e:
logger.error(f"Failed to generate PDF: {e}")
import traceback
traceback.print_exc()
return False
# Singleton instance
pdf_generator_service = PDFGeneratorService()

31
backend/download_fonts.sh Executable file
View File

@@ -0,0 +1,31 @@
#!/bin/bash
# Download Noto Sans SC TrueType font for layout-preserving PDF generation
set -e
FONT_DIR="backend/fonts"
FONT_URL="https://github.com/notofonts/noto-cjk/raw/main/Sans/Variable/TTF/Subset/NotoSansSC-VF.ttf"
FONT_FILE="NotoSansSC-Regular.ttf"
echo "🔤 Downloading Chinese font for PDF generation..."
# Create font directory
mkdir -p "$FONT_DIR"
# Download font if not exists
if [ -f "$FONT_DIR/$FONT_FILE" ]; then
echo "✓ Font already exists: $FONT_DIR/$FONT_FILE"
else
echo "Downloading from GitHub..."
wget "$FONT_URL" -O "$FONT_DIR/$FONT_FILE"
if [ -f "$FONT_DIR/$FONT_FILE" ]; then
SIZE=$(du -h "$FONT_DIR/$FONT_FILE" | cut -f1)
echo "✓ Font downloaded successfully: $SIZE"
else
echo "✗ Font download failed"
exit 1
fi
fi
echo "✅ Font setup complete!"

Binary file not shown.

View File

@@ -0,0 +1,62 @@
"""
Test script to verify ReportLab and Chinese font rendering
"""
from reportlab.pdfgen import canvas
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from pathlib import Path
import sys
def test_chinese_rendering():
"""Test if Chinese characters can be rendered in PDF"""
# Font path
font_path = "/home/egg/project/Tool_OCR/backend/fonts/NotoSansSC-Regular.ttf"
# Check if font file exists
if not Path(font_path).exists():
print(f"❌ Font file not found: {font_path}")
return False
print(f"✓ Font file found: {font_path}")
try:
# Register Chinese font
pdfmetrics.registerFont(TTFont('NotoSansSC', font_path))
print("✓ Font registered successfully")
# Create test PDF
test_pdf = "/tmp/test_chinese.pdf"
c = canvas.Canvas(test_pdf)
# Set Chinese font
c.setFont('NotoSansSC', 14)
# Draw test text
c.drawString(100, 750, "測試中文字符渲染 - Test Chinese Character Rendering")
c.drawString(100, 730, "HTD-S1 技術數據表")
c.drawString(100, 710, "這是一個 PDF 生成測試")
c.save()
print(f"✓ Test PDF created: {test_pdf}")
# Check file size
file_size = Path(test_pdf).stat().st_size
print(f"✓ PDF file size: {file_size} bytes")
if file_size > 0:
print("\n✅ Chinese font rendering test PASSED")
return True
else:
print("\n❌ PDF file is empty")
return False
except Exception as e:
print(f"❌ Error during testing: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = test_chinese_rendering()
sys.exit(0 if success else 1)

4
frontend/.env.example Normal file
View File

@@ -0,0 +1,4 @@
# Backend API URL
# For WSL2, use the WSL2 IP address (get it with: hostname -I)
# For native Linux/Mac, use http://localhost:8000
VITE_API_URL=http://172.20.20.106:8000

View File

@@ -19,6 +19,7 @@
"react-dropzone": "^14.3.8",
"react-i18next": "^16.3.0",
"react-markdown": "^9.0.1",
"react-pdf": "^10.2.0",
"react-router-dom": "^7.9.5",
"tailwind-merge": "^3.4.0",
"zustand": "^5.0.8"
@@ -1048,6 +1049,191 @@
"@jridgewell/sourcemap-codec": "^1.4.14"
}
},
"node_modules/@napi-rs/canvas": {
"version": "0.1.82",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.82.tgz",
"integrity": "sha512-FGjyUBoF0sl1EenSiE4UV2WYu76q6F9GSYedq5EiOCOyGYoQ/Owulcv6rd7v/tWOpljDDtefXXIaOCJrVKem4w==",
"license": "MIT",
"optional": true,
"workspaces": [
"e2e/*"
],
"engines": {
"node": ">= 10"
},
"optionalDependencies": {
"@napi-rs/canvas-android-arm64": "0.1.82",
"@napi-rs/canvas-darwin-arm64": "0.1.82",
"@napi-rs/canvas-darwin-x64": "0.1.82",
"@napi-rs/canvas-linux-arm-gnueabihf": "0.1.82",
"@napi-rs/canvas-linux-arm64-gnu": "0.1.82",
"@napi-rs/canvas-linux-arm64-musl": "0.1.82",
"@napi-rs/canvas-linux-riscv64-gnu": "0.1.82",
"@napi-rs/canvas-linux-x64-gnu": "0.1.82",
"@napi-rs/canvas-linux-x64-musl": "0.1.82",
"@napi-rs/canvas-win32-x64-msvc": "0.1.82"
}
},
"node_modules/@napi-rs/canvas-android-arm64": {
"version": "0.1.82",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.82.tgz",
"integrity": "sha512-bvZhN0iI54ouaQOrgJV96H2q7J3ZoufnHf4E1fUaERwW29Rz4rgicohnAg4venwBJZYjGl5Yl3CGmlAl1LZowQ==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-darwin-arm64": {
"version": "0.1.82",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.82.tgz",
"integrity": "sha512-InuBHKCyuFqhNwNr4gpqazo5Xp6ltKflqOLiROn4hqAS8u21xAHyYCJRgHwd+a5NKmutFTaRWeUIT/vxWbU/iw==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-darwin-x64": {
"version": "0.1.82",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.82.tgz",
"integrity": "sha512-aQGV5Ynn96onSXcuvYb2y7TRXD/t4CL2EGmnGqvLyeJX1JLSNisKQlWN/1bPDDXymZYSdUqbXehj5qzBlOx+RQ==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-arm-gnueabihf": {
"version": "0.1.82",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.82.tgz",
"integrity": "sha512-YIUpmHWeHGGRhWitT1KJkgj/JPXPfc9ox8oUoyaGPxolLGPp5AxJkq8wIg8CdFGtutget968dtwmx71m8o3h5g==",
"cpu": [
"arm"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-arm64-gnu": {
"version": "0.1.82",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.82.tgz",
"integrity": "sha512-AwLzwLBgmvk7kWeUgItOUor/QyG31xqtD26w1tLpf4yE0hiXTGp23yc669aawjB6FzgIkjh1NKaNS52B7/qEBQ==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-arm64-musl": {
"version": "0.1.82",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.82.tgz",
"integrity": "sha512-moZWuqepAwWBffdF4JDadt8TgBD02iMhG6I1FHZf8xO20AsIp9rB+p0B8Zma2h2vAF/YMjeFCDmW5un6+zZz9g==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-riscv64-gnu": {
"version": "0.1.82",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-riscv64-gnu/-/canvas-linux-riscv64-gnu-0.1.82.tgz",
"integrity": "sha512-w9++2df2kG9eC9LWYIHIlMLuhIrKGQYfUxs97CwgxYjITeFakIRazI9LYWgVzEc98QZ9x9GQvlicFsrROV59MQ==",
"cpu": [
"riscv64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-x64-gnu": {
"version": "0.1.82",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.82.tgz",
"integrity": "sha512-lZulOPwrRi6hEg/17CaqdwWEUfOlIJuhXxincx1aVzsVOCmyHf+xFq4i6liJl1P+x2v6Iz2Z/H5zHvXJCC7Bwg==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-x64-musl": {
"version": "0.1.82",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.82.tgz",
"integrity": "sha512-Be9Wf5RTv1w6GXlTph55K3PH3vsAh1Ax4T1FQY1UYM0QfD0yrwGdnJ8/fhqw7dEgMjd59zIbjJQC8C3msbGn5g==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-win32-x64-msvc": {
"version": "0.1.82",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.82.tgz",
"integrity": "sha512-LN/i8VrvxTDmEEK1c10z2cdOTkWT76LlTGtyZe5Kr1sqoSomKeExAjbilnu1+oee5lZUgS5yfZ2LNlVhCeARuw==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@nodelib/fs.scandir": {
"version": "2.1.5",
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
@@ -4007,6 +4193,24 @@
"@jridgewell/sourcemap-codec": "^1.5.5"
}
},
"node_modules/make-cancellable-promise": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/make-cancellable-promise/-/make-cancellable-promise-2.0.0.tgz",
"integrity": "sha512-3SEQqTpV9oqVsIWqAcmDuaNeo7yBO3tqPtqGRcKkEo0lrzD3wqbKG9mkxO65KoOgXqj+zH2phJ2LiAsdzlogSw==",
"license": "MIT",
"funding": {
"url": "https://github.com/wojtekmaj/make-cancellable-promise?sponsor=1"
}
},
"node_modules/make-event-props": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/make-event-props/-/make-event-props-2.0.0.tgz",
"integrity": "sha512-G/hncXrl4Qt7mauJEXSg3AcdYzmpkIITTNl5I+rH9sog5Yw0kK6vseJjCaPfOXqOqQuPUP89Rkhfz5kPS8ijtw==",
"license": "MIT",
"funding": {
"url": "https://github.com/wojtekmaj/make-event-props?sponsor=1"
}
},
"node_modules/math-intrinsics": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
@@ -4169,6 +4373,23 @@
"url": "https://opencollective.com/unified"
}
},
"node_modules/merge-refs": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/merge-refs/-/merge-refs-2.0.0.tgz",
"integrity": "sha512-3+B21mYK2IqUWnd2EivABLT7ueDhb0b8/dGK8LoFQPrU61YITeCMn14F7y7qZafWNZhUEKb24cJdiT5Wxs3prg==",
"license": "MIT",
"funding": {
"url": "https://github.com/wojtekmaj/merge-refs?sponsor=1"
},
"peerDependencies": {
"@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/merge2": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz",
@@ -5060,6 +5281,47 @@
"react": ">=18"
}
},
"node_modules/react-pdf": {
"version": "10.2.0",
"resolved": "https://registry.npmjs.org/react-pdf/-/react-pdf-10.2.0.tgz",
"integrity": "sha512-zk0DIL31oCh8cuQycM0SJKfwh4Onz0/Nwi6wTOjgtEjWGUY6eM+/vuzvOP3j70qtEULn7m1JtaeGzud1w5fY2Q==",
"license": "MIT",
"dependencies": {
"clsx": "^2.0.0",
"dequal": "^2.0.3",
"make-cancellable-promise": "^2.0.0",
"make-event-props": "^2.0.0",
"merge-refs": "^2.0.0",
"pdfjs-dist": "5.4.296",
"tiny-invariant": "^1.0.0",
"warning": "^4.0.0"
},
"funding": {
"url": "https://github.com/wojtekmaj/react-pdf?sponsor=1"
},
"peerDependencies": {
"@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
"react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
"react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/react-pdf/node_modules/pdfjs-dist": {
"version": "5.4.296",
"resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-5.4.296.tgz",
"integrity": "sha512-DlOzet0HO7OEnmUmB6wWGJrrdvbyJKftI1bhMitK7O2N8W2gc757yyYBbINy9IDafXAV9wmKr9t7xsTaNKRG5Q==",
"license": "Apache-2.0",
"engines": {
"node": ">=20.16.0 || >=22.3.0"
},
"optionalDependencies": {
"@napi-rs/canvas": "^0.1.80"
}
},
"node_modules/react-refresh": {
"version": "0.18.0",
"resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.18.0.tgz",
@@ -5382,6 +5644,12 @@
"url": "https://opencollective.com/webpack"
}
},
"node_modules/tiny-invariant": {
"version": "1.3.3",
"resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz",
"integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==",
"license": "MIT"
},
"node_modules/tinyglobby": {
"version": "0.2.15",
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
@@ -5824,6 +6092,15 @@
"node": ">=0.10.0"
}
},
"node_modules/warning": {
"version": "4.0.3",
"resolved": "https://registry.npmjs.org/warning/-/warning-4.0.3.tgz",
"integrity": "sha512-rpJyN222KWIvHJ/F53XSZv0Zl/accqHR8et1kpaMTD/fLCRxtV8iX8czMzY7sVZupTI3zcUTg8eycS2kNF9l6w==",
"license": "MIT",
"dependencies": {
"loose-envify": "^1.0.0"
}
},
"node_modules/which": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",

View File

@@ -21,6 +21,7 @@
"react-dropzone": "^14.3.8",
"react-i18next": "^16.3.0",
"react-markdown": "^9.0.1",
"react-pdf": "^10.2.0",
"react-router-dom": "^7.9.5",
"tailwind-merge": "^3.4.0",
"zustand": "^5.0.8"

View File

@@ -0,0 +1,156 @@
import { useState, useMemo } from 'react'
import { Document, Page } from 'react-pdf'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Button } from '@/components/ui/button'
import { ChevronLeft, ChevronRight, ZoomIn, ZoomOut } from 'lucide-react'
import 'react-pdf/dist/Page/AnnotationLayer.css'
import 'react-pdf/dist/Page/TextLayer.css'
interface PDFViewerProps {
title?: string
pdfUrl: string
className?: string
httpHeaders?: Record<string, string>
}
export default function PDFViewer({ title, pdfUrl, className, httpHeaders }: PDFViewerProps) {
const [numPages, setNumPages] = useState<number>(0)
const [pageNumber, setPageNumber] = useState<number>(1)
const [scale, setScale] = useState<number>(1.0)
const [loading, setLoading] = useState<boolean>(true)
const [error, setError] = useState<string | null>(null)
// Memoize the file prop to prevent unnecessary reloads
const fileConfig = useMemo(() => {
return httpHeaders ? { url: pdfUrl, httpHeaders } : pdfUrl
}, [pdfUrl, httpHeaders])
const onDocumentLoadSuccess = ({ numPages }: { numPages: number }) => {
setNumPages(numPages)
setLoading(false)
setError(null)
}
const onDocumentLoadError = (error: Error) => {
console.error('Error loading PDF:', error)
setError('Failed to load PDF. Please try again later.')
setLoading(false)
}
const goToPreviousPage = () => {
setPageNumber((prev) => Math.max(prev - 1, 1))
}
const goToNextPage = () => {
setPageNumber((prev) => Math.min(prev + 1, numPages))
}
const zoomIn = () => {
setScale((prev) => Math.min(prev + 0.2, 3.0))
}
const zoomOut = () => {
setScale((prev) => Math.max(prev - 0.2, 0.5))
}
return (
<Card className={className}>
{title && (
<CardHeader>
<CardTitle>{title}</CardTitle>
</CardHeader>
)}
<CardContent>
{/* Controls */}
<div className="flex items-center justify-between mb-4 gap-4 flex-wrap">
{/* Page Navigation */}
<div className="flex items-center gap-2">
<Button
variant="outline"
size="sm"
onClick={goToPreviousPage}
disabled={pageNumber <= 1 || loading}
>
<ChevronLeft className="h-4 w-4" />
</Button>
<span className="text-sm whitespace-nowrap">
Page {pageNumber} of {numPages || '...'}
</span>
<Button
variant="outline"
size="sm"
onClick={goToNextPage}
disabled={pageNumber >= numPages || loading}
>
<ChevronRight className="h-4 w-4" />
</Button>
</div>
{/* Zoom Controls */}
<div className="flex items-center gap-2">
<Button
variant="outline"
size="sm"
onClick={zoomOut}
disabled={scale <= 0.5 || loading}
>
<ZoomOut className="h-4 w-4" />
</Button>
<span className="text-sm whitespace-nowrap w-16 text-center">
{Math.round(scale * 100)}%
</span>
<Button
variant="outline"
size="sm"
onClick={zoomIn}
disabled={scale >= 3.0 || loading}
>
<ZoomIn className="h-4 w-4" />
</Button>
</div>
</div>
{/* PDF Document */}
<div className="border rounded-md bg-muted/10 overflow-auto max-h-[800px]">
<div className="flex justify-center p-4">
{loading && (
<div className="flex items-center justify-center min-h-[400px]">
<div className="animate-spin rounded-full h-12 w-12 border-b-2 border-primary"></div>
</div>
)}
{error && (
<div className="flex items-center justify-center min-h-[400px]">
<div className="text-center">
<p className="text-destructive font-semibold mb-2">Error</p>
<p className="text-sm text-muted-foreground">{error}</p>
</div>
</div>
)}
{!error && (
<Document
file={fileConfig}
onLoadSuccess={onDocumentLoadSuccess}
onLoadError={onDocumentLoadError}
loading={
<div className="flex items-center justify-center min-h-[400px]">
<div className="animate-spin rounded-full h-12 w-12 border-b-2 border-primary"></div>
</div>
}
>
<Page
pageNumber={pageNumber}
scale={scale}
renderTextLayer={true}
renderAnnotationLayer={true}
className="shadow-lg"
/>
</Document>
)}
</div>
</div>
</CardContent>
</Card>
)
}

View File

@@ -8,6 +8,11 @@ import i18n from './i18n'
import './index.css'
import App from './App.tsx'
// Configure PDF.js worker for react-pdf
import { pdfjs } from 'react-pdf'
// Use the worker from react-pdf's bundled pdfjs-dist
pdfjs.GlobalWorkerOptions.workerSrc = `//unpkg.com/pdfjs-dist@${pdfjs.version}/build/pdf.worker.min.mjs`
// Create React Query client
const queryClient = new QueryClient({
defaultOptions: {

View File

@@ -3,7 +3,7 @@ import { useTranslation } from 'react-i18next'
import { useQuery } from '@tanstack/react-query'
import { Button } from '@/components/ui/button'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import MarkdownPreview from '@/components/MarkdownPreview'
import PDFViewer from '@/components/PDFViewer'
import { useToast } from '@/components/ui/toast'
import { useUploadStore } from '@/store/uploadStore'
import { apiClientV2 } from '@/services/apiV2'
@@ -157,6 +157,14 @@ export default function ResultsPage() {
const isCompleted = taskDetail.status === 'completed'
// Construct PDF URL for preview
const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localhost:8000'
const pdfUrl = taskId ? `${API_BASE_URL}/api/v2/tasks/${taskId}/download/pdf` : ''
// Get auth token for PDF preview
const authToken = localStorage.getItem('auth_token_v2')
const pdfHttpHeaders = authToken ? { Authorization: `Bearer ${authToken}` } : undefined
return (
<div className="space-y-6">
{/* Page Header */}
@@ -242,17 +250,11 @@ export default function ResultsPage() {
{/* Results Preview */}
{isCompleted ? (
<Card>
<CardHeader>
<CardTitle></CardTitle>
</CardHeader>
<CardContent>
<MarkdownPreview
title={`OCR 結果 - ${taskDetail.filename || '未知檔案'}`}
content="請使用上方下載按鈕下載 Markdown 或 JSON 格式查看完整結果"
<PDFViewer
title={`OCR 結果預覽 - ${taskDetail.filename || '未知檔案'}`}
pdfUrl={pdfUrl}
httpHeaders={pdfHttpHeaders}
/>
</CardContent>
</Card>
) : taskDetail.status === 'processing' ? (
<Card>
<CardContent className="p-12 text-center">

View File

@@ -3,7 +3,7 @@ import { useTranslation } from 'react-i18next'
import { useQuery } from '@tanstack/react-query'
import { Button } from '@/components/ui/button'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import MarkdownPreview from '@/components/MarkdownPreview'
import PDFViewer from '@/components/PDFViewer'
import { useToast } from '@/components/ui/toast'
import { apiClientV2 } from '@/services/apiV2'
import {
@@ -149,6 +149,14 @@ export default function TaskDetailPage() {
const isProcessing = taskDetail.status === 'processing'
const isFailed = taskDetail.status === 'failed'
// Construct PDF URL for preview
const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localhost:8000'
const pdfUrl = taskId ? `${API_BASE_URL}/api/v2/tasks/${taskId}/download/pdf` : ''
// Get auth token for PDF preview
const authToken = localStorage.getItem('auth_token_v2')
const pdfHttpHeaders = authToken ? { Authorization: `Bearer ${authToken}` } : undefined
return (
<div className="space-y-6">
{/* Page Header */}
@@ -329,17 +337,11 @@ export default function TaskDetailPage() {
{/* Result Preview */}
{isCompleted && (
<Card>
<CardHeader>
<CardTitle></CardTitle>
</CardHeader>
<CardContent>
<MarkdownPreview
title={`OCR 結果 - ${taskDetail.filename || '未知檔案'}`}
content="請使用上方下載按鈕下載 Markdown、JSON 或 PDF 格式查看完整結果"
<PDFViewer
title={`OCR 結果預覽 - ${taskDetail.filename || '未知檔案'}`}
pdfUrl={pdfUrl}
httpHeaders={pdfHttpHeaders}
/>
</CardContent>
</Card>
)}
</div>
)

View File

@@ -6,10 +6,11 @@ import path from 'path'
export default defineConfig({
plugins: [react()],
server: {
host: '0.0.0.0',
port: 5173,
proxy: {
'/api': {
target: 'http://localhost:8000',
target: process.env.VITE_API_URL || 'http://localhost:8000',
changeOrigin: true,
},
},

View File

@@ -21,6 +21,7 @@ opencv-python>=4.8.0
# ===== PDF Generation =====
weasyprint>=60.0
markdown>=3.5.0
reportlab>=4.0.0 # Layout-preserving PDF generation with precise coordinate control
# Note: pandoc needs to be installed via brew (brew install pandoc)
# ===== Data Export =====