diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 1e5397a..56da732 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -96,6 +96,11 @@ class Settings(BaseSettings): pdf_margin_left: int = Field(default=20) pdf_margin_right: int = Field(default=20) + # ===== Layout-Preserving PDF Configuration ===== + chinese_font_path: str = Field(default="./backend/fonts/NotoSansSC-Regular.ttf") + pdf_font_size_base: int = Field(default=12) + pdf_enable_bbox_debug: bool = Field(default=False) # Draw bounding boxes for debugging + # ===== Translation Configuration (Reserved) ===== enable_translation: bool = Field(default=False) translation_engine: str = Field(default="offline") diff --git a/backend/app/routers/tasks.py b/backend/app/routers/tasks.py index 1b5b5cf..bc4b142 100644 --- a/backend/app/routers/tasks.py +++ b/backend/app/routers/tasks.py @@ -66,34 +66,33 @@ def process_task_ocr(task_id: str, task_db_id: int, file_path: str, filename: st # Initialize OCR service ocr_service = OCRService() + # Create result directory before OCR processing (needed for saving extracted images) + result_dir = Path(settings.result_dir) / task_id + result_dir.mkdir(parents=True, exist_ok=True) + # Process the file with OCR ocr_result = ocr_service.process_image( image_path=Path(file_path), lang='ch', - detect_layout=True + detect_layout=True, + output_dir=result_dir ) # Calculate processing time processing_time_ms = int((datetime.now() - start_time).total_seconds() * 1000) - # Create result directory - result_dir = Path(settings.result_dir) / task_id - result_dir.mkdir(parents=True, exist_ok=True) - - # Save JSON result - json_path = result_dir / f"{Path(filename).stem}_result.json" - with open(json_path, 'w', encoding='utf-8') as f: - json.dump(ocr_result, f, ensure_ascii=False, indent=2) - - # Save Markdown result - markdown_path = result_dir / f"{Path(filename).stem}_result.md" - markdown_content = ocr_result.get('markdown_content', '') - with open(markdown_path, 'w', encoding='utf-8') as f: - f.write(markdown_content) + # Save results using OCR service (includes JSON, Markdown, and PDF generation) + json_path, markdown_path, pdf_path = ocr_service.save_results( + result=ocr_result, + output_dir=result_dir, + file_id=Path(filename).stem, + source_file_path=Path(file_path) + ) # Update task with results (direct database update) - task.result_json_path = str(json_path) - task.result_markdown_path = str(markdown_path) + task.result_json_path = str(json_path) if json_path else None + task.result_markdown_path = str(markdown_path) if markdown_path else None + task.result_pdf_path = str(pdf_path) if pdf_path else None task.processing_time_ms = processing_time_ms task.status = TaskStatus.COMPLETED task.completed_at = datetime.utcnow() @@ -468,10 +467,16 @@ async def download_pdf( current_user: User = Depends(get_current_user) ): """ - Download task result as searchable PDF file + Download task result as layout-preserving PDF file - **task_id**: Task UUID + + Returns a PDF that preserves the original document layout using OCR results. + The PDF is generated from OCR JSON data and cached for subsequent requests. """ + from pathlib import Path + from app.services.pdf_generator_service import pdf_generator_service + # Get task task = task_service.get_task_by_id( db=db, @@ -485,12 +490,69 @@ async def download_pdf( detail="Task not found" ) + # Check if task is completed + if task.status.value != "completed": + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Task is not completed yet. Please wait for OCR processing to finish." + ) + + # Check if PDF path is stored in database + if task.result_pdf_path and Path(task.result_pdf_path).exists(): + pdf_path = Path(task.result_pdf_path) + logger.info(f"Using pre-generated PDF from database: {pdf_path.name}") + else: + # Fallback: Try to generate PDF on-demand + result_dir = Path(settings.result_dir) / task_id + + # Use stored JSON path or construct it + if task.result_json_path and Path(task.result_json_path).exists(): + json_path = Path(task.result_json_path) + else: + # Try to find JSON file in result directory + json_files = list(result_dir.glob("*_result.json")) + if not json_files: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="OCR result JSON not found" + ) + json_path = json_files[0] + + # Construct PDF path based on JSON filename + pdf_filename = json_path.stem.replace("_result", "_layout") + ".pdf" + pdf_path = result_dir / pdf_filename + + # Generate PDF if it doesn't exist + if not pdf_path.exists(): + logger.info(f"Generating layout-preserving PDF for task {task_id}") + + # Get source file path if available + source_file = None + task_file = db.query(TaskFile).filter(TaskFile.task_id == task.id).first() + if task_file and task_file.stored_path and Path(task_file.stored_path).exists(): + source_file = Path(task_file.stored_path) + + # Generate PDF + success = pdf_generator_service.generate_layout_pdf( + json_path=json_path, + output_path=pdf_path, + source_file_path=source_file + ) + + if not success: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to generate PDF. Please check server logs." + ) + + logger.info(f"PDF generated successfully: {pdf_path.name}") + # Validate file access is_valid, error_msg = file_access_service.validate_file_access( db=db, user_id=current_user.id, task_id=task_id, - file_path=task.result_pdf_path + file_path=str(pdf_path) ) if not is_valid: @@ -502,7 +564,7 @@ async def download_pdf( # Return file filename = f"{task.filename or task_id}_result.pdf" return FileResponse( - path=task.result_pdf_path, + path=str(pdf_path), filename=filename, media_type="application/pdf" ) diff --git a/backend/app/services/ocr_service.py b/backend/app/services/ocr_service.py index e5ae73e..cd86251 100644 --- a/backend/app/services/ocr_service.py +++ b/backend/app/services/ocr_service.py @@ -284,7 +284,8 @@ class OCRService: image_path: Path, lang: str = 'ch', detect_layout: bool = True, - confidence_threshold: Optional[float] = None + confidence_threshold: Optional[float] = None, + output_dir: Optional[Path] = None ) -> Dict: """ Process single image with OCR and layout analysis @@ -340,7 +341,8 @@ class OCRService: page_image_path, lang=lang, detect_layout=detect_layout, - confidence_threshold=confidence_threshold + confidence_threshold=confidence_threshold, + output_dir=output_dir ) # Accumulate results @@ -458,7 +460,7 @@ class OCRService: images_metadata = [] if detect_layout: - layout_data, images_metadata = self.analyze_layout(image_path) + layout_data, images_metadata = self.analyze_layout(image_path, output_dir=output_dir) # Generate Markdown markdown_content = self.generate_markdown(text_regions, layout_data) @@ -500,12 +502,71 @@ class OCRService: 'processing_time': (datetime.now() - start_time).total_seconds(), } - def analyze_layout(self, image_path: Path) -> Tuple[Optional[Dict], List[Dict]]: + def _extract_table_text(self, html_content: str) -> str: + """ + Extract text from HTML table content for translation purposes + + Args: + html_content: HTML content containing table + + Returns: + Extracted text from table cells + """ + try: + from html.parser import HTMLParser + + class TableTextExtractor(HTMLParser): + def __init__(self): + super().__init__() + self.text_parts = [] + self.in_table = False + + def handle_starttag(self, tag, attrs): + if tag == 'table': + self.in_table = True + + def handle_endtag(self, tag): + if tag == 'table': + self.in_table = False + elif tag in ('td', 'th') and self.in_table: + self.text_parts.append(' | ') # Cell separator + elif tag == 'tr' and self.in_table: + self.text_parts.append('\n') # Row separator + + def handle_data(self, data): + if self.in_table: + stripped = data.strip() + if stripped: + self.text_parts.append(stripped) + + parser = TableTextExtractor() + parser.feed(html_content) + + # Clean up the extracted text + extracted = ''.join(parser.text_parts) + # Remove multiple separators + import re + extracted = re.sub(r'\s*\|\s*\|+\s*', ' | ', extracted) + extracted = re.sub(r'\n+', '\n', extracted) + extracted = extracted.strip() + + return extracted + + except Exception as e: + logger.warning(f"Failed to extract table text: {e}") + # Fallback: just remove HTML tags + import re + text = re.sub(r'<[^>]+>', ' ', html_content) + text = re.sub(r'\s+', ' ', text) + return text.strip() + + def analyze_layout(self, image_path: Path, output_dir: Optional[Path] = None) -> Tuple[Optional[Dict], List[Dict]]: """ Analyze document layout using PP-StructureV3 Args: image_path: Path to image file + output_dir: Optional output directory for saving extracted images (defaults to image_path.parent) Returns: Tuple of (layout_data, images_metadata) @@ -548,16 +609,59 @@ class OCRService: 'page': page_idx, 'bbox': [], # PP-StructureV3 doesn't provide individual bbox in this format } + + # Extract text from table for translation purposes + if has_table: + table_text = self._extract_table_text(markdown_texts) + element['extracted_text'] = table_text + logger.info(f"Extracted {len(table_text)} characters from table") + layout_elements.append(element) - # Add image metadata + # Add image metadata and SAVE images to disk for img_idx, (img_path, img_obj) in enumerate(markdown_images.items()): + # Save image to disk + try: + # Determine base directory for saving images + base_dir = output_dir if output_dir else image_path.parent + + # Create full path for image file + full_img_path = base_dir / img_path + + # Create imgs/ subdirectory if it doesn't exist + full_img_path.parent.mkdir(parents=True, exist_ok=True) + + # Save image object to disk + if hasattr(img_obj, 'save'): + # img_obj is PIL Image + img_obj.save(str(full_img_path)) + logger.info(f"Saved extracted image to {full_img_path}") + else: + logger.warning(f"Image object for {img_path} does not have save() method, skipping") + + except Exception as e: + logger.warning(f"Failed to save image {img_path}: {str(e)}") + # Continue processing even if image save fails + + # Extract bbox from filename (format: img_in_table_box_x1_y1_x2_y2.jpg) + bbox = [] + try: + import re + match = re.search(r'box_(\d+)_(\d+)_(\d+)_(\d+)', img_path) + if match: + x1, y1, x2, y2 = map(int, match.groups()) + # Convert to 4-point bbox format: [[x1,y1], [x2,y1], [x2,y2], [x1,y2]] + bbox = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]] + logger.info(f"Extracted bbox from filename: {bbox}") + except Exception as e: + logger.warning(f"Failed to extract bbox from {img_path}: {e}") + images_metadata.append({ 'element_id': len(layout_elements) + img_idx, 'image_path': img_path, 'type': 'image', 'page': page_idx, - 'bbox': [], + 'bbox': bbox, }) if layout_elements: @@ -638,18 +742,20 @@ class OCRService: self, result: Dict, output_dir: Path, - file_id: str - ) -> Tuple[Optional[Path], Optional[Path]]: + file_id: str, + source_file_path: Optional[Path] = None + ) -> Tuple[Optional[Path], Optional[Path], Optional[Path]]: """ - Save OCR results to JSON and Markdown files + Save OCR results to JSON, Markdown, and layout-preserving PDF files Args: result: OCR result dictionary output_dir: Output directory file_id: Unique file identifier + source_file_path: Optional path to original source file for PDF generation Returns: - Tuple of (json_path, markdown_path) + Tuple of (json_path, markdown_path, pdf_path) """ try: output_dir.mkdir(parents=True, exist_ok=True) @@ -666,8 +772,37 @@ class OCRService: f.write(markdown_content) logger.info(f"Results saved: {json_path.name}, {markdown_path.name}") - return json_path, markdown_path + + # Generate layout-preserving PDF + pdf_path = None + try: + from app.services.pdf_generator_service import pdf_generator_service + + pdf_filename = f"{file_id}_layout.pdf" + pdf_path = output_dir / pdf_filename + + logger.info(f"Generating layout-preserving PDF: {pdf_filename}") + + success = pdf_generator_service.generate_layout_pdf( + json_path=json_path, + output_path=pdf_path, + source_file_path=source_file_path + ) + + if success: + logger.info(f"✓ PDF generated successfully: {pdf_path.name}") + else: + logger.warning(f"✗ PDF generation failed for {file_id}") + pdf_path = None + + except Exception as e: + logger.error(f"Error generating PDF for {file_id}: {str(e)}") + import traceback + traceback.print_exc() + pdf_path = None + + return json_path, markdown_path, pdf_path except Exception as e: logger.error(f"Error saving results: {str(e)}") - return None, None + return None, None, None diff --git a/backend/app/services/pdf_generator_service.py b/backend/app/services/pdf_generator_service.py new file mode 100644 index 0000000..35f1f9f --- /dev/null +++ b/backend/app/services/pdf_generator_service.py @@ -0,0 +1,626 @@ +""" +Layout-Preserving PDF Generation Service +Generates PDF files that preserve the original document layout using OCR JSON data +""" + +import json +import logging +from pathlib import Path +from typing import Dict, List, Optional, Tuple +from datetime import datetime + +from reportlab.lib.pagesizes import A4, letter +from reportlab.lib.units import mm +from reportlab.pdfgen import canvas +from reportlab.pdfbase import pdfmetrics +from reportlab.pdfbase.ttfonts import TTFont +from reportlab.platypus import Table, TableStyle +from reportlab.lib import colors +from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT +from reportlab.platypus import Paragraph +from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle +from PIL import Image +from html.parser import HTMLParser + +from app.core.config import settings + +logger = logging.getLogger(__name__) + + +class HTMLTableParser(HTMLParser): + """Parse HTML table to extract structure and data""" + + def __init__(self): + super().__init__() + self.tables = [] + self.current_table = None + self.current_row = None + self.current_cell = None + self.in_table = False + + def handle_starttag(self, tag, attrs): + attrs_dict = dict(attrs) + + if tag == 'table': + self.in_table = True + self.current_table = {'rows': []} + + elif tag == 'tr' and self.in_table: + self.current_row = {'cells': []} + + elif tag in ('td', 'th') and self.in_table and self.current_row is not None: + colspan = int(attrs_dict.get('colspan', 1)) + rowspan = int(attrs_dict.get('rowspan', 1)) + self.current_cell = { + 'text': '', + 'is_header': tag == 'th', + 'colspan': colspan, + 'rowspan': rowspan + } + + def handle_endtag(self, tag): + if tag == 'table' and self.in_table: + if self.current_table and self.current_table['rows']: + self.tables.append(self.current_table) + self.current_table = None + self.in_table = False + + elif tag == 'tr' and self.current_row is not None: + if self.current_table is not None: + self.current_table['rows'].append(self.current_row) + self.current_row = None + + elif tag in ('td', 'th') and self.current_cell is not None: + if self.current_row is not None: + self.current_row['cells'].append(self.current_cell) + self.current_cell = None + + def handle_data(self, data): + if self.current_cell is not None: + self.current_cell['text'] += data.strip() + ' ' + + +class PDFGeneratorService: + """Service for generating layout-preserving PDFs from OCR JSON data""" + + def __init__(self): + """Initialize PDF generator with font configuration""" + self.font_name = 'NotoSansSC' + self.font_path = None + self.font_registered = False + + self._register_chinese_font() + + def _register_chinese_font(self): + """Register Chinese font for PDF generation""" + try: + # Get font path from settings + font_path = Path(settings.chinese_font_path) + + # Try relative path from project root + if not font_path.is_absolute(): + # Adjust path - settings.chinese_font_path starts with ./backend/ + project_root = Path(__file__).resolve().parent.parent.parent.parent + font_path = project_root / font_path + + if not font_path.exists(): + logger.error(f"Chinese font not found at {font_path}") + return + + # Register font + pdfmetrics.registerFont(TTFont(self.font_name, str(font_path))) + self.font_path = font_path + self.font_registered = True + logger.info(f"Chinese font registered: {self.font_name} from {font_path}") + + except Exception as e: + logger.error(f"Failed to register Chinese font: {e}") + self.font_registered = False + + def load_ocr_json(self, json_path: Path) -> Optional[Dict]: + """ + Load and parse OCR JSON result file + + Args: + json_path: Path to JSON file + + Returns: + Parsed JSON data or None if failed + """ + try: + with open(json_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + logger.info(f"Loaded OCR JSON: {json_path.name}") + return data + + except Exception as e: + logger.error(f"Failed to load JSON {json_path}: {e}") + return None + + def calculate_page_dimensions(self, text_regions: List[Dict], source_file_path: Optional[Path] = None) -> Tuple[float, float]: + """ + Calculate page dimensions from source file or text region bounding boxes + + Args: + text_regions: List of text regions with bbox coordinates + source_file_path: Optional path to source file for accurate dimensions + + Returns: + Tuple of (width, height) in points + """ + # First try to get dimensions from source file + if source_file_path: + dims = self.get_original_page_size(source_file_path) + if dims: + return dims + + if not text_regions: + return A4 # Default to A4 size + + max_x = 0 + max_y = 0 + + for region in text_regions: + bbox = region.get('bbox', []) + if not bbox or len(bbox) < 4: + continue + + # bbox format: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] + for point in bbox: + if isinstance(point, (list, tuple)) and len(point) >= 2: + x, y = point[0], point[1] + max_x = max(max_x, x) + max_y = max(max_y, y) + + # OCR coordinates are in pixels, use them directly as points (1:1 mapping) + # Do NOT add padding - this causes layout issues + width = max_x if max_x > 0 else A4[0] + height = max_y if max_y > 0 else A4[1] + + logger.info(f"Calculated page dimensions from OCR: {width:.1f} x {height:.1f} points") + return (width, height) + + def get_original_page_size(self, file_path: Path) -> Optional[Tuple[float, float]]: + """ + Extract page dimensions from original source file + + Args: + file_path: Path to original file (image or PDF) + + Returns: + Tuple of (width, height) in points or None + """ + try: + if not file_path.exists(): + return None + + # For images, get dimensions from PIL + if file_path.suffix.lower() in ['.png', '.jpg', '.jpeg', '.bmp', '.tiff']: + img = Image.open(file_path) + # Use pixel dimensions directly as points (1:1 mapping) + # This matches how PaddleOCR reports coordinates + width_pt = float(img.width) + height_pt = float(img.height) + logger.info(f"Extracted dimensions from image: {width_pt:.1f} x {height_pt:.1f} points (1:1 pixel mapping)") + return (width_pt, height_pt) + + # For PDFs, would need PyPDF2 or similar + # For now, return None to use calculated dimensions + + except Exception as e: + logger.warning(f"Failed to get page size from {file_path}: {e}") + + return None + + def draw_text_region( + self, + pdf_canvas: canvas.Canvas, + region: Dict, + page_height: float + ): + """ + Draw a text region at precise coordinates + + Args: + pdf_canvas: ReportLab canvas object + region: Text region dict with text, bbox, confidence + page_height: Height of page (for coordinate transformation) + """ + text = region.get('text', '') + bbox = region.get('bbox', []) + confidence = region.get('confidence', 1.0) + + if not text or not bbox or len(bbox) < 4: + return + + try: + # bbox from OCR: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] + # Points: top-left, top-right, bottom-right, bottom-left + # OCR coordinates: origin (0,0) at top-left, Y increases downward + ocr_x_left = bbox[0][0] # Left X + ocr_y_top = bbox[0][1] # Top Y in OCR coordinates + ocr_x_right = bbox[2][0] # Right X + ocr_y_bottom = bbox[2][1] # Bottom Y in OCR coordinates + + # Calculate bbox dimensions + bbox_width = abs(ocr_x_right - ocr_x_left) + bbox_height = abs(ocr_y_bottom - ocr_y_top) + + # Calculate font size using heuristics + # Font size is typically 70-90% of bbox height + # Testing shows 0.75 works well for most cases + font_size = bbox_height * 0.75 + font_size = max(min(font_size, 72), 4) # Clamp between 4pt and 72pt + + # Transform coordinates: OCR (top-left origin) → PDF (bottom-left origin) + # CRITICAL: Y-axis flip! + pdf_x = ocr_x_left + pdf_y = page_height - ocr_y_bottom # Flip Y-axis using bottom coordinate + + # Set font + font_name = self.font_name if self.font_registered else 'Helvetica' + pdf_canvas.setFont(font_name, font_size) + + # Calculate text width to prevent overflow + text_width = pdf_canvas.stringWidth(text, font_name, font_size) + + # If text is too wide for bbox, scale down font + if text_width > bbox_width: + scale_factor = bbox_width / text_width + font_size = font_size * scale_factor * 0.95 # 95% to add small margin + font_size = max(font_size, 3) # Minimum 3pt + pdf_canvas.setFont(font_name, font_size) + + # Draw text at calculated position + pdf_canvas.drawString(pdf_x, pdf_y, text) + + # Debug: Draw bounding box (optional) + if settings.pdf_enable_bbox_debug: + pdf_canvas.setStrokeColorRGB(1, 0, 0, 0.3) # Red, semi-transparent + pdf_canvas.setLineWidth(0.5) + # Transform all bbox points to PDF coordinates + pdf_points = [(p[0], page_height - p[1]) for p in bbox] + # Draw bbox rectangle + for i in range(4): + x1, y1 = pdf_points[i] + x2, y2 = pdf_points[(i + 1) % 4] + pdf_canvas.line(x1, y1, x2, y2) + + except Exception as e: + logger.warning(f"Failed to draw text region '{text[:20]}...': {e}") + + def draw_table_region( + self, + pdf_canvas: canvas.Canvas, + table_element: Dict, + images_metadata: List[Dict], + page_height: float + ): + """ + Draw a table region by parsing HTML and rebuilding with ReportLab Table + + Args: + pdf_canvas: ReportLab canvas object + table_element: Table element dict with HTML content + images_metadata: List of image metadata to find table bbox + page_height: Height of page + """ + try: + html_content = table_element.get('content', '') + if not html_content: + return + + # Parse HTML to extract table structure + parser = HTMLTableParser() + parser.feed(html_content) + + if not parser.tables: + logger.warning("No tables found in HTML content") + return + + # Get the first table (PP-StructureV3 usually provides one table per element) + table_data = parser.tables[0] + rows = table_data['rows'] + + if not rows: + return + + # Find corresponding table image to get bbox + table_bbox = None + for img_meta in images_metadata: + img_path = img_meta.get('image_path', '') + if 'table' in img_path.lower(): + bbox = img_meta.get('bbox', []) + if bbox and len(bbox) >= 4: + table_bbox = bbox + break + + if not table_bbox: + logger.warning("No bbox found for table") + return + + # Extract bbox coordinates + ocr_x_left = table_bbox[0][0] + ocr_y_top = table_bbox[0][1] + ocr_x_right = table_bbox[2][0] + ocr_y_bottom = table_bbox[2][1] + + table_width = abs(ocr_x_right - ocr_x_left) + table_height = abs(ocr_y_bottom - ocr_y_top) + + # Transform coordinates + pdf_x = ocr_x_left + pdf_y = page_height - ocr_y_bottom + + # Build table data for ReportLab + # Convert parsed structure to simple 2D array + max_cols = max(len(row['cells']) for row in rows) + reportlab_data = [] + + for row in rows: + row_data = [] + for cell in row['cells']: + text = cell['text'].strip() + row_data.append(text) + # Pad row if needed + while len(row_data) < max_cols: + row_data.append('') + reportlab_data.append(row_data) + + # Calculate column widths (equal distribution) + col_widths = [table_width / max_cols] * max_cols + + # Create ReportLab Table + # Use smaller font size to fit in bbox + font_size = min(table_height / len(rows) * 0.5, 10) + font_size = max(font_size, 6) + + # Create table with font + table = Table(reportlab_data, colWidths=col_widths) + + # Apply table style + style = TableStyle([ + ('FONT', (0, 0), (-1, -1), self.font_name if self.font_registered else 'Helvetica', font_size), + ('GRID', (0, 0), (-1, -1), 0.5, colors.black), + ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'), + ('ALIGN', (0, 0), (-1, -1), 'CENTER'), + ('LEFTPADDING', (0, 0), (-1, -1), 2), + ('RIGHTPADDING', (0, 0), (-1, -1), 2), + ('TOPPADDING', (0, 0), (-1, -1), 2), + ('BOTTOMPADDING', (0, 0), (-1, -1), 2), + ]) + + # Add header style if first row has headers + if rows and rows[0]['cells'] and rows[0]['cells'][0].get('is_header'): + style.add('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey) + style.add('FONT', (0, 0), (-1, 0), self.font_name if self.font_registered else 'Helvetica-Bold', font_size) + + table.setStyle(style) + + # Calculate table size + table.wrapOn(pdf_canvas, table_width, table_height) + + # Draw table at position + table.drawOn(pdf_canvas, pdf_x, pdf_y) + + logger.info(f"Drew table at ({pdf_x:.0f}, {pdf_y:.0f}) size {table_width:.0f}x{table_height:.0f} with {len(rows)} rows") + + except Exception as e: + logger.warning(f"Failed to draw table region: {e}") + import traceback + traceback.print_exc() + + def draw_image_region( + self, + pdf_canvas: canvas.Canvas, + region: Dict, + page_height: float, + result_dir: Path + ): + """ + Draw an image region by embedding the extracted image + + Handles images extracted by PP-StructureV3 (tables, figures, charts, etc.) + + Args: + pdf_canvas: ReportLab canvas object + region: Image metadata dict with image_path and bbox + page_height: Height of page (for coordinate transformation) + result_dir: Directory containing result files + """ + try: + image_path_str = region.get('image_path', '') + if not image_path_str: + return + + # Construct full path to image + image_path = result_dir / image_path_str + + if not image_path.exists(): + logger.warning(f"Image not found: {image_path}") + return + + # Get bbox for positioning + bbox = region.get('bbox', []) + if not bbox or len(bbox) < 4: + # If no bbox, skip for now + logger.warning(f"No bbox for image {image_path_str}") + return + + # bbox from OCR: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] + # OCR coordinates: origin (0,0) at top-left, Y increases downward + ocr_x_left = bbox[0][0] + ocr_y_top = bbox[0][1] + ocr_x_right = bbox[2][0] + ocr_y_bottom = bbox[2][1] + + # Calculate bbox dimensions + bbox_width = abs(ocr_x_right - ocr_x_left) + bbox_height = abs(ocr_y_bottom - ocr_y_top) + + # Transform coordinates: OCR (top-left origin) → PDF (bottom-left origin) + # CRITICAL: Y-axis flip! + # For images, we position at bottom-left corner + pdf_x_left = ocr_x_left + pdf_y_bottom = page_height - ocr_y_bottom # Flip Y-axis + + # Draw image using ReportLab + # drawImage expects: (path, x, y, width, height) + # where (x, y) is the bottom-left corner of the image + pdf_canvas.drawImage( + str(image_path), + pdf_x_left, + pdf_y_bottom, + width=bbox_width, + height=bbox_height, + preserveAspectRatio=True, + mask='auto' # Handle transparency + ) + + logger.info(f"Drew image: {image_path_str} at ({pdf_x_left:.0f}, {pdf_y_bottom:.0f}) size {bbox_width:.0f}x{bbox_height:.0f}") + + except Exception as e: + logger.warning(f"Failed to draw image region: {e}") + + def generate_layout_pdf( + self, + json_path: Path, + output_path: Path, + source_file_path: Optional[Path] = None + ) -> bool: + """ + Generate layout-preserving PDF from OCR JSON data + + Args: + json_path: Path to OCR JSON file + output_path: Path to save generated PDF + source_file_path: Optional path to original source file for dimension extraction + + Returns: + True if successful, False otherwise + """ + try: + # Check if PDF already exists (caching) + if output_path.exists(): + logger.info(f"PDF already exists: {output_path.name}") + return True + + # Load JSON data + ocr_data = self.load_ocr_json(json_path) + if not ocr_data: + return False + + # Get text regions + text_regions = ocr_data.get('text_regions', []) + if not text_regions: + logger.warning("No text regions found in JSON") + return False + + # Get images metadata + images_metadata = ocr_data.get('images_metadata', []) + + # Get layout data + layout_data = ocr_data.get('layout_data', {}) + + # Determine page dimensions + page_size = self.calculate_page_dimensions(text_regions, source_file_path) + + page_width, page_height = page_size + + # Create PDF canvas + pdf_canvas = canvas.Canvas(str(output_path), pagesize=(page_width, page_height)) + + # Extract table bboxes to exclude text in those regions + table_bboxes = [] + for img_meta in images_metadata: + img_path = img_meta.get('image_path', '') + if 'table' in img_path.lower(): + bbox = img_meta.get('bbox', []) + if bbox and len(bbox) >= 4: + table_bboxes.append(bbox) + + # Helper function to check if a point is inside a bbox + def point_in_bbox(x, y, bbox): + x1, y1 = bbox[0] + x2, y2 = bbox[2] + return min(x1, x2) <= x <= max(x1, x2) and min(y1, y2) <= y <= max(y1, y2) + + # Filter text regions to exclude those inside tables + filtered_text_regions = [] + for region in text_regions: + bbox = region.get('bbox', []) + if not bbox or len(bbox) < 4: + continue + + # Check if text region center is inside any table bbox + center_x = (bbox[0][0] + bbox[2][0]) / 2 + center_y = (bbox[0][1] + bbox[2][1]) / 2 + + is_in_table = any(point_in_bbox(center_x, center_y, table_bbox) for table_bbox in table_bboxes) + + if not is_in_table: + filtered_text_regions.append(region) + else: + logger.debug(f"Excluded text '{region.get('text', '')[:20]}...' (inside table)") + + logger.info(f"Filtered {len(text_regions) - len(filtered_text_regions)} text regions inside tables") + + # Group regions by page + pages_data = {} + for region in filtered_text_regions: + page_num = region.get('page', 1) + if page_num not in pages_data: + pages_data[page_num] = [] + pages_data[page_num].append(region) + + # Get table elements from layout_data + table_elements = [] + if layout_data and layout_data.get('elements'): + table_elements = [e for e in layout_data['elements'] if e.get('type') == 'table'] + + # Process each page + total_pages = ocr_data.get('total_pages', 1) + for page_num in range(1, total_pages + 1): + if page_num > 1: + pdf_canvas.showPage() # Start new page + + # Draw text regions for this page (excluding table text) + page_regions = pages_data.get(page_num, []) + for region in page_regions: + self.draw_text_region(pdf_canvas, region, page_height) + + # Draw tables for this page + for table_elem in table_elements: + if table_elem.get('page', 0) == page_num - 1: # page is 0-indexed + self.draw_table_region(pdf_canvas, table_elem, images_metadata, page_height) + + # Draw non-table images for this page (figure, chart, seal, etc.) + for img_meta in images_metadata: + if img_meta.get('page') == page_num - 1: # page is 0-indexed + img_path = img_meta.get('image_path', '') + # Skip table images (they're now rendered as tables) + if 'table' not in img_path.lower(): + self.draw_image_region( + pdf_canvas, + img_meta, + page_height, + json_path.parent + ) + + # Save PDF + pdf_canvas.save() + + file_size = output_path.stat().st_size + logger.info(f"Generated layout-preserving PDF: {output_path.name} ({file_size} bytes)") + return True + + except Exception as e: + logger.error(f"Failed to generate PDF: {e}") + import traceback + traceback.print_exc() + return False + + +# Singleton instance +pdf_generator_service = PDFGeneratorService() diff --git a/backend/download_fonts.sh b/backend/download_fonts.sh new file mode 100755 index 0000000..2ca7e45 --- /dev/null +++ b/backend/download_fonts.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Download Noto Sans SC TrueType font for layout-preserving PDF generation + +set -e + +FONT_DIR="backend/fonts" +FONT_URL="https://github.com/notofonts/noto-cjk/raw/main/Sans/Variable/TTF/Subset/NotoSansSC-VF.ttf" +FONT_FILE="NotoSansSC-Regular.ttf" + +echo "🔤 Downloading Chinese font for PDF generation..." + +# Create font directory +mkdir -p "$FONT_DIR" + +# Download font if not exists +if [ -f "$FONT_DIR/$FONT_FILE" ]; then + echo "✓ Font already exists: $FONT_DIR/$FONT_FILE" +else + echo "Downloading from GitHub..." + wget "$FONT_URL" -O "$FONT_DIR/$FONT_FILE" + + if [ -f "$FONT_DIR/$FONT_FILE" ]; then + SIZE=$(du -h "$FONT_DIR/$FONT_FILE" | cut -f1) + echo "✓ Font downloaded successfully: $SIZE" + else + echo "✗ Font download failed" + exit 1 + fi +fi + +echo "✅ Font setup complete!" diff --git a/backend/fonts/NotoSansSC-Regular.ttf b/backend/fonts/NotoSansSC-Regular.ttf new file mode 100644 index 0000000..5371a54 Binary files /dev/null and b/backend/fonts/NotoSansSC-Regular.ttf differ diff --git a/backend/test_chinese_font.py b/backend/test_chinese_font.py new file mode 100644 index 0000000..651c6b6 --- /dev/null +++ b/backend/test_chinese_font.py @@ -0,0 +1,62 @@ +""" +Test script to verify ReportLab and Chinese font rendering +""" +from reportlab.pdfgen import canvas +from reportlab.pdfbase import pdfmetrics +from reportlab.pdfbase.ttfonts import TTFont +from pathlib import Path +import sys + +def test_chinese_rendering(): + """Test if Chinese characters can be rendered in PDF""" + + # Font path + font_path = "/home/egg/project/Tool_OCR/backend/fonts/NotoSansSC-Regular.ttf" + + # Check if font file exists + if not Path(font_path).exists(): + print(f"❌ Font file not found: {font_path}") + return False + + print(f"✓ Font file found: {font_path}") + + try: + # Register Chinese font + pdfmetrics.registerFont(TTFont('NotoSansSC', font_path)) + print("✓ Font registered successfully") + + # Create test PDF + test_pdf = "/tmp/test_chinese.pdf" + c = canvas.Canvas(test_pdf) + + # Set Chinese font + c.setFont('NotoSansSC', 14) + + # Draw test text + c.drawString(100, 750, "測試中文字符渲染 - Test Chinese Character Rendering") + c.drawString(100, 730, "HTD-S1 技術數據表") + c.drawString(100, 710, "這是一個 PDF 生成測試") + + c.save() + print(f"✓ Test PDF created: {test_pdf}") + + # Check file size + file_size = Path(test_pdf).stat().st_size + print(f"✓ PDF file size: {file_size} bytes") + + if file_size > 0: + print("\n✅ Chinese font rendering test PASSED") + return True + else: + print("\n❌ PDF file is empty") + return False + + except Exception as e: + print(f"❌ Error during testing: {e}") + import traceback + traceback.print_exc() + return False + +if __name__ == "__main__": + success = test_chinese_rendering() + sys.exit(0 if success else 1) diff --git a/frontend/.env.example b/frontend/.env.example new file mode 100644 index 0000000..f67c662 --- /dev/null +++ b/frontend/.env.example @@ -0,0 +1,4 @@ +# Backend API URL +# For WSL2, use the WSL2 IP address (get it with: hostname -I) +# For native Linux/Mac, use http://localhost:8000 +VITE_API_URL=http://172.20.20.106:8000 diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 7225de1..89113c9 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -19,6 +19,7 @@ "react-dropzone": "^14.3.8", "react-i18next": "^16.3.0", "react-markdown": "^9.0.1", + "react-pdf": "^10.2.0", "react-router-dom": "^7.9.5", "tailwind-merge": "^3.4.0", "zustand": "^5.0.8" @@ -1048,6 +1049,191 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@napi-rs/canvas": { + "version": "0.1.82", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.82.tgz", + "integrity": "sha512-FGjyUBoF0sl1EenSiE4UV2WYu76q6F9GSYedq5EiOCOyGYoQ/Owulcv6rd7v/tWOpljDDtefXXIaOCJrVKem4w==", + "license": "MIT", + "optional": true, + "workspaces": [ + "e2e/*" + ], + "engines": { + "node": ">= 10" + }, + "optionalDependencies": { + "@napi-rs/canvas-android-arm64": "0.1.82", + "@napi-rs/canvas-darwin-arm64": "0.1.82", + "@napi-rs/canvas-darwin-x64": "0.1.82", + "@napi-rs/canvas-linux-arm-gnueabihf": "0.1.82", + "@napi-rs/canvas-linux-arm64-gnu": "0.1.82", + "@napi-rs/canvas-linux-arm64-musl": "0.1.82", + "@napi-rs/canvas-linux-riscv64-gnu": "0.1.82", + "@napi-rs/canvas-linux-x64-gnu": "0.1.82", + "@napi-rs/canvas-linux-x64-musl": "0.1.82", + "@napi-rs/canvas-win32-x64-msvc": "0.1.82" + } + }, + "node_modules/@napi-rs/canvas-android-arm64": { + "version": "0.1.82", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.82.tgz", + "integrity": "sha512-bvZhN0iI54ouaQOrgJV96H2q7J3ZoufnHf4E1fUaERwW29Rz4rgicohnAg4venwBJZYjGl5Yl3CGmlAl1LZowQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-darwin-arm64": { + "version": "0.1.82", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.82.tgz", + "integrity": "sha512-InuBHKCyuFqhNwNr4gpqazo5Xp6ltKflqOLiROn4hqAS8u21xAHyYCJRgHwd+a5NKmutFTaRWeUIT/vxWbU/iw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-darwin-x64": { + "version": "0.1.82", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.82.tgz", + "integrity": "sha512-aQGV5Ynn96onSXcuvYb2y7TRXD/t4CL2EGmnGqvLyeJX1JLSNisKQlWN/1bPDDXymZYSdUqbXehj5qzBlOx+RQ==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm-gnueabihf": { + "version": "0.1.82", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.82.tgz", + "integrity": "sha512-YIUpmHWeHGGRhWitT1KJkgj/JPXPfc9ox8oUoyaGPxolLGPp5AxJkq8wIg8CdFGtutget968dtwmx71m8o3h5g==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-gnu": { + "version": "0.1.82", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.82.tgz", + "integrity": "sha512-AwLzwLBgmvk7kWeUgItOUor/QyG31xqtD26w1tLpf4yE0hiXTGp23yc669aawjB6FzgIkjh1NKaNS52B7/qEBQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-musl": { + "version": "0.1.82", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.82.tgz", + "integrity": "sha512-moZWuqepAwWBffdF4JDadt8TgBD02iMhG6I1FHZf8xO20AsIp9rB+p0B8Zma2h2vAF/YMjeFCDmW5un6+zZz9g==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-riscv64-gnu": { + "version": "0.1.82", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-riscv64-gnu/-/canvas-linux-riscv64-gnu-0.1.82.tgz", + "integrity": "sha512-w9++2df2kG9eC9LWYIHIlMLuhIrKGQYfUxs97CwgxYjITeFakIRazI9LYWgVzEc98QZ9x9GQvlicFsrROV59MQ==", + "cpu": [ + "riscv64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-gnu": { + "version": "0.1.82", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.82.tgz", + "integrity": "sha512-lZulOPwrRi6hEg/17CaqdwWEUfOlIJuhXxincx1aVzsVOCmyHf+xFq4i6liJl1P+x2v6Iz2Z/H5zHvXJCC7Bwg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-musl": { + "version": "0.1.82", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.82.tgz", + "integrity": "sha512-Be9Wf5RTv1w6GXlTph55K3PH3vsAh1Ax4T1FQY1UYM0QfD0yrwGdnJ8/fhqw7dEgMjd59zIbjJQC8C3msbGn5g==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-win32-x64-msvc": { + "version": "0.1.82", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.82.tgz", + "integrity": "sha512-LN/i8VrvxTDmEEK1c10z2cdOTkWT76LlTGtyZe5Kr1sqoSomKeExAjbilnu1+oee5lZUgS5yfZ2LNlVhCeARuw==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, "node_modules/@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -4007,6 +4193,24 @@ "@jridgewell/sourcemap-codec": "^1.5.5" } }, + "node_modules/make-cancellable-promise": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/make-cancellable-promise/-/make-cancellable-promise-2.0.0.tgz", + "integrity": "sha512-3SEQqTpV9oqVsIWqAcmDuaNeo7yBO3tqPtqGRcKkEo0lrzD3wqbKG9mkxO65KoOgXqj+zH2phJ2LiAsdzlogSw==", + "license": "MIT", + "funding": { + "url": "https://github.com/wojtekmaj/make-cancellable-promise?sponsor=1" + } + }, + "node_modules/make-event-props": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/make-event-props/-/make-event-props-2.0.0.tgz", + "integrity": "sha512-G/hncXrl4Qt7mauJEXSg3AcdYzmpkIITTNl5I+rH9sog5Yw0kK6vseJjCaPfOXqOqQuPUP89Rkhfz5kPS8ijtw==", + "license": "MIT", + "funding": { + "url": "https://github.com/wojtekmaj/make-event-props?sponsor=1" + } + }, "node_modules/math-intrinsics": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", @@ -4169,6 +4373,23 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/merge-refs": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/merge-refs/-/merge-refs-2.0.0.tgz", + "integrity": "sha512-3+B21mYK2IqUWnd2EivABLT7ueDhb0b8/dGK8LoFQPrU61YITeCMn14F7y7qZafWNZhUEKb24cJdiT5Wxs3prg==", + "license": "MIT", + "funding": { + "url": "https://github.com/wojtekmaj/merge-refs?sponsor=1" + }, + "peerDependencies": { + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, "node_modules/merge2": { "version": "1.4.1", "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", @@ -5060,6 +5281,47 @@ "react": ">=18" } }, + "node_modules/react-pdf": { + "version": "10.2.0", + "resolved": "https://registry.npmjs.org/react-pdf/-/react-pdf-10.2.0.tgz", + "integrity": "sha512-zk0DIL31oCh8cuQycM0SJKfwh4Onz0/Nwi6wTOjgtEjWGUY6eM+/vuzvOP3j70qtEULn7m1JtaeGzud1w5fY2Q==", + "license": "MIT", + "dependencies": { + "clsx": "^2.0.0", + "dequal": "^2.0.3", + "make-cancellable-promise": "^2.0.0", + "make-event-props": "^2.0.0", + "merge-refs": "^2.0.0", + "pdfjs-dist": "5.4.296", + "tiny-invariant": "^1.0.0", + "warning": "^4.0.0" + }, + "funding": { + "url": "https://github.com/wojtekmaj/react-pdf?sponsor=1" + }, + "peerDependencies": { + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", + "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/react-pdf/node_modules/pdfjs-dist": { + "version": "5.4.296", + "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-5.4.296.tgz", + "integrity": "sha512-DlOzet0HO7OEnmUmB6wWGJrrdvbyJKftI1bhMitK7O2N8W2gc757yyYBbINy9IDafXAV9wmKr9t7xsTaNKRG5Q==", + "license": "Apache-2.0", + "engines": { + "node": ">=20.16.0 || >=22.3.0" + }, + "optionalDependencies": { + "@napi-rs/canvas": "^0.1.80" + } + }, "node_modules/react-refresh": { "version": "0.18.0", "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.18.0.tgz", @@ -5382,6 +5644,12 @@ "url": "https://opencollective.com/webpack" } }, + "node_modules/tiny-invariant": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz", + "integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==", + "license": "MIT" + }, "node_modules/tinyglobby": { "version": "0.2.15", "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", @@ -5824,6 +6092,15 @@ "node": ">=0.10.0" } }, + "node_modules/warning": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/warning/-/warning-4.0.3.tgz", + "integrity": "sha512-rpJyN222KWIvHJ/F53XSZv0Zl/accqHR8et1kpaMTD/fLCRxtV8iX8czMzY7sVZupTI3zcUTg8eycS2kNF9l6w==", + "license": "MIT", + "dependencies": { + "loose-envify": "^1.0.0" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/frontend/package.json b/frontend/package.json index c9e89e5..1f85631 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -21,6 +21,7 @@ "react-dropzone": "^14.3.8", "react-i18next": "^16.3.0", "react-markdown": "^9.0.1", + "react-pdf": "^10.2.0", "react-router-dom": "^7.9.5", "tailwind-merge": "^3.4.0", "zustand": "^5.0.8" diff --git a/frontend/src/components/PDFViewer.tsx b/frontend/src/components/PDFViewer.tsx new file mode 100644 index 0000000..db4c8c6 --- /dev/null +++ b/frontend/src/components/PDFViewer.tsx @@ -0,0 +1,156 @@ +import { useState, useMemo } from 'react' +import { Document, Page } from 'react-pdf' +import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' +import { Button } from '@/components/ui/button' +import { ChevronLeft, ChevronRight, ZoomIn, ZoomOut } from 'lucide-react' +import 'react-pdf/dist/Page/AnnotationLayer.css' +import 'react-pdf/dist/Page/TextLayer.css' + +interface PDFViewerProps { + title?: string + pdfUrl: string + className?: string + httpHeaders?: Record +} + +export default function PDFViewer({ title, pdfUrl, className, httpHeaders }: PDFViewerProps) { + const [numPages, setNumPages] = useState(0) + const [pageNumber, setPageNumber] = useState(1) + const [scale, setScale] = useState(1.0) + const [loading, setLoading] = useState(true) + const [error, setError] = useState(null) + + // Memoize the file prop to prevent unnecessary reloads + const fileConfig = useMemo(() => { + return httpHeaders ? { url: pdfUrl, httpHeaders } : pdfUrl + }, [pdfUrl, httpHeaders]) + + const onDocumentLoadSuccess = ({ numPages }: { numPages: number }) => { + setNumPages(numPages) + setLoading(false) + setError(null) + } + + const onDocumentLoadError = (error: Error) => { + console.error('Error loading PDF:', error) + setError('Failed to load PDF. Please try again later.') + setLoading(false) + } + + const goToPreviousPage = () => { + setPageNumber((prev) => Math.max(prev - 1, 1)) + } + + const goToNextPage = () => { + setPageNumber((prev) => Math.min(prev + 1, numPages)) + } + + const zoomIn = () => { + setScale((prev) => Math.min(prev + 0.2, 3.0)) + } + + const zoomOut = () => { + setScale((prev) => Math.max(prev - 0.2, 0.5)) + } + + return ( + + {title && ( + + {title} + + )} + + {/* Controls */} +
+ {/* Page Navigation */} +
+ + + Page {pageNumber} of {numPages || '...'} + + +
+ + {/* Zoom Controls */} +
+ + + {Math.round(scale * 100)}% + + +
+
+ + {/* PDF Document */} +
+
+ {loading && ( +
+
+
+ )} + + {error && ( +
+
+

Error

+

{error}

+
+
+ )} + + {!error && ( + +
+
+ } + > + + + )} +
+ +
+
+ ) +} diff --git a/frontend/src/main.tsx b/frontend/src/main.tsx index 570e6c5..0c5d6d4 100644 --- a/frontend/src/main.tsx +++ b/frontend/src/main.tsx @@ -8,6 +8,11 @@ import i18n from './i18n' import './index.css' import App from './App.tsx' +// Configure PDF.js worker for react-pdf +import { pdfjs } from 'react-pdf' +// Use the worker from react-pdf's bundled pdfjs-dist +pdfjs.GlobalWorkerOptions.workerSrc = `//unpkg.com/pdfjs-dist@${pdfjs.version}/build/pdf.worker.min.mjs` + // Create React Query client const queryClient = new QueryClient({ defaultOptions: { diff --git a/frontend/src/pages/ResultsPage.tsx b/frontend/src/pages/ResultsPage.tsx index 2722543..33cd9b9 100644 --- a/frontend/src/pages/ResultsPage.tsx +++ b/frontend/src/pages/ResultsPage.tsx @@ -3,7 +3,7 @@ import { useTranslation } from 'react-i18next' import { useQuery } from '@tanstack/react-query' import { Button } from '@/components/ui/button' import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' -import MarkdownPreview from '@/components/MarkdownPreview' +import PDFViewer from '@/components/PDFViewer' import { useToast } from '@/components/ui/toast' import { useUploadStore } from '@/store/uploadStore' import { apiClientV2 } from '@/services/apiV2' @@ -157,6 +157,14 @@ export default function ResultsPage() { const isCompleted = taskDetail.status === 'completed' + // Construct PDF URL for preview + const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localhost:8000' + const pdfUrl = taskId ? `${API_BASE_URL}/api/v2/tasks/${taskId}/download/pdf` : '' + + // Get auth token for PDF preview + const authToken = localStorage.getItem('auth_token_v2') + const pdfHttpHeaders = authToken ? { Authorization: `Bearer ${authToken}` } : undefined + return (
{/* Page Header */} @@ -242,17 +250,11 @@ export default function ResultsPage() { {/* Results Preview */} {isCompleted ? ( - - - 處理結果預覽 - - - - - + ) : taskDetail.status === 'processing' ? ( diff --git a/frontend/src/pages/TaskDetailPage.tsx b/frontend/src/pages/TaskDetailPage.tsx index db7d6c3..835853b 100644 --- a/frontend/src/pages/TaskDetailPage.tsx +++ b/frontend/src/pages/TaskDetailPage.tsx @@ -3,7 +3,7 @@ import { useTranslation } from 'react-i18next' import { useQuery } from '@tanstack/react-query' import { Button } from '@/components/ui/button' import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' -import MarkdownPreview from '@/components/MarkdownPreview' +import PDFViewer from '@/components/PDFViewer' import { useToast } from '@/components/ui/toast' import { apiClientV2 } from '@/services/apiV2' import { @@ -149,6 +149,14 @@ export default function TaskDetailPage() { const isProcessing = taskDetail.status === 'processing' const isFailed = taskDetail.status === 'failed' + // Construct PDF URL for preview + const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localhost:8000' + const pdfUrl = taskId ? `${API_BASE_URL}/api/v2/tasks/${taskId}/download/pdf` : '' + + // Get auth token for PDF preview + const authToken = localStorage.getItem('auth_token_v2') + const pdfHttpHeaders = authToken ? { Authorization: `Bearer ${authToken}` } : undefined + return (
{/* Page Header */} @@ -329,17 +337,11 @@ export default function TaskDetailPage() { {/* Result Preview */} {isCompleted && ( - - - 處理結果預覽 - - - - - + )}
) diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts index c9e87cf..58ff93c 100644 --- a/frontend/vite.config.ts +++ b/frontend/vite.config.ts @@ -6,10 +6,11 @@ import path from 'path' export default defineConfig({ plugins: [react()], server: { + host: '0.0.0.0', port: 5173, proxy: { '/api': { - target: 'http://localhost:8000', + target: process.env.VITE_API_URL || 'http://localhost:8000', changeOrigin: true, }, }, diff --git a/requirements.txt b/requirements.txt index 6dc50ac..42211a7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,6 +21,7 @@ opencv-python>=4.8.0 # ===== PDF Generation ===== weasyprint>=60.0 markdown>=3.5.0 +reportlab>=4.0.0 # Layout-preserving PDF generation with precise coordinate control # Note: pandoc needs to be installed via brew (brew install pandoc) # ===== Data Export =====