feat: implement layout-preserving PDF generation with table reconstruction
Major Features: - Add PDF generation service with Chinese font support - Parse HTML tables from PP-StructureV3 and rebuild with ReportLab - Extract table text for translation purposes - Auto-filter text regions inside tables to avoid overlaps Backend Changes: 1. pdf_generator_service.py (NEW) - HTMLTableParser: Parse HTML tables to extract structure - PDFGeneratorService: Generate layout-preserving PDFs - Coordinate transformation: OCR (top-left) → PDF (bottom-left) - Font size heuristics: 75% of bbox height with width checking - Table reconstruction: Parse HTML → ReportLab Table - Image embedding: Extract bbox from filenames 2. ocr_service.py - Add _extract_table_text() for translation support - Add output_dir parameter to save images to result directory - Extract bbox from image filenames (img_in_table_box_x1_y1_x2_y2.jpg) 3. tasks.py - Update process_task_ocr to use save_results() with PDF generation - Fix download_pdf endpoint to use database-stored PDF paths - Support on-demand PDF generation from JSON 4. config.py - Add chinese_font_path configuration - Add pdf_enable_bbox_debug flag Frontend Changes: 1. PDFViewer.tsx (NEW) - React PDF viewer with zoom and pagination - Memoized file config to prevent unnecessary reloads 2. TaskDetailPage.tsx & ResultsPage.tsx - Integrate PDF preview and download 3. main.tsx - Configure PDF.js worker via CDN 4. vite.config.ts - Add host: '0.0.0.0' for network access - Use VITE_API_URL environment variable for backend proxy Dependencies: - reportlab: PDF generation library - Noto Sans SC font: Chinese character support 🤖 Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
156
frontend/src/components/PDFViewer.tsx
Normal file
156
frontend/src/components/PDFViewer.tsx
Normal file
@@ -0,0 +1,156 @@
|
||||
import { useState, useMemo } from 'react'
|
||||
import { Document, Page } from 'react-pdf'
|
||||
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { ChevronLeft, ChevronRight, ZoomIn, ZoomOut } from 'lucide-react'
|
||||
import 'react-pdf/dist/Page/AnnotationLayer.css'
|
||||
import 'react-pdf/dist/Page/TextLayer.css'
|
||||
|
||||
interface PDFViewerProps {
|
||||
title?: string
|
||||
pdfUrl: string
|
||||
className?: string
|
||||
httpHeaders?: Record<string, string>
|
||||
}
|
||||
|
||||
export default function PDFViewer({ title, pdfUrl, className, httpHeaders }: PDFViewerProps) {
|
||||
const [numPages, setNumPages] = useState<number>(0)
|
||||
const [pageNumber, setPageNumber] = useState<number>(1)
|
||||
const [scale, setScale] = useState<number>(1.0)
|
||||
const [loading, setLoading] = useState<boolean>(true)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
|
||||
// Memoize the file prop to prevent unnecessary reloads
|
||||
const fileConfig = useMemo(() => {
|
||||
return httpHeaders ? { url: pdfUrl, httpHeaders } : pdfUrl
|
||||
}, [pdfUrl, httpHeaders])
|
||||
|
||||
const onDocumentLoadSuccess = ({ numPages }: { numPages: number }) => {
|
||||
setNumPages(numPages)
|
||||
setLoading(false)
|
||||
setError(null)
|
||||
}
|
||||
|
||||
const onDocumentLoadError = (error: Error) => {
|
||||
console.error('Error loading PDF:', error)
|
||||
setError('Failed to load PDF. Please try again later.')
|
||||
setLoading(false)
|
||||
}
|
||||
|
||||
const goToPreviousPage = () => {
|
||||
setPageNumber((prev) => Math.max(prev - 1, 1))
|
||||
}
|
||||
|
||||
const goToNextPage = () => {
|
||||
setPageNumber((prev) => Math.min(prev + 1, numPages))
|
||||
}
|
||||
|
||||
const zoomIn = () => {
|
||||
setScale((prev) => Math.min(prev + 0.2, 3.0))
|
||||
}
|
||||
|
||||
const zoomOut = () => {
|
||||
setScale((prev) => Math.max(prev - 0.2, 0.5))
|
||||
}
|
||||
|
||||
return (
|
||||
<Card className={className}>
|
||||
{title && (
|
||||
<CardHeader>
|
||||
<CardTitle>{title}</CardTitle>
|
||||
</CardHeader>
|
||||
)}
|
||||
<CardContent>
|
||||
{/* Controls */}
|
||||
<div className="flex items-center justify-between mb-4 gap-4 flex-wrap">
|
||||
{/* Page Navigation */}
|
||||
<div className="flex items-center gap-2">
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={goToPreviousPage}
|
||||
disabled={pageNumber <= 1 || loading}
|
||||
>
|
||||
<ChevronLeft className="h-4 w-4" />
|
||||
</Button>
|
||||
<span className="text-sm whitespace-nowrap">
|
||||
Page {pageNumber} of {numPages || '...'}
|
||||
</span>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={goToNextPage}
|
||||
disabled={pageNumber >= numPages || loading}
|
||||
>
|
||||
<ChevronRight className="h-4 w-4" />
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{/* Zoom Controls */}
|
||||
<div className="flex items-center gap-2">
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={zoomOut}
|
||||
disabled={scale <= 0.5 || loading}
|
||||
>
|
||||
<ZoomOut className="h-4 w-4" />
|
||||
</Button>
|
||||
<span className="text-sm whitespace-nowrap w-16 text-center">
|
||||
{Math.round(scale * 100)}%
|
||||
</span>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={zoomIn}
|
||||
disabled={scale >= 3.0 || loading}
|
||||
>
|
||||
<ZoomIn className="h-4 w-4" />
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* PDF Document */}
|
||||
<div className="border rounded-md bg-muted/10 overflow-auto max-h-[800px]">
|
||||
<div className="flex justify-center p-4">
|
||||
{loading && (
|
||||
<div className="flex items-center justify-center min-h-[400px]">
|
||||
<div className="animate-spin rounded-full h-12 w-12 border-b-2 border-primary"></div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="flex items-center justify-center min-h-[400px]">
|
||||
<div className="text-center">
|
||||
<p className="text-destructive font-semibold mb-2">Error</p>
|
||||
<p className="text-sm text-muted-foreground">{error}</p>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{!error && (
|
||||
<Document
|
||||
file={fileConfig}
|
||||
onLoadSuccess={onDocumentLoadSuccess}
|
||||
onLoadError={onDocumentLoadError}
|
||||
loading={
|
||||
<div className="flex items-center justify-center min-h-[400px]">
|
||||
<div className="animate-spin rounded-full h-12 w-12 border-b-2 border-primary"></div>
|
||||
</div>
|
||||
}
|
||||
>
|
||||
<Page
|
||||
pageNumber={pageNumber}
|
||||
scale={scale}
|
||||
renderTextLayer={true}
|
||||
renderAnnotationLayer={true}
|
||||
className="shadow-lg"
|
||||
/>
|
||||
</Document>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
)
|
||||
}
|
||||
Reference in New Issue
Block a user