feat: implement layout-preserving PDF generation with table reconstruction

Major Features:
- Add PDF generation service with Chinese font support
- Parse HTML tables from PP-StructureV3 and rebuild with ReportLab
- Extract table text for translation purposes
- Auto-filter text regions inside tables to avoid overlaps

Backend Changes:
1. pdf_generator_service.py (NEW)
   - HTMLTableParser: Parse HTML tables to extract structure
   - PDFGeneratorService: Generate layout-preserving PDFs
   - Coordinate transformation: OCR (top-left) → PDF (bottom-left)
   - Font size heuristics: 75% of bbox height with width checking
   - Table reconstruction: Parse HTML → ReportLab Table
   - Image embedding: Extract bbox from filenames

2. ocr_service.py
   - Add _extract_table_text() for translation support
   - Add output_dir parameter to save images to result directory
   - Extract bbox from image filenames (img_in_table_box_x1_y1_x2_y2.jpg)

3. tasks.py
   - Update process_task_ocr to use save_results() with PDF generation
   - Fix download_pdf endpoint to use database-stored PDF paths
   - Support on-demand PDF generation from JSON

4. config.py
   - Add chinese_font_path configuration
   - Add pdf_enable_bbox_debug flag

Frontend Changes:
1. PDFViewer.tsx (NEW)
   - React PDF viewer with zoom and pagination
   - Memoized file config to prevent unnecessary reloads

2. TaskDetailPage.tsx & ResultsPage.tsx
   - Integrate PDF preview and download

3. main.tsx
   - Configure PDF.js worker via CDN

4. vite.config.ts
   - Add host: '0.0.0.0' for network access
   - Use VITE_API_URL environment variable for backend proxy

Dependencies:
- reportlab: PDF generation library
- Noto Sans SC font: Chinese character support

🤖 Generated with Claude Code
https://claude.com/claude-code

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-11-17 20:21:56 +08:00
parent 012da1abc4
commit fa1abcd8e6
16 changed files with 1427 additions and 57 deletions

View File

@@ -0,0 +1,156 @@
import { useState, useMemo } from 'react'
import { Document, Page } from 'react-pdf'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Button } from '@/components/ui/button'
import { ChevronLeft, ChevronRight, ZoomIn, ZoomOut } from 'lucide-react'
import 'react-pdf/dist/Page/AnnotationLayer.css'
import 'react-pdf/dist/Page/TextLayer.css'
interface PDFViewerProps {
title?: string
pdfUrl: string
className?: string
httpHeaders?: Record<string, string>
}
export default function PDFViewer({ title, pdfUrl, className, httpHeaders }: PDFViewerProps) {
const [numPages, setNumPages] = useState<number>(0)
const [pageNumber, setPageNumber] = useState<number>(1)
const [scale, setScale] = useState<number>(1.0)
const [loading, setLoading] = useState<boolean>(true)
const [error, setError] = useState<string | null>(null)
// Memoize the file prop to prevent unnecessary reloads
const fileConfig = useMemo(() => {
return httpHeaders ? { url: pdfUrl, httpHeaders } : pdfUrl
}, [pdfUrl, httpHeaders])
const onDocumentLoadSuccess = ({ numPages }: { numPages: number }) => {
setNumPages(numPages)
setLoading(false)
setError(null)
}
const onDocumentLoadError = (error: Error) => {
console.error('Error loading PDF:', error)
setError('Failed to load PDF. Please try again later.')
setLoading(false)
}
const goToPreviousPage = () => {
setPageNumber((prev) => Math.max(prev - 1, 1))
}
const goToNextPage = () => {
setPageNumber((prev) => Math.min(prev + 1, numPages))
}
const zoomIn = () => {
setScale((prev) => Math.min(prev + 0.2, 3.0))
}
const zoomOut = () => {
setScale((prev) => Math.max(prev - 0.2, 0.5))
}
return (
<Card className={className}>
{title && (
<CardHeader>
<CardTitle>{title}</CardTitle>
</CardHeader>
)}
<CardContent>
{/* Controls */}
<div className="flex items-center justify-between mb-4 gap-4 flex-wrap">
{/* Page Navigation */}
<div className="flex items-center gap-2">
<Button
variant="outline"
size="sm"
onClick={goToPreviousPage}
disabled={pageNumber <= 1 || loading}
>
<ChevronLeft className="h-4 w-4" />
</Button>
<span className="text-sm whitespace-nowrap">
Page {pageNumber} of {numPages || '...'}
</span>
<Button
variant="outline"
size="sm"
onClick={goToNextPage}
disabled={pageNumber >= numPages || loading}
>
<ChevronRight className="h-4 w-4" />
</Button>
</div>
{/* Zoom Controls */}
<div className="flex items-center gap-2">
<Button
variant="outline"
size="sm"
onClick={zoomOut}
disabled={scale <= 0.5 || loading}
>
<ZoomOut className="h-4 w-4" />
</Button>
<span className="text-sm whitespace-nowrap w-16 text-center">
{Math.round(scale * 100)}%
</span>
<Button
variant="outline"
size="sm"
onClick={zoomIn}
disabled={scale >= 3.0 || loading}
>
<ZoomIn className="h-4 w-4" />
</Button>
</div>
</div>
{/* PDF Document */}
<div className="border rounded-md bg-muted/10 overflow-auto max-h-[800px]">
<div className="flex justify-center p-4">
{loading && (
<div className="flex items-center justify-center min-h-[400px]">
<div className="animate-spin rounded-full h-12 w-12 border-b-2 border-primary"></div>
</div>
)}
{error && (
<div className="flex items-center justify-center min-h-[400px]">
<div className="text-center">
<p className="text-destructive font-semibold mb-2">Error</p>
<p className="text-sm text-muted-foreground">{error}</p>
</div>
</div>
)}
{!error && (
<Document
file={fileConfig}
onLoadSuccess={onDocumentLoadSuccess}
onLoadError={onDocumentLoadError}
loading={
<div className="flex items-center justify-center min-h-[400px]">
<div className="animate-spin rounded-full h-12 w-12 border-b-2 border-primary"></div>
</div>
}
>
<Page
pageNumber={pageNumber}
scale={scale}
renderTextLayer={true}
renderAnnotationLayer={true}
className="shadow-lg"
/>
</Document>
)}
</div>
</div>
</CardContent>
</Card>
)
}

View File

@@ -8,6 +8,11 @@ import i18n from './i18n'
import './index.css'
import App from './App.tsx'
// Configure PDF.js worker for react-pdf
import { pdfjs } from 'react-pdf'
// Use the worker from react-pdf's bundled pdfjs-dist
pdfjs.GlobalWorkerOptions.workerSrc = `//unpkg.com/pdfjs-dist@${pdfjs.version}/build/pdf.worker.min.mjs`
// Create React Query client
const queryClient = new QueryClient({
defaultOptions: {

View File

@@ -3,7 +3,7 @@ import { useTranslation } from 'react-i18next'
import { useQuery } from '@tanstack/react-query'
import { Button } from '@/components/ui/button'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import MarkdownPreview from '@/components/MarkdownPreview'
import PDFViewer from '@/components/PDFViewer'
import { useToast } from '@/components/ui/toast'
import { useUploadStore } from '@/store/uploadStore'
import { apiClientV2 } from '@/services/apiV2'
@@ -157,6 +157,14 @@ export default function ResultsPage() {
const isCompleted = taskDetail.status === 'completed'
// Construct PDF URL for preview
const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localhost:8000'
const pdfUrl = taskId ? `${API_BASE_URL}/api/v2/tasks/${taskId}/download/pdf` : ''
// Get auth token for PDF preview
const authToken = localStorage.getItem('auth_token_v2')
const pdfHttpHeaders = authToken ? { Authorization: `Bearer ${authToken}` } : undefined
return (
<div className="space-y-6">
{/* Page Header */}
@@ -242,17 +250,11 @@ export default function ResultsPage() {
{/* Results Preview */}
{isCompleted ? (
<Card>
<CardHeader>
<CardTitle></CardTitle>
</CardHeader>
<CardContent>
<MarkdownPreview
title={`OCR 結果 - ${taskDetail.filename || '未知檔案'}`}
content="請使用上方下載按鈕下載 Markdown 或 JSON 格式查看完整結果"
/>
</CardContent>
</Card>
<PDFViewer
title={`OCR 結果預覽 - ${taskDetail.filename || '未知檔案'}`}
pdfUrl={pdfUrl}
httpHeaders={pdfHttpHeaders}
/>
) : taskDetail.status === 'processing' ? (
<Card>
<CardContent className="p-12 text-center">

View File

@@ -3,7 +3,7 @@ import { useTranslation } from 'react-i18next'
import { useQuery } from '@tanstack/react-query'
import { Button } from '@/components/ui/button'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import MarkdownPreview from '@/components/MarkdownPreview'
import PDFViewer from '@/components/PDFViewer'
import { useToast } from '@/components/ui/toast'
import { apiClientV2 } from '@/services/apiV2'
import {
@@ -149,6 +149,14 @@ export default function TaskDetailPage() {
const isProcessing = taskDetail.status === 'processing'
const isFailed = taskDetail.status === 'failed'
// Construct PDF URL for preview
const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localhost:8000'
const pdfUrl = taskId ? `${API_BASE_URL}/api/v2/tasks/${taskId}/download/pdf` : ''
// Get auth token for PDF preview
const authToken = localStorage.getItem('auth_token_v2')
const pdfHttpHeaders = authToken ? { Authorization: `Bearer ${authToken}` } : undefined
return (
<div className="space-y-6">
{/* Page Header */}
@@ -329,17 +337,11 @@ export default function TaskDetailPage() {
{/* Result Preview */}
{isCompleted && (
<Card>
<CardHeader>
<CardTitle></CardTitle>
</CardHeader>
<CardContent>
<MarkdownPreview
title={`OCR 結果 - ${taskDetail.filename || '未知檔案'}`}
content="請使用上方下載按鈕下載 Markdown、JSON 或 PDF 格式查看完整結果"
/>
</CardContent>
</Card>
<PDFViewer
title={`OCR 結果預覽 - ${taskDetail.filename || '未知檔案'}`}
pdfUrl={pdfUrl}
httpHeaders={pdfHttpHeaders}
/>
)}
</div>
)