Files
OCR/frontend/src/pages/ResultsPage.tsx
egg fa1abcd8e6 feat: implement layout-preserving PDF generation with table reconstruction
Major Features:
- Add PDF generation service with Chinese font support
- Parse HTML tables from PP-StructureV3 and rebuild with ReportLab
- Extract table text for translation purposes
- Auto-filter text regions inside tables to avoid overlaps

Backend Changes:
1. pdf_generator_service.py (NEW)
   - HTMLTableParser: Parse HTML tables to extract structure
   - PDFGeneratorService: Generate layout-preserving PDFs
   - Coordinate transformation: OCR (top-left) → PDF (bottom-left)
   - Font size heuristics: 75% of bbox height with width checking
   - Table reconstruction: Parse HTML → ReportLab Table
   - Image embedding: Extract bbox from filenames

2. ocr_service.py
   - Add _extract_table_text() for translation support
   - Add output_dir parameter to save images to result directory
   - Extract bbox from image filenames (img_in_table_box_x1_y1_x2_y2.jpg)

3. tasks.py
   - Update process_task_ocr to use save_results() with PDF generation
   - Fix download_pdf endpoint to use database-stored PDF paths
   - Support on-demand PDF generation from JSON

4. config.py
   - Add chinese_font_path configuration
   - Add pdf_enable_bbox_debug flag

Frontend Changes:
1. PDFViewer.tsx (NEW)
   - React PDF viewer with zoom and pagination
   - Memoized file config to prevent unnecessary reloads

2. TaskDetailPage.tsx & ResultsPage.tsx
   - Integrate PDF preview and download

3. main.tsx
   - Configure PDF.js worker via CDN

4. vite.config.ts
   - Add host: '0.0.0.0' for network access
   - Use VITE_API_URL environment variable for backend proxy

Dependencies:
- reportlab: PDF generation library
- Noto Sans SC font: Chinese character support

🤖 Generated with Claude Code
https://claude.com/claude-code

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-17 20:21:56 +08:00

291 lines
9.9 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { useNavigate } from 'react-router-dom'
import { useTranslation } from 'react-i18next'
import { useQuery } from '@tanstack/react-query'
import { Button } from '@/components/ui/button'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import PDFViewer from '@/components/PDFViewer'
import { useToast } from '@/components/ui/toast'
import { useUploadStore } from '@/store/uploadStore'
import { apiClientV2 } from '@/services/apiV2'
import { FileText, Download, AlertCircle, TrendingUp, Clock, Layers, FileJson, Loader2 } from 'lucide-react'
import { Badge } from '@/components/ui/badge'
export default function ResultsPage() {
const { t } = useTranslation()
const navigate = useNavigate()
const { toast } = useToast()
const { batchId } = useUploadStore()
// In V2, batchId is actually a task_id (string)
const taskId = batchId ? String(batchId) : null
// Get task details
const { data: taskDetail, isLoading } = useQuery({
queryKey: ['taskDetail', taskId],
queryFn: () => apiClientV2.getTask(taskId!),
enabled: !!taskId,
refetchInterval: (query) => {
const data = query.state.data
if (!data) return 2000
if (data.status === 'completed' || data.status === 'failed') {
return false
}
return 2000
},
})
const handleDownloadPDF = async () => {
if (!taskId) return
try {
await apiClientV2.downloadPDF(taskId)
toast({
title: t('export.exportSuccess'),
description: 'PDF 已下載',
variant: 'success',
})
} catch (error: any) {
toast({
title: t('export.exportError'),
description: error.response?.data?.detail || t('errors.networkError'),
variant: 'destructive',
})
}
}
const handleDownloadMarkdown = async () => {
if (!taskId) return
try {
await apiClientV2.downloadMarkdown(taskId)
toast({
title: t('export.exportSuccess'),
description: 'Markdown 已下載',
variant: 'success',
})
} catch (error: any) {
toast({
title: t('export.exportError'),
description: error.response?.data?.detail || t('errors.networkError'),
variant: 'destructive',
})
}
}
const handleDownloadJSON = async () => {
if (!taskId) return
try {
await apiClientV2.downloadJSON(taskId)
toast({
title: t('export.exportSuccess'),
description: 'JSON 已下載',
variant: 'success',
})
} catch (error: any) {
toast({
title: t('export.exportError'),
description: error.response?.data?.detail || t('errors.networkError'),
variant: 'destructive',
})
}
}
const getStatusBadge = (status: string) => {
switch (status) {
case 'completed':
return <Badge variant="default" className="bg-green-600"></Badge>
case 'processing':
return <Badge variant="default"></Badge>
case 'failed':
return <Badge variant="destructive"></Badge>
default:
return <Badge variant="secondary"></Badge>
}
}
// Show helpful message when no task is selected
if (!taskId) {
return (
<div className="flex items-center justify-center min-h-[60vh]">
<Card className="max-w-md text-center">
<CardHeader>
<div className="flex justify-center mb-4">
<div className="w-16 h-16 bg-muted rounded-full flex items-center justify-center">
<AlertCircle className="w-8 h-8 text-muted-foreground" />
</div>
</div>
<CardTitle className="text-xl">{t('results.title')}</CardTitle>
</CardHeader>
<CardContent className="space-y-4">
<p className="text-muted-foreground">
{t('results.noBatchMessage', { defaultValue: '尚未選擇任何任務。請先上傳並處理檔案。' })}
</p>
<Button onClick={() => navigate('/upload')} size="lg">
{t('results.goToUpload', { defaultValue: '前往上傳頁面' })}
</Button>
</CardContent>
</Card>
</div>
)
}
if (isLoading) {
return (
<div className="flex items-center justify-center min-h-[60vh]">
<div className="text-center">
<Loader2 className="w-12 h-12 animate-spin text-primary mx-auto mb-4" />
<p className="text-muted-foreground">...</p>
</div>
</div>
)
}
if (!taskDetail) {
return (
<div className="flex items-center justify-center min-h-[60vh]">
<Card className="max-w-md text-center">
<CardHeader>
<CardTitle></CardTitle>
</CardHeader>
<CardContent>
<Button onClick={() => navigate('/tasks')}>
</Button>
</CardContent>
</Card>
</div>
)
}
const isCompleted = taskDetail.status === 'completed'
// Construct PDF URL for preview
const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localhost:8000'
const pdfUrl = taskId ? `${API_BASE_URL}/api/v2/tasks/${taskId}/download/pdf` : ''
// Get auth token for PDF preview
const authToken = localStorage.getItem('auth_token_v2')
const pdfHttpHeaders = authToken ? { Authorization: `Bearer ${authToken}` } : undefined
return (
<div className="space-y-6">
{/* Page Header */}
<div className="page-header">
<div className="flex items-center justify-between">
<div>
<h1 className="page-title">{t('results.title')}</h1>
<p className="text-muted-foreground mt-1">
ID: <span className="font-mono text-primary">{taskId}</span>
{taskDetail.filename && ` · ${taskDetail.filename}`}
</p>
</div>
<div className="flex gap-3 items-center">
{getStatusBadge(taskDetail.status)}
{isCompleted && (
<>
<Button onClick={handleDownloadJSON} variant="outline" className="gap-2">
<FileJson className="w-4 h-4" />
JSON
</Button>
<Button onClick={handleDownloadMarkdown} variant="outline" className="gap-2">
<FileText className="w-4 h-4" />
Markdown
</Button>
<Button onClick={handleDownloadPDF} className="gap-2">
<Download className="w-4 h-4" />
PDF
</Button>
</>
)}
</div>
</div>
</div>
{/* Stats Grid */}
{isCompleted && (
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
<Card>
<CardContent className="p-6">
<div className="flex items-center gap-3">
<div className="p-3 bg-primary/10 rounded-lg">
<Clock className="w-6 h-6 text-primary" />
</div>
<div>
<p className="text-sm text-muted-foreground"></p>
<p className="text-2xl font-bold">
{taskDetail.processing_time_ms ? (taskDetail.processing_time_ms / 1000).toFixed(2) : '0'}s
</p>
</div>
</div>
</CardContent>
</Card>
<Card>
<CardContent className="p-6">
<div className="flex items-center gap-3">
<div className="p-3 bg-success/10 rounded-lg">
<TrendingUp className="w-6 h-6 text-success" />
</div>
<div>
<p className="text-sm text-muted-foreground"></p>
<p className="text-2xl font-bold text-success"></p>
</div>
</div>
</CardContent>
</Card>
<Card>
<CardContent className="p-6">
<div className="flex items-center gap-3">
<div className="p-3 bg-accent/10 rounded-lg">
<Layers className="w-6 h-6 text-accent" />
</div>
<div>
<p className="text-sm text-muted-foreground"></p>
<p className="text-2xl font-bold">OCR</p>
</div>
</div>
</CardContent>
</Card>
</div>
)}
{/* Results Preview */}
{isCompleted ? (
<PDFViewer
title={`OCR 結果預覽 - ${taskDetail.filename || '未知檔案'}`}
pdfUrl={pdfUrl}
httpHeaders={pdfHttpHeaders}
/>
) : taskDetail.status === 'processing' ? (
<Card>
<CardContent className="p-12 text-center">
<Loader2 className="w-16 h-16 animate-spin text-primary mx-auto mb-4" />
<p className="text-lg font-semibold">...</p>
<p className="text-muted-foreground mt-2">OCR </p>
</CardContent>
</Card>
) : taskDetail.status === 'failed' ? (
<Card>
<CardContent className="p-12 text-center">
<AlertCircle className="w-16 h-16 text-destructive mx-auto mb-4" />
<p className="text-lg font-semibold text-destructive"></p>
{taskDetail.error_message && (
<p className="text-muted-foreground mt-2">{taskDetail.error_message}</p>
)}
</CardContent>
</Card>
) : (
<Card>
<CardContent className="p-12 text-center">
<Clock className="w-16 h-16 text-muted-foreground mx-auto mb-4" />
<p className="text-lg font-semibold"></p>
<p className="text-muted-foreground mt-2"> OCR </p>
<Button onClick={() => navigate('/processing')} className="mt-4">
</Button>
</CardContent>
</Card>
)}
</div>
)
}