feat: implement layout-preserving PDF generation with table reconstruction
Major Features: - Add PDF generation service with Chinese font support - Parse HTML tables from PP-StructureV3 and rebuild with ReportLab - Extract table text for translation purposes - Auto-filter text regions inside tables to avoid overlaps Backend Changes: 1. pdf_generator_service.py (NEW) - HTMLTableParser: Parse HTML tables to extract structure - PDFGeneratorService: Generate layout-preserving PDFs - Coordinate transformation: OCR (top-left) → PDF (bottom-left) - Font size heuristics: 75% of bbox height with width checking - Table reconstruction: Parse HTML → ReportLab Table - Image embedding: Extract bbox from filenames 2. ocr_service.py - Add _extract_table_text() for translation support - Add output_dir parameter to save images to result directory - Extract bbox from image filenames (img_in_table_box_x1_y1_x2_y2.jpg) 3. tasks.py - Update process_task_ocr to use save_results() with PDF generation - Fix download_pdf endpoint to use database-stored PDF paths - Support on-demand PDF generation from JSON 4. config.py - Add chinese_font_path configuration - Add pdf_enable_bbox_debug flag Frontend Changes: 1. PDFViewer.tsx (NEW) - React PDF viewer with zoom and pagination - Memoized file config to prevent unnecessary reloads 2. TaskDetailPage.tsx & ResultsPage.tsx - Integrate PDF preview and download 3. main.tsx - Configure PDF.js worker via CDN 4. vite.config.ts - Add host: '0.0.0.0' for network access - Use VITE_API_URL environment variable for backend proxy Dependencies: - reportlab: PDF generation library - Noto Sans SC font: Chinese character support 🤖 Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
277
frontend/package-lock.json
generated
277
frontend/package-lock.json
generated
@@ -19,6 +19,7 @@
|
||||
"react-dropzone": "^14.3.8",
|
||||
"react-i18next": "^16.3.0",
|
||||
"react-markdown": "^9.0.1",
|
||||
"react-pdf": "^10.2.0",
|
||||
"react-router-dom": "^7.9.5",
|
||||
"tailwind-merge": "^3.4.0",
|
||||
"zustand": "^5.0.8"
|
||||
@@ -1048,6 +1049,191 @@
|
||||
"@jridgewell/sourcemap-codec": "^1.4.14"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas": {
|
||||
"version": "0.1.82",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.82.tgz",
|
||||
"integrity": "sha512-FGjyUBoF0sl1EenSiE4UV2WYu76q6F9GSYedq5EiOCOyGYoQ/Owulcv6rd7v/tWOpljDDtefXXIaOCJrVKem4w==",
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"workspaces": [
|
||||
"e2e/*"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@napi-rs/canvas-android-arm64": "0.1.82",
|
||||
"@napi-rs/canvas-darwin-arm64": "0.1.82",
|
||||
"@napi-rs/canvas-darwin-x64": "0.1.82",
|
||||
"@napi-rs/canvas-linux-arm-gnueabihf": "0.1.82",
|
||||
"@napi-rs/canvas-linux-arm64-gnu": "0.1.82",
|
||||
"@napi-rs/canvas-linux-arm64-musl": "0.1.82",
|
||||
"@napi-rs/canvas-linux-riscv64-gnu": "0.1.82",
|
||||
"@napi-rs/canvas-linux-x64-gnu": "0.1.82",
|
||||
"@napi-rs/canvas-linux-x64-musl": "0.1.82",
|
||||
"@napi-rs/canvas-win32-x64-msvc": "0.1.82"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-android-arm64": {
|
||||
"version": "0.1.82",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.82.tgz",
|
||||
"integrity": "sha512-bvZhN0iI54ouaQOrgJV96H2q7J3ZoufnHf4E1fUaERwW29Rz4rgicohnAg4venwBJZYjGl5Yl3CGmlAl1LZowQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-darwin-arm64": {
|
||||
"version": "0.1.82",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.82.tgz",
|
||||
"integrity": "sha512-InuBHKCyuFqhNwNr4gpqazo5Xp6ltKflqOLiROn4hqAS8u21xAHyYCJRgHwd+a5NKmutFTaRWeUIT/vxWbU/iw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-darwin-x64": {
|
||||
"version": "0.1.82",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.82.tgz",
|
||||
"integrity": "sha512-aQGV5Ynn96onSXcuvYb2y7TRXD/t4CL2EGmnGqvLyeJX1JLSNisKQlWN/1bPDDXymZYSdUqbXehj5qzBlOx+RQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-linux-arm-gnueabihf": {
|
||||
"version": "0.1.82",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.82.tgz",
|
||||
"integrity": "sha512-YIUpmHWeHGGRhWitT1KJkgj/JPXPfc9ox8oUoyaGPxolLGPp5AxJkq8wIg8CdFGtutget968dtwmx71m8o3h5g==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-linux-arm64-gnu": {
|
||||
"version": "0.1.82",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.82.tgz",
|
||||
"integrity": "sha512-AwLzwLBgmvk7kWeUgItOUor/QyG31xqtD26w1tLpf4yE0hiXTGp23yc669aawjB6FzgIkjh1NKaNS52B7/qEBQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-linux-arm64-musl": {
|
||||
"version": "0.1.82",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.82.tgz",
|
||||
"integrity": "sha512-moZWuqepAwWBffdF4JDadt8TgBD02iMhG6I1FHZf8xO20AsIp9rB+p0B8Zma2h2vAF/YMjeFCDmW5un6+zZz9g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-linux-riscv64-gnu": {
|
||||
"version": "0.1.82",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-riscv64-gnu/-/canvas-linux-riscv64-gnu-0.1.82.tgz",
|
||||
"integrity": "sha512-w9++2df2kG9eC9LWYIHIlMLuhIrKGQYfUxs97CwgxYjITeFakIRazI9LYWgVzEc98QZ9x9GQvlicFsrROV59MQ==",
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-linux-x64-gnu": {
|
||||
"version": "0.1.82",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.82.tgz",
|
||||
"integrity": "sha512-lZulOPwrRi6hEg/17CaqdwWEUfOlIJuhXxincx1aVzsVOCmyHf+xFq4i6liJl1P+x2v6Iz2Z/H5zHvXJCC7Bwg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-linux-x64-musl": {
|
||||
"version": "0.1.82",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.82.tgz",
|
||||
"integrity": "sha512-Be9Wf5RTv1w6GXlTph55K3PH3vsAh1Ax4T1FQY1UYM0QfD0yrwGdnJ8/fhqw7dEgMjd59zIbjJQC8C3msbGn5g==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-win32-x64-msvc": {
|
||||
"version": "0.1.82",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.82.tgz",
|
||||
"integrity": "sha512-LN/i8VrvxTDmEEK1c10z2cdOTkWT76LlTGtyZe5Kr1sqoSomKeExAjbilnu1+oee5lZUgS5yfZ2LNlVhCeARuw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@nodelib/fs.scandir": {
|
||||
"version": "2.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
|
||||
@@ -4007,6 +4193,24 @@
|
||||
"@jridgewell/sourcemap-codec": "^1.5.5"
|
||||
}
|
||||
},
|
||||
"node_modules/make-cancellable-promise": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/make-cancellable-promise/-/make-cancellable-promise-2.0.0.tgz",
|
||||
"integrity": "sha512-3SEQqTpV9oqVsIWqAcmDuaNeo7yBO3tqPtqGRcKkEo0lrzD3wqbKG9mkxO65KoOgXqj+zH2phJ2LiAsdzlogSw==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"url": "https://github.com/wojtekmaj/make-cancellable-promise?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/make-event-props": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/make-event-props/-/make-event-props-2.0.0.tgz",
|
||||
"integrity": "sha512-G/hncXrl4Qt7mauJEXSg3AcdYzmpkIITTNl5I+rH9sog5Yw0kK6vseJjCaPfOXqOqQuPUP89Rkhfz5kPS8ijtw==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"url": "https://github.com/wojtekmaj/make-event-props?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/math-intrinsics": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
|
||||
@@ -4169,6 +4373,23 @@
|
||||
"url": "https://opencollective.com/unified"
|
||||
}
|
||||
},
|
||||
"node_modules/merge-refs": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/merge-refs/-/merge-refs-2.0.0.tgz",
|
||||
"integrity": "sha512-3+B21mYK2IqUWnd2EivABLT7ueDhb0b8/dGK8LoFQPrU61YITeCMn14F7y7qZafWNZhUEKb24cJdiT5Wxs3prg==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"url": "https://github.com/wojtekmaj/merge-refs?sponsor=1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@types/react": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/merge2": {
|
||||
"version": "1.4.1",
|
||||
"resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz",
|
||||
@@ -5060,6 +5281,47 @@
|
||||
"react": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/react-pdf": {
|
||||
"version": "10.2.0",
|
||||
"resolved": "https://registry.npmjs.org/react-pdf/-/react-pdf-10.2.0.tgz",
|
||||
"integrity": "sha512-zk0DIL31oCh8cuQycM0SJKfwh4Onz0/Nwi6wTOjgtEjWGUY6eM+/vuzvOP3j70qtEULn7m1JtaeGzud1w5fY2Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"clsx": "^2.0.0",
|
||||
"dequal": "^2.0.3",
|
||||
"make-cancellable-promise": "^2.0.0",
|
||||
"make-event-props": "^2.0.0",
|
||||
"merge-refs": "^2.0.0",
|
||||
"pdfjs-dist": "5.4.296",
|
||||
"tiny-invariant": "^1.0.0",
|
||||
"warning": "^4.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/wojtekmaj/react-pdf?sponsor=1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
|
||||
"react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
|
||||
"react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@types/react": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/react-pdf/node_modules/pdfjs-dist": {
|
||||
"version": "5.4.296",
|
||||
"resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-5.4.296.tgz",
|
||||
"integrity": "sha512-DlOzet0HO7OEnmUmB6wWGJrrdvbyJKftI1bhMitK7O2N8W2gc757yyYBbINy9IDafXAV9wmKr9t7xsTaNKRG5Q==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=20.16.0 || >=22.3.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@napi-rs/canvas": "^0.1.80"
|
||||
}
|
||||
},
|
||||
"node_modules/react-refresh": {
|
||||
"version": "0.18.0",
|
||||
"resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.18.0.tgz",
|
||||
@@ -5382,6 +5644,12 @@
|
||||
"url": "https://opencollective.com/webpack"
|
||||
}
|
||||
},
|
||||
"node_modules/tiny-invariant": {
|
||||
"version": "1.3.3",
|
||||
"resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz",
|
||||
"integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/tinyglobby": {
|
||||
"version": "0.2.15",
|
||||
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
|
||||
@@ -5824,6 +6092,15 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/warning": {
|
||||
"version": "4.0.3",
|
||||
"resolved": "https://registry.npmjs.org/warning/-/warning-4.0.3.tgz",
|
||||
"integrity": "sha512-rpJyN222KWIvHJ/F53XSZv0Zl/accqHR8et1kpaMTD/fLCRxtV8iX8czMzY7sVZupTI3zcUTg8eycS2kNF9l6w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"loose-envify": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/which": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
|
||||
|
||||
Reference in New Issue
Block a user