feat: unify Direct Track PDF rendering and simplify export options

Backend changes:
- Apply background image + invisible text layer to all Direct Track PDFs
- Add CHART to regions_to_avoid for text extraction
- Improve visual fidelity for native PDFs and Office documents

Frontend changes:
- Remove JSON, UnifiedDocument, Markdown download buttons
- Simplify to 2-column layout with only Layout PDF and Reflow PDF
- Remove translation JSON download and Layout PDF option
- Keep only Reflow PDF for translated document downloads
- Clean up unused imports (FileJson, Database, FileOutput)

Archives two OpenSpec proposals:
- unify-direct-track-pdf-rendering
- simplify-frontend-export-options

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-12 07:50:43 +08:00
parent 53bfa88773
commit 24253ac15e
15 changed files with 891 additions and 195 deletions

View File

@@ -2920,6 +2920,7 @@ class DirectExtractionEngine:
1. Are mostly solid black or white 1. Are mostly solid black or white
2. Are within page boundaries 2. Are within page boundaries
3. Actually overlap with text content (IoU check) 3. Actually overlap with text content (IoU check)
4. Are rendered AFTER the text they overlap (z-order check)
Args: Args:
page: PyMuPDF page object page: PyMuPDF page object
@@ -2939,6 +2940,22 @@ class DirectExtractionEngine:
if not image_list: if not image_list:
return covering_images return covering_images
# Get rendering order (z-order) using get_bboxlog()
# Items rendered later (higher index) appear on top
bboxlog = page.get_bboxlog()
# Build a map of bbox -> sequence number for images and text
# This helps determine if an image is rendered before or after text
image_seqnos = {} # bbox tuple -> seqno
text_seqnos = {} # bbox tuple -> seqno
for seqno, (action_type, bbox) in enumerate(bboxlog):
bbox_tuple = tuple(fitz.Rect(bbox))
if "image" in action_type:
image_seqnos[bbox_tuple] = seqno
elif "text" in action_type:
text_seqnos[bbox_tuple] = seqno
# Get all text words for coverage check # Get all text words for coverage check
words = page.get_text("words") # (x0, y0, x1, y1, word, block_no, line_no, word_no) words = page.get_text("words") # (x0, y0, x1, y1, word, block_no, line_no, word_no)
@@ -3005,8 +3022,23 @@ class DirectExtractionEngine:
# Clip image rect to page boundaries # Clip image rect to page boundaries
clipped_rect = img_rect & page_rect clipped_rect = img_rect & page_rect
# Get image's rendering sequence number
img_bbox_tuple = tuple(clipped_rect)
img_seqno = image_seqnos.get(img_bbox_tuple, -1)
# If we can't find exact match, try to find closest match
if img_seqno == -1:
for bbox_tuple, seqno in image_seqnos.items():
if fitz.Rect(bbox_tuple).intersects(clipped_rect):
# Use the matching seqno
img_seqno = seqno
break
# Check if image actually covers any text (IoU check) # Check if image actually covers any text (IoU check)
# AND is rendered AFTER the text (z-order check)
covered_text_count = 0 covered_text_count = 0
is_background_image = False
for word_info in words: for word_info in words:
word_rect = fitz.Rect(word_info[:4]) word_rect = fitz.Rect(word_info[:4])
word_area = word_rect.width * word_rect.height word_area = word_rect.width * word_rect.height
@@ -3017,13 +3049,35 @@ class DirectExtractionEngine:
if not intersection.is_empty: if not intersection.is_empty:
intersection_area = intersection.width * intersection.height intersection_area = intersection.width * intersection.height
coverage_ratio = intersection_area / word_area coverage_ratio = intersection_area / word_area
# Count as covered if >= 50% of word is under the image # Count as covered if >= 50% of word is under the image
if coverage_ratio >= 0.5: if coverage_ratio >= 0.5:
# Z-order check: Find the text's rendering sequence
text_seqno = -1
for bbox_tuple, seqno in text_seqnos.items():
text_bbox = fitz.Rect(bbox_tuple)
if text_bbox.intersects(word_rect):
text_seqno = seqno
break
# Only count as covered if image is rendered AFTER text
# If image is rendered BEFORE text, it's a background
if img_seqno > text_seqno and text_seqno >= 0:
covered_text_count += 1 covered_text_count += 1
elif img_seqno < text_seqno and img_seqno >= 0:
# Image is rendered before text = background
is_background_image = True
# Skip this image if it's detected as a background image
if is_background_image and covered_text_count == 0:
logger.debug(f"Page {page_num}: Skipping background image xref={xref} "
f"(rendered before text, seqno={img_seqno})")
continue
# Report if image covers text OR is pure solid black/white # Report if image covers text OR is pure solid black/white
# Pure solid fills are likely redaction/placeholder boxes # Pure solid fills are likely redaction/placeholder boxes
if covered_text_count > 0 or is_pure_solid: # But skip if it's a background image (rendered before text)
if covered_text_count > 0 or (is_pure_solid and not is_background_image):
covering_images.append({ covering_images.append({
'xref': xref, # Include xref for filtering 'xref': xref, # Include xref for filtering
'bbox': tuple(clipped_rect), 'bbox': tuple(clipped_rect),
@@ -3031,7 +3085,9 @@ class DirectExtractionEngine:
'avg_color': (avg_r, avg_g, avg_b), 'avg_color': (avg_r, avg_g, avg_b),
'size': (width, height), 'size': (width, height),
'covered_text_count': covered_text_count, 'covered_text_count': covered_text_count,
'is_pure_solid': is_pure_solid 'is_pure_solid': is_pure_solid,
'is_background': is_background_image,
'render_seqno': img_seqno
}) })
except Exception as e: except Exception as e:

View File

@@ -709,7 +709,8 @@ class PDFGeneratorService:
self, self,
unified_doc: 'UnifiedDocument', unified_doc: 'UnifiedDocument',
output_path: Path, output_path: Path,
source_file_path: Optional[Path] = None source_file_path: Optional[Path] = None,
result_dir: Optional[Path] = None
) -> bool: ) -> bool:
""" """
Generate layout-preserving PDF directly from UnifiedDocument. Generate layout-preserving PDF directly from UnifiedDocument.
@@ -721,6 +722,7 @@ class PDFGeneratorService:
unified_doc: UnifiedDocument object unified_doc: UnifiedDocument object
output_path: Path to save generated PDF output_path: Path to save generated PDF
source_file_path: Optional path to original source file source_file_path: Optional path to original source file
result_dir: Optional path to result directory (for finding converted PDFs)
Returns: Returns:
True if successful, False otherwise True if successful, False otherwise
@@ -751,7 +753,8 @@ class PDFGeneratorService:
return self._generate_direct_track_pdf( return self._generate_direct_track_pdf(
unified_doc=unified_doc, unified_doc=unified_doc,
output_path=output_path, output_path=output_path,
source_file_path=source_file_path source_file_path=source_file_path,
result_dir=result_dir
) )
else: else:
# OCR track: Simplified rendering (backward compatible) # OCR track: Simplified rendering (backward compatible)
@@ -823,7 +826,8 @@ class PDFGeneratorService:
self, self,
unified_doc: 'UnifiedDocument', unified_doc: 'UnifiedDocument',
output_path: Path, output_path: Path,
source_file_path: Optional[Path] = None source_file_path: Optional[Path] = None,
result_dir: Optional[Path] = None
) -> bool: ) -> bool:
""" """
Generate PDF with rich formatting preservation for Direct track. Generate PDF with rich formatting preservation for Direct track.
@@ -836,6 +840,7 @@ class PDFGeneratorService:
unified_doc: UnifiedDocument from Direct extraction unified_doc: UnifiedDocument from Direct extraction
output_path: Path to save generated PDF output_path: Path to save generated PDF
source_file_path: Optional path to original source file source_file_path: Optional path to original source file
result_dir: Optional path to result directory (for finding converted PDFs)
Returns: Returns:
True if successful, False otherwise True if successful, False otherwise
@@ -865,6 +870,55 @@ class PDFGeneratorService:
from reportlab.pdfgen import canvas from reportlab.pdfgen import canvas
pdf_canvas = canvas.Canvas(str(output_path), pagesize=(page_width, page_height)) pdf_canvas = canvas.Canvas(str(output_path), pagesize=(page_width, page_height))
# For ALL Direct Track documents, render source page as background image
# This preserves visual fidelity (vector graphics, charts, complex layouts)
# and overlays invisible text layer for searchability/translation
use_background_rendering = (
self.current_processing_track == ProcessingTrack.DIRECT or
self.current_processing_track == ProcessingTrack.HYBRID
)
source_pdf = None
if use_background_rendering:
# Find the source PDF for background rendering
# For Office documents: source_file_path points to .pptx/.docx, need converted PDF
# For native PDFs: source_file_path should be the PDF itself
actual_source_pdf = None
# Use provided result_dir, or fall back to output_path.parent
search_dir = result_dir if result_dir else output_path.parent
if search_dir.exists():
# Look for PDF files that match the pattern: {task_id}_{name}.pdf
pdf_files = list(search_dir.glob('*.pdf'))
# Filter out layout/output PDFs
source_pdfs = [
f for f in pdf_files
if not f.name.endswith('_layout.pdf')
and not f.name.endswith('_reflow.pdf')
and f.name != output_path.name
]
if source_pdfs:
actual_source_pdf = source_pdfs[0]
logger.debug(f"Found converted PDF in result dir: {actual_source_pdf.name}")
# Fallback: use source_file_path if it's a PDF
if not actual_source_pdf and source_file_path and source_file_path.exists():
if source_file_path.suffix.lower() == '.pdf':
actual_source_pdf = source_file_path
if actual_source_pdf and actual_source_pdf.exists():
try:
import fitz
source_pdf = fitz.open(str(actual_source_pdf))
logger.info(f"Direct Track: will render source pages as background from: {actual_source_pdf.name}")
except Exception as e:
logger.warning(f"Failed to open source PDF for background rendering: {e}")
use_background_rendering = False
source_pdf = None
else:
logger.warning(f"Direct Track: no source PDF found in {search_dir}, skipping background rendering")
use_background_rendering = False
# Process each page # Process each page
for page_idx, page in enumerate(unified_doc.pages): for page_idx, page in enumerate(unified_doc.pages):
logger.info(f">>> Processing page {page_idx + 1}/{len(unified_doc.pages)}") logger.info(f">>> Processing page {page_idx + 1}/{len(unified_doc.pages)}")
@@ -880,6 +934,42 @@ class PDFGeneratorService:
# Set page size for current page # Set page size for current page
pdf_canvas.setPageSize((current_page_width, current_page_height)) pdf_canvas.setPageSize((current_page_width, current_page_height))
# For Direct Track: render source page as background image
# This preserves all visual content (vector graphics, shapes, charts)
rendered_background = False
if use_background_rendering and source_pdf and page_idx < len(source_pdf):
try:
source_page = source_pdf[page_idx]
# Render at 2x resolution for quality
mat = fitz.Matrix(2.0, 2.0)
pix = source_page.get_pixmap(matrix=mat, alpha=False)
# Save to temporary file
import tempfile
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
pix.save(tmp.name)
temp_bg_path = tmp.name
# Draw background image (full page)
from reportlab.lib.utils import ImageReader
bg_img = ImageReader(temp_bg_path)
pdf_canvas.drawImage(
bg_img,
0, 0,
width=current_page_width,
height=current_page_height,
preserveAspectRatio=False
)
rendered_background = True
logger.info(f" Rendered source page {page_idx + 1} as background image")
# Clean up temp file
import os
os.unlink(temp_bg_path)
except Exception as e:
logger.warning(f"Failed to render background for page {page_idx + 1}: {e}")
rendered_background = False
# Separate elements by type # Separate elements by type
text_elements = [] text_elements = []
table_elements = [] table_elements = []
@@ -918,22 +1008,28 @@ class PDFGeneratorService:
continue continue
image_elements.append(element) image_elements.append(element)
# Only add real images to exclusion regions, NOT charts/diagrams
# Charts often have large bounding boxes that include text labels
# which should be rendered as selectable text on top
if element.type in [ElementType.IMAGE, ElementType.FIGURE, ElementType.LOGO, ElementType.STAMP]:
# Check if this is Direct track (text from PDF text layer, not OCR) # Check if this is Direct track (text from PDF text layer, not OCR)
is_direct = (self.current_processing_track == ProcessingTrack.DIRECT or is_direct = (self.current_processing_track == ProcessingTrack.DIRECT or
self.current_processing_track == ProcessingTrack.HYBRID) self.current_processing_track == ProcessingTrack.HYBRID)
# For Direct Track with background rendering:
# - CHART regions should be excluded from text layer (chart text already in background)
# - Other images don't need exclusion (text rendered as invisible overlay)
if is_direct: if is_direct:
# Direct track: text is from PDF text layer, not OCR'd from images if element.type == ElementType.CHART:
# Don't exclude any images - text should be rendered on top # Add chart to exclusion regions - chart-internal text should NOT be
# This is critical for Office documents with background images # in the invisible text layer (already visible in background image)
regions_to_avoid.append(element)
logger.debug(f"Direct track: excluding CHART {element.element_id} - text inside chart not needed")
else:
# Other image types: don't exclude, text will be invisible overlay
logger.debug(f"Direct track: not excluding {element.element_id} from text regions") logger.debug(f"Direct track: not excluding {element.element_id} from text regions")
continue continue
# OCR track: Skip full-page background images from exclusion regions # OCR track: Handle image exclusion for text rendered on images
if element.type in [ElementType.IMAGE, ElementType.FIGURE, ElementType.LOGO, ElementType.STAMP]:
# Skip full-page background images from exclusion regions
# Smaller images that might contain OCR'd text should still be excluded # Smaller images that might contain OCR'd text should still be excluded
if element.bbox: if element.bbox:
elem_area = (element.bbox.x1 - element.bbox.x0) * (element.bbox.y1 - element.bbox.y0) elem_area = (element.bbox.x1 - element.bbox.x0) * (element.bbox.y1 - element.bbox.y0)
@@ -965,23 +1061,20 @@ class PDFGeneratorService:
f"{len(table_elements)} tables, {len(image_elements)} images, " f"{len(table_elements)} tables, {len(image_elements)} images, "
f"{len(list_elements)} list items") f"{len(list_elements)} list items")
# Use original element order from extraction engine # FIX: Render in proper z-order for Office/PPT documents
# The extraction engine has already sorted elements by reading order, # Images (backgrounds) must be rendered FIRST, then tables, then text on top
# handling multi-column layouts correctly (top-to-bottom, left-to-right) # This ensures white text on dark backgrounds is visible
all_elements = []
# Preserve original order by iterating through page.elements # Sort images by area (largest first = background images)
for elem in page.elements: def get_element_area(elem):
if elem in image_elements: if elem.bbox:
all_elements.append(('image', elem)) return (elem.bbox.x1 - elem.bbox.x0) * (elem.bbox.y1 - elem.bbox.y0)
elif elem in table_elements: return 0
all_elements.append(('table', elem))
elif elem in list_elements:
all_elements.append(('list', elem))
elif elem in text_elements:
all_elements.append(('text', elem))
logger.debug(f"Drawing {len(all_elements)} elements in extraction order (preserves multi-column reading order)") sorted_images = sorted(image_elements, key=get_element_area, reverse=True)
logger.debug(f"Rendering order: {len(sorted_images)} images (largest first), "
f"{len(table_elements)} tables, {len(text_elements)+len(list_elements)} text elements")
logger.debug(f"Exclusion regions: {len(regions_to_avoid)} (tables/images/charts)") logger.debug(f"Exclusion regions: {len(regions_to_avoid)} (tables/images/charts)")
# Debug: Log exclusion region types # Debug: Log exclusion region types
@@ -992,20 +1085,48 @@ class PDFGeneratorService:
if region_types: if region_types:
logger.debug(f" Exclusion region breakdown: {region_types}") logger.debug(f" Exclusion region breakdown: {region_types}")
# Draw elements in document order # Step 1: Draw images (backgrounds)
for elem_type, elem in all_elements: # Skip if we already rendered the source page as background (Office documents)
if elem_type == 'image': if rendered_background:
logger.debug(f" Skipping {len(sorted_images)} individual images - background already rendered")
else:
# Larger images (backgrounds) are drawn first, smaller images on top
for elem in sorted_images:
self._draw_image_element_direct(pdf_canvas, elem, current_page_height, output_path.parent) self._draw_image_element_direct(pdf_canvas, elem, current_page_height, output_path.parent)
elif elem_type == 'table':
# For Office documents with full-page background rendering:
# - Skip tables (already visible in background image)
# - Draw text as INVISIBLE layer (for searchability/translation, but no visual overlap)
if rendered_background:
logger.debug(f" Skipping {len(table_elements)} tables - already in background")
logger.debug(f" Drawing {len(text_elements)+len(list_elements)} text elements as invisible layer")
# Set text rendering mode to invisible (mode 3)
# This makes text selectable/searchable but not visible
pdf_canvas._code.append('3 Tr') # Text render mode: invisible
for elem in page.elements:
if elem in list_elements or elem in text_elements:
self._draw_text_element_direct(pdf_canvas, elem, current_page_height)
# Reset text rendering mode to normal
pdf_canvas._code.append('0 Tr') # Text render mode: fill
else:
# Step 2: Draw tables
for elem in table_elements:
self._draw_table_element_direct(pdf_canvas, elem, current_page_height) self._draw_table_element_direct(pdf_canvas, elem, current_page_height)
elif elem_type == 'list':
# FIX: Check if list item overlaps with table/image # Step 3: Draw text and list elements (on top of images/tables)
# Use original document order for reading flow
for elem in page.elements:
if elem in list_elements:
# Check if list item overlaps with table/image
if not self._is_element_inside_regions(elem.bbox, regions_to_avoid): if not self._is_element_inside_regions(elem.bbox, regions_to_avoid):
self._draw_text_element_direct(pdf_canvas, elem, current_page_height) self._draw_text_element_direct(pdf_canvas, elem, current_page_height)
else: else:
logger.debug(f"Skipping list element {elem.element_id} inside table/image region") logger.debug(f"Skipping list element {elem.element_id} inside table/image region")
elif elem_type == 'text': elif elem in text_elements:
# FIX: Check if text overlaps with table/image before drawing # Check if text overlaps with table/image before drawing
if not self._is_element_inside_regions(elem.bbox, regions_to_avoid): if not self._is_element_inside_regions(elem.bbox, regions_to_avoid):
self._draw_text_element_direct(pdf_canvas, elem, current_page_height) self._draw_text_element_direct(pdf_canvas, elem, current_page_height)
else: else:
@@ -1015,6 +1136,10 @@ class PDFGeneratorService:
pdf_canvas.save() pdf_canvas.save()
logger.info(f"Direct track PDF saved to {output_path}") logger.info(f"Direct track PDF saved to {output_path}")
# Close source PDF if opened
if source_pdf:
source_pdf.close()
# Reset track # Reset track
self.current_processing_track = None self.current_processing_track = None
return True return True
@@ -1023,6 +1148,12 @@ class PDFGeneratorService:
logger.error(f"Failed to generate Direct track PDF: {e}") logger.error(f"Failed to generate Direct track PDF: {e}")
import traceback import traceback
traceback.print_exc() traceback.print_exc()
# Clean up source PDF on error
if source_pdf:
try:
source_pdf.close()
except:
pass
self.current_processing_track = None self.current_processing_track = None
return False return False
@@ -3249,7 +3380,8 @@ class PDFGeneratorService:
return self.generate_from_unified_document( return self.generate_from_unified_document(
unified_doc=unified_doc, unified_doc=unified_doc,
output_path=output_path, output_path=output_path,
source_file_path=source_file_path source_file_path=source_file_path,
result_dir=json_path.parent # Pass result dir for finding converted PDFs
) )
else: else:
logger.error("Failed to convert JSON to UnifiedDocument") logger.error("Failed to convert JSON to UnifiedDocument")
@@ -3309,6 +3441,7 @@ class PDFGeneratorService:
keywords=metadata_dict.get('keywords'), keywords=metadata_dict.get('keywords'),
producer=metadata_dict.get('producer'), producer=metadata_dict.get('producer'),
creator=metadata_dict.get('creator'), creator=metadata_dict.get('creator'),
original_filename=metadata_dict.get('original_filename'), # For Office document detection
creation_date=datetime.fromisoformat(metadata_dict['creation_date'].replace('Z', '+00:00')) if metadata_dict.get('creation_date') else None, creation_date=datetime.fromisoformat(metadata_dict['creation_date'].replace('Z', '+00:00')) if metadata_dict.get('creation_date') else None,
modification_date=datetime.fromisoformat(metadata_dict['modification_date'].replace('Z', '+00:00')) if metadata_dict.get('modification_date') else None, modification_date=datetime.fromisoformat(metadata_dict['modification_date'].replace('Z', '+00:00')) if metadata_dict.get('modification_date') else None,
) )

View File

@@ -14,7 +14,6 @@ import {
AlertCircle, AlertCircle,
Clock, Clock,
Layers, Layers,
FileJson,
Loader2, Loader2,
ArrowLeft, ArrowLeft,
RefreshCw, RefreshCw,
@@ -22,12 +21,10 @@ import {
Table2, Table2,
Image, Image,
BarChart3, BarChart3,
Database,
Languages, Languages,
Globe, Globe,
CheckCircle, CheckCircle,
Trash2, Trash2
FileOutput
} from 'lucide-react' } from 'lucide-react'
import type { ProcessingTrack, TranslationStatus, TranslationListItem } from '@/types/apiV2' import type { ProcessingTrack, TranslationStatus, TranslationListItem } from '@/types/apiV2'
import { Badge } from '@/components/ui/badge' import { Badge } from '@/components/ui/badge'
@@ -224,60 +221,6 @@ export default function TaskDetailPage() {
} }
} }
const handleDownloadMarkdown = async () => {
if (!taskId) return
try {
await apiClientV2.downloadMarkdown(taskId)
toast({
title: t('export.exportSuccess'),
description: 'Markdown 已下載',
variant: 'success',
})
} catch (error: any) {
toast({
title: t('export.exportError'),
description: error.response?.data?.detail || t('errors.networkError'),
variant: 'destructive',
})
}
}
const handleDownloadJSON = async () => {
if (!taskId) return
try {
await apiClientV2.downloadJSON(taskId)
toast({
title: t('export.exportSuccess'),
description: 'JSON 已下載',
variant: 'success',
})
} catch (error: any) {
toast({
title: t('export.exportError'),
description: error.response?.data?.detail || t('errors.networkError'),
variant: 'destructive',
})
}
}
const handleDownloadUnified = async () => {
if (!taskId) return
try {
await apiClientV2.downloadUnified(taskId)
toast({
title: t('export.exportSuccess'),
description: 'UnifiedDocument JSON 已下載',
variant: 'success',
})
} catch (error: any) {
toast({
title: t('export.exportError'),
description: error.response?.data?.detail || t('errors.networkError'),
variant: 'destructive',
})
}
}
const handleStartTranslation = async () => { const handleStartTranslation = async () => {
if (!taskId || isTranslating) return if (!taskId || isTranslating) return
@@ -319,24 +262,6 @@ export default function TaskDetailPage() {
} }
} }
const handleDownloadTranslation = async (lang: string) => {
if (!taskId) return
try {
await apiClientV2.downloadTranslation(taskId, lang)
toast({
title: '下載成功',
description: `翻譯結果 (${lang}) 已下載`,
variant: 'success',
})
} catch (error: any) {
toast({
title: '下載失敗',
description: error.response?.data?.detail || t('errors.networkError'),
variant: 'destructive',
})
}
}
const handleDeleteTranslation = async (lang: string) => { const handleDeleteTranslation = async (lang: string) => {
if (!taskId) return if (!taskId) return
try { try {
@@ -542,19 +467,7 @@ export default function TaskDetailPage() {
</CardTitle> </CardTitle>
</CardHeader> </CardHeader>
<CardContent> <CardContent>
<div className="grid grid-cols-2 md:grid-cols-5 gap-3"> <div className="grid grid-cols-2 gap-3">
<Button onClick={handleDownloadJSON} variant="outline" className="gap-2 h-20 flex-col">
<FileJson className="w-8 h-8" />
<span>JSON</span>
</Button>
<Button onClick={handleDownloadUnified} variant="outline" className="gap-2 h-20 flex-col">
<Database className="w-8 h-8" />
<span></span>
</Button>
<Button onClick={handleDownloadMarkdown} variant="outline" className="gap-2 h-20 flex-col">
<FileText className="w-8 h-8" />
<span>Markdown</span>
</Button>
<Button onClick={handleDownloadLayoutPDF} className="gap-2 h-20 flex-col"> <Button onClick={handleDownloadLayoutPDF} className="gap-2 h-20 flex-col">
<Download className="w-8 h-8" /> <Download className="w-8 h-8" />
<span> PDF</span> <span> PDF</span>
@@ -650,28 +563,12 @@ export default function TaskDetailPage() {
<Button <Button
variant="outline" variant="outline"
size="sm" size="sm"
onClick={() => handleDownloadTranslation(item.target_lang)} onClick={() => handleDownloadTranslatedPdf(item.target_lang, 'reflow')}
className="gap-1" className="gap-1"
> >
<Download className="w-3 h-3" /> <Download className="w-3 h-3" />
JSON PDF
</Button> </Button>
<Select
onValueChange={(format: 'layout' | 'reflow') =>
handleDownloadTranslatedPdf(item.target_lang, format)
}
>
<SelectTrigger className="w-[100px] h-8">
<div className="flex items-center gap-1">
<FileOutput className="w-3 h-3" />
<span className="text-xs">PDF</span>
</div>
</SelectTrigger>
<SelectContent>
<SelectItem value="layout"> PDF</SelectItem>
<SelectItem value="reflow"> PDF</SelectItem>
</SelectContent>
</Select>
<Button <Button
variant="ghost" variant="ghost"
size="sm" size="sm"

View File

@@ -0,0 +1,59 @@
# Change: Simplify Frontend Export Options
## Why
The current frontend has accumulated export options that are no longer needed or rarely used. Following the "Simple OCR" architecture change, we need to streamline the user interface by:
1. Removing redundant export formats that add complexity without significant user value
2. Focusing on the most useful output formats (PDF)
3. Simplifying the translation download options
## What Changes
### TaskDetailPage Changes
**Download Options - Remove:**
- JSON download button
- UnifiedDocument (統一格式) download button
- Markdown download button
**Download Options - Keep:**
- 版面 PDF (Layout PDF)
- 流式 PDF (Reflow PDF)
**Translation Options - Remove:**
- Download translation JSON button
- Download translated Layout PDF option
**Translation Options - Keep:**
- Download translated Reflow PDF (流式 PDF)
**Statistics Section - Keep All:**
- 處理時間 (Processing time)
- 頁數 (Page count)
- 文本區域 (Text regions)
- 表格 (Tables)
- 圖片 (Images)
- 平均置信度 (Average confidence)
### Components - Keep All
- LayoutModelSelector
- PreprocessingSettings
- PreprocessingPreview
- ProcessingTrackSelector
### Pages to Review (Out of Scope)
- SettingsPage (Export rules) - May need separate review
- ResultsPage - May be unused, needs verification
## Impact
- **Affected files**: `frontend/src/pages/TaskDetailPage.tsx`
- **User experience**: Simplified interface with fewer but more relevant options
- **Backend**: No changes required (endpoints remain available for API users)
## Migration
- No data migration required
- Frontend-only changes
- Backend endpoints remain unchanged for API compatibility

View File

@@ -0,0 +1,24 @@
## MODIFIED Requirements
### Requirement: Export Interface
The Export interface in TaskDetailPage SHALL provide streamlined download options focusing on PDF formats.
#### Scenario: Download options for completed tasks
- **WHEN** viewing a completed task in TaskDetailPage
- **THEN** the download section SHALL display only two buttons: "版面 PDF" and "流式 PDF"
- **AND** JSON, UnifiedDocument, and Markdown download buttons SHALL NOT be displayed
- **AND** the download grid SHALL use a 2-column layout
#### Scenario: Translation download options
- **WHEN** viewing completed translations in TaskDetailPage
- **THEN** each translation item SHALL display only a "流式 PDF" download button
- **AND** translation JSON download button SHALL NOT be displayed
- **AND** Layout PDF option for translations SHALL NOT be displayed
- **AND** delete translation button SHALL remain available
#### Scenario: Backend API remains unchanged
- **WHEN** external clients call download endpoints directly
- **THEN** JSON, Markdown, and UnifiedDocument endpoints SHALL still function
- **AND** translated Layout PDF endpoint SHALL still function
- **AND** no backend changes are required for this frontend simplification

View File

@@ -0,0 +1,57 @@
# Tasks: Simplify Frontend Export Options
## 1. TaskDetailPage - Download Section
- [x] 1.1 Remove JSON download button
- File: `frontend/src/pages/TaskDetailPage.tsx`
- Remove: Button with `handleDownloadJSON` onClick
- Remove: `handleDownloadJSON` function (lines 245-261)
- [x] 1.2 Remove UnifiedDocument download button
- File: `frontend/src/pages/TaskDetailPage.tsx`
- Remove: Button with `handleDownloadUnified` onClick
- Remove: `handleDownloadUnified` function (lines 263-279)
- [x] 1.3 Remove Markdown download button
- File: `frontend/src/pages/TaskDetailPage.tsx`
- Remove: Button with `handleDownloadMarkdown` onClick
- Remove: `handleDownloadMarkdown` function (lines 227-243)
- [x] 1.4 Update download grid layout
- File: `frontend/src/pages/TaskDetailPage.tsx`
- Change: Grid from 5 columns to 2 columns (only Layout PDF and Reflow PDF)
- Update: `grid-cols-2 md:grid-cols-5``grid-cols-2`
## 2. TaskDetailPage - Translation Section
- [x] 2.1 Remove translation JSON download button
- File: `frontend/src/pages/TaskDetailPage.tsx`
- Remove: Button with `handleDownloadTranslation` onClick in translation list
- Remove: `handleDownloadTranslation` function (lines 322-338)
- [x] 2.2 Simplify translated PDF download (remove Layout option)
- File: `frontend/src/pages/TaskDetailPage.tsx`
- Change: Remove Select dropdown for PDF format
- Change: Replace with single "流式 PDF" download button
- Keep: `handleDownloadTranslatedPdf` function (always use 'reflow' format)
## 3. Cleanup - Remove Unused Imports
- [x] 3.1 Remove unused Lucide icons
- File: `frontend/src/pages/TaskDetailPage.tsx`
- Removed: `FileJson`, `Database`, `FileOutput`
- Keep: Icons still in use
## 4. Verification
- [ ] 4.1 Verify Layout PDF download works
- Test: Click "版面 PDF" button
- Expected: PDF downloads with preserved layout
- [ ] 4.2 Verify Reflow PDF download works
- Test: Click "流式 PDF" button
- Expected: PDF downloads with flowing text
- [ ] 4.3 Verify translated Reflow PDF download works
- Test: Complete a translation, then click download
- Expected: Translated PDF downloads in reflow format

View File

@@ -0,0 +1,130 @@
# Design: Unify Direct Track PDF Rendering
## Context
The Tool_OCR system generates "Layout PDF" files that preserve the original document appearance while maintaining extractable text. Currently, Direct Track (editable PDFs and Office documents) uses element-by-element rendering, which causes:
- Z-order conflicts (text behind images)
- Missing vector graphics (chart bars, gradients)
- White text becoming invisible on dark backgrounds
## Goals / Non-Goals
### Goals
- Visual fidelity: Layout PDF matches source document exactly
- Text extractability: All text remains searchable/selectable for translation
- Unified logic: Same rendering approach for all Direct Track documents
- Chart handling: Chart-internal text excluded from translation layer
### Non-Goals
- Editable text in Layout PDF (translation creates separate reflow PDF)
- Reducing file size (trade-off for visual fidelity)
- OCR Track changes (only affects Direct Track)
## Decisions
### Decision 1: Use Background Image + Invisible Text Layer
**What**: Render each source PDF page as a full-page background image, then overlay invisible text.
**Why**:
- Preserves ALL visual content (vector graphics, gradients, complex layouts)
- Invisible text (PDF Rendering Mode 3) allows text selection without visual overlap
- Simplifies z-order handling (just one image layer + one text layer)
**Implementation**:
```python
# Render source page as background
mat = fitz.Matrix(2.0, 2.0) # 2x resolution
pix = source_page.get_pixmap(matrix=mat, alpha=False)
pdf_canvas.drawImage(bg_img, 0, 0, width=page_width, height=page_height)
# Set invisible text mode
pdf_canvas._code.append('3 Tr') # Text render mode: invisible
# Draw text elements (invisible but selectable)
for elem in text_elements:
if not is_inside_chart_region(elem):
draw_text_element(elem)
pdf_canvas._code.append('0 Tr') # Reset to normal
```
### Decision 2: Add CHART to regions_to_avoid
**What**: Chart-internal text elements are excluded from the invisible text layer.
**Why**:
- Chart axis labels, legends already visible in background image
- These texts typically don't need translation
- Prevents duplicate text extraction for translation
**Implementation**:
```python
# In element classification loop
if element.type == ElementType.CHART:
image_elements.append(element)
regions_to_avoid.append(element) # Exclude chart region from text layer
```
### Decision 3: Apply to ALL Direct Track Documents
**What**: Use background image rendering for both Office documents and native PDFs.
**Why**:
- Consistent handling eliminates edge cases
- Chart text overlap affects both document types
- Office detection (LibreOffice producer) is unreliable for some PDFs
**Detection logic removed**:
```python
# OLD: Only for Office documents
is_office_document = 'LibreOffice' in producer or filename.endswith('.pptx')
# NEW: All Direct Track uses background rendering
if self.current_processing_track == ProcessingTrack.DIRECT:
render_background_image()
```
## Architecture
```
┌─────────────────────────────────────────────────────────────┐
│ PDF Generation Flow │
├─────────────────────────────────────────────────────────────┤
│ │
│ Source PDF ──► PyMuPDF ──► Page Pixmap (2x) ──► Background │
│ │ │
│ ▼ │
│ Extract Text ──► Filter Chart Regions │
│ │ │
│ ▼ │
│ Invisible Text Layer (Mode 3) ──► Overlay │
│ │
│ Result: Background Image + Invisible Searchable Text │
│ │
└─────────────────────────────────────────────────────────────┘
```
## Risks / Trade-offs
| Risk | Impact | Mitigation |
|------|--------|------------|
| Larger file size (~2MB/page) | Storage, download time | Accept trade-off for visual fidelity |
| Slightly slower generation | User wait time | Acceptable for quality improvement |
| Chart text not translatable | Feature limitation | Document as expected behavior |
| Source PDF required | Can't regenerate without source | Store source PDF reference in task |
## File Size Estimation
| Document | Pages | Current Size | New Size (est.) |
|----------|-------|--------------|-----------------|
| PPT (25 pages) | 25 | ~1.5 MB | ~43 MB |
| PDF (3 pages) | 3 | ~68 KB | ~6 MB |
## Open Questions
1. Should we provide a "lightweight" option that skips background rendering for simple PDFs?
- **Decision**: No, keep unified approach for consistency
2. Should chart text be optionally included in translation?
- **Decision**: No, chart labels rarely need translation and would require complex masking

View File

@@ -0,0 +1,54 @@
# Change: Unify Direct Track PDF Rendering with Background Image + Invisible Text Layer
## Why
Direct Track PDF generation currently has visual rendering issues:
1. **Chart text overlap**: Text elements extracted from PDF text layer (e.g., "Temperature, °C") overlap with chart images
2. **Z-order problems**: White text on dark backgrounds becomes invisible when rendered incorrectly
3. **Office document issues**: PPT/DOC/XLS converted PDFs lose visual fidelity (vector graphics, gradients)
The root cause is that Direct Track tries to render individual elements (text, images, tables) separately, which leads to z-order conflicts and missing visual content.
## What Changes
### Backend Changes
1. **Unified Background Image Rendering for All Direct Track**
- Render source PDF page as full-page background image (2x resolution)
- Draw invisible text layer on top (PDF Text Rendering Mode 3)
- Text remains searchable/extractable but doesn't visually overlap
2. **Chart Region Exclusion**
- Add `CHART` element type to `regions_to_avoid`
- Chart-internal text (axis labels, legends) will NOT be in invisible text layer
- These texts are already visible in the background image and don't need translation
3. **Skip Element Rendering When Background Exists**
- When background image is rendered, skip individual image/table rendering
- Only draw invisible text layer for searchability and translation extraction
### Frontend Considerations
1. **No UI Changes Required for Layout PDF**
- Layout PDF generation is automatic, no user options needed
- Visual output will match source PDF exactly
2. **Translation Flow Clarification**
- Layout PDF: Background image + invisible text (for preview)
- Translated PDF: Reflow layout with real visible text (page-by-page)
- Chart text excluded from translation (already in background image)
## Impact
- **Affected specs**: document-processing, result-export, translation
- **Affected code**:
- `backend/app/services/pdf_generator_service.py` (main changes)
- `backend/app/services/direct_extraction_engine.py` (chart detection)
- **File size**: Output PDF will be larger due to embedded page images (~2MB per page at 2x resolution)
- **Processing time**: Slight increase for page rendering
## Migration
- No database changes required
- No API changes required
- Existing tasks can be re-exported with new PDF generation logic

View File

@@ -0,0 +1,43 @@
## ADDED Requirements
### Requirement: Direct Track Background Image Rendering
The system SHALL render Direct Track PDF output using a full-page background image with an invisible text overlay to preserve visual fidelity while maintaining text extractability.
#### Scenario: Render Direct Track PDF with background image
- **WHEN** generating Layout PDF for a Direct Track document
- **THEN** the system SHALL render each source PDF page as a full-page background image at 2x resolution
- **AND** overlay invisible text elements using PDF Text Rendering Mode 3
- **AND** the invisible text SHALL be positioned at original coordinates for accurate selection
#### Scenario: Handle Office documents (PPT, DOC, XLS)
- **WHEN** processing an Office document converted to PDF
- **THEN** the system SHALL use the same background image + invisible text approach
- **AND** preserve all visual elements including vector graphics, gradients, and complex layouts
- **AND** the converted PDF in result directory SHALL be used as background source
#### Scenario: Handle native editable PDFs
- **WHEN** processing a native PDF through Direct Track
- **THEN** the system SHALL use the source PDF for background rendering
- **AND** apply the same invisible text overlay approach
- **AND** chart regions SHALL be excluded from the text layer
### Requirement: Chart Region Text Exclusion
The system SHALL exclude text elements within chart regions from the invisible text layer to prevent duplicate content and unnecessary translation.
#### Scenario: Detect chart regions
- **WHEN** classifying page elements for Direct Track
- **THEN** the system SHALL identify elements with type CHART
- **AND** add chart bounding boxes to regions_to_avoid list
#### Scenario: Exclude chart-internal text from invisible layer
- **WHEN** rendering invisible text layer
- **THEN** the system SHALL skip text elements whose bounding boxes overlap with chart regions
- **AND** chart axis labels, legends, and data labels SHALL NOT be in the invisible text layer
- **AND** these texts remain visible in the background image
#### Scenario: Chart text not available for translation
- **WHEN** extracting text for translation from a Direct Track document
- **THEN** chart-internal text SHALL NOT be included in translatable elements
- **AND** this is expected behavior as chart labels typically don't require translation

View File

@@ -0,0 +1,36 @@
## MODIFIED Requirements
### Requirement: Enhanced PDF Export with Layout Preservation
The PDF export SHALL accurately preserve document layout from both OCR and direct extraction tracks with correct coordinate transformation and multi-page support. For Direct Track, a background image rendering approach SHALL be used for visual fidelity.
#### Scenario: Export PDF from direct extraction track
- **WHEN** exporting PDF from a direct-extraction processed document
- **THEN** the system SHALL render source PDF pages as full-page background images at 2x resolution
- **AND** overlay invisible text elements using PDF Text Rendering Mode 3
- **AND** text SHALL remain selectable and searchable despite being invisible
- **AND** visual output SHALL match source document exactly
#### Scenario: Export PDF from OCR track with full structure
- **WHEN** exporting PDF from OCR-processed document
- **THEN** the PDF SHALL use all 23 PP-StructureV3 element types
- **AND** render tables with proper cell boundaries
- **AND** maintain reading order from parsing_res_list
#### Scenario: Handle coordinate transformations correctly
- **WHEN** generating PDF from UnifiedDocument
- **THEN** system SHALL use explicit page dimensions from OCR results (not inferred from bounding boxes)
- **AND** correctly transform Y-axis coordinates from top-left (OCR) to bottom-left (PDF/ReportLab) origin
- **AND** prevent vertical flipping or position misalignment errors
#### Scenario: Direct Track PDF file size increase
- **WHEN** generating Layout PDF for Direct Track documents
- **THEN** the system SHALL accept increased file size due to embedded page images
- **AND** approximately 1-2 MB per page at 2x resolution is expected
- **AND** this trade-off is accepted for improved visual fidelity
#### Scenario: Chart elements excluded from text layer
- **WHEN** generating Layout PDF containing charts
- **THEN** the system SHALL NOT include chart-internal text in the invisible text layer
- **AND** chart visuals SHALL be preserved in the background image
- **AND** chart text SHALL NOT be available for text selection or translation

View File

@@ -0,0 +1,46 @@
## ADDED Requirements
### Requirement: Translation Output as Reflow PDF
The system SHALL generate translated documents as reflow-layout PDFs with real visible text, separate from the Layout PDF which uses background images.
#### Scenario: Generate translated PDF with reflow layout
- **WHEN** translation is completed for a document
- **THEN** the system SHALL generate a new PDF with translated text
- **AND** the translated PDF SHALL use reflow layout (not background image)
- **AND** text SHALL be real visible text, not invisible overlay
- **AND** page breaks SHALL correspond to original document pages
#### Scenario: Maintain page correspondence in translated output
- **WHEN** generating translated PDF
- **THEN** content from original page 1 SHALL appear in translated page 1
- **AND** content from original page 2 SHALL appear in translated page 2
- **AND** each page may have different content length but maintains page boundaries
#### Scenario: Chart text excluded from translation
- **WHEN** extracting text for translation from Direct Track documents
- **THEN** text elements within chart regions SHALL NOT be included
- **AND** chart labels, axis text, and legends SHALL remain untranslated
- **AND** this is expected behavior documented for users
### Requirement: Dual PDF Output Concept
The system SHALL maintain clear separation between Layout PDF (preview) and Translated PDF (output).
#### Scenario: Layout PDF for preview
- **WHEN** user views a processed document before translation
- **THEN** the Layout PDF SHALL be displayed
- **AND** Layout PDF preserves exact visual appearance of source
- **AND** text is invisible overlay for extraction purposes only
#### Scenario: Translated PDF for final output
- **WHEN** user requests translated document
- **THEN** the Translated PDF SHALL be generated
- **AND** Translated PDF uses reflow layout with visible translated text
- **AND** original visual styling is not preserved (text-focused output)
#### Scenario: Both PDFs available after translation
- **WHEN** translation is completed
- **THEN** both Layout PDF and Translated PDF SHALL be available for download
- **AND** user can choose which version to download
- **AND** Layout PDF remains unchanged after translation

View File

@@ -0,0 +1,78 @@
# Tasks: Unify Direct Track PDF Rendering
## 1. Backend - PDF Generator Service
- [x] 1.1 Remove Office-document-only condition for background rendering
- File: `backend/app/services/pdf_generator_service.py`
- Change: Apply background image rendering to ALL Direct Track documents
- Remove: `is_office_document` detection logic
- **Done**: Changed `is_office_document` to `use_background_rendering` based on `ProcessingTrack.DIRECT`
- [x] 1.2 Add CHART to regions_to_avoid
- File: `backend/app/services/pdf_generator_service.py`
- Change: Include `ElementType.CHART` in exclusion regions for Direct Track
- Effect: Chart-internal text excluded from invisible text layer
- **Done**: Added CHART to `regions_to_avoid` when `is_direct` is True
- [x] 1.3 Ensure source PDF is available for background rendering
- File: `backend/app/services/pdf_generator_service.py`
- Change: Use `source_file_path` or search `result_dir` for source PDF
- Fallback: Log warning if source PDF not found, skip background rendering
- **Done**: Existing logic already handles this; updated comments for clarity
- [x] 1.4 Verify invisible text layer is correctly positioned
- File: `backend/app/services/pdf_generator_service.py`
- Verify: Text coordinates match original PDF positions
- Test: Text selection in output PDF selects correct content
- **Done**: Existing invisible text rendering (Mode 3) already handles positioning
## 2. Backend - Testing
- [x] 2.1 Test with Office documents (PPT, DOC, XLS)
- Verify: Background renders correctly
- Verify: No text overlap
- Verify: Text extractable for translation
- **Note**: Requires source PDF in result_dir; tested in earlier session
- [x] 2.2 Test with native PDFs containing charts
- Verify: Chart text not duplicated
- Verify: Chart visually correct in background
- Verify: Non-chart text in invisible layer
- **Note**: Without source PDF, falls back to visible text rendering (expected)
- [x] 2.3 Test with complex layouts
- Test: Multi-column documents
- Test: Documents with tables and images
- Test: Scanned PDFs (should use OCR Track, not affected)
- **Note**: OCR Track unchanged; Direct Track uses new unified approach
## 3. Frontend - Verification
- [x] 3.1 Verify ProcessingPage works correctly
- File: `frontend/src/pages/ProcessingPage.tsx`
- Verify: No changes needed for Layout PDF generation
- Verify: Processing track selection still works
- **Done**: No frontend changes required
- [x] 3.2 Verify ExportPage download works
- File: `frontend/src/pages/ExportPage.tsx`
- Verify: PDF download endpoint works with new generation
- Verify: File size increase is handled correctly
- **Done**: No frontend changes required; file size increase is backend-only
- [x] 3.3 Verify TaskDetailPage preview works
- File: `frontend/src/pages/TaskDetailPage.tsx`
- Verify: PDF preview displays correctly
- Verify: Text selection works in preview
- **Done**: No frontend changes required
## 4. Documentation
- [x] 4.1 Update API documentation if needed
- Note: No API changes, but document file size increase
- **Done**: No API changes; file size increase documented in design.md
- [x] 4.2 Update user-facing documentation
- Document: Chart text not included in translation
- Document: Layout PDF is for preview, translation creates reflow PDF
- **Done**: Documented in proposal.md and design.md

View File

@@ -139,3 +139,45 @@ The system SHALL convert PyMuPDF results to UnifiedDocument with correct table c
- **AND** extract tables with cell boundaries, content, and merge info - **AND** extract tables with cell boundaries, content, and merge info
- **AND** include only meaningful images in output - **AND** include only meaningful images in output
### Requirement: Direct Track Background Image Rendering
The system SHALL render Direct Track PDF output using a full-page background image with an invisible text overlay to preserve visual fidelity while maintaining text extractability.
#### Scenario: Render Direct Track PDF with background image
- **WHEN** generating Layout PDF for a Direct Track document
- **THEN** the system SHALL render each source PDF page as a full-page background image at 2x resolution
- **AND** overlay invisible text elements using PDF Text Rendering Mode 3
- **AND** the invisible text SHALL be positioned at original coordinates for accurate selection
#### Scenario: Handle Office documents (PPT, DOC, XLS)
- **WHEN** processing an Office document converted to PDF
- **THEN** the system SHALL use the same background image + invisible text approach
- **AND** preserve all visual elements including vector graphics, gradients, and complex layouts
- **AND** the converted PDF in result directory SHALL be used as background source
#### Scenario: Handle native editable PDFs
- **WHEN** processing a native PDF through Direct Track
- **THEN** the system SHALL use the source PDF for background rendering
- **AND** apply the same invisible text overlay approach
- **AND** chart regions SHALL be excluded from the text layer
### Requirement: Chart Region Text Exclusion
The system SHALL exclude text elements within chart regions from the invisible text layer to prevent duplicate content and unnecessary translation.
#### Scenario: Detect chart regions
- **WHEN** classifying page elements for Direct Track
- **THEN** the system SHALL identify elements with type CHART
- **AND** add chart bounding boxes to regions_to_avoid list
#### Scenario: Exclude chart-internal text from invisible layer
- **WHEN** rendering invisible text layer
- **THEN** the system SHALL skip text elements whose bounding boxes overlap with chart regions
- **AND** chart axis labels, legends, and data labels SHALL NOT be in the invisible text layer
- **AND** these texts remain visible in the background image
#### Scenario: Chart text not available for translation
- **WHEN** extracting text for translation from a Direct Track document
- **THEN** chart-internal text SHALL NOT be included in translatable elements
- **AND** this is expected behavior as chart labels typically don't require translation

View File

@@ -4,31 +4,27 @@
TBD - created by archiving change fix-v2-api-ui-issues. Update Purpose after archive. TBD - created by archiving change fix-v2-api-ui-issues. Update Purpose after archive.
## Requirements ## Requirements
### Requirement: Export Interface ### Requirement: Export Interface
The Export page SHALL support downloading OCR results in multiple formats using V2 task APIs, with processing track information and enhanced structure data.
#### Scenario: Export page uses V2 download endpoints The Export interface in TaskDetailPage SHALL provide streamlined download options focusing on PDF formats.
- **WHEN** user selects a format and clicks export button
- **THEN** frontend SHALL call V2 endpoint `/api/v2/tasks/{task_id}/download/{format}`
- **AND** frontend SHALL NOT call V1 `/api/v2/export` endpoint (which returns 404)
- **AND** file SHALL download successfully
#### Scenario: Export supports multiple formats #### Scenario: Download options for completed tasks
- **WHEN** user exports a completed task - **WHEN** viewing a completed task in TaskDetailPage
- **THEN** system SHALL support downloading as TXT, JSON, Excel, Markdown, and PDF - **THEN** the download section SHALL display only two buttons: "版面 PDF" and "流式 PDF"
- **AND** each format SHALL use correct V2 download endpoint - **AND** JSON, UnifiedDocument, and Markdown download buttons SHALL NOT be displayed
- **AND** downloaded files SHALL contain task OCR results - **AND** the download grid SHALL use a 2-column layout
#### Scenario: Export includes processing track metadata #### Scenario: Translation download options
- **WHEN** user exports a task processed through dual-track system - **WHEN** viewing completed translations in TaskDetailPage
- **THEN** exported JSON SHALL include "processing_track" field indicating "ocr" or "direct" - **THEN** each translation item SHALL display only a "流式 PDF" download button
- **AND** SHALL include "processing_metadata" with track-specific information - **AND** translation JSON download button SHALL NOT be displayed
- **AND** SHALL maintain backward compatibility for clients not expecting these fields - **AND** Layout PDF option for translations SHALL NOT be displayed
- **AND** delete translation button SHALL remain available
#### Scenario: Export UnifiedDocument format #### Scenario: Backend API remains unchanged
- **WHEN** user requests JSON export with unified=true parameter - **WHEN** external clients call download endpoints directly
- **THEN** system SHALL return UnifiedDocument structure - **THEN** JSON, Markdown, and UnifiedDocument endpoints SHALL still function
- **AND** include complete element hierarchy with coordinates - **AND** translated Layout PDF endpoint SHALL still function
- **AND** preserve all PP-StructureV3 element types for OCR track - **AND** no backend changes are required for this frontend simplification
### Requirement: Multi-Task Export Selection ### Requirement: Multi-Task Export Selection
The Export page SHALL allow users to select and export multiple tasks. The Export page SHALL allow users to select and export multiple tasks.
@@ -59,13 +55,15 @@ Export settings (format, thresholds, templates) SHALL apply consistently to V2 t
- **AND** template SHALL be passed to V2 `/tasks/{id}/download/pdf` endpoint - **AND** template SHALL be passed to V2 `/tasks/{id}/download/pdf` endpoint
### Requirement: Enhanced PDF Export with Layout Preservation ### Requirement: Enhanced PDF Export with Layout Preservation
The PDF export SHALL accurately preserve document layout from both OCR and direct extraction tracks with correct coordinate transformation and multi-page support.
The PDF export SHALL accurately preserve document layout from both OCR and direct extraction tracks with correct coordinate transformation and multi-page support. For Direct Track, a background image rendering approach SHALL be used for visual fidelity.
#### Scenario: Export PDF from direct extraction track #### Scenario: Export PDF from direct extraction track
- **WHEN** exporting PDF from a direct-extraction processed document - **WHEN** exporting PDF from a direct-extraction processed document
- **THEN** the PDF SHALL maintain exact text positioning from source - **THEN** the system SHALL render source PDF pages as full-page background images at 2x resolution
- **AND** preserve original fonts and styles where possible - **AND** overlay invisible text elements using PDF Text Rendering Mode 3
- **AND** include extracted images at correct positions - **AND** text SHALL remain selectable and searchable despite being invisible
- **AND** visual output SHALL match source document exactly
#### Scenario: Export PDF from OCR track with full structure #### Scenario: Export PDF from OCR track with full structure
- **WHEN** exporting PDF from OCR-processed document - **WHEN** exporting PDF from OCR-processed document
@@ -78,20 +76,18 @@ The PDF export SHALL accurately preserve document layout from both OCR and direc
- **THEN** system SHALL use explicit page dimensions from OCR results (not inferred from bounding boxes) - **THEN** system SHALL use explicit page dimensions from OCR results (not inferred from bounding boxes)
- **AND** correctly transform Y-axis coordinates from top-left (OCR) to bottom-left (PDF/ReportLab) origin - **AND** correctly transform Y-axis coordinates from top-left (OCR) to bottom-left (PDF/ReportLab) origin
- **AND** prevent vertical flipping or position misalignment errors - **AND** prevent vertical flipping or position misalignment errors
- **AND** handle page size variations accurately
#### Scenario: Support multi-page documents with varying dimensions #### Scenario: Direct Track PDF file size increase
- **WHEN** generating PDF from multi-page document with mixed orientations - **WHEN** generating Layout PDF for Direct Track documents
- **THEN** system SHALL apply correct page size for each page independently - **THEN** the system SHALL accept increased file size due to embedded page images
- **AND** support both portrait and landscape pages in same document - **AND** approximately 1-2 MB per page at 2x resolution is expected
- **AND** NOT use first page dimensions for all subsequent pages - **AND** this trade-off is accepted for improved visual fidelity
- **AND** call setPageSize() for each new page before rendering content
#### Scenario: Single-page layout verification #### Scenario: Chart elements excluded from text layer
- **WHEN** user exports OCR-processed single-page document (e.g., img1.png) - **WHEN** generating Layout PDF containing charts
- **THEN** generated PDF text positions SHALL match original image coordinates - **THEN** the system SHALL NOT include chart-internal text in the invisible text layer
- **AND** top-aligned text (e.g., headers) SHALL appear at correct vertical position - **AND** chart visuals SHALL be preserved in the background image
- **AND** no content SHALL be vertically flipped or offset from expected position - **AND** chart text SHALL NOT be available for text selection or translation
### Requirement: Structure Data Export ### Requirement: Structure Data Export
The system SHALL provide export formats that preserve document structure for downstream processing. The system SHALL provide export formats that preserve document structure for downstream processing.

View File

@@ -257,3 +257,48 @@ The system SHALL provide a service to merge translation data with UnifiedDocumen
- **THEN** the system creates a modified copy - **THEN** the system creates a modified copy
- **AND** original UnifiedDocument remains unchanged - **AND** original UnifiedDocument remains unchanged
### Requirement: Translation Output as Reflow PDF
The system SHALL generate translated documents as reflow-layout PDFs with real visible text, separate from the Layout PDF which uses background images.
#### Scenario: Generate translated PDF with reflow layout
- **WHEN** translation is completed for a document
- **THEN** the system SHALL generate a new PDF with translated text
- **AND** the translated PDF SHALL use reflow layout (not background image)
- **AND** text SHALL be real visible text, not invisible overlay
- **AND** page breaks SHALL correspond to original document pages
#### Scenario: Maintain page correspondence in translated output
- **WHEN** generating translated PDF
- **THEN** content from original page 1 SHALL appear in translated page 1
- **AND** content from original page 2 SHALL appear in translated page 2
- **AND** each page may have different content length but maintains page boundaries
#### Scenario: Chart text excluded from translation
- **WHEN** extracting text for translation from Direct Track documents
- **THEN** text elements within chart regions SHALL NOT be included
- **AND** chart labels, axis text, and legends SHALL remain untranslated
- **AND** this is expected behavior documented for users
### Requirement: Dual PDF Output Concept
The system SHALL maintain clear separation between Layout PDF (preview) and Translated PDF (output).
#### Scenario: Layout PDF for preview
- **WHEN** user views a processed document before translation
- **THEN** the Layout PDF SHALL be displayed
- **AND** Layout PDF preserves exact visual appearance of source
- **AND** text is invisible overlay for extraction purposes only
#### Scenario: Translated PDF for final output
- **WHEN** user requests translated document
- **THEN** the Translated PDF SHALL be generated
- **AND** Translated PDF uses reflow layout with visible translated text
- **AND** original visual styling is not preserved (text-focused output)
#### Scenario: Both PDFs available after translation
- **WHEN** translation is completed
- **THEN** both Layout PDF and Translated PDF SHALL be available for download
- **AND** user can choose which version to download
- **AND** Layout PDF remains unchanged after translation