feat: optimize task file generation and add visualization download

Backend changes:
- Disable PP-Structure debug file generation by default
- Separate raw_ocr_regions.json generation from debug flag (critical file)
- Add visualization folder download endpoint as ZIP
- Add has_visualization field to TaskDetailResponse
- Stop generating Markdown files
- Save translated PDFs to task folder with caching

Frontend changes:
- Replace JSON/MD download buttons with PDF buttons in TaskHistoryPage
- Add visualization download button in TaskDetailPage
- Fix Processing page task switching issue (reset isNotFound)

Archives two OpenSpec proposals:
- optimize-task-files-and-visualization
- simplify-frontend-add-billing

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-12 19:11:50 +08:00
parent 65abd51d60
commit efa7e4175c
14 changed files with 534 additions and 97 deletions

View File

@@ -107,6 +107,50 @@ class PPStructureDebug:
return saved_files
def save_debug_results(
self,
pp_structure_results: Dict[str, Any],
raw_ocr_regions: List[Dict[str, Any]],
filename_prefix: str = "debug"
) -> Dict[str, Path]:
"""
Save debug-only files (PP-Structure raw results and summary).
Does NOT save raw_ocr_regions.json (that's handled separately).
Args:
pp_structure_results: Raw PP-StructureV3 analysis results
raw_ocr_regions: Raw OCR text regions (for summary generation only)
filename_prefix: Prefix for output files
Returns:
Dictionary with paths to saved files
"""
saved_files = {}
# Save PP-StructureV3 results
pp_json_path = self.output_dir / f"{filename_prefix}_pp_structure_raw.json"
try:
serializable_results = self._make_serializable(pp_structure_results)
with open(pp_json_path, 'w', encoding='utf-8') as f:
json.dump(serializable_results, f, ensure_ascii=False, indent=2)
saved_files['pp_structure'] = pp_json_path
logger.info(f"Saved PP-StructureV3 raw results to {pp_json_path}")
except Exception as e:
logger.error(f"Failed to save PP-StructureV3 results: {e}")
# Save summary comparison
summary_path = self.output_dir / f"{filename_prefix}_debug_summary.json"
try:
summary = self._generate_summary(pp_structure_results, raw_ocr_regions)
with open(summary_path, 'w', encoding='utf-8') as f:
json.dump(summary, f, ensure_ascii=False, indent=2)
saved_files['summary'] = summary_path
logger.info(f"Saved debug summary to {summary_path}")
except Exception as e:
logger.error(f"Failed to save debug summary: {e}")
return saved_files
def generate_visualization(
self,
image_path: Path,