fix: disable chart recognition due to PaddlePaddle 3.0.0 API limitation

PaddleOCR-VL chart recognition model requires `fused_rms_norm_ext` API
which is not available in PaddlePaddle 3.0.0 stable release.

Changes:
- Set use_chart_recognition=False in PP-StructureV3 initialization
- Remove unsupported show_log parameter from PaddleOCR 3.x API calls
- Document known limitation in openspec proposal
- Add limitation documentation to README
- Update tasks.md with documentation task for known issues

Impact:
- Layout analysis still detects/extracts charts as images ✓
- Tables, formulas, and text recognition work normally ✓
- Deep chart understanding (type detection, data extraction) disabled ✗
- Chart to structured data conversion disabled ✗

Workaround: Charts saved as image files for manual review

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-11-14 13:16:17 +08:00
parent 80c091b89a
commit b048f2d640
5 changed files with 119 additions and 133 deletions

View File

@@ -170,48 +170,25 @@ class OCRService:
logger.info(f"Initializing PaddleOCR engine for language: {lang} (GPU: {self.use_gpu})")
try:
# Check PaddlePaddle version to use correct API
paddle_version = paddle.__version__
is_paddle_3x = paddle_version.startswith('3.')
if is_paddle_3x:
# PaddlePaddle 3.x uses 'device' parameter
device = f"gpu:{settings.gpu_device_id}" if self.use_gpu else "cpu"
self.ocr_engines[lang] = PaddleOCR(
use_angle_cls=True,
lang=lang,
device=device,
)
else:
# PaddlePaddle 2.x uses 'use_gpu' and 'gpu_mem' parameters
self.ocr_engines[lang] = PaddleOCR(
use_angle_cls=True,
lang=lang,
use_gpu=self.use_gpu,
gpu_mem=int(settings.gpu_memory_fraction * 1000) if self.use_gpu else 500,
)
logger.info(f"PaddleOCR engine ready for {lang} (PaddlePaddle {paddle_version}, {'GPU' if self.use_gpu else 'CPU'} mode)")
# PaddleOCR 3.x: Device is set globally via paddle.set_device()
# No need to pass device/use_gpu/gpu_mem parameters
self.ocr_engines[lang] = PaddleOCR(
lang=lang,
use_textline_orientation=True, # Replaces deprecated use_angle_cls
)
logger.info(f"PaddleOCR engine ready for {lang} (PaddlePaddle {paddle.__version__}, {'GPU' if self.use_gpu else 'CPU'} mode)")
except Exception as e:
# If GPU initialization fails, fall back to CPU
if self.use_gpu:
logger.warning(f"GPU initialization failed, falling back to CPU: {e}")
self.use_gpu = False
paddle_version = paddle.__version__
is_paddle_3x = paddle_version.startswith('3.')
if is_paddle_3x:
self.ocr_engines[lang] = PaddleOCR(
use_angle_cls=True,
lang=lang,
device="cpu",
)
else:
self.ocr_engines[lang] = PaddleOCR(
use_angle_cls=True,
lang=lang,
use_gpu=False,
)
# Switch to CPU device globally
paddle.set_device('cpu')
self.ocr_engines[lang] = PaddleOCR(
lang=lang,
use_textline_orientation=True,
)
logger.info(f"PaddleOCR engine ready for {lang} (CPU mode - fallback)")
else:
raise
@@ -229,63 +206,35 @@ class OCRService:
logger.info(f"Initializing PP-StructureV3 engine (GPU: {self.use_gpu})")
try:
# Check PaddlePaddle version to use correct API
paddle_version = paddle.__version__
is_paddle_3x = paddle_version.startswith('3.')
if is_paddle_3x:
# PaddlePaddle 3.x uses 'device' parameter
device = f"gpu:{settings.gpu_device_id}" if self.use_gpu else "cpu"
self.structure_engine = PPStructureV3(
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=False,
use_table_recognition=True,
use_formula_recognition=True,
layout_threshold=0.5,
device=device,
)
else:
# PaddlePaddle 2.x uses 'use_gpu' and 'gpu_mem' parameters
self.structure_engine = PPStructureV3(
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=False,
use_table_recognition=True,
use_formula_recognition=True,
layout_threshold=0.5,
use_gpu=self.use_gpu,
gpu_mem=int(settings.gpu_memory_fraction * 1000) if self.use_gpu else 500,
)
logger.info(f"PP-StructureV3 engine ready (PaddlePaddle {paddle_version}, {'GPU' if self.use_gpu else 'CPU'} mode)")
# PaddleOCR 3.x: Device is set globally via paddle.set_device()
# No need to pass device/use_gpu/gpu_mem parameters
self.structure_engine = PPStructureV3(
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=False,
use_table_recognition=True,
use_formula_recognition=True,
use_chart_recognition=False, # Disable chart recognition (requires fused_rms_norm_ext not in PaddlePaddle 3.0.0)
layout_threshold=0.5,
)
logger.info(f"PP-StructureV3 engine ready (PaddlePaddle {paddle.__version__}, {'GPU' if self.use_gpu else 'CPU'} mode)")
except Exception as e:
# If GPU initialization fails, fall back to CPU
if self.use_gpu:
logger.warning(f"GPU initialization failed for PP-Structure, falling back to CPU: {e}")
paddle_version = paddle.__version__
is_paddle_3x = paddle_version.startswith('3.')
if is_paddle_3x:
self.structure_engine = PPStructureV3(
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=False,
use_table_recognition=True,
use_formula_recognition=True,
layout_threshold=0.5,
device="cpu",
)
else:
self.structure_engine = PPStructureV3(
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=False,
use_table_recognition=True,
use_formula_recognition=True,
layout_threshold=0.5,
use_gpu=False,
)
self.use_gpu = False
# Switch to CPU device globally
paddle.set_device('cpu')
self.structure_engine = PPStructureV3(
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=False,
use_table_recognition=True,
use_formula_recognition=True,
use_chart_recognition=False, # Disable chart recognition
layout_threshold=0.5,
)
logger.info("PP-StructureV3 engine ready (CPU mode - fallback)")
else:
raise