From 80c091b89a12b8d8a7b57f6688562134d65d296f Mon Sep 17 00:00:00 2001 From: egg Date: Fri, 14 Nov 2025 10:56:29 +0800 Subject: [PATCH] fix: add PaddlePaddle 2.x/3.x API compatibility layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PaddlePaddle 3.0.0b2 has "Illegal instruction" error on current CPU. Downgrade to stable 2.6.2 which works but uses different API. Changes: - Auto-detect PaddlePaddle version at runtime - Use 'device' parameter for 3.x (device="gpu:0" or "cpu") - Use 'use_gpu' + 'gpu_mem' parameters for 2.x - Apply to both get_ocr_engine() and get_structure_engine() - Log PaddlePaddle version in initialization messages Current setup: - paddlepaddle-gpu==2.6.2 (stable, CUDA compiled) - paddleocr==3.3.1 - paddlex==3.3.9 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- backend/app/services/ocr_service.py | 114 ++++++++++++++++++++-------- 1 file changed, 84 insertions(+), 30 deletions(-) diff --git a/backend/app/services/ocr_service.py b/backend/app/services/ocr_service.py index b4b5746..7ebc4b9 100644 --- a/backend/app/services/ocr_service.py +++ b/backend/app/services/ocr_service.py @@ -170,25 +170,48 @@ class OCRService: logger.info(f"Initializing PaddleOCR engine for language: {lang} (GPU: {self.use_gpu})") try: - # PaddleOCR 3.x uses 'device' parameter instead of 'use_gpu' and 'gpu_mem' - device = f"gpu:{settings.gpu_device_id}" if self.use_gpu else "cpu" - self.ocr_engines[lang] = PaddleOCR( - use_angle_cls=True, - lang=lang, - device=device, - ) - logger.info(f"PaddleOCR engine ready for {lang} ({'GPU' if self.use_gpu else 'CPU'} mode)") + # Check PaddlePaddle version to use correct API + paddle_version = paddle.__version__ + is_paddle_3x = paddle_version.startswith('3.') + + if is_paddle_3x: + # PaddlePaddle 3.x uses 'device' parameter + device = f"gpu:{settings.gpu_device_id}" if self.use_gpu else "cpu" + self.ocr_engines[lang] = PaddleOCR( + use_angle_cls=True, + lang=lang, + device=device, + ) + else: + # PaddlePaddle 2.x uses 'use_gpu' and 'gpu_mem' parameters + self.ocr_engines[lang] = PaddleOCR( + use_angle_cls=True, + lang=lang, + use_gpu=self.use_gpu, + gpu_mem=int(settings.gpu_memory_fraction * 1000) if self.use_gpu else 500, + ) + logger.info(f"PaddleOCR engine ready for {lang} (PaddlePaddle {paddle_version}, {'GPU' if self.use_gpu else 'CPU'} mode)") except Exception as e: # If GPU initialization fails, fall back to CPU if self.use_gpu: logger.warning(f"GPU initialization failed, falling back to CPU: {e}") self.use_gpu = False - self.ocr_engines[lang] = PaddleOCR( - use_angle_cls=True, - lang=lang, - device="cpu", - ) + paddle_version = paddle.__version__ + is_paddle_3x = paddle_version.startswith('3.') + + if is_paddle_3x: + self.ocr_engines[lang] = PaddleOCR( + use_angle_cls=True, + lang=lang, + device="cpu", + ) + else: + self.ocr_engines[lang] = PaddleOCR( + use_angle_cls=True, + lang=lang, + use_gpu=False, + ) logger.info(f"PaddleOCR engine ready for {lang} (CPU mode - fallback)") else: raise @@ -206,23 +229,13 @@ class OCRService: logger.info(f"Initializing PP-StructureV3 engine (GPU: {self.use_gpu})") try: - # PaddleOCR 3.x uses 'device' parameter instead of 'use_gpu' and 'gpu_mem' - device = f"gpu:{settings.gpu_device_id}" if self.use_gpu else "cpu" - self.structure_engine = PPStructureV3( - use_doc_orientation_classify=False, - use_doc_unwarping=False, - use_textline_orientation=False, - use_table_recognition=True, - use_formula_recognition=True, - layout_threshold=0.5, - device=device, - ) - logger.info(f"PP-StructureV3 engine ready ({'GPU' if self.use_gpu else 'CPU'} mode)") + # Check PaddlePaddle version to use correct API + paddle_version = paddle.__version__ + is_paddle_3x = paddle_version.startswith('3.') - except Exception as e: - # If GPU initialization fails, fall back to CPU - if self.use_gpu: - logger.warning(f"GPU initialization failed for PP-Structure, falling back to CPU: {e}") + if is_paddle_3x: + # PaddlePaddle 3.x uses 'device' parameter + device = f"gpu:{settings.gpu_device_id}" if self.use_gpu else "cpu" self.structure_engine = PPStructureV3( use_doc_orientation_classify=False, use_doc_unwarping=False, @@ -230,8 +243,49 @@ class OCRService: use_table_recognition=True, use_formula_recognition=True, layout_threshold=0.5, - device="cpu", + device=device, ) + else: + # PaddlePaddle 2.x uses 'use_gpu' and 'gpu_mem' parameters + self.structure_engine = PPStructureV3( + use_doc_orientation_classify=False, + use_doc_unwarping=False, + use_textline_orientation=False, + use_table_recognition=True, + use_formula_recognition=True, + layout_threshold=0.5, + use_gpu=self.use_gpu, + gpu_mem=int(settings.gpu_memory_fraction * 1000) if self.use_gpu else 500, + ) + logger.info(f"PP-StructureV3 engine ready (PaddlePaddle {paddle_version}, {'GPU' if self.use_gpu else 'CPU'} mode)") + + except Exception as e: + # If GPU initialization fails, fall back to CPU + if self.use_gpu: + logger.warning(f"GPU initialization failed for PP-Structure, falling back to CPU: {e}") + paddle_version = paddle.__version__ + is_paddle_3x = paddle_version.startswith('3.') + + if is_paddle_3x: + self.structure_engine = PPStructureV3( + use_doc_orientation_classify=False, + use_doc_unwarping=False, + use_textline_orientation=False, + use_table_recognition=True, + use_formula_recognition=True, + layout_threshold=0.5, + device="cpu", + ) + else: + self.structure_engine = PPStructureV3( + use_doc_orientation_classify=False, + use_doc_unwarping=False, + use_textline_orientation=False, + use_table_recognition=True, + use_formula_recognition=True, + layout_threshold=0.5, + use_gpu=False, + ) logger.info("PP-StructureV3 engine ready (CPU mode - fallback)") else: raise