fix: multi-worker translation status and OCR fallback handling

Translation status (multi-worker support): - Add filesystem lock files (.translating) to track in-progress translations - Check lock files in /status API when job_state not found in current worker - Remove lock files on translation success or failure OCR fallback fix: - Fix empty pages when layout analysis fails but OCR succeeds - Change 'enhanced_results' in ocr_results to ocr_results.get('enhanced_results') - This ensures fallback to text_regions when enhanced_results is empty list 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 16:36:36 +08:00
parent 1c37585be2
commit 3ccbdb8394
4 changed files with 36 additions and 3 deletions
--- a/backend/app/routers/translate.py
+++ b/backend/app/routers/translate.py
@@ -343,9 +343,26 @@ async def get_translation_status(
    job_state = translation_service.get_job_state(task_id)
    if not job_state:
-        # No active job - check if any completed translations exist
+        # No active job in this worker - check filesystem for status
        if task.result_json_path:
            result_dir = Path(task.result_json_path).parent
            # Check for in-progress translation (lock file exists)
            lock_files = list(result_dir.glob("*.translating"))
            if lock_files:
                # Translation is in progress (possibly in another worker)
                latest_lock = max(lock_files, key=lambda f: f.stat().st_mtime)
                # Extract language from lock filename: {filename}_translated_{lang}.translating
                lock_stem = latest_lock.stem  # e.g., "scan_translated_en"
                lang = lock_stem.split("_translated_")[-1] if "_translated_" in lock_stem else "unknown"
                return TranslationStatusResponse(
                    task_id=task_id,
                    status=TranslationStatusEnum.TRANSLATING,
                    target_lang=lang,
                    progress=TranslationProgress(percentage=50.0)  # Approximate progress
                )
            # Check for completed translations
            translated_files = list(result_dir.glob("*_translated_*.json"))
            if translated_files:
                # Return completed status for the most recent translation
--- a/backend/app/services/ocr_to_unified_converter.py
+++ b/backend/app/services/ocr_to_unified_converter.py
@@ -439,14 +439,15 @@ class OCRToUnifiedConverter:
        ocr_dimensions = ocr_results.get('ocr_dimensions', {})
        # Check if we have enhanced results from PPStructureEnhanced
-        if 'enhanced_results' in ocr_results:
+        # Note: Must check for non-empty list, not just key existence (key may exist with empty list)
        if ocr_results.get('enhanced_results'):
            pages = self._extract_from_enhanced_results(
                ocr_results['enhanced_results'],
                raw_text_regions=raw_text_regions,
                ocr_dimensions=ocr_dimensions
            )
        # Check for traditional OCR results with text_regions at top level (from process_file_traditional)
-        elif 'text_regions' in ocr_results:
+        elif ocr_results.get('text_regions'):
            pages = self._extract_from_traditional_ocr(ocr_results)
        # Check for traditional layout_data structure
        elif 'layout_data' in ocr_results:
--- a/backend/app/services/translation_service.py
+++ b/backend/app/services/translation_service.py
@@ -708,7 +708,14 @@ class TranslationService:
            f"Starting translation: task_id={task_id}, target={target_lang}"
        )
        # Create lock file to indicate translation in progress (for multi-worker support)
        output_filename = result_json_path.stem.replace('_result', '')
        lock_file_path = result_json_path.parent / f"{output_filename}_translated_{target_lang}.translating"
        try:
            # Create lock file
            lock_file_path.touch()
            # Load source JSON
            with open(result_json_path, 'r', encoding='utf-8') as f:
                result_json = json.load(f)
@@ -786,12 +793,19 @@ class TranslationService:
                f"saved to {output_path}"
            )
            # Remove lock file on success
            if lock_file_path.exists():
                lock_file_path.unlink()
            return True, output_path, None
        except Exception as e:
            logger.error(f"Translation failed: {e}")
            import traceback
            traceback.print_exc()
            # Remove lock file on failure
            if lock_file_path.exists():
                lock_file_path.unlink()
            return False, None, str(e)
    def get_job_state(self, task_id: str) -> Optional[TranslationJobState]:
--- a/start.sh
+++ b/start.sh
@@ -30,6 +30,7 @@ FRONTEND_PORT=${FRONTEND_PORT:-5173}
 # Production settings
 PROD_MODE=false
 # Translation status uses filesystem lock files for multi-worker support
 UVICORN_WORKERS=${UVICORN_WORKERS:-4}
 # Create PID directory