fix: multi-worker translation status and OCR fallback handling

Translation status (multi-worker support):
- Add filesystem lock files (.translating) to track in-progress translations
- Check lock files in /status API when job_state not found in current worker
- Remove lock files on translation success or failure

OCR fallback fix:
- Fix empty pages when layout analysis fails but OCR succeeds
- Change 'enhanced_results' in ocr_results to ocr_results.get('enhanced_results')
- This ensures fallback to text_regions when enhanced_results is empty list

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-14 16:36:36 +08:00
parent 1c37585be2
commit 3ccbdb8394
4 changed files with 36 additions and 3 deletions

View File

@@ -439,14 +439,15 @@ class OCRToUnifiedConverter:
ocr_dimensions = ocr_results.get('ocr_dimensions', {})
# Check if we have enhanced results from PPStructureEnhanced
if 'enhanced_results' in ocr_results:
# Note: Must check for non-empty list, not just key existence (key may exist with empty list)
if ocr_results.get('enhanced_results'):
pages = self._extract_from_enhanced_results(
ocr_results['enhanced_results'],
raw_text_regions=raw_text_regions,
ocr_dimensions=ocr_dimensions
)
# Check for traditional OCR results with text_regions at top level (from process_file_traditional)
elif 'text_regions' in ocr_results:
elif ocr_results.get('text_regions'):
pages = self._extract_from_traditional_ocr(ocr_results)
# Check for traditional layout_data structure
elif 'layout_data' in ocr_results:

View File

@@ -708,7 +708,14 @@ class TranslationService:
f"Starting translation: task_id={task_id}, target={target_lang}"
)
# Create lock file to indicate translation in progress (for multi-worker support)
output_filename = result_json_path.stem.replace('_result', '')
lock_file_path = result_json_path.parent / f"{output_filename}_translated_{target_lang}.translating"
try:
# Create lock file
lock_file_path.touch()
# Load source JSON
with open(result_json_path, 'r', encoding='utf-8') as f:
result_json = json.load(f)
@@ -786,12 +793,19 @@ class TranslationService:
f"saved to {output_path}"
)
# Remove lock file on success
if lock_file_path.exists():
lock_file_path.unlink()
return True, output_path, None
except Exception as e:
logger.error(f"Translation failed: {e}")
import traceback
traceback.print_exc()
# Remove lock file on failure
if lock_file_path.exists():
lock_file_path.unlink()
return False, None, str(e)
def get_job_state(self, task_id: str) -> Optional[TranslationJobState]: