fix: multi-worker translation status and OCR fallback handling
Translation status (multi-worker support):
- Add filesystem lock files (.translating) to track in-progress translations
- Check lock files in /status API when job_state not found in current worker
- Remove lock files on translation success or failure
OCR fallback fix:
- Fix empty pages when layout analysis fails but OCR succeeds
- Change 'enhanced_results' in ocr_results to ocr_results.get('enhanced_results')
- This ensures fallback to text_regions when enhanced_results is empty list
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -343,9 +343,26 @@ async def get_translation_status(
|
|||||||
job_state = translation_service.get_job_state(task_id)
|
job_state = translation_service.get_job_state(task_id)
|
||||||
|
|
||||||
if not job_state:
|
if not job_state:
|
||||||
# No active job - check if any completed translations exist
|
# No active job in this worker - check filesystem for status
|
||||||
if task.result_json_path:
|
if task.result_json_path:
|
||||||
result_dir = Path(task.result_json_path).parent
|
result_dir = Path(task.result_json_path).parent
|
||||||
|
|
||||||
|
# Check for in-progress translation (lock file exists)
|
||||||
|
lock_files = list(result_dir.glob("*.translating"))
|
||||||
|
if lock_files:
|
||||||
|
# Translation is in progress (possibly in another worker)
|
||||||
|
latest_lock = max(lock_files, key=lambda f: f.stat().st_mtime)
|
||||||
|
# Extract language from lock filename: {filename}_translated_{lang}.translating
|
||||||
|
lock_stem = latest_lock.stem # e.g., "scan_translated_en"
|
||||||
|
lang = lock_stem.split("_translated_")[-1] if "_translated_" in lock_stem else "unknown"
|
||||||
|
return TranslationStatusResponse(
|
||||||
|
task_id=task_id,
|
||||||
|
status=TranslationStatusEnum.TRANSLATING,
|
||||||
|
target_lang=lang,
|
||||||
|
progress=TranslationProgress(percentage=50.0) # Approximate progress
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check for completed translations
|
||||||
translated_files = list(result_dir.glob("*_translated_*.json"))
|
translated_files = list(result_dir.glob("*_translated_*.json"))
|
||||||
if translated_files:
|
if translated_files:
|
||||||
# Return completed status for the most recent translation
|
# Return completed status for the most recent translation
|
||||||
|
|||||||
@@ -439,14 +439,15 @@ class OCRToUnifiedConverter:
|
|||||||
ocr_dimensions = ocr_results.get('ocr_dimensions', {})
|
ocr_dimensions = ocr_results.get('ocr_dimensions', {})
|
||||||
|
|
||||||
# Check if we have enhanced results from PPStructureEnhanced
|
# Check if we have enhanced results from PPStructureEnhanced
|
||||||
if 'enhanced_results' in ocr_results:
|
# Note: Must check for non-empty list, not just key existence (key may exist with empty list)
|
||||||
|
if ocr_results.get('enhanced_results'):
|
||||||
pages = self._extract_from_enhanced_results(
|
pages = self._extract_from_enhanced_results(
|
||||||
ocr_results['enhanced_results'],
|
ocr_results['enhanced_results'],
|
||||||
raw_text_regions=raw_text_regions,
|
raw_text_regions=raw_text_regions,
|
||||||
ocr_dimensions=ocr_dimensions
|
ocr_dimensions=ocr_dimensions
|
||||||
)
|
)
|
||||||
# Check for traditional OCR results with text_regions at top level (from process_file_traditional)
|
# Check for traditional OCR results with text_regions at top level (from process_file_traditional)
|
||||||
elif 'text_regions' in ocr_results:
|
elif ocr_results.get('text_regions'):
|
||||||
pages = self._extract_from_traditional_ocr(ocr_results)
|
pages = self._extract_from_traditional_ocr(ocr_results)
|
||||||
# Check for traditional layout_data structure
|
# Check for traditional layout_data structure
|
||||||
elif 'layout_data' in ocr_results:
|
elif 'layout_data' in ocr_results:
|
||||||
|
|||||||
@@ -708,7 +708,14 @@ class TranslationService:
|
|||||||
f"Starting translation: task_id={task_id}, target={target_lang}"
|
f"Starting translation: task_id={task_id}, target={target_lang}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Create lock file to indicate translation in progress (for multi-worker support)
|
||||||
|
output_filename = result_json_path.stem.replace('_result', '')
|
||||||
|
lock_file_path = result_json_path.parent / f"{output_filename}_translated_{target_lang}.translating"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# Create lock file
|
||||||
|
lock_file_path.touch()
|
||||||
|
|
||||||
# Load source JSON
|
# Load source JSON
|
||||||
with open(result_json_path, 'r', encoding='utf-8') as f:
|
with open(result_json_path, 'r', encoding='utf-8') as f:
|
||||||
result_json = json.load(f)
|
result_json = json.load(f)
|
||||||
@@ -786,12 +793,19 @@ class TranslationService:
|
|||||||
f"saved to {output_path}"
|
f"saved to {output_path}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Remove lock file on success
|
||||||
|
if lock_file_path.exists():
|
||||||
|
lock_file_path.unlink()
|
||||||
|
|
||||||
return True, output_path, None
|
return True, output_path, None
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Translation failed: {e}")
|
logger.error(f"Translation failed: {e}")
|
||||||
import traceback
|
import traceback
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
# Remove lock file on failure
|
||||||
|
if lock_file_path.exists():
|
||||||
|
lock_file_path.unlink()
|
||||||
return False, None, str(e)
|
return False, None, str(e)
|
||||||
|
|
||||||
def get_job_state(self, task_id: str) -> Optional[TranslationJobState]:
|
def get_job_state(self, task_id: str) -> Optional[TranslationJobState]:
|
||||||
|
|||||||
1
start.sh
1
start.sh
@@ -30,6 +30,7 @@ FRONTEND_PORT=${FRONTEND_PORT:-5173}
|
|||||||
|
|
||||||
# Production settings
|
# Production settings
|
||||||
PROD_MODE=false
|
PROD_MODE=false
|
||||||
|
# Translation status uses filesystem lock files for multi-worker support
|
||||||
UVICORN_WORKERS=${UVICORN_WORKERS:-4}
|
UVICORN_WORKERS=${UVICORN_WORKERS:-4}
|
||||||
|
|
||||||
# Create PID directory
|
# Create PID directory
|
||||||
|
|||||||
Reference in New Issue
Block a user