OK
This commit is contained in:
51
tasks.py
51
tasks.py
@@ -165,21 +165,49 @@ def extract_audio_task(self, input_path, output_path):
|
||||
@celery.task(base=ProgressTask, bind=True)
|
||||
def transcribe_audio_task(self, audio_path):
|
||||
from app import app
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
logger.error(f"[TRANSCRIBE DEBUG] Starting transcribe task for: {audio_path}")
|
||||
|
||||
with app.app_context():
|
||||
try:
|
||||
logger.error(f"[TRANSCRIBE DEBUG] Entered app context")
|
||||
self.update_progress(0, 100, "Loading and preparing audio file...")
|
||||
audio = AudioSegment.from_file(audio_path)
|
||||
logger.error(f"[TRANSCRIBE DEBUG] Progress updated to 0%")
|
||||
|
||||
logger.error(f"[TRANSCRIBE DEBUG] About to load audio file: {audio_path}")
|
||||
audio = AudioSegment.from_file(audio_path)
|
||||
logger.error(f"[TRANSCRIBE DEBUG] Audio loaded successfully, duration: {len(audio)}ms")
|
||||
|
||||
# 1. Split audio by silence (skip for very long audio to avoid timeout)
|
||||
logger.error(f"[TRANSCRIBE DEBUG] Starting silence detection")
|
||||
audio_duration_minutes = len(audio) / (1000 * 60) # Convert to minutes
|
||||
logger.error(f"[TRANSCRIBE DEBUG] Audio duration: {audio_duration_minutes:.2f} minutes")
|
||||
|
||||
if audio_duration_minutes > 10: # Skip silence detection for audio longer than 10 minutes
|
||||
logger.error(f"[TRANSCRIBE DEBUG] Audio too long ({audio_duration_minutes:.2f} min), skipping silence detection")
|
||||
self.update_progress(10, 100, f"Audio is {audio_duration_minutes:.1f} minutes long, processing as single chunk...")
|
||||
chunks = [audio] # Use entire audio as single chunk
|
||||
else:
|
||||
self.update_progress(5, 100, "Detecting silence to split audio into chunks...")
|
||||
try:
|
||||
chunks = split_on_silence(
|
||||
audio,
|
||||
min_silence_len=700,
|
||||
silence_thresh=-40,
|
||||
keep_silence=300
|
||||
)
|
||||
logger.error(f"[TRANSCRIBE DEBUG] Silence detection completed, found {len(chunks)} chunks")
|
||||
except Exception as e:
|
||||
logger.error(f"[TRANSCRIBE DEBUG] Error in silence detection: {str(e)}")
|
||||
chunks = [audio]
|
||||
|
||||
# 1. Split audio by silence
|
||||
self.update_progress(5, 100, "Detecting silence to split audio into chunks...")
|
||||
chunks = split_on_silence(
|
||||
audio,
|
||||
min_silence_len=700,
|
||||
silence_thresh=-40,
|
||||
keep_silence=300
|
||||
)
|
||||
if not chunks: # If no silence is detected, treat the whole audio as one chunk
|
||||
logger.error(f"[TRANSCRIBE DEBUG] No chunks detected, using full audio")
|
||||
chunks = [audio]
|
||||
else:
|
||||
logger.error(f"[TRANSCRIBE DEBUG] Using {len(chunks)} chunks")
|
||||
|
||||
# 2. Process chunks and ensure they are within API limits
|
||||
final_segments = []
|
||||
@@ -229,10 +257,13 @@ def transcribe_audio_task(self, audio_path):
|
||||
return {'status': 'Success', 'content': full_content, 'result_path': transcript_filename}
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
logger.error(f"[TRANSCRIBE DEBUG] Exception occurred: {type(e).__name__}: {str(e)}")
|
||||
logger.error(f"[TRANSCRIBE DEBUG] Full traceback: {traceback.format_exc()}")
|
||||
error_message = f"An error occurred: {str(e)}"
|
||||
self.update_state(
|
||||
state='FAILURE',
|
||||
meta={'exc_type': type(e).__name__, 'exc_message': error_message}
|
||||
meta={'exc_type': type(e).__name__, 'exc_message': error_message, 'traceback': traceback.format_exc()}
|
||||
)
|
||||
return {'status': 'Error', 'error': error_message}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user