feat: Add Dify audio transcription with VAD chunking and SSE progress

- Add audio file upload transcription via Dify STT API
- Implement VAD-based audio segmentation in sidecar (3-min chunks)
- Add SSE endpoint for real-time transcription progress updates
- Fix chunk size enforcement for reliable uploads
- Add retry logic with exponential backoff for API calls
- Support Python 3.13+ with audioop-lts package
- Update frontend with Chinese progress messages and chunk display
- Improve start.sh health check with retry loop

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-11 21:00:27 +08:00
parent e790f48967
commit 263eb1c394
10 changed files with 1008 additions and 16 deletions

View File

@@ -96,6 +96,33 @@
color: #666;
font-style: italic;
}
.upload-progress {
display: none;
padding: 10px 15px;
background: #fff3e0;
border-radius: 6px;
margin-bottom: 10px;
}
.upload-progress.active {
display: block;
}
.upload-progress-bar {
height: 6px;
background: #e0e0e0;
border-radius: 3px;
overflow: hidden;
margin-top: 8px;
}
.upload-progress-fill {
height: 100%;
background: #ff9800;
width: 0%;
transition: width 0.3s ease;
}
.upload-progress-text {
font-size: 13px;
color: #e65100;
}
.transcript-textarea {
width: 100%;
min-height: 400px;
@@ -143,8 +170,10 @@
<div class="panel">
<div class="panel-header">
<span>Transcript (逐字稿)</span>
<div class="recording-controls" style="padding: 0;">
<div class="recording-controls" style="padding: 0; display: flex; gap: 8px;">
<button class="btn btn-danger" id="record-btn">Start Recording</button>
<button class="btn btn-secondary" id="upload-audio-btn">Upload Audio</button>
<input type="file" id="audio-file-input" accept=".mp3,.wav,.m4a,.webm,.ogg,.flac,.aac" style="display: none;">
</div>
</div>
<div class="panel-body">
@@ -155,6 +184,14 @@
<span class="segment-count" id="segment-count">Segments: 0</span>
</div>
<!-- Upload Progress -->
<div id="upload-progress" class="upload-progress">
<span class="upload-progress-text" id="upload-progress-text">Uploading...</span>
<div class="upload-progress-bar">
<div class="upload-progress-fill" id="upload-progress-fill"></div>
</div>
</div>
<!-- Single Transcript Textarea -->
<div id="transcript-container">
<textarea
@@ -203,7 +240,8 @@
updateMeeting,
deleteMeeting,
exportMeeting,
summarizeTranscript
summarizeTranscript,
transcribeAudio
} from '../services/api.js';
const meetingId = localStorage.getItem('currentMeetingId');
@@ -234,6 +272,11 @@
const deleteBtn = document.getElementById('delete-btn');
const addConclusionBtn = document.getElementById('add-conclusion-btn');
const addActionBtn = document.getElementById('add-action-btn');
const uploadAudioBtn = document.getElementById('upload-audio-btn');
const audioFileInput = document.getElementById('audio-file-input');
const uploadProgressEl = document.getElementById('upload-progress');
const uploadProgressText = document.getElementById('upload-progress-text');
const uploadProgressFill = document.getElementById('upload-progress-fill');
// Load meeting data
async function loadMeeting() {
@@ -460,6 +503,86 @@
processingIndicatorEl.classList.add('hidden');
}
// === Audio File Upload ===
uploadAudioBtn.addEventListener('click', () => {
if (isRecording) {
alert('Please stop recording before uploading audio.');
return;
}
audioFileInput.click();
});
audioFileInput.addEventListener('change', async (e) => {
const file = e.target.files[0];
if (!file) return;
// Validate file size (500MB max)
const maxSize = 500 * 1024 * 1024;
if (file.size > maxSize) {
alert('File too large. Maximum size is 500MB.');
audioFileInput.value = '';
return;
}
// Confirm if transcript has content
const currentTranscript = transcriptTextEl.value.trim();
if (currentTranscript) {
if (!confirm('This will replace the existing transcript. Do you want to continue?')) {
audioFileInput.value = '';
return;
}
}
// Start upload
uploadAudioBtn.disabled = true;
recordBtn.disabled = true;
uploadProgressEl.classList.add('active');
uploadProgressFill.style.width = '0%';
uploadProgressText.textContent = 'Uploading audio file...';
try {
const result = await transcribeAudio(file, (progress) => {
if (progress.phase === 'uploading') {
uploadProgressFill.style.width = `${progress.progress}%`;
uploadProgressText.textContent = `上傳中: ${progress.progress}%`;
} else if (progress.phase === 'processing') {
uploadProgressFill.style.width = `${progress.progress}%`;
uploadProgressText.textContent = progress.message || '處理中...';
} else if (progress.phase === 'transcribing') {
uploadProgressFill.style.width = `${progress.progress}%`;
if (progress.total && progress.current) {
uploadProgressText.textContent = `轉錄中: ${progress.current}/${progress.total} 片段 (${progress.progress}%)`;
} else {
uploadProgressText.textContent = progress.message || '轉錄中...';
}
} else if (progress.phase === 'complete') {
uploadProgressFill.style.width = '100%';
uploadProgressText.textContent = progress.message || '轉錄完成';
}
});
// Success - update transcript
transcriptTextEl.value = result.transcript || '';
const chunksInfo = result.chunks_failed > 0
? `${result.chunks_processed}/${result.chunks_total} 片段成功`
: `${result.chunks_processed} 片段`;
uploadProgressText.textContent = `轉錄完成!(${chunksInfo}, ${Math.round(result.total_duration_seconds)}秒)`;
// Auto-hide progress after 3 seconds
setTimeout(() => {
uploadProgressEl.classList.remove('active');
}, 3000);
} catch (error) {
alert('Error transcribing audio: ' + error.message);
uploadProgressEl.classList.remove('active');
} finally {
uploadAudioBtn.disabled = false;
recordBtn.disabled = false;
audioFileInput.value = '';
}
});
// === Streaming Event Handlers (legacy, kept for future use) ===
window.electronAPI.onTranscriptionSegment((segment) => {
console.log('Received segment:', segment);