feat: Add Dify audio transcription with VAD chunking and SSE progress
- Add audio file upload transcription via Dify STT API - Implement VAD-based audio segmentation in sidecar (3-min chunks) - Add SSE endpoint for real-time transcription progress updates - Fix chunk size enforcement for reliable uploads - Add retry logic with exponential backoff for API calls - Support Python 3.13+ with audioop-lts package - Update frontend with Chinese progress messages and chunk display - Improve start.sh health check with retry loop 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -96,6 +96,33 @@
|
||||
color: #666;
|
||||
font-style: italic;
|
||||
}
|
||||
.upload-progress {
|
||||
display: none;
|
||||
padding: 10px 15px;
|
||||
background: #fff3e0;
|
||||
border-radius: 6px;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
.upload-progress.active {
|
||||
display: block;
|
||||
}
|
||||
.upload-progress-bar {
|
||||
height: 6px;
|
||||
background: #e0e0e0;
|
||||
border-radius: 3px;
|
||||
overflow: hidden;
|
||||
margin-top: 8px;
|
||||
}
|
||||
.upload-progress-fill {
|
||||
height: 100%;
|
||||
background: #ff9800;
|
||||
width: 0%;
|
||||
transition: width 0.3s ease;
|
||||
}
|
||||
.upload-progress-text {
|
||||
font-size: 13px;
|
||||
color: #e65100;
|
||||
}
|
||||
.transcript-textarea {
|
||||
width: 100%;
|
||||
min-height: 400px;
|
||||
@@ -143,8 +170,10 @@
|
||||
<div class="panel">
|
||||
<div class="panel-header">
|
||||
<span>Transcript (逐字稿)</span>
|
||||
<div class="recording-controls" style="padding: 0;">
|
||||
<div class="recording-controls" style="padding: 0; display: flex; gap: 8px;">
|
||||
<button class="btn btn-danger" id="record-btn">Start Recording</button>
|
||||
<button class="btn btn-secondary" id="upload-audio-btn">Upload Audio</button>
|
||||
<input type="file" id="audio-file-input" accept=".mp3,.wav,.m4a,.webm,.ogg,.flac,.aac" style="display: none;">
|
||||
</div>
|
||||
</div>
|
||||
<div class="panel-body">
|
||||
@@ -155,6 +184,14 @@
|
||||
<span class="segment-count" id="segment-count">Segments: 0</span>
|
||||
</div>
|
||||
|
||||
<!-- Upload Progress -->
|
||||
<div id="upload-progress" class="upload-progress">
|
||||
<span class="upload-progress-text" id="upload-progress-text">Uploading...</span>
|
||||
<div class="upload-progress-bar">
|
||||
<div class="upload-progress-fill" id="upload-progress-fill"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Single Transcript Textarea -->
|
||||
<div id="transcript-container">
|
||||
<textarea
|
||||
@@ -203,7 +240,8 @@
|
||||
updateMeeting,
|
||||
deleteMeeting,
|
||||
exportMeeting,
|
||||
summarizeTranscript
|
||||
summarizeTranscript,
|
||||
transcribeAudio
|
||||
} from '../services/api.js';
|
||||
|
||||
const meetingId = localStorage.getItem('currentMeetingId');
|
||||
@@ -234,6 +272,11 @@
|
||||
const deleteBtn = document.getElementById('delete-btn');
|
||||
const addConclusionBtn = document.getElementById('add-conclusion-btn');
|
||||
const addActionBtn = document.getElementById('add-action-btn');
|
||||
const uploadAudioBtn = document.getElementById('upload-audio-btn');
|
||||
const audioFileInput = document.getElementById('audio-file-input');
|
||||
const uploadProgressEl = document.getElementById('upload-progress');
|
||||
const uploadProgressText = document.getElementById('upload-progress-text');
|
||||
const uploadProgressFill = document.getElementById('upload-progress-fill');
|
||||
|
||||
// Load meeting data
|
||||
async function loadMeeting() {
|
||||
@@ -460,6 +503,86 @@
|
||||
processingIndicatorEl.classList.add('hidden');
|
||||
}
|
||||
|
||||
// === Audio File Upload ===
|
||||
uploadAudioBtn.addEventListener('click', () => {
|
||||
if (isRecording) {
|
||||
alert('Please stop recording before uploading audio.');
|
||||
return;
|
||||
}
|
||||
audioFileInput.click();
|
||||
});
|
||||
|
||||
audioFileInput.addEventListener('change', async (e) => {
|
||||
const file = e.target.files[0];
|
||||
if (!file) return;
|
||||
|
||||
// Validate file size (500MB max)
|
||||
const maxSize = 500 * 1024 * 1024;
|
||||
if (file.size > maxSize) {
|
||||
alert('File too large. Maximum size is 500MB.');
|
||||
audioFileInput.value = '';
|
||||
return;
|
||||
}
|
||||
|
||||
// Confirm if transcript has content
|
||||
const currentTranscript = transcriptTextEl.value.trim();
|
||||
if (currentTranscript) {
|
||||
if (!confirm('This will replace the existing transcript. Do you want to continue?')) {
|
||||
audioFileInput.value = '';
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Start upload
|
||||
uploadAudioBtn.disabled = true;
|
||||
recordBtn.disabled = true;
|
||||
uploadProgressEl.classList.add('active');
|
||||
uploadProgressFill.style.width = '0%';
|
||||
uploadProgressText.textContent = 'Uploading audio file...';
|
||||
|
||||
try {
|
||||
const result = await transcribeAudio(file, (progress) => {
|
||||
if (progress.phase === 'uploading') {
|
||||
uploadProgressFill.style.width = `${progress.progress}%`;
|
||||
uploadProgressText.textContent = `上傳中: ${progress.progress}%`;
|
||||
} else if (progress.phase === 'processing') {
|
||||
uploadProgressFill.style.width = `${progress.progress}%`;
|
||||
uploadProgressText.textContent = progress.message || '處理中...';
|
||||
} else if (progress.phase === 'transcribing') {
|
||||
uploadProgressFill.style.width = `${progress.progress}%`;
|
||||
if (progress.total && progress.current) {
|
||||
uploadProgressText.textContent = `轉錄中: ${progress.current}/${progress.total} 片段 (${progress.progress}%)`;
|
||||
} else {
|
||||
uploadProgressText.textContent = progress.message || '轉錄中...';
|
||||
}
|
||||
} else if (progress.phase === 'complete') {
|
||||
uploadProgressFill.style.width = '100%';
|
||||
uploadProgressText.textContent = progress.message || '轉錄完成';
|
||||
}
|
||||
});
|
||||
|
||||
// Success - update transcript
|
||||
transcriptTextEl.value = result.transcript || '';
|
||||
const chunksInfo = result.chunks_failed > 0
|
||||
? `${result.chunks_processed}/${result.chunks_total} 片段成功`
|
||||
: `${result.chunks_processed} 片段`;
|
||||
uploadProgressText.textContent = `轉錄完成!(${chunksInfo}, ${Math.round(result.total_duration_seconds)}秒)`;
|
||||
|
||||
// Auto-hide progress after 3 seconds
|
||||
setTimeout(() => {
|
||||
uploadProgressEl.classList.remove('active');
|
||||
}, 3000);
|
||||
|
||||
} catch (error) {
|
||||
alert('Error transcribing audio: ' + error.message);
|
||||
uploadProgressEl.classList.remove('active');
|
||||
} finally {
|
||||
uploadAudioBtn.disabled = false;
|
||||
recordBtn.disabled = false;
|
||||
audioFileInput.value = '';
|
||||
}
|
||||
});
|
||||
|
||||
// === Streaming Event Handlers (legacy, kept for future use) ===
|
||||
window.electronAPI.onTranscriptionSegment((segment) => {
|
||||
console.log('Received segment:', segment);
|
||||
|
||||
@@ -141,6 +141,231 @@ export async function summarizeTranscript(transcript) {
|
||||
});
|
||||
}
|
||||
|
||||
export async function transcribeAudio(file, onProgress = null) {
|
||||
const url = `${API_BASE_URL}/ai/transcribe-audio-stream`;
|
||||
const formData = new FormData();
|
||||
formData.append("file", file);
|
||||
|
||||
const token = getToken();
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
// Use fetch for SSE support
|
||||
fetch(url, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: token ? `Bearer ${token}` : undefined,
|
||||
},
|
||||
body: formData,
|
||||
})
|
||||
.then((response) => {
|
||||
if (response.status === 401) {
|
||||
clearToken();
|
||||
window.electronAPI?.navigate("login");
|
||||
throw new Error("Session expired, please login again");
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
return response.json().then((error) => {
|
||||
throw new Error(error.detail || `HTTP error ${response.status}`);
|
||||
});
|
||||
}
|
||||
|
||||
if (onProgress) {
|
||||
onProgress({ phase: "processing", progress: 0, message: "處理中..." });
|
||||
}
|
||||
|
||||
// Read SSE stream
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = "";
|
||||
let result = null;
|
||||
let totalChunks = 0;
|
||||
let processedChunks = 0;
|
||||
|
||||
function processLine(line) {
|
||||
if (line.startsWith("data: ")) {
|
||||
try {
|
||||
const data = JSON.parse(line.slice(6));
|
||||
|
||||
switch (data.event) {
|
||||
case "start":
|
||||
case "segmenting":
|
||||
if (onProgress) {
|
||||
onProgress({
|
||||
phase: "processing",
|
||||
progress: 5,
|
||||
message: data.message,
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case "segments_ready":
|
||||
totalChunks = data.total;
|
||||
if (onProgress) {
|
||||
onProgress({
|
||||
phase: "transcribing",
|
||||
progress: 10,
|
||||
total: totalChunks,
|
||||
current: 0,
|
||||
message: data.message,
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case "chunk_start":
|
||||
if (onProgress) {
|
||||
const progress = 10 + ((data.chunk - 1) / totalChunks) * 85;
|
||||
onProgress({
|
||||
phase: "transcribing",
|
||||
progress: Math.round(progress),
|
||||
total: totalChunks,
|
||||
current: data.chunk,
|
||||
message: data.message,
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case "chunk_done":
|
||||
processedChunks++;
|
||||
if (onProgress) {
|
||||
const progress = 10 + (data.chunk / totalChunks) * 85;
|
||||
onProgress({
|
||||
phase: "transcribing",
|
||||
progress: Math.round(progress),
|
||||
total: totalChunks,
|
||||
current: data.chunk,
|
||||
message: data.message,
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case "chunk_error":
|
||||
console.warn(`Chunk ${data.chunk} error: ${data.message}`);
|
||||
break;
|
||||
|
||||
case "error":
|
||||
throw new Error(data.message);
|
||||
|
||||
case "complete":
|
||||
result = {
|
||||
transcript: data.transcript,
|
||||
chunks_processed: data.chunks_processed,
|
||||
chunks_total: data.chunks_total,
|
||||
total_duration_seconds: data.duration,
|
||||
language: "zh",
|
||||
};
|
||||
if (onProgress) {
|
||||
onProgress({
|
||||
phase: "complete",
|
||||
progress: 100,
|
||||
message: "轉錄完成",
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn("SSE parse error:", e, line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function read() {
|
||||
reader
|
||||
.read()
|
||||
.then(({ done, value }) => {
|
||||
if (done) {
|
||||
// Process any remaining buffer
|
||||
if (buffer.trim()) {
|
||||
buffer.split("\n").forEach(processLine);
|
||||
}
|
||||
if (result) {
|
||||
resolve(result);
|
||||
} else {
|
||||
reject(new Error("Transcription failed - no result received"));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split("\n");
|
||||
buffer = lines.pop() || ""; // Keep incomplete line in buffer
|
||||
|
||||
lines.forEach(processLine);
|
||||
read();
|
||||
})
|
||||
.catch(reject);
|
||||
}
|
||||
|
||||
read();
|
||||
})
|
||||
.catch(reject);
|
||||
});
|
||||
}
|
||||
|
||||
// Legacy non-streaming version (fallback)
|
||||
export async function transcribeAudioLegacy(file, onProgress = null) {
|
||||
const url = `${API_BASE_URL}/ai/transcribe-audio`;
|
||||
const formData = new FormData();
|
||||
formData.append("file", file);
|
||||
|
||||
const token = getToken();
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const xhr = new XMLHttpRequest();
|
||||
|
||||
xhr.upload.addEventListener("progress", (event) => {
|
||||
if (event.lengthComputable && onProgress) {
|
||||
const percentComplete = Math.round((event.loaded / event.total) * 100);
|
||||
onProgress({ phase: "uploading", progress: percentComplete });
|
||||
}
|
||||
});
|
||||
|
||||
xhr.addEventListener("load", () => {
|
||||
if (xhr.status >= 200 && xhr.status < 300) {
|
||||
try {
|
||||
const response = JSON.parse(xhr.responseText);
|
||||
resolve(response);
|
||||
} catch (e) {
|
||||
reject(new Error("Invalid response format"));
|
||||
}
|
||||
} else if (xhr.status === 401) {
|
||||
clearToken();
|
||||
window.electronAPI?.navigate("login");
|
||||
reject(new Error("Session expired, please login again"));
|
||||
} else {
|
||||
try {
|
||||
const error = JSON.parse(xhr.responseText);
|
||||
reject(new Error(error.detail || `HTTP error ${xhr.status}`));
|
||||
} catch (e) {
|
||||
reject(new Error(`HTTP error ${xhr.status}`));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
xhr.addEventListener("error", () => {
|
||||
reject(new Error("Network error"));
|
||||
});
|
||||
|
||||
xhr.addEventListener("timeout", () => {
|
||||
reject(new Error("Request timeout"));
|
||||
});
|
||||
|
||||
xhr.open("POST", url, true);
|
||||
xhr.timeout = 600000; // 10 minutes for large files
|
||||
if (token) {
|
||||
xhr.setRequestHeader("Authorization", `Bearer ${token}`);
|
||||
}
|
||||
xhr.send(formData);
|
||||
|
||||
// Notify processing phase after upload completes
|
||||
if (onProgress) {
|
||||
xhr.upload.addEventListener("loadend", () => {
|
||||
onProgress({ phase: "processing", progress: 0 });
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Export API
|
||||
export async function exportMeeting(id) {
|
||||
return request(`/meetings/${id}/export`, {
|
||||
|
||||
Reference in New Issue
Block a user