feat: Add Dify audio transcription with VAD chunking and SSE progress

- Add audio file upload transcription via Dify STT API
- Implement VAD-based audio segmentation in sidecar (3-min chunks)
- Add SSE endpoint for real-time transcription progress updates
- Fix chunk size enforcement for reliable uploads
- Add retry logic with exponential backoff for API calls
- Support Python 3.13+ with audioop-lts package
- Update frontend with Chinese progress messages and chunk display
- Improve start.sh health check with retry loop

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-11 21:00:27 +08:00
parent e790f48967
commit 263eb1c394
10 changed files with 1008 additions and 16 deletions

View File

@@ -141,6 +141,231 @@ export async function summarizeTranscript(transcript) {
});
}
export async function transcribeAudio(file, onProgress = null) {
const url = `${API_BASE_URL}/ai/transcribe-audio-stream`;
const formData = new FormData();
formData.append("file", file);
const token = getToken();
return new Promise((resolve, reject) => {
// Use fetch for SSE support
fetch(url, {
method: "POST",
headers: {
Authorization: token ? `Bearer ${token}` : undefined,
},
body: formData,
})
.then((response) => {
if (response.status === 401) {
clearToken();
window.electronAPI?.navigate("login");
throw new Error("Session expired, please login again");
}
if (!response.ok) {
return response.json().then((error) => {
throw new Error(error.detail || `HTTP error ${response.status}`);
});
}
if (onProgress) {
onProgress({ phase: "processing", progress: 0, message: "處理中..." });
}
// Read SSE stream
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = "";
let result = null;
let totalChunks = 0;
let processedChunks = 0;
function processLine(line) {
if (line.startsWith("data: ")) {
try {
const data = JSON.parse(line.slice(6));
switch (data.event) {
case "start":
case "segmenting":
if (onProgress) {
onProgress({
phase: "processing",
progress: 5,
message: data.message,
});
}
break;
case "segments_ready":
totalChunks = data.total;
if (onProgress) {
onProgress({
phase: "transcribing",
progress: 10,
total: totalChunks,
current: 0,
message: data.message,
});
}
break;
case "chunk_start":
if (onProgress) {
const progress = 10 + ((data.chunk - 1) / totalChunks) * 85;
onProgress({
phase: "transcribing",
progress: Math.round(progress),
total: totalChunks,
current: data.chunk,
message: data.message,
});
}
break;
case "chunk_done":
processedChunks++;
if (onProgress) {
const progress = 10 + (data.chunk / totalChunks) * 85;
onProgress({
phase: "transcribing",
progress: Math.round(progress),
total: totalChunks,
current: data.chunk,
message: data.message,
});
}
break;
case "chunk_error":
console.warn(`Chunk ${data.chunk} error: ${data.message}`);
break;
case "error":
throw new Error(data.message);
case "complete":
result = {
transcript: data.transcript,
chunks_processed: data.chunks_processed,
chunks_total: data.chunks_total,
total_duration_seconds: data.duration,
language: "zh",
};
if (onProgress) {
onProgress({
phase: "complete",
progress: 100,
message: "轉錄完成",
});
}
break;
}
} catch (e) {
console.warn("SSE parse error:", e, line);
}
}
}
function read() {
reader
.read()
.then(({ done, value }) => {
if (done) {
// Process any remaining buffer
if (buffer.trim()) {
buffer.split("\n").forEach(processLine);
}
if (result) {
resolve(result);
} else {
reject(new Error("Transcription failed - no result received"));
}
return;
}
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split("\n");
buffer = lines.pop() || ""; // Keep incomplete line in buffer
lines.forEach(processLine);
read();
})
.catch(reject);
}
read();
})
.catch(reject);
});
}
// Legacy non-streaming version (fallback)
export async function transcribeAudioLegacy(file, onProgress = null) {
const url = `${API_BASE_URL}/ai/transcribe-audio`;
const formData = new FormData();
formData.append("file", file);
const token = getToken();
return new Promise((resolve, reject) => {
const xhr = new XMLHttpRequest();
xhr.upload.addEventListener("progress", (event) => {
if (event.lengthComputable && onProgress) {
const percentComplete = Math.round((event.loaded / event.total) * 100);
onProgress({ phase: "uploading", progress: percentComplete });
}
});
xhr.addEventListener("load", () => {
if (xhr.status >= 200 && xhr.status < 300) {
try {
const response = JSON.parse(xhr.responseText);
resolve(response);
} catch (e) {
reject(new Error("Invalid response format"));
}
} else if (xhr.status === 401) {
clearToken();
window.electronAPI?.navigate("login");
reject(new Error("Session expired, please login again"));
} else {
try {
const error = JSON.parse(xhr.responseText);
reject(new Error(error.detail || `HTTP error ${xhr.status}`));
} catch (e) {
reject(new Error(`HTTP error ${xhr.status}`));
}
}
});
xhr.addEventListener("error", () => {
reject(new Error("Network error"));
});
xhr.addEventListener("timeout", () => {
reject(new Error("Request timeout"));
});
xhr.open("POST", url, true);
xhr.timeout = 600000; // 10 minutes for large files
if (token) {
xhr.setRequestHeader("Authorization", `Bearer ${token}`);
}
xhr.send(formData);
// Notify processing phase after upload completes
if (onProgress) {
xhr.upload.addEventListener("loadend", () => {
onProgress({ phase: "processing", progress: 0 });
});
}
});
}
// Export API
export async function exportMeeting(id) {
return request(`/meetings/${id}/export`, {