commit 0720b1d0ccb6f965d52b3bb7a45fc7b9940aa301 Author: beabigegg Date: Thu Aug 7 09:57:20 2025 +0800 first_upload diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..4b5bae3 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,19 @@ +.git +.gitignore +.idea +.vscode +__pycache__ +.venv +venv +env +.env +*.pyc +*.pyo +*.pyd +*.db +/uploads +/demucs_separated +Dockerfile +docker-compose.yml +README.md +啟動方式.txt diff --git a/.env b/.env new file mode 100644 index 0000000..9d69b09 --- /dev/null +++ b/.env @@ -0,0 +1,10 @@ +# Dify API Configuration +DIFY_API_KEY="app-VGhuWOymkg3LVw4LfUiy2cey" +DIFY_API_BASE_URL="https://dify.theaken.com/v1" + +# Celery Configuration +CELERY_BROKER_URL="redis://localhost:6379/0" +CELERY_RESULT_BACKEND="redis://localhost:6379/0" + +# Flask App Configuration +FLASK_RUN_PORT=12000 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4523d44 --- /dev/null +++ b/.gitignore @@ -0,0 +1,147 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if have platform-specific dependencies, it may be better to +# ignore it. +# Pipfile.lock + +# PEP 582; used by PDM, PEP 582 compatible installers +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# Personal +.idea/ +.vscode/ +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? + +# Demucs and Uploads +/demucs_separated/ +/uploads/ +/啟動方式.txt diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7c29082 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,36 @@ +# Use an official Python runtime as a parent image +FROM python:3.10-slim + +# Set the working directory in the container +WORKDIR /app + +# Install system dependencies required by some Python packages +# ffmpeg is crucial for audio/video processing +RUN apt-get update && apt-get install -y --no-install-recommends \ + ffmpeg \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# Copy the requirements file into the container at /app +COPY requirements.txt . + +# Install any needed packages specified in requirements.txt +# This command will install the GPU version of PyTorch if the base image has CUDA support +# and the host machine has NVIDIA drivers. Otherwise, it can be adapted for CPU. +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the rest of the application's code into the container +COPY . . + +# Make port 12000 available to the world outside this container +# This is the port the app will run on, as defined in the .env file +EXPOSE 12000 + +# Define environment variable to ensure python outputs everything without buffering +ENV PYTHONUNBUFFERED 1 + +# Run app.py when the container launches +# Use Gunicorn for a production-ready WSGI server +# The command will be specified in docker-compose.yml to allow for different entrypoints +# for the web server and celery worker. +CMD ["gunicorn", "--bind", "0.0.0.0:12000", "app:app"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..5a1ee6c --- /dev/null +++ b/README.md @@ -0,0 +1,141 @@ +# AI Meeting Assistant + +AI Meeting Assistant 是一個功能強大的 Web 應用程式,旨在簡化和自動化會議後續處理流程。它提供了一系列工具,包括從影片中提取音訊、將音訊轉錄為文字、對逐字稿進行多語言翻譯,以及生成專業的會議結論。 + + +## ✨ 功能特色 + +- **影片轉音訊**: 從常見的影片格式(MP4, MOV, AVI)中快速提取 `.wav` 音訊檔案。 +- **AI 音訊轉文字**: 使用 OpenAI Whisper 模型將音訊轉錄為帶有精確時間戳的逐字稿。 + - **AI 人聲分離**: 內建 Demucs 模型,可在轉錄前分離人聲,有效處理背景嘈雜的音訊。 + - **多語言支援**: 支援中文、英文、日文、韓文等多種語言的自動偵測與轉錄。 +- **AI 逐段翻譯**: 整合 Dify AI 平台,提供高品質的逐字稿全文對照翻譯。 +- **AI 會議結論**: + - **初版摘要**: 自動生成會議的重點、待辦事項和結論。 + - **迭代修改**: 可根據您的指示,對已生成的結論進行多次修改和完善。 +- **現代化介面**: 採用 Bootstrap 5 構建,提供清晰、專業且響應式的使用者體驗。 +- **Docker 化部署**: 提供 Docker 和 Docker Compose 設定,實現一鍵啟動所有服務(Web, Celery, Redis),並支援 CPU 和 GPU 環境。 + +## 🛠️ 環境要求 + +在本地端執行此專案之前,請確保您已安裝以下軟體: + +- **Python**: 3.9 或更高版本。 +- **Redis**: 用於 Celery 的訊息代理和後端。 +- **FFmpeg**: 一個處理多媒體內容的開源工具集。請確保 `ffmpeg` 指令可在您的終端機中直接執行。 + - **Windows**: 可從 [官網](https://ffmpeg.org/download.html) 下載,並將其 `bin` 目錄加入系統的 `PATH` 環境變數。 + - **macOS (使用 Homebrew)**: `brew install ffmpeg` + - **Linux (Debian/Ubuntu)**: `sudo apt update && sudo apt install ffmpeg` + +## 🚀 本地端啟動指南 + +1. **克隆專案** + ```bash + git clone + cd AI_meeting_assistant + ``` + +2. **建立並啟用虛擬環境** + ```bash + python -m venv venv + # Windows + venv\Scripts\activate + # macOS / Linux + source venv/bin/activate + ``` + +3. **安裝 Python 依賴套件** + ```bash + pip install -r requirements.txt + ``` + *注意:`requirements.txt` 預設安裝 GPU 版本的 PyTorch。如果您的電腦沒有 NVIDIA GPU,請在安裝前將 `requirements.txt` 中的 `torch` 和 `torchaudio` 相關行替換為 CPU 版本:* + ``` + # torch --extra-index-url https://download.pytorch.org/whl/cu118 + # torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 + torch + torchaudio + ``` + +4. **設定環境變數** + - 將 `.env.example` 檔案複製為 `.env`。 + - 在 `.env` 檔案中填入您的 Dify API 金鑰。 + ``` + DIFY_API_KEY="your_dify_api_key_here" + ``` + +5. **啟動服務** + 您需要開啟 **三個獨立的終端機**,並在每個終端機中啟用虛擬環境 (`venv`): + + - **終端機 1: 啟動 Redis** + 如果尚未執行,請啟動您的 Redis 伺服器。 + ```bash + redis-server + ``` + + - **終端機 2: 啟動 Celery Worker** + 此背景處理器將執行所有耗時的 AI 任務。 + ```bash + celery -A tasks.celery worker --loglevel=info --pool=solo + ``` + *在 Windows 上,可能需要使用 `gevent` 或 `eventlet` 作為執行池:`celery -A tasks.celery worker --loglevel=info -P gevent`* + + - **終端機 3: 啟動 Flask Web 應用** + ```bash + flask run + ``` + 應用程式將在 `.env` 檔案中指定的 Port(預設為 `12000`)上執行。 + +6. **訪問應用** + 在瀏覽器中開啟 `http://127.0.0.1:12000`。 + +## 🐳 Docker 部署指南 + +使用 Docker 是部署此應用的推薦方式,因為它能自動處理所有服務和依賴。 + +1. **安裝 Docker 和 Docker Compose** + 請確保您的系統已安裝 [Docker](https://www.docker.com/products/docker-desktop/)。 + +2. **設定環境變數** + - **重要**: Docker 使用的是容器網路,因此需要將 Redis 的主機名稱指向 Docker Compose 中定義的服務名稱。請確認 `.env` 檔案中的 `CELERY_BROKER_URL` 和 `CELERY_RESULT_BACKEND` 指向 `redis`: + ``` + CELERY_BROKER_URL="redis://redis:6379/0" + CELERY_RESULT_BACKEND="redis://redis:6379/0" + ``` + +3. **建構並啟動容器** + 在專案根目錄下執行以下指令: + ```bash + docker-compose up --build + ``` + - **GPU 支援**: 如果您的系統支援 NVIDIA GPU 且已安裝 `nvidia-docker-toolkit`,Docker Compose 將會自動使用 `Dockerfile` 中為 GPU 優化的設定來建構映像。 + - **CPU 使用者**: 如果您沒有 GPU,Docker 會自動退回使用 CPU 版本的 PyTorch,無需任何修改。 + +4. **訪問應用** + 在瀏覽器中開啟 `http://localhost:12000`。 + +5. **停止服務** + 若要停止所有容器,請在終端機中按下 `Ctrl + C`,然後執行: + ```bash + docker-compose down + ``` + +## 📂 專案結構 + +``` +. +├── app.py # Flask 主應用程式 +├── tasks.py # Celery 背景任務 (AI/ML 處理) +├── requirements.txt # Python 依賴套件 +├── Dockerfile # 用於建構應用程式映像 +├── docker-compose.yml # 一鍵啟動所有服務 +├── .env.example # 環境變數範本 +├── .gitignore # Git 忽略清單 +├── README.md # 專案說明文件 +├── templates/ +│ └── index.html # 前端網頁 +├── static/ +│ ├── css/style.css # 自訂樣式 +│ └── js/script.js # 前端互動邏輯 +├── uploads/ # (自動生成) 儲存上傳的檔案 +└── demucs_separated/ # (自動生成) 儲存人聲分離結果 +``` diff --git a/app.py b/app.py new file mode 100644 index 0000000..2ca3f82 --- /dev/null +++ b/app.py @@ -0,0 +1,112 @@ +import os +import uuid +from flask import Flask, request, jsonify, render_template, send_from_directory, url_for +from werkzeug.utils import secure_filename +from dotenv import load_dotenv +from tasks import ( + celery, + extract_audio_task, + transcribe_audio_task, + translate_segments_task, + summarize_text_task +) + +# --- Flask App 設定 --- +load_dotenv() +app = Flask(__name__) + +project_root = os.path.dirname(os.path.abspath(__file__)) +UPLOAD_FOLDER = os.path.join(project_root, 'uploads') + +if not os.path.exists(UPLOAD_FOLDER): + os.makedirs(UPLOAD_FOLDER) +app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER +app.config['MAX_CONTENT_LENGTH'] = 1024 * 1024 * 1024 # 限制上傳大小為 1GB + +def save_uploaded_file(file_key='file'): + """一個輔助函式,用於儲存上傳的檔案並回傳路徑。""" + if file_key not in request.files: + return None, (jsonify({'error': '請求中沒有檔案部分'}), 400) + file = request.files[file_key] + if file.filename == '': + return None, (jsonify({'error': '未選擇檔案'}), 400) + if file: + original_filename = secure_filename(file.filename) + file_extension = os.path.splitext(original_filename)[1] + unique_filename = f"{uuid.uuid4()}{file_extension}" + file_path = os.path.join(app.config['UPLOAD_FOLDER'], unique_filename) + file.save(file_path) + return file_path, None + return None, (jsonify({'error': '未知的檔案錯誤'}), 500) + +# --- API 路由 (Endpoints) --- +@app.route('/') +def index(): + return render_template('index.html') + +@app.route('/extract_audio', methods=['POST']) +def handle_extract_audio(): + input_path, error = save_uploaded_file() + if error: return error + + output_audio_path = os.path.splitext(input_path)[0] + ".wav" + task = extract_audio_task.delay(input_path, output_audio_path) + return jsonify({'task_id': task.id, 'status_url': url_for('get_task_status', task_id=task.id)}), 202 + +@app.route('/transcribe_audio', methods=['POST']) +def handle_transcribe_audio(): + input_path, error = save_uploaded_file() + if error: return error + + language = request.form.get('language', 'auto') + use_demucs = request.form.get('use_demucs') == 'on' + + output_txt_path = os.path.splitext(input_path)[0] + ".txt" + + task = transcribe_audio_task.delay(input_path, output_txt_path, language, use_demucs) + + return jsonify({'task_id': task.id, 'status_url': url_for('get_task_status', task_id=task.id)}), 202 + +@app.route('/translate_text', methods=['POST']) +def handle_translate_text(): + input_path, error = save_uploaded_file() + if error: return error + + target_language = request.form.get('target_language', '繁體中文') + output_txt_path = os.path.splitext(input_path)[0] + "_translated.txt" + task = translate_segments_task.delay(input_path, output_txt_path, target_language) + return jsonify({'task_id': task.id, 'status_url': url_for('get_task_status', task_id=task.id)}), 202 + +@app.route('/summarize_text', methods=['POST']) +def handle_summarize_text(): + data = request.get_json() + if not data or 'text_content' not in data: + return jsonify({'error': '請求中缺少 text_content'}), 400 + + text_content = data['text_content'] + target_language = data.get('target_language', '繁體中文') + conversation_id = data.get('conversation_id') + revision_instruction = data.get('revision_instruction') + + task = summarize_text_task.delay(text_content, target_language, conversation_id, revision_instruction) + return jsonify({'task_id': task.id, 'status_url': url_for('get_task_status', task_id=task.id)}), 202 + +# --- 通用狀態查詢和下載 --- +@app.route('/status/') +def get_task_status(task_id): + task = celery.AsyncResult(task_id) + response_data = {'state': task.state, 'info': task.info if isinstance(task.info, dict) else str(task.info)} + + if task.state == 'SUCCESS' and isinstance(task.info, dict) and 'result_path' in task.info and task.info['result_path']: + response_data['info']['download_url'] = url_for('download_file', filename=os.path.basename(task.info['result_path'])) + + return jsonify(response_data) + +@app.route('/download/') +def download_file(filename): + return send_from_directory(app.config['UPLOAD_FOLDER'], filename, as_attachment=True) + +# --- 主程式入口 --- +if __name__ == '__main__': + port = int(os.environ.get("FLASK_RUN_PORT", 5000)) + app.run(host='0.0.0.0', port=port, debug=True) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..49cdcd8 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,55 @@ +version: '3.8' + +services: + redis: + image: "redis:alpine" + ports: + - "6379:6379" + volumes: + - redis_data:/data + + web: + build: . + ports: + - "12000:12000" + volumes: + - .:/app + - ./uploads:/app/uploads + env_file: + - .env + depends_on: + - redis + # The following 'deploy' key enables GPU access for the service. + # This requires nvidia-container-toolkit to be installed on the host. + # Docker Compose will automatically use this if available. + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + command: gunicorn --bind 0.0.0.0:12000 --workers 4 app:app + + worker: + build: . + volumes: + - .:/app + - ./uploads:/app/uploads + - ./demucs_separated:/app/demucs_separated + env_file: + - .env + depends_on: + - redis + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + # For Windows, you might need to use -P gevent + command: celery -A tasks.celery worker --loglevel=info --pool=solo + +volumes: + redis_data: diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1f10ae8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,18 @@ +Flask==2.2.5 +celery==5.3.6 +redis==4.5.4 +# For NVIDIA GPU (CUDA 11.8) support, use these lines: +torch --extra-index-url https://download.pytorch.org/whl/cu118 +torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 +# For CPU-only, comment out the two lines above and uncomment the two lines below: +# torch +# torchaudio +openai-whisper +moviepy +opencc-python-reimplemented +ffmpeg-python +python-dotenv +gunicorn +demucs +soundfile +gevent # Added for celery on windows diff --git a/static/css/style.css b/static/css/style.css new file mode 100644 index 0000000..9971d18 --- /dev/null +++ b/static/css/style.css @@ -0,0 +1,38 @@ +/* static/css/style.css */ +body { + background-color: #f8f9fa; +} + +.container { + max-width: 960px; +} + +.card-header-tabs { + margin-bottom: -1px; +} + +.nav-link { + color: #6c757d; +} + +.nav-link.active { + color: #000; + background-color: #fff; + border-color: #dee2e6 #dee2e6 #fff; +} + +.result-preview { + white-space: pre-wrap; + word-wrap: break-word; + max-height: 400px; + overflow-y: auto; + font-family: 'Courier New', Courier, monospace; +} + +.action-btn:disabled { + cursor: not-allowed; +} + +.progress-bar { + transition: width 0.6s ease; +} diff --git a/static/js/script.js b/static/js/script.js new file mode 100644 index 0000000..b00f9b0 --- /dev/null +++ b/static/js/script.js @@ -0,0 +1,275 @@ +document.addEventListener('DOMContentLoaded', function() { + // --- Global variables --- + let statusInterval; + let currentTaskType = ''; + let summaryConversationId = null; + let lastSummaryText = ''; + + // --- DOM Elements --- + const progressContainer = document.getElementById('progress-container'); + const statusText = document.getElementById('status-text'); + const progressBar = document.getElementById('progress-bar'); + const resultContainer = document.getElementById('result-container'); + const textResultPreview = document.getElementById('text-result-preview'); + const downloadLink = document.getElementById('download-link'); + const revisionArea = document.getElementById('revision-area'); + const allActionButtons = document.querySelectorAll('.action-btn'); + + // --- Tab Switching Logic --- + const tabButtons = document.querySelectorAll('#myTab button'); + tabButtons.forEach(button => { + button.addEventListener('shown.bs.tab', function() { + resetUiForNewTask(); + }); + }); + + // --- Event Listeners for all action buttons --- + allActionButtons.forEach(button => { + button.addEventListener('click', handleActionClick); + }); + + function handleActionClick(event) { + const button = event.currentTarget; + currentTaskType = button.dataset.task; + + resetUiForNewTask(); + button.disabled = true; + button.innerHTML = ' 處理中...'; + progressContainer.style.display = 'block'; + + if (currentTaskType === 'summarize_text') { + const fileInput = document.getElementById('summary-file-input'); + const file = fileInput.files[0]; + + if (file) { + const reader = new FileReader(); + reader.onload = function(e) { + const fileContent = e.target.result; + startSummarizeTask(fileContent); + }; + reader.onerror = function() { + handleError("讀取檔案時發生錯誤。"); + }; + reader.readAsText(file); + } else { + const textContent = document.getElementById('summary-source-text').value; + if (!textContent.trim()) { + alert('請貼上文字或選擇檔案!'); + resetButtons(); + return; + } + startSummarizeTask(textContent); + } + return; + } + + let endpoint = ''; + let formData = new FormData(); + let body = null; + let fileInput; + + switch (currentTaskType) { + case 'extract_audio': + endpoint = '/extract_audio'; + fileInput = document.getElementById('video-file'); + break; + + case 'transcribe_audio': + endpoint = '/transcribe_audio'; + fileInput = document.getElementById('audio-file'); + formData.append('language', document.getElementById('lang-select').value); + if (document.getElementById('use-demucs').checked) { + formData.append('use_demucs', 'on'); + } + break; + + case 'translate_text': + endpoint = '/translate_text'; + fileInput = document.getElementById('transcript-file'); + formData.append('target_language', document.getElementById('translate-lang-select').value); + break; + + case 'revise_summary': + endpoint = '/summarize_text'; + const instruction = document.getElementById('revision-instruction').value; + if (!lastSummaryText) { alert('請先生成初版結論!'); resetButtons(); return; } + if (!instruction.trim()) { alert('請輸入修改指示!'); resetButtons(); return; } + body = JSON.stringify({ + text_content: lastSummaryText, + revision_instruction: instruction, + target_language: document.getElementById('summary-lang-select').value, + conversation_id: summaryConversationId + }); + startFetchTask(endpoint, body, { 'Content-Type': 'application/json' }); + return; + + default: + console.error('Unknown task type:', currentTaskType); + resetButtons(); + return; + } + + if (!fileInput || !fileInput.files[0]) { + alert('請選擇一個檔案!'); + resetButtons(); + return; + } + formData.append('file', fileInput.files[0]); + body = formData; + + startFetchTask(endpoint, body); + } + + function startSummarizeTask(textContent) { + summaryConversationId = null; + lastSummaryText = textContent; + const body = JSON.stringify({ + text_content: textContent, + target_language: document.getElementById('summary-lang-select').value + }); + startFetchTask('/summarize_text', body, { 'Content-Type': 'application/json' }); + } + + function startFetchTask(endpoint, body, headers = {}) { + updateProgress(0, '準備上傳與處理...'); + fetch(endpoint, { + method: 'POST', + body: body, + headers: headers + }) + .then(response => { + if (!response.ok) { + return response.json().then(err => { throw new Error(err.error || '伺服器錯誤') }); + } + return response.json(); + }) + .then(data => { + if (data.task_id) { + statusInterval = setInterval(() => checkTaskStatus(data.status_url), 2000); + } else { + handleError(data.error || '未能啟動背景任務'); + } + }) + .catch(error => { + handleError(error.message || '請求失敗'); + }); + } + + function checkTaskStatus(statusUrl) { + fetch(statusUrl) + .then(response => response.json()) + .then(data => { + const info = data.info || {}; + if (data.state === 'PROGRESS') { + updateProgress(info.current, info.status, info.total); + const previewContent = info.content || info.summary || info.preview; + if (previewContent) { + resultContainer.style.display = 'block'; + textResultPreview.textContent = previewContent; + textResultPreview.style.display = 'block'; + } + } else if (data.state === 'SUCCESS') { + clearInterval(statusInterval); + updateProgress(100, info.status || '完成!', 100); + displayResult(info); + resetButtons(); + } else if (data.state === 'FAILURE') { + clearInterval(statusInterval); + handleError(info.exc_message || '任務執行失敗'); + } + }) + .catch(error => { + clearInterval(statusInterval); + handleError('查詢進度時發生網路錯誤: ' + error); + }); + } + + function updateProgress(current, text, total = 100) { + const percent = total > 0 ? Math.round((current / total) * 100) : 0; + progressBar.style.width = percent + '%'; + progressBar.setAttribute('aria-valuenow', percent); + progressBar.textContent = percent + '%'; + statusText.textContent = text; + } + + function displayResult(info) { + resultContainer.style.display = 'block'; + + const content = info.content || info.summary; + if (content) { + textResultPreview.textContent = content; + textResultPreview.style.display = 'block'; + lastSummaryText = content; + } else { + textResultPreview.style.display = 'none'; + } + + if (info.download_url) { + downloadLink.href = info.download_url; + downloadLink.style.display = 'inline-block'; + } + + if (currentTaskType === 'summarize_text' || currentTaskType === 'revise_summary') { + revisionArea.style.display = 'block'; + summaryConversationId = info.conversation_id; + } + } + + function handleError(message) { + statusText.textContent = `錯誤:${message}`; + progressBar.classList.add('bg-danger'); + resetButtons(); + } + + function resetUiForNewTask() { + if (statusInterval) clearInterval(statusInterval); + + progressContainer.style.display = 'none'; + resultContainer.style.display = 'none'; + textResultPreview.style.display = 'none'; + textResultPreview.textContent = ''; + downloadLink.style.display = 'none'; + revisionArea.style.display = 'none'; + + progressBar.style.width = '0%'; + progressBar.setAttribute('aria-valuenow', 0); + progressBar.textContent = '0%'; + progressBar.classList.remove('bg-danger'); + statusText.textContent = ''; + + resetButtons(); + } + + function resetButtons() { + allActionButtons.forEach(button => { + button.disabled = false; + const task = button.dataset.task; + let iconHtml = ''; + let text = ''; + + switch(task) { + case 'extract_audio': + iconHtml = ''; + text = '開始轉換'; + break; + case 'transcribe_audio': + iconHtml = ''; + text = '開始轉錄'; + break; + case 'translate_text': + iconHtml = ''; + text = '開始翻譯'; + break; + case 'summarize_text': + iconHtml = ''; + text = '產生初版結論'; + break; + case 'revise_summary': + iconHtml = ''; + text = '根據指示產生修改版'; + break; + } + button.innerHTML = iconHtml + text; + }); + } +}); diff --git a/tasks.py b/tasks.py new file mode 100644 index 0000000..69a6c6b --- /dev/null +++ b/tasks.py @@ -0,0 +1,248 @@ +import os +import sys +import whisper +import torch +import shutil +import subprocess +import time +import re +import json +import requests +from celery import Celery, Task +from opencc import OpenCC +from moviepy import VideoFileClip +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +# ========== Dify API Configuration ========== +DIFY_API_KEY = os.environ.get("DIFY_API_KEY") +DIFY_API_BASE_URL = os.environ.get("DIFY_API_BASE_URL") + +# ========== Dify API Client Function ========== +def ask_dify(prompt: str, user_id: str = "default-tk-user-resume", inputs: dict = None, response_mode: str = "streaming", conversation_id: str = None, timeout_seconds: int = 1200) -> dict: + if not DIFY_API_KEY or not DIFY_API_BASE_URL: + return {"answer": "❌ 錯誤:DIFY_API_KEY 或 DIFY_API_BASE_URL 未在環境變數中設定。", "conversation_id": conversation_id} + + if inputs is None: + inputs = {} + url = f"{DIFY_API_BASE_URL}/chat-messages" + headers = {"Authorization": f"Bearer {DIFY_API_KEY}", "Content-Type": "application/json"} + payload = {"inputs": inputs, "query": prompt, "user": user_id, "response_mode": response_mode} + if conversation_id: + payload["conversation_id"] = conversation_id + returned_conversation_id = conversation_id + error_from_stream_message = None + print(f"\n--- [ASK_DIFY] Sending request to Dify ---") + try: + is_streaming_request = (response_mode == "streaming") + response = requests.post(url, headers=headers, json=payload, timeout=timeout_seconds, stream=is_streaming_request) + response.raise_for_status() + if is_streaming_request: + full_answer_chunks = [] + for line in response.iter_lines(): + if line: + decoded_line = line.decode('utf-8') + if decoded_line.startswith("data:"): + try: + data_json_str = decoded_line[len("data:"):] + data_obj = json.loads(data_json_str) + event_type = data_obj.get("event") + if event_type == "agent_message" or event_type == "message": + if "answer" in data_obj and data_obj["answer"] is not None: + full_answer_chunks.append(data_obj["answer"]) + elif event_type == "message_end": + if "conversation_id" in data_obj: + returned_conversation_id = data_obj["conversation_id"] + break + elif event_type == "error": + error_from_stream_message = data_obj.get('message', 'Dify API 返回未知的流式錯誤') + returned_conversation_id = data_obj.get("conversation_id", returned_conversation_id) + break + except json.JSONDecodeError: + pass + if error_from_stream_message: + clean_error_answer = re.sub(r".*?\s*", "", error_from_stream_message, flags=re.DOTALL).strip() + return {"answer": f"❌ Dify API 流處理錯誤: {clean_error_answer}", "conversation_id": returned_conversation_id} + raw_answer = "".join(full_answer_chunks) if full_answer_chunks else "⚠️ 流式響應未包含有效回答或內容為空" + else: + response_data = response.json() + raw_answer = response_data.get("answer", "⚠️ 回應中未找到 'answer' 欄位或內容為空") + returned_conversation_id = response_data.get("conversation_id", returned_conversation_id) + clean_answer = re.sub(r".*?\s*", "", raw_answer, flags=re.DOTALL).strip() + return {"answer": clean_answer, "conversation_id": returned_conversation_id} + except requests.exceptions.Timeout: + return {"answer": f"⚠️ 請求 Dify API 逾時 (超過 {timeout_seconds} 秒)", "conversation_id": conversation_id} + except requests.exceptions.HTTPError as http_err: + error_message_detail = f" - 原始響應: {http_err.response.text}" + final_error_message = f"❌ Dify API HTTP 錯誤: {http_err.response.status_code}{error_message_detail}" + return {"answer": final_error_message, "conversation_id": conversation_id} + except Exception as e: + return {"answer": f"❌ 處理 Dify API 請求或響應時發生未預期錯誤: {type(e).__name__}: {str(e)}", "conversation_id": conversation_id} +# ========== Dify API Client Function END ========== + + +# ========== Celery 設定 ========== +celery = Celery( + 'tasks', + broker=os.environ.get('CELERY_BROKER_URL', 'redis://localhost:6379/0'), + backend=os.environ.get('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0') +) + +class ProgressTask(Task): + def update_progress(self, current, total, status, extra_info=None): + meta = {'current': current, 'total': total, 'status': status} + if extra_info: + meta.update(extra_info) + self.update_state(state='PROGRESS', meta=meta) + +# ========== Demucs 輔助函式 ========== +def separate_vocals_with_demucs(self, audio_path, project_root): + self.update_progress(10, 100, "🎛️ 使用 Demucs 分離人聲...") + output_dir = os.path.join(project_root, 'demucs_separated') + cmd = [ + sys.executable, '-m', 'demucs.separate', # <-- 將 'python' 改為 sys.executable + '-n', 'htdemucs_ft', + '--two-stems=vocals', + '-o', output_dir, + audio_path + ] + try: + result = subprocess.run(cmd, check=True, capture_output=True) + print("Demucs stdout:", result.stdout) + except subprocess.CalledProcessError as e: + stderr_str = "No stderr output" + if e.stderr: + try: + stderr_str = e.stderr.decode('utf-8') + except UnicodeDecodeError: + stderr_str = e.stderr.decode(sys.getdefaultencoding(), errors='replace') + print("Demucs stderr:", stderr_str) + raise RuntimeError(f"Demucs 人聲分離失敗: {stderr_str}") + original_filename_base = os.path.splitext(os.path.basename(audio_path))[0] + vocals_path = os.path.join(output_dir, 'htdemucs_ft', original_filename_base, 'vocals.wav') + if not os.path.exists(vocals_path): + raise FileNotFoundError(f"找不到 Demucs 分離出的人聲音訊檔案: {vocals_path}") + return vocals_path + +# ========== Task 1: 影片轉音訊 ========== +@celery.task(base=ProgressTask, bind=True) +def extract_audio_task(self, input_video_path, output_audio_path): + try: + self.update_progress(0, 100, "準備轉換影片...") + video_clip = VideoFileClip(input_video_path) + self.update_progress(50, 100, "正在提取音訊...") + video_clip.audio.write_audiofile(output_audio_path, codec="pcm_s16le") + video_clip.close() + self.update_progress(100, 100, "音訊提取完成!") + return {'status': '完成', 'result_path': output_audio_path} + except Exception as e: + self.update_state(state='FAILURE', meta={'exc_type': type(e).__name__, 'exc_message': str(e)}) + return {'status': '錯誤', 'error': str(e)} + +# ========== Task 2: 音訊轉文字 (Whisper) ========== +@celery.task(base=ProgressTask, bind=True) +def transcribe_audio_task(self, input_audio_path, output_txt_path, language, use_demucs=False): + try: + self.update_progress(0, 100, "準備開始轉錄...") + current_audio_path = input_audio_path + if use_demucs: + project_root = os.path.dirname(os.path.dirname(output_txt_path)) # uploads folder is inside project_root + current_audio_path = separate_vocals_with_demucs(self, input_audio_path, project_root) + self.update_progress(25, 100, "✅ 人聲分離完成,準備載入 Whisper...") + + device = "cuda" if torch.cuda.is_available() else "cpu" + progress_after_load = 40 if use_demucs else 20 + self.update_progress(progress_after_load - 10, 100, f"載入 Whisper 'medium' 模型 (使用 {device})...") + model = whisper.load_model("medium", device=device) + self.update_progress(progress_after_load, 100, "模型載入完畢,開始轉錄音訊...") + transcription_result = model.transcribe( + audio=current_audio_path, + language=language if language != 'auto' else None, + fp16=(device == "cuda"), + verbose=False + ) + self.update_progress(85, 100, "轉錄完成,進行繁體轉換與格式化...") + cc = OpenCC('s2twp') + with open(output_txt_path, "w", encoding="utf-8") as f_out: + for segment in transcription_result["segments"]: + start_time_s = int(segment["start"]) + end_time_s = int(segment["end"]) + text_content = cc.convert(segment["text"].strip()) + formatted_line = f"[{start_time_s:04d}s - {end_time_s:04d}s] {text_content}\n" + f_out.write(formatted_line) + self.update_progress(100, 100, "逐字稿完成!") + return {'status': '完成', 'result_path': output_txt_path} + except Exception as e: + import traceback + traceback.print_exc() + self.update_state(state='FAILURE', meta={'exc_type': type(e).__name__, 'exc_message': str(e)}) + return {'status': '錯誤', 'error': str(e)} + +# ========== Task 3: 逐段翻譯 (Dify) ========== +glossary = {"LLM": "大型語言模型", "prompt": "提示詞", "API": "應用程式介面"} +def generate_translation_prompt_for_dify(english_text_segment, target_language): + glossary_entries = "\n".join([f"- {k}: {v}" for k, v in glossary.items()]) + return ( + f"動作: 翻譯\n目標語言: {target_language}\n參考字典:\n{glossary_entries}\n\n" + f"需翻譯內容or需總結內容:\n---\n{english_text_segment}\n---\n你的結果:\n" + ) +def split_segments(text_content): + pattern = re.compile(r"(\[\d{4}s\s*-\s*\d{4}s\])(.*?)(?=\n\[\d{4}s|$)", re.DOTALL) + return [f"{match.group(1).strip()} {match.group(2).strip()}" for match in pattern.finditer(text_content)] +def parse_segment(segment_text): + match = re.match(r"(\[\d{4}s\s*-\s*\d{4}s\])\s*(.*)", segment_text.strip(), re.DOTALL) + return (match.group(1), match.group(2)) if match else (None, segment_text.strip()) + +@celery.task(base=ProgressTask, bind=True) +def translate_segments_task(self, input_txt_path, output_txt_path, target_language): + try: + with open(input_txt_path, "r", encoding="utf-8") as f: + segments = split_segments(f.read()) + total_segments = len(segments) + if total_segments == 0: + self.update_progress(100, 100, "完成,但輸入檔無內容。") + return {'status': '完成', 'result_path': None, 'content': '(輸入檔案為空)'} + conversation_id = None + full_translated_content = "" + for idx, segment_text in enumerate(segments): + timestamp, original_text = parse_segment(segment_text) + status_msg = f"正在翻譯第 {idx + 1}/{total_segments} 段..." + self.update_progress(idx, total_segments, status_msg, {'preview': full_translated_content}) + if not original_text.strip(): + translated_text = "(原始內容為空)" + else: + prompt = generate_translation_prompt_for_dify(original_text, target_language) + response = ask_dify(prompt, conversation_id=conversation_id) + translated_text = response.get("answer", "翻譯失敗") + conversation_id = response.get("conversation_id") + line_break = "\n\n" if full_translated_content else "" + segment_result = f"{timestamp}\n{original_text}\n👉{translated_text}" + full_translated_content += f"{line_break}{segment_result}" + with open(output_txt_path, "w", encoding="utf-8") as f: + f.write(full_translated_content) + self.update_progress(total_segments, total_segments, "全部翻譯完成!") + return {'status': '完成', 'result_path': output_txt_path, 'content': full_translated_content} + except Exception as e: + self.update_state(state='FAILURE', meta={'exc_type': type(e).__name__, 'exc_message': str(e)}) + return {'status': '錯誤', 'error': str(e)} + +# ========== Task 4: 會議結論整理 (Dify) ========== +@celery.task(base=ProgressTask, bind=True) +def summarize_text_task(self, text_content, target_language, conversation_id=None, revision_instruction=None): + try: + self.update_progress(1, 100, "準備提示詞...") + if revision_instruction: + prompt = f"Existing Summary:\n---\n{text_content}\n---\n\nUser's Revision Instructions:\n---\n{revision_instruction}\n---\nPlease provide the revised meeting summary in {target_language}:" + else: + prompt = f"Please act as a professional meeting analyst and summarize the following meeting transcript into key points, action items, and conclusions. The summary should be in {target_language}.\n\nTranscript:\n---\n{text_content}\n---" + self.update_progress(20, 100, "正在請求 Dify API...") + response = ask_dify(prompt, conversation_id=conversation_id) + summary = response.get("answer", "總結失敗") + new_conv_id = response.get("conversation_id") + self.update_progress(100, 100, "總結已生成!") + return {'status': '完成', 'summary': summary, 'conversation_id': new_conv_id} + except Exception as e: + self.update_state(state='FAILURE', meta={'exc_type': type(e).__name__, 'exc_message': str(e)}) + return {'status': '錯誤', 'error': str(e)} diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..b37dd5d --- /dev/null +++ b/templates/index.html @@ -0,0 +1,168 @@ + + + + + + AI Meeting Assistant + + + + + +
+
+

AI 會議助手

+

一個強大的工具,用於轉錄、翻譯和總結您的會議內容。

+
+ +
+
+ +
+
+
+ +
+
影片轉音訊 (.wav)
+

從影片檔案中提取音軌,以便進行後續處理。

+
+ + +
+ +
+ + +
+
音訊轉文字 (Whisper)
+

將音訊檔案轉錄成帶有時間戳的逐字稿。

+
+ + +
+
+
+ + +
+
+
+ + +
+ +
+ + +
+
逐段翻譯 (Dify)
+

將逐字稿檔案進行逐段對照翻譯。

+
+ + +
+
+ + +
+ +
+ + +
+
會議結論整理 (Dify)
+

從逐字稿或貼上的文字中生成會議摘要。

+
+ + +
+
+ + +
+
+ + +
+ +
+
+
+
+ + + + + +
+ + + + +