Document_Translator/app/api/files.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
檔案管理 API

Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""

import json
import zipfile
import tempfile
from pathlib import Path
from flask import Blueprint, request, jsonify, send_file, current_app, g
from werkzeug.utils import secure_filename
from app.utils.decorators import jwt_login_required, rate_limit
from app.utils.validators import validate_file, validate_languages, validate_job_uuid
from app.utils.helpers import (
    save_uploaded_file,
    create_response,
    format_file_size,
    generate_download_token
)
from app.utils.exceptions import ValidationError, FileProcessingError
from app.utils.logger import get_logger
from app.models.job import TranslationJob
from app.models.log import SystemLog

files_bp = Blueprint('files', __name__, url_prefix='/files')
logger = get_logger(__name__)


@files_bp.route('/upload', methods=['POST'])
@jwt_login_required
@rate_limit(max_requests=20, per_seconds=3600)  # 每小時最多20次上傳
def upload_file():
    """檔案上傳"""
    try:
        # 檢查是否有檔案
        if 'file' not in request.files:
            return jsonify(create_response(
                success=False,
                error='NO_FILE',
                message='未選擇檔案'
            )), 400

        file_obj = request.files['file']

        # 驗證檔案
        file_info = validate_file(file_obj)

        # 取得翻譯設定
        source_language = request.form.get('source_language', 'auto')
        target_languages_str = request.form.get('target_languages', '[]')

        try:
            target_languages = json.loads(target_languages_str)
        except json.JSONDecodeError:
            return jsonify(create_response(
                success=False,
                error='INVALID_TARGET_LANGUAGES',
                message='目標語言格式錯誤'
            )), 400

        # 驗證語言設定
        lang_info = validate_languages(source_language, target_languages)

        # 建立翻譯任務
        job = TranslationJob(
            user_id=g.current_user_id,
            original_filename=file_info['filename'],
            file_extension=file_info['file_extension'],
            file_size=file_info['file_size'],
            file_path='',  # 暫時為空，稍後更新
            source_language=lang_info['source_language'],
            target_languages=lang_info['target_languages'],
            status='PENDING'
        )

        # 先保存到資料庫以取得 job_uuid
        from app import db
        db.session.add(job)
        db.session.commit()

        # 儲存檔案
        file_result = save_uploaded_file(file_obj, job.job_uuid)

        if not file_result['success']:
            # 如果儲存失敗，刪除任務記錄
            db.session.delete(job)
            db.session.commit()

            raise FileProcessingError(f"檔案儲存失敗: {file_result['error']}")

        # 更新任務的檔案路徑
        job.file_path = file_result['file_path']

        # 新增原始檔案記錄
        job.add_original_file(
            filename=file_result['filename'],
            file_path=file_result['file_path'],
            file_size=file_result['file_size']
        )

        db.session.commit()

        # 計算佇列位置
        queue_position = TranslationJob.get_queue_position(job.job_uuid)

        # 記錄日誌
        SystemLog.info(
            'files.upload',
            f'File uploaded successfully: {file_info["filename"]}',
            user_id=g.current_user_id,
            job_id=job.id,
            extra_data={
                'filename': file_info['filename'],
                'file_size': file_info['file_size'],
                'source_language': source_language,
                'target_languages': target_languages
            }
        )

        logger.info(f"File uploaded successfully: {job.job_uuid} - {file_info['filename']}")

        # 觸發翻譯任務
        try:
            from app.tasks.translation import process_translation_job

            # 嘗試使用 Celery 異步處理
            try:
                task = process_translation_job.delay(job.id)
                logger.info(f"Translation task queued with Celery: {task.id} for job {job.job_uuid}")
            except Exception as celery_error:
                logger.warning(f"Celery not available, falling back to synchronous processing: {str(celery_error)}")

                # Celery 不可用時，使用同步處理
                try:
                    from app.services.translation_service import TranslationService
                    service = TranslationService()

                    # 在後台執行翻譯（同步處理）
                    logger.info(f"Starting synchronous translation for job {job.job_uuid}")
                    result = service.translate_document(job.job_uuid)
                    logger.info(f"Synchronous translation completed for job {job.job_uuid}: {result}")

                except Exception as sync_error:
                    logger.error(f"Synchronous translation failed for job {job.job_uuid}: {str(sync_error)}")
                    job.update_status('FAILED', error_message=f"翻譯處理失敗: {str(sync_error)}")
                    db.session.commit()

        except Exception as e:
            logger.error(f"Failed to process translation for job {job.job_uuid}: {str(e)}")
            job.update_status('FAILED', error_message=f"任務處理失敗: {str(e)}")
            db.session.commit()

        return jsonify(create_response(
            success=True,
            data={
                'job_uuid': job.job_uuid,
                'original_filename': job.original_filename,
                'file_size': job.file_size,
                'file_size_formatted': format_file_size(job.file_size),
                'source_language': job.source_language,
                'target_languages': job.target_languages,
                'status': job.status,
                'queue_position': queue_position,
                'created_at': job.created_at.isoformat()
            },
            message='檔案上傳成功，已加入翻譯佇列'
        ))

    except ValidationError as e:
        logger.warning(f"File upload validation error: {str(e)}")
        return jsonify(create_response(
            success=False,
            error=e.error_code,
            message=str(e)
        )), 400

    except FileProcessingError as e:
        logger.error(f"File processing error: {str(e)}")
        return jsonify(create_response(
            success=False,
            error='FILE_PROCESSING_ERROR',
            message=str(e)
        )), 500

    except Exception as e:
        logger.error(f"File upload error: {str(e)}")

        SystemLog.error(
            'files.upload_error',
            f'File upload failed: {str(e)}',
            user_id=g.current_user_id,
            extra_data={'error': str(e)}
        )

        return jsonify(create_response(
            success=False,
            error='SYSTEM_ERROR',
            message='檔案上傳失敗'
        )), 500


@files_bp.route('/<job_uuid>/download/<language_code>', methods=['GET'])
@jwt_login_required
def download_file(job_uuid, language_code):
    """下載翻譯檔案"""
    try:
        # 驗證 UUID 格式
        validate_job_uuid(job_uuid)

        # 取得任務
        job = TranslationJob.query.filter_by(job_uuid=job_uuid).first()

        if not job:
            return jsonify(create_response(
                success=False,
                error='JOB_NOT_FOUND',
                message='任務不存在'
            )), 404

        # 檢查權限
        if job.user_id != g.current_user_id and not g.is_admin:
            return jsonify(create_response(
                success=False,
                error='PERMISSION_DENIED',
                message='無權限存取此檔案'
            )), 403

        # 檢查任務狀態
        if job.status != 'COMPLETED':
            return jsonify(create_response(
                success=False,
                error='JOB_NOT_COMPLETED',
                message='任務尚未完成'
            )), 400

        # 尋找對應的翻譯檔案
        translated_file = None
        for file_record in job.files:
            if file_record.file_type == 'TRANSLATED' and file_record.language_code == language_code:
                translated_file = file_record
                break

        if not translated_file:
            return jsonify(create_response(
                success=False,
                error='FILE_NOT_FOUND',
                message=f'找不到 {language_code} 的翻譯檔案'
            )), 404

        # 檢查檔案是否存在
        file_path = Path(translated_file.file_path)
        if not file_path.exists():
            logger.error(f"File not found on disk: {file_path}")

            return jsonify(create_response(
                success=False,
                error='FILE_NOT_FOUND_ON_DISK',
                message='檔案在伺服器上不存在'
            )), 404

        # 記錄下載日誌
        SystemLog.info(
            'files.download',
            f'File downloaded: {translated_file.filename}',
            user_id=g.current_user_id,
            job_id=job.id,
            extra_data={
                'filename': translated_file.filename,
                'language_code': language_code,
                'file_size': translated_file.file_size
            }
        )

        logger.info(f"File downloaded: {job.job_uuid} - {language_code}")

        # 發送檔案
        return send_file(
            str(file_path),
            as_attachment=True,
            download_name=translated_file.filename,
            mimetype='application/octet-stream'
        )

    except ValidationError as e:
        return jsonify(create_response(
            success=False,
            error=e.error_code,
            message=str(e)
        )), 400

    except Exception as e:
        logger.error(f"File download error: {str(e)}")

        return jsonify(create_response(
            success=False,
            error='SYSTEM_ERROR',
            message='檔案下載失敗'
        )), 500


@files_bp.route('/<job_uuid>/download/original', methods=['GET'])
@jwt_login_required
def download_original_file(job_uuid):
    """下載原始檔案"""
    try:
        # 驗證 UUID 格式
        validate_job_uuid(job_uuid)

        # 取得任務
        job = TranslationJob.query.filter_by(job_uuid=job_uuid).first()

        if not job:
            return jsonify(create_response(
                success=False,
                error='JOB_NOT_FOUND',
                message='任務不存在'
            )), 404

        # 檢查權限
        if job.user_id != g.current_user_id and not g.is_admin:
            return jsonify(create_response(
                success=False,
                error='PERMISSION_DENIED',
                message='無權限存取此檔案'
            )), 403

        # 取得原始檔案
        original_file = job.get_original_file()

        if not original_file:
            return jsonify(create_response(
                success=False,
                error='ORIGINAL_FILE_NOT_FOUND',
                message='找不到原始檔案記錄'
            )), 404

        # 檢查檔案是否存在
        file_path = Path(original_file.file_path)
        if not file_path.exists():
            logger.error(f"Original file not found on disk: {file_path}")

            return jsonify(create_response(
                success=False,
                error='FILE_NOT_FOUND_ON_DISK',
                message='原始檔案在伺服器上不存在'
            )), 404

        # 記錄下載日誌
        SystemLog.info(
            'files.download_original',
            f'Original file downloaded: {original_file.filename}',
            user_id=g.current_user_id,
            job_id=job.id,
            extra_data={
                'filename': original_file.filename,
                'file_size': original_file.file_size
            }
        )

        logger.info(f"Original file downloaded: {job.job_uuid}")

        # 發送檔案
        return send_file(
            str(file_path),
            as_attachment=True,
            download_name=job.original_filename,
            mimetype='application/octet-stream'
        )

    except ValidationError as e:
        return jsonify(create_response(
            success=False,
            error=e.error_code,
            message=str(e)
        )), 400

    except Exception as e:
        logger.error(f"Original file download error: {str(e)}")

        return jsonify(create_response(
            success=False,
            error='SYSTEM_ERROR',
            message='原始檔案下載失敗'
        )), 500


@files_bp.route('/supported-formats', methods=['GET'])
def get_supported_formats():
    """取得支援的檔案格式"""
    try:
        formats = {
            '.docx': {
                'name': 'Word 文件 (.docx)',
                'description': 'Microsoft Word 2007+ 格式',
                'icon': 'file-word'
            },
            '.doc': {
                'name': 'Word 文件 (.doc)',
                'description': 'Microsoft Word 97-2003 格式',
                'icon': 'file-word'
            },
            '.pptx': {
                'name': 'PowerPoint 簡報 (.pptx)',
                'description': 'Microsoft PowerPoint 2007+ 格式',
                'icon': 'file-powerpoint'
            },
            '.xlsx': {
                'name': 'Excel 試算表 (.xlsx)',
                'description': 'Microsoft Excel 2007+ 格式',
                'icon': 'file-excel'
            },
            '.xls': {
                'name': 'Excel 試算表 (.xls)',
                'description': 'Microsoft Excel 97-2003 格式',
                'icon': 'file-excel'
            },
            '.pdf': {
                'name': 'PDF 文件 (.pdf)',
                'description': 'Portable Document Format',
                'icon': 'file-pdf'
            }
        }

        max_size = current_app.config.get('MAX_CONTENT_LENGTH', 26214400)

        return jsonify(create_response(
            success=True,
            data={
                'supported_formats': formats,
                'max_file_size': max_size,
                'max_file_size_formatted': format_file_size(max_size)
            }
        ))

    except Exception as e:
        logger.error(f"Get supported formats error: {str(e)}")

        return jsonify(create_response(
            success=False,
            error='SYSTEM_ERROR',
            message='取得支援格式失敗'
        )), 500


@files_bp.route('/supported-languages', methods=['GET'])
def get_supported_languages():
    """取得支援的語言"""
    try:
        from app.utils.helpers import get_supported_languages

        languages = get_supported_languages()

        return jsonify(create_response(
            success=True,
            data={
                'supported_languages': languages
            }
        ))

    except Exception as e:
        logger.error(f"Get supported languages error: {str(e)}")

        return jsonify(create_response(
            success=False,
            error='SYSTEM_ERROR',
            message='取得支援語言失敗'
        )), 500


@files_bp.route('/<job_uuid>/download/batch', methods=['GET'])
@jwt_login_required
def download_batch_files(job_uuid):
    """批量下載所有翻譯檔案為 ZIP"""
    try:
        # 驗證 UUID 格式
        validate_job_uuid(job_uuid)

        # 取得任務
        job = TranslationJob.query.filter_by(job_uuid=job_uuid).first()

        if not job:
            return jsonify(create_response(
                success=False,
                error='JOB_NOT_FOUND',
                message='任務不存在'
            )), 404

        # 檢查權限
        if job.user_id != g.current_user_id and not g.is_admin:
            return jsonify(create_response(
                success=False,
                error='PERMISSION_DENIED',
                message='無權限存取此檔案'
            )), 403

        # 檢查任務狀態
        if job.status != 'COMPLETED':
            return jsonify(create_response(
                success=False,
                error='JOB_NOT_COMPLETED',
                message='任務尚未完成'
            )), 400

        # 收集所有翻譯檔案
        translated_files = job.get_translated_files()

        if not translated_files:
            return jsonify(create_response(
                success=False,
                error='NO_TRANSLATED_FILES',
                message='沒有找到翻譯檔案'
            )), 404

        # 建立臨時 ZIP 檔案
        temp_dir = tempfile.gettempdir()
        zip_filename = f"{job.original_filename.split('.')[0]}_translations_{job.job_uuid[:8]}.zip"
        zip_path = Path(temp_dir) / zip_filename

        try:
            with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
                files_added = 0

                # 添加原始檔案
                original_file = job.get_original_file()
                if original_file and Path(original_file.file_path).exists():
                    zip_file.write(
                        original_file.file_path,
                        f"original/{original_file.filename}"
                    )
                    files_added += 1

                # 添加所有翻譯檔案（避免重複）
                added_files = set()  # 追蹤已添加的檔案，避免重複
                for tf in translated_files:
                    file_path = Path(tf.file_path)
                    if file_path.exists():
                        # 按語言建立資料夾結構
                        archive_name = f"{tf.language_code}/{tf.filename}"

                        # 檢查是否已經添加過這個檔案
                        if archive_name not in added_files:
                            zip_file.write(str(file_path), archive_name)
                            added_files.add(archive_name)
                            files_added += 1
                    else:
                        logger.warning(f"Translation file not found: {tf.file_path}")

                if files_added == 0:
                    return jsonify(create_response(
                        success=False,
                        error='NO_FILES_TO_ZIP',
                        message='沒有可用的檔案進行壓縮'
                    )), 404

            # 檢查 ZIP 檔案是否建立成功
            if not zip_path.exists():
                return jsonify(create_response(
                    success=False,
                    error='ZIP_CREATION_FAILED',
                    message='ZIP 檔案建立失敗'
                )), 500

            # 記錄下載日誌
            SystemLog.info(
                'files.download_batch',
                f'Batch files downloaded: {zip_filename}',
                user_id=g.current_user_id,
                job_id=job.id,
                extra_data={
                    'zip_filename': zip_filename,
                    'files_count': files_added,
                    'job_uuid': job_uuid
                }
            )

            logger.info(f"Batch files downloaded: {job.job_uuid} - {files_added} files in ZIP")

            # 發送 ZIP 檔案
            return send_file(
                str(zip_path),
                as_attachment=True,
                download_name=zip_filename,
                mimetype='application/zip'
            )

        finally:
            # 清理臨時檔案（在發送後會自動清理）
            pass

    except ValidationError as e:
        return jsonify(create_response(
            success=False,
            error=e.error_code,
            message=str(e)
        )), 400

    except Exception as e:
        logger.error(f"Batch download error: {str(e)}")

        return jsonify(create_response(
            success=False,
            error='SYSTEM_ERROR',
            message='批量下載失敗'
        )), 500


@files_bp.route('/<job_uuid>/download/combine', methods=['GET'])
@jwt_login_required
def download_combine_file(job_uuid):
    """下載合併檔案"""
    try:
        # 驗證 UUID 格式
        validate_job_uuid(job_uuid)

        # 取得當前用戶
        current_user_id = g.current_user_id

        # 查找任務
        job = TranslationJob.query.filter_by(
            job_uuid=job_uuid,
            user_id=current_user_id
        ).first()

        if not job:
            return jsonify(create_response(
                success=False,
                error='JOB_NOT_FOUND',
                message='任務不存在'
            )), 404

        # 檢查任務狀態
        if job.status != 'COMPLETED':
            return jsonify(create_response(
                success=False,
                error='JOB_NOT_COMPLETED',
                message='任務尚未完成'
            )), 400

        # 尋找 combine 檔案
        combine_file = None
        for file in job.files:
            if file.filename.lower().find('combine') != -1 or file.file_type == 'combined':
                combine_file = file
                break

        if not combine_file:
            return jsonify(create_response(
                success=False,
                error='COMBINE_FILE_NOT_FOUND',
                message='找不到合併檔案'
            )), 404

        # 檢查檔案是否存在
        file_path = Path(combine_file.file_path)
        if not file_path.exists():
            return jsonify(create_response(
                success=False,
                error='FILE_NOT_FOUND',
                message='合併檔案已被刪除'
            )), 404

        logger.info(f"Combine file downloaded: {job.job_uuid} - {combine_file.filename}")

        # 發送檔案
        return send_file(
            str(file_path),
            as_attachment=True,
            download_name=combine_file.filename,
            mimetype='application/octet-stream'
        )

    except ValidationError as e:
        return jsonify(create_response(
            success=False,
            error=e.error_code,
            message=str(e)
        )), 400

    except Exception as e:
        logger.error(f"Combine file download error: {str(e)}")

        return jsonify(create_response(
            success=False,
            error='SYSTEM_ERROR',
            message='合併檔案下載失敗'
        )), 500