Files
Document_Translator/app/utils/validators.py
2025-09-02 10:31:35 +08:00

203 lines
6.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
驗證工具模組
Author: PANJIT IT Team
Created: 2024-01-28
Modified: 2024-01-28
"""
import os
from pathlib import Path
from flask import current_app
from .exceptions import ValidationError
def validate_file(file_obj):
"""驗證上傳的檔案"""
if not file_obj:
raise ValidationError("未選擇檔案", "NO_FILE")
if not file_obj.filename:
raise ValidationError("檔案名稱為空", "NO_FILENAME")
# 檢查檔案副檔名
file_ext = Path(file_obj.filename).suffix.lower()
allowed_extensions = current_app.config.get('ALLOWED_EXTENSIONS', {'.docx', '.doc', '.pptx', '.xlsx', '.xls', '.pdf'})
if file_ext not in allowed_extensions:
raise ValidationError(
f"不支援的檔案類型: {file_ext},支援的格式: {', '.join(allowed_extensions)}",
"INVALID_FILE_TYPE"
)
# 檢查檔案大小
max_size = current_app.config.get('MAX_CONTENT_LENGTH', 26214400) # 25MB
# 取得檔案大小
file_obj.seek(0, os.SEEK_END)
file_size = file_obj.tell()
file_obj.seek(0)
if file_size > max_size:
raise ValidationError(
f"檔案大小超過限制 ({format_file_size(max_size)})",
"FILE_TOO_LARGE"
)
if file_size == 0:
raise ValidationError("檔案為空", "EMPTY_FILE")
return {
'filename': file_obj.filename,
'file_extension': file_ext,
'file_size': file_size,
'valid': True
}
def validate_languages(source_language, target_languages):
"""驗證語言設定"""
# 支援的語言列表
supported_languages = {
'auto': '自動偵測',
'zh-CN': '簡體中文',
'zh-TW': '繁體中文',
'en': '英文',
'ja': '日文',
'ko': '韓文',
'vi': '越南文',
'th': '泰文',
'id': '印尼文',
'ms': '馬來文',
'es': '西班牙文',
'fr': '法文',
'de': '德文',
'ru': '俄文'
}
# 驗證來源語言
if source_language and source_language not in supported_languages:
raise ValidationError(
f"不支援的來源語言: {source_language}",
"INVALID_SOURCE_LANGUAGE"
)
# 驗證目標語言
if not target_languages or not isinstance(target_languages, list):
raise ValidationError("必須指定至少一個目標語言", "NO_TARGET_LANGUAGES")
if len(target_languages) == 0:
raise ValidationError("必須指定至少一個目標語言", "NO_TARGET_LANGUAGES")
if len(target_languages) > 10: # 限制最多10個目標語言
raise ValidationError("目標語言數量過多最多支援10個", "TOO_MANY_TARGET_LANGUAGES")
invalid_languages = [lang for lang in target_languages if lang not in supported_languages]
if invalid_languages:
raise ValidationError(
f"不支援的目標語言: {', '.join(invalid_languages)}",
"INVALID_TARGET_LANGUAGE"
)
# 檢查來源語言和目標語言是否有重疊
if source_language and source_language != 'auto' and source_language in target_languages:
raise ValidationError(
"目標語言不能包含來源語言",
"SOURCE_TARGET_OVERLAP"
)
return {
'source_language': source_language or 'auto',
'target_languages': target_languages,
'supported_languages': supported_languages,
'valid': True
}
def validate_job_uuid(job_uuid):
"""驗證任務UUID格式"""
import uuid
if not job_uuid:
raise ValidationError("任務UUID不能為空", "INVALID_UUID")
try:
uuid.UUID(job_uuid)
return True
except ValueError:
raise ValidationError("任務UUID格式錯誤", "INVALID_UUID")
def validate_pagination(page, per_page):
"""驗證分頁參數"""
try:
page = int(page) if page else 1
per_page = int(per_page) if per_page else 20
except (ValueError, TypeError):
raise ValidationError("分頁參數必須為數字", "INVALID_PAGINATION")
if page < 1:
raise ValidationError("頁數必須大於0", "INVALID_PAGE")
if per_page < 1 or per_page > 100:
raise ValidationError("每頁項目數必須在1-100之間", "INVALID_PER_PAGE")
return page, per_page
def format_file_size(size_bytes):
"""格式化檔案大小顯示"""
if size_bytes == 0:
return "0 B"
size_names = ["B", "KB", "MB", "GB", "TB"]
i = 0
while size_bytes >= 1024 and i < len(size_names) - 1:
size_bytes /= 1024.0
i += 1
return f"{size_bytes:.1f} {size_names[i]}"
def sanitize_filename(filename):
"""清理檔案名稱,移除不安全字元"""
import re
# 保留檔案名稱和副檔名
name = Path(filename).stem
ext = Path(filename).suffix
# 移除或替換不安全字元
safe_name = re.sub(r'[^\w\s.-]', '_', name)
safe_name = re.sub(r'\s+', '_', safe_name) # 空白替換為底線
safe_name = safe_name.strip('._') # 移除開頭結尾的點和底線
# 限制長度
if len(safe_name) > 100:
safe_name = safe_name[:100]
return f"{safe_name}{ext}"
def validate_date_range(start_date, end_date):
"""驗證日期範圍"""
from datetime import datetime
if start_date:
try:
start_date = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
except ValueError:
raise ValidationError("開始日期格式錯誤", "INVALID_START_DATE")
if end_date:
try:
end_date = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
except ValueError:
raise ValidationError("結束日期格式錯誤", "INVALID_END_DATE")
if start_date and end_date and start_date > end_date:
raise ValidationError("開始日期不能晚於結束日期", "INVALID_DATE_RANGE")
return start_date, end_date