Add PDF translation API, utilities, docs, and config

Introduces core backend and frontend infrastructure for a PDF translation interface. Adds API endpoints for translation, PDF testing, and AI provider testing; implements PDF text extraction, cost tracking, and pricing logic in the lib directory; adds reusable UI components; and provides comprehensive documentation (SDD, environment setup, Claude instructions). Updates Tailwind and global styles, and includes a sample test PDF and configuration files.
This commit is contained in:
2025-10-15 23:34:44 +08:00
parent c899702d51
commit 39a4788cc4
21 changed files with 11041 additions and 251 deletions

View File

@@ -1,30 +1,133 @@
import { type NextRequest, NextResponse } from "next/server"
import { createOpenAI } from "@ai-sdk/openai"
import { openai } from "@ai-sdk/openai"
import { generateText } from "ai"
import { extractTextFromPDF, generateTranslatedPDF, processImageFile, processPDFWithOCR, ocrLanguageMap, isImageFile, isPDFFile } from "@/lib/pdf-processor"
import { calculateCost, estimateTokens, formatTokenCount, MODEL_PRICING } from "@/lib/pricing"
import { costTracker } from "@/lib/cost-tracker"
export async function POST(request: NextRequest) {
try {
// Select AI provider based on environment variable
const aiProvider = process.env.AI_PROVIDER || "deepseek"
let model
let modelName: string
if (aiProvider === "openai") {
// Use OpenAI
modelName = process.env.OPENAI_MODEL || "gpt-4o-mini"
model = openai(modelName)
} else {
// Use DeepSeek (default)
modelName = process.env.DEEPSEEK_MODEL || "deepseek-chat"
const deepseek = createOpenAI({
apiKey: process.env.DEEPSEEK_API_KEY,
baseURL: process.env.DEEPSEEK_BASE_URL || "https://api.deepseek.com",
})
model = deepseek(modelName)
}
const formData = await request.formData()
const file = formData.get("file") as File
const targetLanguage = formData.get("targetLanguage") as string
const sourceLanguage = formData.get("sourceLanguage") as string
const returnPDF = formData.get("returnPDF") === "true"
if (!file || !targetLanguage) {
return NextResponse.json({ error: "缺少必要參數" }, { status: 400 })
}
// Extract text from PDF
// Validate file type
if (!isPDFFile(file.type) && !isImageFile(file.type)) {
return NextResponse.json({ error: "請上傳 PDF 檔案或圖片檔案" }, { status: 400 })
}
// Check file size (10MB limit)
const maxSize = parseInt(process.env.MAX_FILE_SIZE || "10485760")
if (file.size > maxSize) {
return NextResponse.json({ error: "檔案太大,請上傳小於 10MB 的檔案" }, { status: 413 })
}
// Process file based on type
const arrayBuffer = await file.arrayBuffer()
const buffer = Buffer.from(arrayBuffer)
let extractedText = ""
let metadata: any = {}
try {
if (isImageFile(file.type)) {
// Image file - OCR功能已停用
console.log("Image file detected - OCR功能已停用")
extractedText = "目前僅支援包含文字的 PDF 文件,不支援圖片檔案。"
metadata = { title: file.name, type: 'image' }
} else if (isPDFFile(file.type)) {
// Process PDF file
console.log("Processing PDF file...")
const result = await extractTextFromPDF(buffer)
metadata = result.metadata
if (result.isScanned) {
// PDF is scanned or text extraction failed
console.log("Detected scanned PDF or text extraction failed")
const message = result.metadata?.message || "此 PDF 為掃描檔案,目前僅支援包含文字的 PDF 文件。"
return NextResponse.json({
error: `${message}
// For demo purposes, we'll simulate PDF text extraction
// In production, you'd use a library like pdf-parse
const pdfText = `這是從PDF提取的示例文本。在實際應用中這裡會是真實的PDF內容。
📋 可能的原因:
• PDF 是掃描的圖片檔案
• PDF 文件已加密或受保護
• PDF 內容格式特殊,無法提取文字
• PDF 文件損壞
這個應用展示了如何使用AI來翻譯文檔內容。您可以上傳任何PDF文件選擇目標語言然後獲得翻譯結果。
💡 建議:
• 嘗試其他包含純文字的 PDF 文件
• 確認 PDF 可以在其他軟體中複製文字
• 如果是掃描檔案,建議轉換為圖片格式`,
details: {
pageCount: result.pageCount,
textLength: result.metadata?.extractedTextLength || 0,
hasTextContent: result.metadata?.hasTextContent || false
}
}, { status: 400 })
} else {
// PDF has extractable text, use it directly
console.log("PDF contains extractable text, using direct extraction")
extractedText = result.text
}
}
if (!extractedText || extractedText.trim().length === 0) {
extractedText = "無法從檔案擷取文字內容。請確認檔案包含可讀取的文字或清晰的圖像。"
}
} catch (error) {
console.error("File processing error:", error)
// Provide helpful error message for PDF conversion issues
if (error instanceof Error && error.message.includes('PDF 轉圖片失敗')) {
return NextResponse.json({
error: `📄 掃描 PDF 需要額外工具支援
主要功能包括
- 支持多種語言翻譯
- 清爽的用戶介面
- 簡單易用的操作流程`
🎯 建議解決方案
1. 💡 立即可用:將 PDF 轉換為圖片格式JPG/PNG後上傳
- 使用 PDF 閱讀器截圖
- 或使用線上 PDF 轉圖片工具
2. 🔧 安裝系統工具:
• Windows: 下載安裝 ImageMagick (https://imagemagick.org/script/download.php#windows)
• Mac: brew install imagemagick
• Linux: apt-get install imagemagick
📸 提示:圖片格式的 OCR 識別效果通常比掃描 PDF 更好!`,
suggestion: "convert_to_image",
downloadLink: "https://imagemagick.org/script/download.php#windows"
}, { status: 400 })
}
extractedText = `檔案處理過程中發生錯誤:${error instanceof Error ? error.message : '未知錯誤'}`
}
// Get language name for better translation context
const languageNames: Record<string, string> = {
@@ -40,23 +143,137 @@ export async function POST(request: NextRequest) {
pt: "Português",
ru: "Русский",
ar: "العربية",
hi: "हिन्दी",
th: "ไทย",
vi: "Tiếng Việt",
}
const targetLanguageName = languageNames[targetLanguage] || targetLanguage
const sourceLanguageName = languageNames[sourceLanguage] || "自動偵測"
// Translate using AI SDK
const { text: translatedText } = await generateText({
model: "openai/gpt-4o-mini",
prompt: `請將以下文本翻譯成${targetLanguageName}。保持原文的格式和結構,只翻譯內容:
// Prepare translation prompt
const prompt = `You are a professional translator. Translate the following text from ${sourceLanguageName} to ${targetLanguageName}.
Keep the original format and structure. Only translate the content, preserving line breaks and paragraphs.
If the text appears to be an error message or system message, translate it appropriately.
${pdfText}`,
Text to translate:
${extractedText}`
// Estimate input tokens
const estimatedInputTokens = estimateTokens(prompt)
let translatedText: string
let usage: any = null
try {
// Try to translate using selected AI provider
const result = await generateText({
model: model,
prompt: prompt,
temperature: 0.3, // Lower temperature for more accurate translation
maxTokens: 4000,
})
translatedText = result.text
usage = result.usage
} catch (error) {
console.error("AI API Error:", error)
// Fallback to a simple mock translation for demo purposes
translatedText = `[模擬翻譯結果]\n\n原文內容: ${extractedText}\n\n注意: 這是模擬翻譯結果,因為 AI API 連接失敗。請檢查 API 金鑰配置。\n\n目標語言: ${targetLanguageName}\n來源語言: ${sourceLanguageName}\n\n實際應用中這裡會顯示真正的 AI 翻譯結果。`
}
// Calculate token usage and cost
const tokenUsage = {
promptTokens: usage?.promptTokens || estimatedInputTokens,
completionTokens: usage?.completionTokens || estimateTokens(translatedText),
totalTokens: (usage?.promptTokens || estimatedInputTokens) + (usage?.completionTokens || estimateTokens(translatedText))
}
const costCalculation = calculateCost(modelName, tokenUsage)
const modelDisplayName = MODEL_PRICING[modelName as keyof typeof MODEL_PRICING]?.displayName || modelName
// Track cost in accumulator (client-side will handle storage)
const costSession = {
model: modelName,
provider: aiProvider,
tokenUsage: {
promptTokens: tokenUsage.promptTokens,
completionTokens: tokenUsage.completionTokens,
totalTokens: tokenUsage.totalTokens
},
cost: {
inputCost: costCalculation.inputCost,
outputCost: costCalculation.outputCost,
totalCost: costCalculation.totalCost,
currency: costCalculation.currency
}
}
// Generate translated PDF if requested
let pdfBase64 = ""
if (returnPDF) {
try {
const pdfBytes = await generateTranslatedPDF(translatedText, metadata, targetLanguage)
pdfBase64 = Buffer.from(pdfBytes).toString('base64')
} catch (error) {
console.error("PDF generation error:", error)
// Continue without PDF generation
}
}
return NextResponse.json({
translatedText,
pdfBase64: pdfBase64,
hasPDF: pdfBase64.length > 0,
tokenUsage: {
promptTokens: tokenUsage.promptTokens,
completionTokens: tokenUsage.completionTokens,
totalTokens: tokenUsage.totalTokens,
formattedCounts: {
prompt: formatTokenCount(tokenUsage.promptTokens),
completion: formatTokenCount(tokenUsage.completionTokens),
total: formatTokenCount(tokenUsage.totalTokens)
}
},
cost: {
inputCost: costCalculation.inputCost,
outputCost: costCalculation.outputCost,
totalCost: costCalculation.totalCost,
formattedCost: costCalculation.formattedCost,
currency: costCalculation.currency
},
model: {
name: modelName,
displayName: modelDisplayName,
provider: aiProvider
},
costSession: costSession
})
return NextResponse.json({ translatedText })
} catch (error) {
console.error("翻譯錯誤:", error)
return NextResponse.json({ error: "翻譯過程中發生錯誤" }, { status: 500 })
// Check for specific error types
if (error instanceof Error) {
if (error.message.includes("API key")) {
return NextResponse.json({ error: "API 金鑰配置錯誤,請聯繫管理員" }, { status: 500 })
}
if (error.message.includes("rate limit")) {
return NextResponse.json({ error: "API 請求過於頻繁,請稍後再試" }, { status: 429 })
}
}
return NextResponse.json({ error: "翻譯過程中發生錯誤,請稍後再試" }, { status: 500 })
}
}
// OPTIONS method for CORS
export async function OPTIONS(request: NextRequest) {
return new NextResponse(null, {
status: 200,
headers: {
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': 'POST, OPTIONS',
'Access-Control-Allow-Headers': 'Content-Type',
},
})
}