Add PDF translation API, utilities, docs, and config

Introduces core backend and frontend infrastructure for a PDF translation interface. Adds API endpoints for translation, PDF testing, and AI provider testing; implements PDF text extraction, cost tracking, and pricing logic in the lib directory; adds reusable UI components; and provides comprehensive documentation (SDD, environment setup, Claude instructions). Updates Tailwind and global styles, and includes a sample test PDF and configuration files.
2025-10-15 23:34:44 +08:00
parent c899702d51
commit 39a4788cc4
21 changed files with 11041 additions and 251 deletions
--- a/lib/pricing.ts
+++ b/lib/pricing.ts
@@ -0,0 +1,98 @@
+// Pricing configuration for different AI models
+// Prices are per 1M tokens
+
+export const MODEL_PRICING = {
+  // DeepSeek pricing (very cost-effective)
+  'deepseek-chat': {
+    input: 0.14,  // $0.14 per 1M input tokens
+    output: 0.28, // $0.28 per 1M output tokens
+    currency: 'USD',
+    displayName: 'DeepSeek Chat'
+  },
+  
+  // OpenAI pricing
+  'gpt-4o-mini': {
+    input: 0.15,   // $0.15 per 1M input tokens
+    output: 0.60,  // $0.60 per 1M output tokens
+    currency: 'USD',
+    displayName: 'GPT-4o Mini'
+  },
+  
+  'gpt-4o': {
+    input: 5.00,   // $5.00 per 1M input tokens
+    output: 15.00, // $15.00 per 1M output tokens
+    currency: 'USD',
+    displayName: 'GPT-4o'
+  },
+  
+  'gpt-3.5-turbo': {
+    input: 0.50,   // $0.50 per 1M input tokens
+    output: 1.50,  // $1.50 per 1M output tokens
+    currency: 'USD',
+    displayName: 'GPT-3.5 Turbo'
+  }
+}
+
+export interface TokenUsage {
+  promptTokens: number
+  completionTokens: number
+  totalTokens: number
+}
+
+export interface CostCalculation {
+  inputCost: number
+  outputCost: number
+  totalCost: number
+  currency: string
+  formattedCost: string
+}
+
+export function calculateCost(
+  model: string,
+  tokenUsage: TokenUsage
+): CostCalculation {
+  const pricing = MODEL_PRICING[model as keyof typeof MODEL_PRICING] || MODEL_PRICING['deepseek-chat']
+  
+  // Calculate costs (convert from per 1M tokens to actual usage)
+  const inputCost = (tokenUsage.promptTokens / 1_000_000) * pricing.input
+  const outputCost = (tokenUsage.completionTokens / 1_000_000) * pricing.output
+  const totalCost = inputCost + outputCost
+  
+  // Format cost with appropriate decimal places
+  const formattedCost = totalCost < 0.01 
+    ? `$${totalCost.toFixed(6)}` 
+    : `$${totalCost.toFixed(4)}`
+  
+  return {
+    inputCost,
+    outputCost,
+    totalCost,
+    currency: pricing.currency,
+    formattedCost
+  }
+}
+
+export function estimateTokens(text: string): number {
+  // Rough estimation: 1 token ≈ 4 characters for English
+  // For Chinese/Japanese: 1 token ≈ 2 characters
+  // This is a simplified estimation
+  
+  const hasAsianChars = /[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]/.test(text)
+  
+  if (hasAsianChars) {
+    // Chinese, Japanese, Korean text
+    return Math.ceil(text.length / 2)
+  } else {
+    // English and other Latin-based languages
+    return Math.ceil(text.length / 4)
+  }
+}
+
+export function formatTokenCount(tokens: number): string {
+  if (tokens >= 1_000_000) {
+    return `${(tokens / 1_000_000).toFixed(2)}M`
+  } else if (tokens >= 1_000) {
+    return `${(tokens / 1_000).toFixed(1)}K`
+  }
+  return tokens.toString()
+}