Add PDF translation API, utilities, docs, and config
Introduces core backend and frontend infrastructure for a PDF translation interface. Adds API endpoints for translation, PDF testing, and AI provider testing; implements PDF text extraction, cost tracking, and pricing logic in the lib directory; adds reusable UI components; and provides comprehensive documentation (SDD, environment setup, Claude instructions). Updates Tailwind and global styles, and includes a sample test PDF and configuration files.
This commit is contained in:
98
lib/pricing.ts
Normal file
98
lib/pricing.ts
Normal file
@@ -0,0 +1,98 @@
|
||||
// Pricing configuration for different AI models
|
||||
// Prices are per 1M tokens
|
||||
|
||||
export const MODEL_PRICING = {
|
||||
// DeepSeek pricing (very cost-effective)
|
||||
'deepseek-chat': {
|
||||
input: 0.14, // $0.14 per 1M input tokens
|
||||
output: 0.28, // $0.28 per 1M output tokens
|
||||
currency: 'USD',
|
||||
displayName: 'DeepSeek Chat'
|
||||
},
|
||||
|
||||
// OpenAI pricing
|
||||
'gpt-4o-mini': {
|
||||
input: 0.15, // $0.15 per 1M input tokens
|
||||
output: 0.60, // $0.60 per 1M output tokens
|
||||
currency: 'USD',
|
||||
displayName: 'GPT-4o Mini'
|
||||
},
|
||||
|
||||
'gpt-4o': {
|
||||
input: 5.00, // $5.00 per 1M input tokens
|
||||
output: 15.00, // $15.00 per 1M output tokens
|
||||
currency: 'USD',
|
||||
displayName: 'GPT-4o'
|
||||
},
|
||||
|
||||
'gpt-3.5-turbo': {
|
||||
input: 0.50, // $0.50 per 1M input tokens
|
||||
output: 1.50, // $1.50 per 1M output tokens
|
||||
currency: 'USD',
|
||||
displayName: 'GPT-3.5 Turbo'
|
||||
}
|
||||
}
|
||||
|
||||
export interface TokenUsage {
|
||||
promptTokens: number
|
||||
completionTokens: number
|
||||
totalTokens: number
|
||||
}
|
||||
|
||||
export interface CostCalculation {
|
||||
inputCost: number
|
||||
outputCost: number
|
||||
totalCost: number
|
||||
currency: string
|
||||
formattedCost: string
|
||||
}
|
||||
|
||||
export function calculateCost(
|
||||
model: string,
|
||||
tokenUsage: TokenUsage
|
||||
): CostCalculation {
|
||||
const pricing = MODEL_PRICING[model as keyof typeof MODEL_PRICING] || MODEL_PRICING['deepseek-chat']
|
||||
|
||||
// Calculate costs (convert from per 1M tokens to actual usage)
|
||||
const inputCost = (tokenUsage.promptTokens / 1_000_000) * pricing.input
|
||||
const outputCost = (tokenUsage.completionTokens / 1_000_000) * pricing.output
|
||||
const totalCost = inputCost + outputCost
|
||||
|
||||
// Format cost with appropriate decimal places
|
||||
const formattedCost = totalCost < 0.01
|
||||
? `$${totalCost.toFixed(6)}`
|
||||
: `$${totalCost.toFixed(4)}`
|
||||
|
||||
return {
|
||||
inputCost,
|
||||
outputCost,
|
||||
totalCost,
|
||||
currency: pricing.currency,
|
||||
formattedCost
|
||||
}
|
||||
}
|
||||
|
||||
export function estimateTokens(text: string): number {
|
||||
// Rough estimation: 1 token ≈ 4 characters for English
|
||||
// For Chinese/Japanese: 1 token ≈ 2 characters
|
||||
// This is a simplified estimation
|
||||
|
||||
const hasAsianChars = /[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]/.test(text)
|
||||
|
||||
if (hasAsianChars) {
|
||||
// Chinese, Japanese, Korean text
|
||||
return Math.ceil(text.length / 2)
|
||||
} else {
|
||||
// English and other Latin-based languages
|
||||
return Math.ceil(text.length / 4)
|
||||
}
|
||||
}
|
||||
|
||||
export function formatTokenCount(tokens: number): string {
|
||||
if (tokens >= 1_000_000) {
|
||||
return `${(tokens / 1_000_000).toFixed(2)}M`
|
||||
} else if (tokens >= 1_000) {
|
||||
return `${(tokens / 1_000).toFixed(1)}K`
|
||||
}
|
||||
return tokens.toString()
|
||||
}
|
Reference in New Issue
Block a user