Introduces core backend and frontend infrastructure for a PDF translation interface. Adds API endpoints for translation, PDF testing, and AI provider testing; implements PDF text extraction, cost tracking, and pricing logic in the lib directory; adds reusable UI components; and provides comprehensive documentation (SDD, environment setup, Claude instructions). Updates Tailwind and global styles, and includes a sample test PDF and configuration files.
98 lines
2.5 KiB
TypeScript
98 lines
2.5 KiB
TypeScript
// Pricing configuration for different AI models
|
|
// Prices are per 1M tokens
|
|
|
|
export const MODEL_PRICING = {
|
|
// DeepSeek pricing (very cost-effective)
|
|
'deepseek-chat': {
|
|
input: 0.14, // $0.14 per 1M input tokens
|
|
output: 0.28, // $0.28 per 1M output tokens
|
|
currency: 'USD',
|
|
displayName: 'DeepSeek Chat'
|
|
},
|
|
|
|
// OpenAI pricing
|
|
'gpt-4o-mini': {
|
|
input: 0.15, // $0.15 per 1M input tokens
|
|
output: 0.60, // $0.60 per 1M output tokens
|
|
currency: 'USD',
|
|
displayName: 'GPT-4o Mini'
|
|
},
|
|
|
|
'gpt-4o': {
|
|
input: 5.00, // $5.00 per 1M input tokens
|
|
output: 15.00, // $15.00 per 1M output tokens
|
|
currency: 'USD',
|
|
displayName: 'GPT-4o'
|
|
},
|
|
|
|
'gpt-3.5-turbo': {
|
|
input: 0.50, // $0.50 per 1M input tokens
|
|
output: 1.50, // $1.50 per 1M output tokens
|
|
currency: 'USD',
|
|
displayName: 'GPT-3.5 Turbo'
|
|
}
|
|
}
|
|
|
|
export interface TokenUsage {
|
|
promptTokens: number
|
|
completionTokens: number
|
|
totalTokens: number
|
|
}
|
|
|
|
export interface CostCalculation {
|
|
inputCost: number
|
|
outputCost: number
|
|
totalCost: number
|
|
currency: string
|
|
formattedCost: string
|
|
}
|
|
|
|
export function calculateCost(
|
|
model: string,
|
|
tokenUsage: TokenUsage
|
|
): CostCalculation {
|
|
const pricing = MODEL_PRICING[model as keyof typeof MODEL_PRICING] || MODEL_PRICING['deepseek-chat']
|
|
|
|
// Calculate costs (convert from per 1M tokens to actual usage)
|
|
const inputCost = (tokenUsage.promptTokens / 1_000_000) * pricing.input
|
|
const outputCost = (tokenUsage.completionTokens / 1_000_000) * pricing.output
|
|
const totalCost = inputCost + outputCost
|
|
|
|
// Format cost with appropriate decimal places
|
|
const formattedCost = totalCost < 0.01
|
|
? `$${totalCost.toFixed(6)}`
|
|
: `$${totalCost.toFixed(4)}`
|
|
|
|
return {
|
|
inputCost,
|
|
outputCost,
|
|
totalCost,
|
|
currency: pricing.currency,
|
|
formattedCost
|
|
}
|
|
}
|
|
|
|
export function estimateTokens(text: string): number {
|
|
// Rough estimation: 1 token ≈ 4 characters for English
|
|
// For Chinese/Japanese: 1 token ≈ 2 characters
|
|
// This is a simplified estimation
|
|
|
|
const hasAsianChars = /[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]/.test(text)
|
|
|
|
if (hasAsianChars) {
|
|
// Chinese, Japanese, Korean text
|
|
return Math.ceil(text.length / 2)
|
|
} else {
|
|
// English and other Latin-based languages
|
|
return Math.ceil(text.length / 4)
|
|
}
|
|
}
|
|
|
|
export function formatTokenCount(tokens: number): string {
|
|
if (tokens >= 1_000_000) {
|
|
return `${(tokens / 1_000_000).toFixed(2)}M`
|
|
} else if (tokens >= 1_000) {
|
|
return `${(tokens / 1_000).toFixed(1)}K`
|
|
}
|
|
return tokens.toString()
|
|
} |