From 3edf6fdf6be635bacd798a9f52eab9b1a5f96438 Mon Sep 17 00:00:00 2001 From: constanttime Date: Sun, 17 Aug 2025 20:13:49 +0530 Subject: [PATCH] feat: add real-time conversation cost tracking with proper token counting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add comprehensive ModelPricing service with 100+ models and historical pricing - Create real-time ConversationCost component that displays in chat header - Use actual token counts from model APIs instead of client-side estimation - Fix BaseClient.js to preserve tokenCount in response messages - Add tokenCount, usage, and tokens fields to message schema - Update Header component to include ConversationCost display - Support OpenAI, Anthropic, Google, and other major model providers - Include color-coded cost display based on amount - Add 32 unit tests for pricing calculation logic 🤖 Generated with Claude Code Co-Authored-By: Claude --- api/app/clients/BaseClient.js | 14 +- api/server/routes/convos.js | 73 ++ .../services/ConversationCostDynamic.js | 559 ++++++++++ api/server/services/ModelPricing.js | 993 ++++++++++++++++++ .../services/__tests__/ModelPricing.test.js | 329 ++++++ .../src/components/Chat/ConversationCost.tsx | 63 ++ client/src/components/Chat/Header.tsx | 3 + client/src/locales/en/translation.json | 5 + packages/data-provider/src/schemas.ts | 3 + 9 files changed, 2041 insertions(+), 1 deletion(-) create mode 100644 api/server/services/ConversationCostDynamic.js create mode 100644 api/server/services/ModelPricing.js create mode 100644 api/server/services/__tests__/ModelPricing.test.js create mode 100644 client/src/components/Chat/ConversationCost.tsx diff --git a/api/app/clients/BaseClient.js b/api/app/clients/BaseClient.js index ec4fbd97d..f9e8d155c 100644 --- a/api/app/clients/BaseClient.js +++ b/api/app/clients/BaseClient.js @@ -779,13 +779,25 @@ class BaseClient { } } + // Persist usage metadata on the assistant message if available for accurate costing + if (this.getStreamUsage != null) { + const streamUsage = this.getStreamUsage(); + if (streamUsage && (Number(streamUsage[this.inputTokensKey]) > 0 || Number(streamUsage[this.outputTokensKey]) > 0)) { + responseMessage.usage = { + prompt_tokens: streamUsage[this.inputTokensKey], + completion_tokens: streamUsage[this.outputTokensKey], + reasoning_tokens: streamUsage.reasoning_tokens, + input_token_details: streamUsage.input_token_details, + }; + } + } + responseMessage.databasePromise = this.saveMessageToDatabase( responseMessage, saveOptions, user, ); this.savedMessageIds.add(responseMessage.messageId); - delete responseMessage.tokenCount; return responseMessage; } diff --git a/api/server/routes/convos.js b/api/server/routes/convos.js index 7113f2d63..0b75e8fb8 100644 --- a/api/server/routes/convos.js +++ b/api/server/routes/convos.js @@ -19,6 +19,10 @@ const assistantClients = { }; const router = express.Router(); +const { + getConversationCostDisplayFromMessages, + getMultipleConversationCosts, +} = require('~/server/services/ConversationCostDynamic'); router.use(requireJwtAuth); router.get('/', async (req, res) => { @@ -230,3 +234,72 @@ router.post('/duplicate', async (req, res) => { }); module.exports = router; + +/** + * GET /:conversationId/cost + * Get cost summary for a specific conversation + */ +router.get('/:conversationId/cost', async (req, res) => { + try { + const { conversationId } = req.params; + const userId = req.user.id; + + const { getConvo } = require('~/models/Conversation'); + const { getMessages } = require('~/models/Message'); + + const conversation = await getConvo(userId, conversationId); + if (!conversation) { + return res.status(404).json({ error: 'Conversation not found' }); + } + + const messages = await getMessages({ user: userId, conversationId }); + if (messages.length === 0) { + return res.status(404).json({ error: 'No messages found in this conversation' }); + } + + const costDisplay = getConversationCostDisplayFromMessages(messages); + if (!costDisplay) { + return res.json({ + conversationId, + totalCost: '$0.00', + totalCostRaw: 0, + primaryModel: 'Unknown', + totalTokens: 0, + lastUpdated: new Date(), + error: 'No cost data available', + }); + } + + costDisplay.conversationId = conversationId; + res.json(costDisplay); + } catch (error) { + logger.error('Error getting conversation cost:', error); + res.status(500).json({ error: 'Failed to calculate conversation cost' }); + } +}); + +/** + * POST /costs + * Get cost summaries for multiple conversations + * Body: { conversationIds: string[] } + */ +router.post('/costs', async (req, res) => { + try { + const { conversationIds } = req.body; + const userId = req.user.id; + + if (!Array.isArray(conversationIds)) { + return res.status(400).json({ error: 'conversationIds must be an array' }); + } + + if (conversationIds.length > 50) { + return res.status(400).json({ error: 'Maximum 50 conversations allowed per request' }); + } + + const costs = await getMultipleConversationCosts(conversationIds, userId); + res.json(costs); + } catch (error) { + logger.error('Error getting multiple conversation costs:', error); + res.status(500).json({ error: 'Failed to calculate conversation costs' }); + } +}); diff --git a/api/server/services/ConversationCostDynamic.js b/api/server/services/ConversationCostDynamic.js new file mode 100644 index 000000000..8412c79a3 --- /dev/null +++ b/api/server/services/ConversationCostDynamic.js @@ -0,0 +1,559 @@ +const { calculateTokenCost, getModelProvider } = require('./ModelPricing'); + +// Use console for logging to avoid circular dependencies +const logger = { + info: (msg, data) => console.log(msg, data || ''), + warn: (msg) => console.warn(msg), + error: (msg, error) => console.error(msg, error || ''), +}; + +/** + * Calculate the total cost of a conversation from messages + * @param {Array} messages - Array of message objects from the database + * @param {string} messages[].messageId - Unique identifier for the message + * @param {string|null} messages[].model - The model used (null for user messages) + * @param {number} [messages[].tokenCount] - Token count for the message + * @param {Object} [messages[].usage] - OpenAI-style usage object + * @param {Object} [messages[].tokens] - Alternative token format + * @param {Date|string|number} [messages[].createdAt] - When the message was created + * @returns {Object|null} Cost summary with total cost, breakdown, and model details + */ +function calculateConversationCostFromMessages(messages) { + try { + if (!messages || messages.length === 0) { + return null; + } + + const costBreakdown = { + prompt: 0, + completion: 0, + cacheWrite: 0, + cacheRead: 0, + reasoning: 0, + }; + + const tokenUsage = { + promptTokens: 0, + completionTokens: 0, + cacheWriteTokens: 0, + cacheReadTokens: 0, + reasoningTokens: 0, + }; + + const modelBreakdown = new Map(); + let lastUpdated = new Date(0); + + messages.forEach((message, index) => { + const hasTokenInfo = !!(message.tokenCount || message.tokens || message.usage); + const inferredRole = message?.model ? 'assistant' : 'user'; + + if (index < 3) { + logger.info( + `Message ${index}: model=${message.model}, tokenCount=${message.tokenCount}, hasTokenInfo=${hasTokenInfo}, role=${inferredRole}`, + ); + } + + if (!hasTokenInfo) { + return; + } + + if (inferredRole === 'assistant' && !message.model) { + return; + } + + const messageDate = new Date(message.createdAt || message.timestamp || Date.now()); + if (messageDate > lastUpdated) { + lastUpdated = messageDate; + } + + const modelKey = message.model || 'user-input'; + if (!modelBreakdown.has(modelKey)) { + modelBreakdown.set(modelKey, { + model: modelKey, + provider: message.model ? getModelProvider(message.model) : 'user', + cost: 0, + tokenUsage: { + promptTokens: 0, + completionTokens: 0, + cacheWriteTokens: 0, + cacheReadTokens: 0, + reasoningTokens: 0, + }, + messageCount: 0, + }); + } + + const modelData = modelBreakdown.get(modelKey); + modelData.messageCount++; + + let currentTokenUsage = { + promptTokens: 0, + completionTokens: 0, + cacheWriteTokens: 0, + cacheReadTokens: 0, + reasoningTokens: 0, + }; + + if (message.usage) { + currentTokenUsage.promptTokens = message.usage.prompt_tokens || message.usage.input_tokens || 0; + currentTokenUsage.completionTokens = message.usage.completion_tokens || message.usage.output_tokens || 0; + currentTokenUsage.reasoningTokens = message.usage.reasoning_tokens || 0; + const write = Number(message.usage?.input_token_details?.cache_creation) || 0; + const read = Number(message.usage?.input_token_details?.cache_read) || 0; + currentTokenUsage.cacheWriteTokens = write; + currentTokenUsage.cacheReadTokens = read; + } else if (message.tokens) { + currentTokenUsage.promptTokens = message.tokens.prompt || message.tokens.input || 0; + currentTokenUsage.completionTokens = message.tokens.completion || message.tokens.output || 0; + } else if (message.tokenCount) { + if (inferredRole === 'assistant') { + currentTokenUsage.completionTokens = message.tokenCount; + } else { + currentTokenUsage.promptTokens = message.tokenCount; + } + } + + if (message.model) { + const cost = calculateTokenCost(message.model, currentTokenUsage, messageDate); + if (!cost.error) { + costBreakdown.prompt += cost.prompt; + costBreakdown.completion += cost.completion; + costBreakdown.cacheWrite += cost.cacheWrite; + costBreakdown.cacheRead += cost.cacheRead; + costBreakdown.reasoning += cost.reasoning; + modelData.cost += cost.total; + } else { + logger.warn(`Could not calculate cost for model ${message.model}: ${cost.error}`); + } + } + + for (const [key, value] of Object.entries(currentTokenUsage)) { + modelData.tokenUsage[key] += value; + tokenUsage[key] += value; + } + }); + + const totalCost = Object.values(costBreakdown).reduce((sum, cost) => sum + cost, 0); + const modelBreakdownArray = Array.from(modelBreakdown.values()).sort((a, b) => b.cost - a.cost); + + logger.info('Cost calculation results:', { + totalCost, + costBreakdown, + tokenUsage, + modelCount: modelBreakdownArray.length, + models: modelBreakdownArray.map((m) => ({ + model: m.model, + cost: m.cost, + tokens: m.tokenUsage, + })), + }); + + return { + totalCost: Math.round(totalCost * 100000) / 100000, + costBreakdown: { + prompt: Math.round(costBreakdown.prompt * 100000) / 100000, + completion: Math.round(costBreakdown.completion * 100000) / 100000, + cacheWrite: Math.round(costBreakdown.cacheWrite * 100000) / 100000, + cacheRead: Math.round(costBreakdown.cacheRead * 100000) / 100000, + reasoning: Math.round(costBreakdown.reasoning * 100000) / 100000, + }, + tokenUsage, + modelBreakdown: modelBreakdownArray, + lastUpdated, + }; + } catch (error) { + logger.error('Error calculating conversation cost from messages:', error); + return null; + } +} + +/** + * Get simplified cost display for UI from messages + * @param {Array} messages - Array of message objects from the database + * @returns {Object|null} Simplified cost data for UI display + */ +function getConversationCostDisplayFromMessages(messages) { + try { + if (!messages || messages.length === 0) { + return null; + } + + const costSummary = calculateConversationCostFromMessages(messages); + if (!costSummary) { + return null; + } + + const formatCost = (cost) => { + if (cost < 0.001) return '<$0.001'; + if (cost < 0.01) return `$${cost.toFixed(4)}`; + if (cost < 1) return `$${cost.toFixed(3)}`; + return `$${cost.toFixed(2)}`; + }; + + return { + totalCost: formatCost(costSummary.totalCost), + totalCostRaw: costSummary.totalCost, + primaryModel: costSummary.modelBreakdown[0]?.model || 'Unknown', + totalTokens: costSummary.tokenUsage.promptTokens + costSummary.tokenUsage.completionTokens, + lastUpdated: costSummary.lastUpdated, + }; + } catch (error) { + logger.error('Error getting conversation cost display from messages:', error); + return null; + } +} + +/** + * Get costs for multiple conversations in batch + * @param {string[]} conversationIds + * @param {string} userId + * @returns {Promise>} + */ +async function getMultipleConversationCosts(conversationIds, userId) { + try { + const { getMessages } = require('~/models/Message'); + const results = {}; + + const batchSize = 10; + for (let i = 0; i < conversationIds.length; i += batchSize) { + const batch = conversationIds.slice(i, i + batchSize); + await Promise.all( + batch.map(async (conversationId) => { + try { + const messages = await getMessages({ user: userId, conversationId }); + if (messages && messages.length > 0) { + const costDisplay = getConversationCostDisplayFromMessages(messages); + results[conversationId] = costDisplay ? { ...costDisplay, conversationId } : null; + } else { + results[conversationId] = null; + } + } catch (error) { + logger.error(`Error calculating cost for conversation ${conversationId}:`, error); + results[conversationId] = null; + } + }), + ); + } + + return results; + } catch (error) { + logger.error('Error getting multiple conversation costs:', error); + return {}; + } +} + +module.exports = { + calculateConversationCostFromMessages, + getConversationCostDisplayFromMessages, + getMultipleConversationCosts, +}; + +const { calculateTokenCost, getModelProvider } = require('./ModelPricing'); + +// Use console for logging to avoid circular dependencies +const logger = { + info: (msg, data) => console.log(msg, data || ''), + warn: (msg) => console.warn(msg), + error: (msg, error) => console.error(msg, error || ''), +}; + +/** + * Calculate the total cost of a conversation from messages + * @param {Array} messages - Array of message objects from the database + * @param {string} messages[].messageId - Unique identifier for the message + * @param {string|null} messages[].model - The model used (null for user messages) + * @param {number} messages[].tokenCount - Token count for the message + * @param {Object} [messages[].usage] - OpenAI-style usage object + * @param {Object} [messages[].tokens] - Alternative token format + * @param {Date} messages[].createdAt - When the message was created + * @returns {Object|null} Cost summary with total cost, breakdown, and model details + * @returns {number} returns.totalCost - Total cost across all models + * @returns {Object} returns.costBreakdown - Breakdown by token type + * @returns {Object} returns.tokenUsage - Token counts by type + * @returns {Array} returns.modelBreakdown - Per-model cost and usage + * @returns {Date} returns.lastUpdated - Timestamp of the last message + */ +function calculateConversationCostFromMessages(messages) { + try { + if (!messages || messages.length === 0) { + return null; + } + + // Initialize cost tracking + const costBreakdown = { + prompt: 0, + completion: 0, + cacheWrite: 0, + cacheRead: 0, + reasoning: 0, + }; + + const tokenUsage = { + promptTokens: 0, + completionTokens: 0, + cacheWriteTokens: 0, + cacheReadTokens: 0, + reasoningTokens: 0, + }; + + const modelBreakdown = new Map(); + let lastUpdated = new Date(0); + + // Process each message + messages.forEach((message, index) => { + // Debug each message processing + const hasTokenInfo = !!(message.tokenCount || message.tokens || message.usage); + const inferredRole = message.model ? 'assistant' : 'user'; + + // Debug logging + if (index < 3) { + logger.info( + `Message ${index}: model=${message.model}, tokenCount=${message.tokenCount}, hasTokenInfo=${hasTokenInfo}, role=${inferredRole}`, + ); + } + + // For LibreChat: Skip messages without token info, but allow both user and assistant messages + // User messages have model=null, assistant messages have specific models + if (!hasTokenInfo) { + return; + } + + // For assistant messages, we need a model for pricing + if (inferredRole === 'assistant' && !message.model) { + return; + } + + const messageDate = new Date(message.createdAt || message.timestamp || Date.now()); + if (messageDate > lastUpdated) { + lastUpdated = messageDate; + } + + // For user messages, use a special key since they don't have a model + const modelKey = message.model || 'user-input'; + + // Initialize model breakdown if not exists + if (!modelBreakdown.has(modelKey)) { + modelBreakdown.set(modelKey, { + model: modelKey, + provider: message.model ? getModelProvider(message.model) : 'user', + cost: 0, + tokenUsage: { + promptTokens: 0, + completionTokens: 0, + cacheWriteTokens: 0, + cacheReadTokens: 0, + reasoningTokens: 0, + }, + messageCount: 0, + }); + } + + const modelData = modelBreakdown.get(modelKey); + modelData.messageCount++; + + // Extract token counts from message + let currentTokenUsage = { + promptTokens: 0, + completionTokens: 0, + cacheWriteTokens: 0, + cacheReadTokens: 0, + reasoningTokens: 0, + }; + + // Check different possible token count formats + if (message.usage) { + // OpenAI format: { prompt_tokens, completion_tokens } + currentTokenUsage.promptTokens = message.usage.prompt_tokens || 0; + currentTokenUsage.completionTokens = message.usage.completion_tokens || 0; + currentTokenUsage.reasoningTokens = message.usage.reasoning_tokens || 0; + } else if (message.tokens) { + // Alternative format + currentTokenUsage.promptTokens = message.tokens.prompt || message.tokens.input || 0; + currentTokenUsage.completionTokens = + message.tokens.completion || message.tokens.output || 0; + } else if (message.tokenCount) { + // LibreChat format: simple tokenCount field + // Infer role from model field: null model = user message, specific model = assistant message + const inferredRole = message.model ? 'assistant' : 'user'; + + if (inferredRole === 'assistant') { + currentTokenUsage.completionTokens = message.tokenCount; + } else { + currentTokenUsage.promptTokens = message.tokenCount; + } + } + + // Handle cache tokens if present + if (message.cacheTokens) { + currentTokenUsage.cacheWriteTokens = message.cacheTokens.write || 0; + currentTokenUsage.cacheReadTokens = message.cacheTokens.read || 0; + } + + // Calculate cost using historical pricing (only for assistant messages with models) + if (message.model) { + const cost = calculateTokenCost(message.model, currentTokenUsage, messageDate); + + if (!cost.error) { + // Add to overall breakdown + costBreakdown.prompt += cost.prompt; + costBreakdown.completion += cost.completion; + costBreakdown.cacheWrite += cost.cacheWrite; + costBreakdown.cacheRead += cost.cacheRead; + costBreakdown.reasoning += cost.reasoning; + + // Add to model breakdown + modelData.cost += cost.total; + } else { + logger.warn(`Could not calculate cost for model ${message.model}: ${cost.error}`); + } + } + + // Always update token usage (for both user and assistant messages) + for (const [key, value] of Object.entries(currentTokenUsage)) { + modelData.tokenUsage[key] += value; + tokenUsage[key] += value; + } + }); + + // Calculate total cost + const totalCost = Object.values(costBreakdown).reduce((sum, cost) => sum + cost, 0); + + // Convert model breakdown to array + const modelBreakdownArray = Array.from(modelBreakdown.values()).sort((a, b) => b.cost - a.cost); + + // Debug final results + logger.info('Cost calculation results:', { + totalCost, + costBreakdown, + tokenUsage, + modelCount: modelBreakdownArray.length, + models: modelBreakdownArray.map((m) => ({ + model: m.model, + cost: m.cost, + tokens: m.tokenUsage, + })), + }); + + return { + totalCost: Math.round(totalCost * 100000) / 100000, // Round to 5 decimal places + costBreakdown: { + prompt: Math.round(costBreakdown.prompt * 100000) / 100000, + completion: Math.round(costBreakdown.completion * 100000) / 100000, + cacheWrite: Math.round(costBreakdown.cacheWrite * 100000) / 100000, + cacheRead: Math.round(costBreakdown.cacheRead * 100000) / 100000, + reasoning: Math.round(costBreakdown.reasoning * 100000) / 100000, + }, + tokenUsage, + modelBreakdown: modelBreakdownArray, + lastUpdated, + }; + } catch (error) { + logger.error('Error calculating conversation cost from messages:', error); + return null; + } +} + +/** + * Get simplified cost display for UI from messages + * @param {Array} messages - Array of message objects from the database + * @returns {Object|null} Simplified cost data for UI display + * @returns {string} returns.totalCost - Formatted cost string (e.g., "$0.054") + * @returns {number} returns.totalCostRaw - Raw cost value for calculations + * @returns {string} returns.primaryModel - The model that contributed most to cost + * @returns {number} returns.totalTokens - Total token count across all messages + * @returns {Date} returns.lastUpdated - Timestamp of the last message + */ +function getConversationCostDisplayFromMessages(messages) { + try { + if (!messages || messages.length === 0) { + return null; + } + + const costSummary = calculateConversationCostFromMessages(messages); + if (!costSummary) { + return null; + } + + // Format cost for display + const formatCost = (cost) => { + if (cost < 0.001) { + return '<$0.001'; + } + if (cost < 0.01) { + return `$${cost.toFixed(4)}`; + } + if (cost < 1) { + return `$${cost.toFixed(3)}`; + } + return `$${cost.toFixed(2)}`; + }; + + return { + totalCost: formatCost(costSummary.totalCost), + totalCostRaw: costSummary.totalCost, + primaryModel: costSummary.modelBreakdown[0]?.model || 'Unknown', + totalTokens: costSummary.tokenUsage.promptTokens + costSummary.tokenUsage.completionTokens, + lastUpdated: costSummary.lastUpdated, + }; + } catch (error) { + logger.error('Error getting conversation cost display from messages:', error); + return null; + } +} + +/** + * Get costs for multiple conversations in batch + * @param {string[]} conversationIds - Array of conversation IDs + * @param {string} userId - User ID + * @returns {Object} Map of conversationId to cost display data + */ +async function getMultipleConversationCosts(conversationIds, userId) { + try { + const { getMessages } = require('~/models/Message'); + const results = {}; + + // Process in batches to avoid overwhelming the database + const batchSize = 10; + for (let i = 0; i < conversationIds.length; i += batchSize) { + const batch = conversationIds.slice(i, i + batchSize); + + // Process batch in parallel + await Promise.all( + batch.map(async (conversationId) => { + try { + const messages = await getMessages({ + user: userId, + conversationId: conversationId, + }); + + if (messages && messages.length > 0) { + const costDisplay = getConversationCostDisplayFromMessages(messages); + if (costDisplay) { + costDisplay.conversationId = conversationId; + results[conversationId] = costDisplay; + } else { + results[conversationId] = null; + } + } else { + results[conversationId] = null; + } + } catch (error) { + logger.error(`Error calculating cost for conversation ${conversationId}:`, error); + results[conversationId] = null; + } + }), + ); + } + + return results; + } catch (error) { + logger.error('Error getting multiple conversation costs:', error); + return {}; + } +} + +module.exports = { + calculateConversationCostFromMessages, + getConversationCostDisplayFromMessages, + getMultipleConversationCosts, +}; diff --git a/api/server/services/ModelPricing.js b/api/server/services/ModelPricing.js new file mode 100644 index 000000000..2acface5f --- /dev/null +++ b/api/server/services/ModelPricing.js @@ -0,0 +1,993 @@ +// Use console for logging to avoid circular dependencies +const logger = { + warn: (msg) => console.warn(msg), + error: (msg) => console.error(msg), +}; + +/** + * Model pricing configuration with historical data + * Prices are in USD per 1M tokens + * + * Format: + * - Each model has an array of pricing periods + * - Periods are sorted by effectiveFrom date (newest first) + * - effectiveTo is optional (null means current pricing) + */ +const PRICING_DATA = { + // OpenAI Models + 'gpt-4o': [ + { + effectiveFrom: new Date('2024-05-13'), + prompt: 5.0, + completion: 15.0, + }, + ], + 'gpt-4o-mini': [ + { + effectiveFrom: new Date('2024-07-18'), + prompt: 0.15, + completion: 0.6, + }, + ], + 'gpt-4-turbo': [ + { + effectiveFrom: new Date('2024-04-09'), + prompt: 10.0, + completion: 30.0, + }, + ], + 'gpt-4': [ + { + effectiveFrom: new Date('2024-01-01'), + prompt: 30.0, + completion: 60.0, + }, + ], + 'gpt-4-0613': [ + { + effectiveFrom: new Date('2023-06-13'), + prompt: 30.0, + completion: 60.0, + }, + ], + 'gpt-4-0314': [ + { + effectiveFrom: new Date('2023-03-14'), + prompt: 30.0, + completion: 60.0, + }, + ], + 'gpt-4-32k-0314': [ + { + effectiveFrom: new Date('2023-03-14'), + prompt: 60.0, + completion: 120.0, + }, + ], + 'gpt-3.5-turbo': [ + { + effectiveFrom: new Date('2024-01-25'), + prompt: 0.5, + completion: 1.5, + }, + { + effectiveFrom: new Date('2023-11-06'), + effectiveTo: new Date('2024-01-24'), + prompt: 1.0, + completion: 2.0, + }, + ], + o1: [ + { + effectiveFrom: new Date('2024-12-05'), + prompt: 15.0, + completion: 60.0, + reasoning: 15.0, + }, + ], + 'o1-mini': [ + { + effectiveFrom: new Date('2024-09-12'), + prompt: 3.0, + completion: 12.0, + reasoning: 3.0, + }, + ], + 'o1-preview': [ + { + effectiveFrom: new Date('2024-09-12'), + prompt: 15.0, + completion: 60.0, + reasoning: 15.0, + }, + ], + 'o1-preview-2024-09-12': [ + { + effectiveFrom: new Date('2024-09-12'), + prompt: 15.0, + completion: 60.0, + reasoning: 15.0, + }, + ], + 'o1-mini-2024-09-12': [ + { + effectiveFrom: new Date('2024-09-12'), + prompt: 3.0, + completion: 12.0, + reasoning: 3.0, + }, + ], + 'o3-mini': [ + { + effectiveFrom: new Date('2024-12-20'), + prompt: 1.5, + completion: 6.0, + reasoning: 1.5, + }, + ], + + // Anthropic Models + 'claude-3-5-sonnet': [ + { + effectiveFrom: new Date('2024-06-20'), + prompt: 3.0, + completion: 15.0, + cacheWrite: 3.75, + cacheRead: 0.3, + }, + ], + 'claude-3.5-sonnet': [ + { + effectiveFrom: new Date('2024-06-20'), + prompt: 3.0, + completion: 15.0, + cacheWrite: 3.75, + cacheRead: 0.3, + }, + ], + 'claude-3-5-haiku': [ + { + effectiveFrom: new Date('2024-11-01'), + prompt: 0.8, + completion: 4.0, + cacheWrite: 1.0, + cacheRead: 0.08, + }, + ], + 'claude-3.5-haiku': [ + { + effectiveFrom: new Date('2024-11-01'), + prompt: 0.8, + completion: 4.0, + cacheWrite: 1.0, + cacheRead: 0.08, + }, + ], + 'claude-3-opus': [ + { + effectiveFrom: new Date('2024-03-04'), + prompt: 15.0, + completion: 75.0, + }, + ], + 'claude-3-sonnet': [ + { + effectiveFrom: new Date('2024-03-04'), + prompt: 3.0, + completion: 15.0, + }, + ], + 'claude-3-haiku': [ + { + effectiveFrom: new Date('2024-03-04'), + prompt: 0.25, + completion: 1.25, + cacheWrite: 0.3, + cacheRead: 0.03, + }, + ], + 'claude-3-5-haiku-20241022': [ + { + effectiveFrom: new Date('2024-11-01'), + prompt: 0.8, + completion: 4.0, + cacheWrite: 1.0, + cacheRead: 0.08, + }, + ], + 'claude-3-5-sonnet-latest': [ + { + effectiveFrom: new Date('2024-10-22'), + prompt: 3.0, + completion: 15.0, + cacheWrite: 3.75, + cacheRead: 0.3, + }, + ], + 'claude-3-opus-20240229': [ + { + effectiveFrom: new Date('2024-03-04'), + prompt: 15.0, + completion: 75.0, + }, + ], + 'claude-3-sonnet-20240229': [ + { + effectiveFrom: new Date('2024-03-04'), + prompt: 3.0, + completion: 15.0, + }, + ], + 'claude-3-haiku-20240307': [ + { + effectiveFrom: new Date('2024-03-07'), + prompt: 0.25, + completion: 1.25, + cacheWrite: 0.3, + cacheRead: 0.03, + }, + ], + + // Google Models + 'gemini-1.5-pro': [ + { + effectiveFrom: new Date('2024-02-15'), + prompt: 2.5, + completion: 10.0, + }, + ], + 'gemini-1.5-flash': [ + { + effectiveFrom: new Date('2024-05-14'), + prompt: 0.15, + completion: 0.6, + }, + ], + 'gemini-1.5-flash-8b': [ + { + effectiveFrom: new Date('2024-10-03'), + prompt: 0.075, + completion: 0.3, + }, + ], + + // Additional OpenAI Models + 'gpt-4.5-preview': [ + { + effectiveFrom: new Date('2025-02-27'), + prompt: 10.0, + completion: 30.0, + }, + ], + 'gpt-4.5-preview-2025-02-27': [ + { + effectiveFrom: new Date('2025-02-27'), + prompt: 10.0, + completion: 30.0, + }, + ], + 'gpt-4-vision-preview': [ + { + effectiveFrom: new Date('2023-11-06'), + prompt: 10.0, + completion: 30.0, + }, + ], + 'gpt-4-turbo-preview': [ + { + effectiveFrom: new Date('2024-01-25'), + prompt: 10.0, + completion: 30.0, + }, + ], + 'gpt-4-1106-preview': [ + { + effectiveFrom: new Date('2023-11-06'), + prompt: 10.0, + completion: 30.0, + }, + ], + 'gpt-4-0125-preview': [ + { + effectiveFrom: new Date('2024-01-25'), + prompt: 10.0, + completion: 30.0, + }, + ], + 'gpt-3.5-turbo-0125': [ + { + effectiveFrom: new Date('2024-01-25'), + prompt: 0.5, + completion: 1.5, + }, + ], + 'gpt-3.5-turbo-0613': [ + { + effectiveFrom: new Date('2023-06-13'), + prompt: 1.5, + completion: 2.0, + }, + ], + 'gpt-3.5-turbo-16k-0613': [ + { + effectiveFrom: new Date('2023-06-13'), + prompt: 3.0, + completion: 4.0, + }, + ], + 'gpt-3.5-turbo-1106': [ + { + effectiveFrom: new Date('2023-11-06'), + prompt: 1.0, + completion: 2.0, + }, + ], + 'gpt-3.5-turbo-16k': [ + { + effectiveFrom: new Date('2023-06-13'), + prompt: 3.0, + completion: 4.0, + }, + ], + 'gpt-3.5-turbo-instruct': [ + { + effectiveFrom: new Date('2023-09-14'), + prompt: 1.5, + completion: 2.0, + }, + ], + 'chatgpt-4o-latest': [ + { + effectiveFrom: new Date('2024-05-13'), + prompt: 5.0, + completion: 15.0, + }, + ], + 'gpt-4o-2024-05-13': [ + { + effectiveFrom: new Date('2024-05-13'), + prompt: 5.0, + completion: 15.0, + }, + ], + 'gpt-4o-2024-08-06': [ + { + effectiveFrom: new Date('2024-08-06'), + prompt: 2.5, + completion: 10.0, + }, + ], + 'gpt-4o-mini-2024-07-18': [ + { + effectiveFrom: new Date('2024-07-18'), + prompt: 0.15, + completion: 0.6, + }, + ], + 'gpt-4-turbo-2024-04-09': [ + { + effectiveFrom: new Date('2024-04-09'), + prompt: 10.0, + completion: 30.0, + }, + ], + 'gpt-4-0125': [ + { + effectiveFrom: new Date('2024-01-25'), + prompt: 10.0, + completion: 30.0, + }, + ], + 'gpt-4-1106': [ + { + effectiveFrom: new Date('2023-11-06'), + prompt: 10.0, + completion: 30.0, + }, + ], + + // Additional Anthropic Models + 'claude-opus-4-20250514': [ + { + effectiveFrom: new Date('2025-05-14'), + prompt: 15.0, + completion: 75.0, + }, + ], + 'claude-opus-4-latest': [ + { + effectiveFrom: new Date('2025-05-14'), + prompt: 15.0, + completion: 75.0, + }, + ], + 'claude-opus-4-1-20250805': [ + { + effectiveFrom: new Date('2025-08-05'), + prompt: 15.0, + completion: 75.0, + }, + ], + 'claude-sonnet-4-20250514': [ + { + effectiveFrom: new Date('2025-05-14'), + prompt: 3.0, + completion: 15.0, + }, + ], + 'claude-sonnet-4-latest': [ + { + effectiveFrom: new Date('2025-05-14'), + prompt: 3.0, + completion: 15.0, + }, + ], + 'claude-3-7-sonnet-latest': [ + { + effectiveFrom: new Date('2025-02-19'), + prompt: 1.5, + completion: 7.5, + }, + ], + 'claude-3-7-sonnet-20250219': [ + { + effectiveFrom: new Date('2025-02-19'), + prompt: 1.5, + completion: 7.5, + }, + ], + 'claude-3-5-sonnet-20240620': [ + { + effectiveFrom: new Date('2024-06-20'), + prompt: 3.0, + completion: 15.0, + cacheWrite: 3.75, + cacheRead: 0.3, + }, + ], + 'claude-3-5-sonnet-20241022': [ + { + effectiveFrom: new Date('2024-10-22'), + prompt: 3.0, + completion: 15.0, + cacheWrite: 3.75, + cacheRead: 0.3, + }, + ], + 'claude-2.1': [ + { + effectiveFrom: new Date('2023-11-21'), + prompt: 8.0, + completion: 24.0, + }, + ], + 'claude-2': [ + { + effectiveFrom: new Date('2023-07-11'), + prompt: 8.0, + completion: 24.0, + }, + ], + 'claude-instant-1': [ + { + effectiveFrom: new Date('2023-03-14'), + prompt: 0.8, + completion: 2.4, + }, + ], + 'claude-1.2': [ + { + effectiveFrom: new Date('2023-05-01'), + prompt: 8.0, + completion: 24.0, + }, + ], + 'claude-1': [ + { + effectiveFrom: new Date('2023-03-14'), + prompt: 8.0, + completion: 24.0, + }, + ], + 'claude-1-100k': [ + { + effectiveFrom: new Date('2023-05-01'), + prompt: 8.0, + completion: 24.0, + }, + ], + 'claude-instant-1-100k': [ + { + effectiveFrom: new Date('2023-05-01'), + prompt: 0.8, + completion: 2.4, + }, + ], + + // Additional Google Models + 'gemini-2.0-flash-001': [ + { + effectiveFrom: new Date('2024-12-11'), + prompt: 0.15, + completion: 0.6, + }, + ], + 'gemini-2.0-flash-exp': [ + { + effectiveFrom: new Date('2024-12-11'), + prompt: 0.0, // Free during experimental phase + completion: 0.0, + }, + ], + 'gemini-2.0-flash-lite': [ + { + effectiveFrom: new Date('2024-12-11'), + prompt: 0.075, + completion: 0.3, + }, + ], + 'gemini-2.0-pro-exp-02-05': [ + { + effectiveFrom: new Date('2025-02-05'), + prompt: 0.0, // Free during experimental phase + completion: 0.0, + }, + ], + 'gemini-1.5-flash-001': [ + { + effectiveFrom: new Date('2024-05-14'), + prompt: 0.15, + completion: 0.6, + }, + ], + 'gemini-1.5-flash-002': [ + { + effectiveFrom: new Date('2024-09-24'), + prompt: 0.15, + completion: 0.6, + }, + ], + 'gemini-1.5-pro-001': [ + { + effectiveFrom: new Date('2024-02-15'), + prompt: 2.5, + completion: 10.0, + }, + ], + 'gemini-1.5-pro-002': [ + { + effectiveFrom: new Date('2024-09-24'), + prompt: 1.25, + completion: 5.0, + }, + ], + 'gemini-1.0-pro-001': [ + { + effectiveFrom: new Date('2023-12-06'), + prompt: 0.5, + completion: 1.5, + }, + ], + 'gemini-pro': [ + { + effectiveFrom: new Date('2023-12-06'), + prompt: 0.5, + completion: 1.5, + }, + ], + 'gemini-pro-vision': [ + { + effectiveFrom: new Date('2023-12-06'), + prompt: 0.5, + completion: 1.5, + }, + ], + + // AWS Bedrock Models (using same pricing as direct API) + 'anthropic.claude-3-5-sonnet-20241022-v2:0': [ + { + effectiveFrom: new Date('2024-10-22'), + prompt: 3.0, + completion: 15.0, + }, + ], + 'anthropic.claude-3-5-sonnet-20240620-v1:0': [ + { + effectiveFrom: new Date('2024-06-20'), + prompt: 3.0, + completion: 15.0, + }, + ], + 'anthropic.claude-3-5-haiku-20241022-v1:0': [ + { + effectiveFrom: new Date('2024-11-01'), + prompt: 0.8, + completion: 4.0, + }, + ], + 'anthropic.claude-3-opus-20240229-v1:0': [ + { + effectiveFrom: new Date('2024-03-04'), + prompt: 15.0, + completion: 75.0, + }, + ], + 'anthropic.claude-3-sonnet-20240229-v1:0': [ + { + effectiveFrom: new Date('2024-03-04'), + prompt: 3.0, + completion: 15.0, + }, + ], + 'anthropic.claude-3-haiku-20240307-v1:0': [ + { + effectiveFrom: new Date('2024-03-07'), + prompt: 0.25, + completion: 1.25, + }, + ], + 'anthropic.claude-v2': [ + { + effectiveFrom: new Date('2023-07-11'), + prompt: 8.0, + completion: 24.0, + }, + ], + 'anthropic.claude-v2:1': [ + { + effectiveFrom: new Date('2023-11-21'), + prompt: 8.0, + completion: 24.0, + }, + ], + 'anthropic.claude-instant-v1': [ + { + effectiveFrom: new Date('2023-03-14'), + prompt: 0.8, + completion: 2.4, + }, + ], + + // Cohere Models (via Bedrock) + 'cohere.command-r-v1:0': [ + { + effectiveFrom: new Date('2024-03-01'), + prompt: 0.5, + completion: 1.5, + }, + ], + 'cohere.command-r-plus-v1:0': [ + { + effectiveFrom: new Date('2024-04-01'), + prompt: 3.0, + completion: 15.0, + }, + ], + + // Meta Llama Models (via Bedrock) + 'meta.llama2-13b-chat-v1': [ + { + effectiveFrom: new Date('2023-07-01'), + prompt: 0.75, + completion: 1.0, + }, + ], + 'meta.llama2-70b-chat-v1': [ + { + effectiveFrom: new Date('2023-07-01'), + prompt: 1.95, + completion: 2.56, + }, + ], + 'meta.llama3-8b-instruct-v1:0': [ + { + effectiveFrom: new Date('2024-04-18'), + prompt: 0.3, + completion: 0.6, + }, + ], + 'meta.llama3-70b-instruct-v1:0': [ + { + effectiveFrom: new Date('2024-04-18'), + prompt: 2.65, + completion: 3.5, + }, + ], + 'meta.llama3-1-8b-instruct-v1:0': [ + { + effectiveFrom: new Date('2024-07-23'), + prompt: 0.22, + completion: 0.22, + }, + ], + 'meta.llama3-1-70b-instruct-v1:0': [ + { + effectiveFrom: new Date('2024-07-23'), + prompt: 0.99, + completion: 0.99, + }, + ], + 'meta.llama3-1-405b-instruct-v1:0': [ + { + effectiveFrom: new Date('2024-07-23'), + prompt: 5.32, + completion: 16.0, + }, + ], + + // Mistral Models (via Bedrock and direct) + 'mistral.mistral-7b-instruct-v0:2': [ + { + effectiveFrom: new Date('2023-09-27'), + prompt: 0.15, + completion: 0.2, + }, + ], + 'mistral.mixtral-8x7b-instruct-v0:1': [ + { + effectiveFrom: new Date('2023-12-11'), + prompt: 0.45, + completion: 0.7, + }, + ], + 'mistral.mistral-large-2402-v1:0': [ + { + effectiveFrom: new Date('2024-02-26'), + prompt: 4.0, + completion: 12.0, + }, + ], + 'mistral.mistral-large-2407-v1:0': [ + { + effectiveFrom: new Date('2024-07-24'), + prompt: 2.0, + completion: 6.0, + }, + ], + 'mistral.mistral-small-2410-v1:0': [ + { + effectiveFrom: new Date('2024-10-01'), + prompt: 0.2, + completion: 0.6, + }, + ], + 'mistral.mistral-small-2402-v1:0': [ + { + effectiveFrom: new Date('2024-02-26'), + prompt: 0.2, + completion: 0.6, + }, + ], + + // AI21 Models (via Bedrock) + 'ai21.jamba-instruct-v1:0': [ + { + effectiveFrom: new Date('2024-03-01'), + prompt: 0.5, + completion: 0.7, + }, + ], + + // Amazon Titan Models + 'amazon.titan-text-lite-v1': [ + { + effectiveFrom: new Date('2023-11-29'), + prompt: 0.3, + completion: 0.4, + }, + ], + 'amazon.titan-text-express-v1': [ + { + effectiveFrom: new Date('2023-11-29'), + prompt: 0.8, + completion: 1.1, + }, + ], + 'amazon.titan-text-premier-v1:0': [ + { + effectiveFrom: new Date('2024-05-01'), + prompt: 5.0, + completion: 15.0, + }, + ], + + // xAI Models + 'grok-2': [ + { + effectiveFrom: new Date('2024-08-01'), + prompt: 5.0, + completion: 10.0, + }, + ], + 'grok-2-mini': [ + { + effectiveFrom: new Date('2024-08-01'), + prompt: 2.0, + completion: 6.0, + }, + ], + + // DeepSeek Models + 'deepseek-chat': [ + { + effectiveFrom: new Date('2024-05-01'), + prompt: 0.14, + completion: 0.28, + }, + ], + 'deepseek-coder': [ + { + effectiveFrom: new Date('2024-05-01'), + prompt: 0.14, + completion: 0.28, + }, + ], + + // Add more models as needed +}; + +/** + * Get pricing for a model at a specific date + * @param {string} model - Model identifier + * @param {Date} [date] - Date to get pricing for (defaults to now) + * @returns {Object|null} Pricing data or null if not found + */ +function getModelPricing(model, date = new Date()) { + const modelPricing = PRICING_DATA[model]; + if (!modelPricing) { + logger.warn(`No pricing data found for model: ${model}`); + return null; + } + + // Find the pricing period that was effective at the given date + for (const period of modelPricing) { + if (date >= period.effectiveFrom && (!period.effectiveTo || date <= period.effectiveTo)) { + return period; + } + } + + // If no exact match, return the earliest pricing as fallback + return modelPricing[modelPricing.length - 1]; +} + +/** + * Calculate cost for token usage + * @param {string} model - Model identifier + * @param {Object} usage - Token usage object + * @param {number} [usage.promptTokens] - Number of prompt tokens + * @param {number} [usage.completionTokens] - Number of completion tokens + * @param {number} [usage.cacheWriteTokens] - Number of cache write tokens + * @param {number} [usage.cacheReadTokens] - Number of cache read tokens + * @param {number} [usage.reasoningTokens] - Number of reasoning tokens + * @param {Date} [date] - Date for pricing calculation (defaults to now) + * @returns {Object} Cost breakdown + */ +function calculateTokenCost(model, usage, date = new Date()) { + // Validate inputs + if (!model || typeof model !== 'string') { + return { + prompt: 0, + completion: 0, + cacheWrite: 0, + cacheRead: 0, + reasoning: 0, + total: 0, + error: 'Invalid model specified', + }; + } + + if (!usage || typeof usage !== 'object') { + return { + prompt: 0, + completion: 0, + cacheWrite: 0, + cacheRead: 0, + reasoning: 0, + total: 0, + error: 'Invalid usage object', + }; + } + + const pricing = getModelPricing(model, date); + if (!pricing) { + return { + prompt: 0, + completion: 0, + cacheWrite: 0, + cacheRead: 0, + reasoning: 0, + total: 0, + error: 'No pricing data available', + }; + } + + const costs = { + prompt: 0, + completion: 0, + cacheWrite: 0, + cacheRead: 0, + reasoning: 0, + }; + + // Calculate each cost component (convert from per million to actual cost) + if (usage.promptTokens) { + costs.prompt = (usage.promptTokens / 1_000_000) * pricing.prompt; + } + + if (usage.completionTokens) { + costs.completion = (usage.completionTokens / 1_000_000) * pricing.completion; + } + + if (usage.cacheWriteTokens && pricing.cacheWrite) { + costs.cacheWrite = (usage.cacheWriteTokens / 1_000_000) * pricing.cacheWrite; + } + + if (usage.cacheReadTokens && pricing.cacheRead) { + costs.cacheRead = (usage.cacheReadTokens / 1_000_000) * pricing.cacheRead; + } + + if (usage.reasoningTokens && pricing.reasoning) { + costs.reasoning = (usage.reasoningTokens / 1_000_000) * pricing.reasoning; + } + + // Calculate total + costs.total = + costs.prompt + costs.completion + costs.cacheWrite + costs.cacheRead + costs.reasoning; + + return costs; +} + +/** + * Get all supported models + * @returns {string[]} Array of model identifiers + */ +function getSupportedModels() { + return Object.keys(PRICING_DATA); +} + +/** + * Get model provider from model name + * @param {string} model - Model identifier + * @returns {string} Provider name + */ +function getModelProvider(model) { + if (model.includes('gpt') || model.includes('o1') || model.includes('chatgpt')) { + return 'OpenAI'; + } + if (model.includes('claude') || model.startsWith('anthropic.')) { + return 'Anthropic'; + } + if (model.includes('gemini')) { + return 'Google'; + } + if (model.includes('mistral')) { + return 'Mistral'; + } + if (model.includes('command') || model.startsWith('cohere.')) { + return 'Cohere'; + } + if (model.includes('llama') || model.startsWith('meta.')) { + return 'Meta'; + } + if (model.includes('titan') || model.startsWith('amazon.')) { + return 'Amazon'; + } + if (model.includes('grok')) { + return 'xAI'; + } + if (model.includes('deepseek')) { + return 'DeepSeek'; + } + if (model.startsWith('ai21.')) { + return 'AI21'; + } + return 'Unknown'; +} + +module.exports = { + getModelPricing, + calculateTokenCost, + getSupportedModels, + getModelProvider, + PRICING_DATA, +}; diff --git a/api/server/services/__tests__/ModelPricing.test.js b/api/server/services/__tests__/ModelPricing.test.js new file mode 100644 index 000000000..444957600 --- /dev/null +++ b/api/server/services/__tests__/ModelPricing.test.js @@ -0,0 +1,329 @@ +const { + getModelPricing, + calculateTokenCost, + getSupportedModels, + getModelProvider, +} = require('../ModelPricing'); + +describe('ModelPricing Service', () => { + describe('getModelPricing', () => { + it('should return pricing for known models', () => { + const pricing = getModelPricing('gpt-4o'); + expect(pricing).toBeDefined(); + expect(pricing.prompt).toBe(5.0); + expect(pricing.completion).toBe(15.0); + }); + + it('should return null for unknown models', () => { + const pricing = getModelPricing('unknown-model'); + expect(pricing).toBeNull(); + }); + + it('should return historical pricing for older dates', () => { + const oldDate = new Date('2023-11-10'); + const pricing = getModelPricing('gpt-3.5-turbo', oldDate); + expect(pricing).toBeDefined(); + expect(pricing.prompt).toBe(1.0); // Historical price + expect(pricing.completion).toBe(2.0); + }); + + it('should return current pricing for recent dates', () => { + const recentDate = new Date('2024-06-01'); + const pricing = getModelPricing('gpt-3.5-turbo', recentDate); + expect(pricing).toBeDefined(); + expect(pricing.prompt).toBe(0.5); // Current price + expect(pricing.completion).toBe(1.5); + }); + + it('should handle Claude models with cache pricing', () => { + const pricing = getModelPricing('claude-3-5-sonnet'); + expect(pricing).toBeDefined(); + expect(pricing.cacheWrite).toBe(3.75); + expect(pricing.cacheRead).toBe(0.3); + }); + + it('should handle o1 models with reasoning pricing', () => { + const pricing = getModelPricing('o1'); + expect(pricing).toBeDefined(); + expect(pricing.reasoning).toBe(15.0); + }); + + it('should handle all newly added models', () => { + const newModels = [ + 'gpt-4-0314', + 'gpt-4-32k-0314', + 'gpt-3.5-turbo-0613', + 'gpt-3.5-turbo-16k-0613', + 'o1-preview-2024-09-12', + 'o1-mini-2024-09-12', + 'o3-mini', + 'gpt-4o-mini-2024-07-18', + 'gpt-4-turbo-2024-04-09', + 'gpt-4-0125', + 'gpt-4-1106', + 'claude-3-5-haiku-20241022', + 'claude-3-5-sonnet-latest', + 'claude-3-opus-20240229', + 'claude-3-sonnet-20240229', + 'claude-3-haiku-20240307', + 'claude-1.2', + 'claude-1', + 'claude-1-100k', + 'claude-instant-1-100k', + 'anthropic.claude-v2', + 'anthropic.claude-v2:1', + 'anthropic.claude-instant-v1', + 'gemini-pro', + 'gemini-pro-vision', + 'mistral.mistral-small-2402-v1:0', + ]; + + newModels.forEach((model) => { + const pricing = getModelPricing(model); + expect(pricing).toBeDefined(); + expect(pricing.prompt).toBeGreaterThan(0); + expect(pricing.completion).toBeGreaterThan(0); + }); + }); + }); + + describe('calculateTokenCost', () => { + it('should calculate basic prompt and completion costs', () => { + const usage = { + promptTokens: 1000, + completionTokens: 500, + }; + const cost = calculateTokenCost('gpt-4o', usage); + + expect(cost.prompt).toBeCloseTo(0.005); // 1000/1M * 5.0 + expect(cost.completion).toBeCloseTo(0.0075); // 500/1M * 15.0 + expect(cost.total).toBeCloseTo(0.0125); + }); + + it('should handle zero token counts', () => { + const usage = { + promptTokens: 0, + completionTokens: 0, + }; + const cost = calculateTokenCost('gpt-4', usage); + + expect(cost.prompt).toBe(0); + expect(cost.completion).toBe(0); + expect(cost.total).toBe(0); + }); + + it('should handle large token counts', () => { + const usage = { + promptTokens: 100000, + completionTokens: 50000, + }; + const cost = calculateTokenCost('gpt-4', usage); + + expect(cost.prompt).toBeCloseTo(3.0); // 100k/1M * 30.0 + expect(cost.completion).toBeCloseTo(3.0); // 50k/1M * 60.0 + expect(cost.total).toBeCloseTo(6.0); + }); + + it('should calculate cache token costs for Claude models', () => { + const usage = { + promptTokens: 1000, + completionTokens: 500, + cacheWriteTokens: 2000, + cacheReadTokens: 3000, + }; + const cost = calculateTokenCost('claude-3-5-sonnet', usage); + + expect(cost.prompt).toBeCloseTo(0.003); // 1000/1M * 3.0 + expect(cost.completion).toBeCloseTo(0.0075); // 500/1M * 15.0 + expect(cost.cacheWrite).toBeCloseTo(0.0075); // 2000/1M * 3.75 + expect(cost.cacheRead).toBeCloseTo(0.0009); // 3000/1M * 0.3 + expect(cost.total).toBeCloseTo(0.0189); + }); + + it('should calculate reasoning token costs for o1 models', () => { + const usage = { + promptTokens: 1000, + completionTokens: 500, + reasoningTokens: 2000, + }; + const cost = calculateTokenCost('o1', usage); + + expect(cost.prompt).toBeCloseTo(0.015); // 1000/1M * 15.0 + expect(cost.completion).toBeCloseTo(0.03); // 500/1M * 60.0 + expect(cost.reasoning).toBeCloseTo(0.03); // 2000/1M * 15.0 + expect(cost.total).toBeCloseTo(0.075); + }); + + it('should handle invalid model gracefully', () => { + const usage = { + promptTokens: 1000, + completionTokens: 500, + }; + const cost = calculateTokenCost('invalid-model', usage); + + expect(cost.total).toBe(0); + expect(cost.error).toBe('No pricing data available'); + }); + + it('should handle invalid usage object', () => { + const cost = calculateTokenCost('gpt-4', null); + + expect(cost.total).toBe(0); + expect(cost.error).toBe('Invalid usage object'); + }); + + it('should handle missing model parameter', () => { + const usage = { + promptTokens: 1000, + completionTokens: 500, + }; + const cost = calculateTokenCost(null, usage); + + expect(cost.total).toBe(0); + expect(cost.error).toBe('Invalid model specified'); + }); + + it('should use historical pricing for past dates', () => { + const usage = { + promptTokens: 1000, + completionTokens: 500, + }; + const oldDate = new Date('2023-11-10'); + const cost = calculateTokenCost('gpt-3.5-turbo', usage, oldDate); + + expect(cost.prompt).toBeCloseTo(0.001); // 1000/1M * 1.0 (historical) + expect(cost.completion).toBeCloseTo(0.001); // 500/1M * 2.0 (historical) + expect(cost.total).toBeCloseTo(0.002); + }); + }); + + describe('getSupportedModels', () => { + it('should return array of supported model names', () => { + const models = getSupportedModels(); + + expect(Array.isArray(models)).toBe(true); + expect(models.length).toBeGreaterThan(80); // We have 100+ models now + expect(models).toContain('gpt-4'); + expect(models).toContain('claude-3-opus'); + expect(models).toContain('gemini-1.5-pro'); + }); + + it('should include all newly added models', () => { + const models = getSupportedModels(); + + expect(models).toContain('gpt-4-0314'); + expect(models).toContain('o3-mini'); + expect(models).toContain('claude-1-100k'); + expect(models).toContain('gemini-pro'); + expect(models).toContain('anthropic.claude-v2'); + }); + }); + + describe('getModelProvider', () => { + it('should identify OpenAI models', () => { + expect(getModelProvider('gpt-4')).toBe('OpenAI'); + expect(getModelProvider('gpt-3.5-turbo')).toBe('OpenAI'); + expect(getModelProvider('o1-preview')).toBe('OpenAI'); + expect(getModelProvider('chatgpt-4o-latest')).toBe('OpenAI'); + }); + + it('should identify Anthropic models', () => { + expect(getModelProvider('claude-3-opus')).toBe('Anthropic'); + expect(getModelProvider('claude-2.1')).toBe('Anthropic'); + expect(getModelProvider('anthropic.claude-v2')).toBe('Anthropic'); + }); + + it('should identify Google models', () => { + expect(getModelProvider('gemini-1.5-pro')).toBe('Google'); + expect(getModelProvider('gemini-pro')).toBe('Google'); + }); + + it('should identify Mistral models', () => { + expect(getModelProvider('mistral.mistral-7b-instruct-v0:2')).toBe('Mistral'); + }); + + it('should identify Cohere models', () => { + expect(getModelProvider('cohere.command-r-v1:0')).toBe('Cohere'); + }); + + it('should identify Meta models', () => { + expect(getModelProvider('meta.llama3-70b-instruct-v1:0')).toBe('Meta'); + }); + + it('should identify Amazon models', () => { + expect(getModelProvider('amazon.titan-text-express-v1')).toBe('Amazon'); + }); + + it('should identify xAI models', () => { + expect(getModelProvider('grok-2')).toBe('xAI'); + }); + + it('should identify DeepSeek models', () => { + expect(getModelProvider('deepseek-chat')).toBe('DeepSeek'); + }); + + it('should return Unknown for unrecognized models', () => { + expect(getModelProvider('unknown-model')).toBe('Unknown'); + }); + }); + + describe('Edge Cases', () => { + it('should handle extremely small token costs', () => { + const usage = { + promptTokens: 1, + completionTokens: 1, + }; + const cost = calculateTokenCost('gpt-4o-mini', usage); + + expect(cost.prompt).toBeCloseTo(0.00000015); + expect(cost.completion).toBeCloseTo(0.0000006); + expect(cost.total).toBeCloseTo(0.00000075); + }); + + it('should handle models with zero-cost experimental pricing', () => { + const usage = { + promptTokens: 10000, + completionTokens: 5000, + }; + const cost = calculateTokenCost('gemini-2.0-flash-exp', usage); + + expect(cost.prompt).toBe(0); + expect(cost.completion).toBe(0); + expect(cost.total).toBe(0); + }); + + it('should handle mixed token types in single request', () => { + const usage = { + promptTokens: 1000, + completionTokens: 500, + cacheWriteTokens: 200, + cacheReadTokens: 300, + reasoningTokens: 0, // Not all models have reasoning + }; + const cost = calculateTokenCost('claude-3-5-sonnet', usage); + + expect(cost.prompt).toBeCloseTo(0.003); + expect(cost.completion).toBeCloseTo(0.0075); + expect(cost.cacheWrite).toBeCloseTo(0.00075); + expect(cost.cacheRead).toBeCloseTo(0.00009); + expect(cost.reasoning).toBe(0); + }); + + it('should handle date boundaries correctly', () => { + // Test exact date match + const exactDate = new Date('2024-01-25'); + const pricing = getModelPricing('gpt-3.5-turbo', exactDate); + expect(pricing.prompt).toBe(0.5); + + // Test one day before change + const dayBefore = new Date('2024-01-24'); + const pricingBefore = getModelPricing('gpt-3.5-turbo', dayBefore); + expect(pricingBefore.prompt).toBe(1.0); + + // Test one day after change + const dayAfter = new Date('2024-01-26'); + const pricingAfter = getModelPricing('gpt-3.5-turbo', dayAfter); + expect(pricingAfter.prompt).toBe(0.5); + }); + }); +}); diff --git a/client/src/components/Chat/ConversationCost.tsx b/client/src/components/Chat/ConversationCost.tsx new file mode 100644 index 000000000..d7b0497ab --- /dev/null +++ b/client/src/components/Chat/ConversationCost.tsx @@ -0,0 +1,63 @@ +import React, { useMemo } from 'react'; +import { useParams } from 'react-router-dom'; +import { useTranslation } from 'react-i18next'; +import { Constants } from 'librechat-data-provider'; +import { useQuery } from '@tanstack/react-query'; + +type CostDisplay = { + totalCost: string; + totalCostRaw: number; + primaryModel: string; + totalTokens: number; + lastUpdated: string | number | Date; + conversationId?: string; +}; + +export default function ConversationCost() { + const { t } = useTranslation(); + const { conversationId } = useParams(); + + const { data } = useQuery({ + queryKey: ['conversationCost', conversationId], + enabled: Boolean(conversationId && conversationId !== Constants.NEW_CONVO), + queryFn: async () => { + const res = await fetch(`/api/convos/${conversationId}/cost`, { credentials: 'include' }); + if (!res.ok) { + return null; + } + return res.json(); + }, + staleTime: 5_000, + refetchOnWindowFocus: false, + }); + + const colorClass = useMemo(() => { + const cost = data?.totalCostRaw ?? 0; + if (cost < 0.01) return 'text-green-600 dark:text-green-400'; + if (cost < 0.1) return 'text-yellow-600 dark:text-yellow-400'; + if (cost < 1) return 'text-orange-600 dark:text-orange-400'; + return 'text-red-600 dark:text-red-400'; + }, [data?.totalCostRaw]); + + if (!conversationId || conversationId === Constants.NEW_CONVO) { + return null; + } + + if (!data || data.totalCostRaw === 0) { + return ( +
+ 💰 + $0.00 +
+ ); + } + + const tooltipText = `${t('com_ui_conversation_cost')}: ${data.totalCost} | ${t('com_ui_primary_model')}: ${data.primaryModel} | ${t('com_ui_total_tokens')}: ${data.totalTokens.toLocaleString()} | ${t('com_ui_last_updated')}: ${new Date(data.lastUpdated).toLocaleTimeString()}`; + + return ( +
+ 💰 + {data.totalCost} +
+ ); +} diff --git a/client/src/components/Chat/Header.tsx b/client/src/components/Chat/Header.tsx index bf04a2e4f..88e773044 100644 --- a/client/src/components/Chat/Header.tsx +++ b/client/src/components/Chat/Header.tsx @@ -9,6 +9,7 @@ import { useGetStartupConfig } from '~/data-provider'; import ExportAndShareMenu from './ExportAndShareMenu'; import BookmarkMenu from './Menus/BookmarkMenu'; import { TemporaryChat } from './TemporaryChat'; +import ConversationCost from './ConversationCost'; import AddMultiConvo from './AddMultiConvo'; import { useHasAccess } from '~/hooks'; @@ -62,6 +63,7 @@ export default function Header() { {hasAccessToMultiConvo === true && } {isSmallScreen && ( <> + @@ -72,6 +74,7 @@ export default function Header() { {!isSmallScreen && (
+ diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json index 514962af2..0ad98c0c0 100644 --- a/client/src/locales/en/translation.json +++ b/client/src/locales/en/translation.json @@ -1242,4 +1242,9 @@ "com_ui_yes": "Yes", "com_ui_zoom": "Zoom", "com_user_message": "You" + , + "com_ui_conversation_cost": "Conversation cost", + "com_ui_last_updated": "Last updated", + "com_ui_primary_model": "Model", + "com_ui_total_tokens": "Tokens" } diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index a3296abc8..9b27a3566 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -538,6 +538,9 @@ export const tMessageSchema = z.object({ unfinished: z.boolean().optional(), searchResult: z.boolean().optional(), finish_reason: z.string().optional(), + tokenCount: z.number().optional(), + usage: z.any().optional(), + tokens: z.any().optional(), /* assistant */ thread_id: z.string().optional(), /* frontend components */