feat: add real-time conversation cost tracking with proper token counting

- Add comprehensive ModelPricing service with 100+ models and historical pricing - Create real-time ConversationCost component that displays in chat header - Use actual token counts from model APIs instead of client-side estimation - Fix BaseClient.js to preserve tokenCount in response messages - Add tokenCount, usage, and tokens fields to message schema - Update Header component to include ConversationCost display - Support OpenAI, Anthropic, Google, and other major model providers - Include color-coded cost display based on amount - Add 32 unit tests for pricing calculation logic 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-24 04:10:15 +01:00 · 2025-08-17 20:13:49 +05:30 · 2025-08-17 20:13:49 +05:30 · 3edf6fdf6b
commit 3edf6fdf6b
parent 543b617e1c
9 changed files with 2041 additions and 1 deletions
--- a/api/app/clients/BaseClient.js
+++ b/api/app/clients/BaseClient.js
@ -779,13 +779,25 @@ class BaseClient {
      }
    }

+    // Persist usage metadata on the assistant message if available for accurate costing
+    if (this.getStreamUsage != null) {
+      const streamUsage = this.getStreamUsage();
+      if (streamUsage && (Number(streamUsage[this.inputTokensKey]) > 0 || Number(streamUsage[this.outputTokensKey]) > 0)) {
+        responseMessage.usage = {
+          prompt_tokens: streamUsage[this.inputTokensKey],
+          completion_tokens: streamUsage[this.outputTokensKey],
+          reasoning_tokens: streamUsage.reasoning_tokens,
+          input_token_details: streamUsage.input_token_details,
+        };
+      }
+    }
+
    responseMessage.databasePromise = this.saveMessageToDatabase(
      responseMessage,
      saveOptions,
      user,
    );
    this.savedMessageIds.add(responseMessage.messageId);
-    delete responseMessage.tokenCount;
    return responseMessage;
  }

--- a/api/server/routes/convos.js
+++ b/api/server/routes/convos.js
@ -19,6 +19,10 @@ const assistantClients = {
 };

 const router = express.Router();
+const {
+  getConversationCostDisplayFromMessages,
+  getMultipleConversationCosts,
+} = require('~/server/services/ConversationCostDynamic');
 router.use(requireJwtAuth);

 router.get('/', async (req, res) => {
@ -230,3 +234,72 @@ router.post('/duplicate', async (req, res) => {
 });

 module.exports = router;
+
+/**
+ * GET /:conversationId/cost
+ * Get cost summary for a specific conversation
+ */
+router.get('/:conversationId/cost', async (req, res) => {
+  try {
+    const { conversationId } = req.params;
+    const userId = req.user.id;
+
+    const { getConvo } = require('~/models/Conversation');
+    const { getMessages } = require('~/models/Message');
+
+    const conversation = await getConvo(userId, conversationId);
+    if (!conversation) {
+      return res.status(404).json({ error: 'Conversation not found' });
+    }
+
+    const messages = await getMessages({ user: userId, conversationId });
+    if (messages.length === 0) {
+      return res.status(404).json({ error: 'No messages found in this conversation' });
+    }
+
+    const costDisplay = getConversationCostDisplayFromMessages(messages);
+    if (!costDisplay) {
+      return res.json({
+        conversationId,
+        totalCost: '$0.00',
+        totalCostRaw: 0,
+        primaryModel: 'Unknown',
+        totalTokens: 0,
+        lastUpdated: new Date(),
+        error: 'No cost data available',
+      });
+    }
+
+    costDisplay.conversationId = conversationId;
+    res.json(costDisplay);
+  } catch (error) {
+    logger.error('Error getting conversation cost:', error);
+    res.status(500).json({ error: 'Failed to calculate conversation cost' });
+  }
+});
+
+/**
+ * POST /costs
+ * Get cost summaries for multiple conversations
+ * Body: { conversationIds: string[] }
+ */
+router.post('/costs', async (req, res) => {
+  try {
+    const { conversationIds } = req.body;
+    const userId = req.user.id;
+
+    if (!Array.isArray(conversationIds)) {
+      return res.status(400).json({ error: 'conversationIds must be an array' });
+    }
+
+    if (conversationIds.length > 50) {
+      return res.status(400).json({ error: 'Maximum 50 conversations allowed per request' });
+    }
+
+    const costs = await getMultipleConversationCosts(conversationIds, userId);
+    res.json(costs);
+  } catch (error) {
+    logger.error('Error getting multiple conversation costs:', error);
+    res.status(500).json({ error: 'Failed to calculate conversation costs' });
+  }
+});
--- a/api/server/services/ConversationCostDynamic.js
+++ b/api/server/services/ConversationCostDynamic.js
@ -0,0 +1,559 @@
+const { calculateTokenCost, getModelProvider } = require('./ModelPricing');
+
+// Use console for logging to avoid circular dependencies
+const logger = {
+  info: (msg, data) => console.log(msg, data || ''),
+  warn: (msg) => console.warn(msg),
+  error: (msg, error) => console.error(msg, error || ''),
+};
+
+/**
+ * Calculate the total cost of a conversation from messages
+ * @param {Array<Object>} messages - Array of message objects from the database
+ * @param {string} messages[].messageId - Unique identifier for the message
+ * @param {string|null} messages[].model - The model used (null for user messages)
+ * @param {number} [messages[].tokenCount] - Token count for the message
+ * @param {Object} [messages[].usage] - OpenAI-style usage object
+ * @param {Object} [messages[].tokens] - Alternative token format
+ * @param {Date|string|number} [messages[].createdAt] - When the message was created
+ * @returns {Object|null} Cost summary with total cost, breakdown, and model details
+ */
+function calculateConversationCostFromMessages(messages) {
+  try {
+    if (!messages || messages.length === 0) {
+      return null;
+    }
+
+    const costBreakdown = {
+      prompt: 0,
+      completion: 0,
+      cacheWrite: 0,
+      cacheRead: 0,
+      reasoning: 0,
+    };
+
+    const tokenUsage = {
+      promptTokens: 0,
+      completionTokens: 0,
+      cacheWriteTokens: 0,
+      cacheReadTokens: 0,
+      reasoningTokens: 0,
+    };
+
+    const modelBreakdown = new Map();
+    let lastUpdated = new Date(0);
+
+    messages.forEach((message, index) => {
+      const hasTokenInfo = !!(message.tokenCount || message.tokens || message.usage);
+      const inferredRole = message?.model ? 'assistant' : 'user';
+
+      if (index < 3) {
+        logger.info(
+          `Message ${index}: model=${message.model}, tokenCount=${message.tokenCount}, hasTokenInfo=${hasTokenInfo}, role=${inferredRole}`,
+        );
+      }
+
+      if (!hasTokenInfo) {
+        return;
+      }
+
+      if (inferredRole === 'assistant' && !message.model) {
+        return;
+      }
+
+      const messageDate = new Date(message.createdAt || message.timestamp || Date.now());
+      if (messageDate > lastUpdated) {
+        lastUpdated = messageDate;
+      }
+
+      const modelKey = message.model || 'user-input';
+      if (!modelBreakdown.has(modelKey)) {
+        modelBreakdown.set(modelKey, {
+          model: modelKey,
+          provider: message.model ? getModelProvider(message.model) : 'user',
+          cost: 0,
+          tokenUsage: {
+            promptTokens: 0,
+            completionTokens: 0,
+            cacheWriteTokens: 0,
+            cacheReadTokens: 0,
+            reasoningTokens: 0,
+          },
+          messageCount: 0,
+        });
+      }
+
+      const modelData = modelBreakdown.get(modelKey);
+      modelData.messageCount++;
+
+      let currentTokenUsage = {
+        promptTokens: 0,
+        completionTokens: 0,
+        cacheWriteTokens: 0,
+        cacheReadTokens: 0,
+        reasoningTokens: 0,
+      };
+
+      if (message.usage) {
+        currentTokenUsage.promptTokens = message.usage.prompt_tokens || message.usage.input_tokens || 0;
+        currentTokenUsage.completionTokens = message.usage.completion_tokens || message.usage.output_tokens || 0;
+        currentTokenUsage.reasoningTokens = message.usage.reasoning_tokens || 0;
+        const write = Number(message.usage?.input_token_details?.cache_creation) || 0;
+        const read = Number(message.usage?.input_token_details?.cache_read) || 0;
+        currentTokenUsage.cacheWriteTokens = write;
+        currentTokenUsage.cacheReadTokens = read;
+      } else if (message.tokens) {
+        currentTokenUsage.promptTokens = message.tokens.prompt || message.tokens.input || 0;
+        currentTokenUsage.completionTokens = message.tokens.completion || message.tokens.output || 0;
+      } else if (message.tokenCount) {
+        if (inferredRole === 'assistant') {
+          currentTokenUsage.completionTokens = message.tokenCount;
+        } else {
+          currentTokenUsage.promptTokens = message.tokenCount;
+        }
+      }
+
+      if (message.model) {
+        const cost = calculateTokenCost(message.model, currentTokenUsage, messageDate);
+        if (!cost.error) {
+          costBreakdown.prompt += cost.prompt;
+          costBreakdown.completion += cost.completion;
+          costBreakdown.cacheWrite += cost.cacheWrite;
+          costBreakdown.cacheRead += cost.cacheRead;
+          costBreakdown.reasoning += cost.reasoning;
+          modelData.cost += cost.total;
+        } else {
+          logger.warn(`Could not calculate cost for model ${message.model}: ${cost.error}`);
+        }
+      }
+
+      for (const [key, value] of Object.entries(currentTokenUsage)) {
+        modelData.tokenUsage[key] += value;
+        tokenUsage[key] += value;
+      }
+    });
+
+    const totalCost = Object.values(costBreakdown).reduce((sum, cost) => sum + cost, 0);
+    const modelBreakdownArray = Array.from(modelBreakdown.values()).sort((a, b) => b.cost - a.cost);
+
+    logger.info('Cost calculation results:', {
+      totalCost,
+      costBreakdown,
+      tokenUsage,
+      modelCount: modelBreakdownArray.length,
+      models: modelBreakdownArray.map((m) => ({
+        model: m.model,
+        cost: m.cost,
+        tokens: m.tokenUsage,
+      })),
+    });
+
+    return {
+      totalCost: Math.round(totalCost * 100000) / 100000,
+      costBreakdown: {
+        prompt: Math.round(costBreakdown.prompt * 100000) / 100000,
+        completion: Math.round(costBreakdown.completion * 100000) / 100000,
+        cacheWrite: Math.round(costBreakdown.cacheWrite * 100000) / 100000,
+        cacheRead: Math.round(costBreakdown.cacheRead * 100000) / 100000,
+        reasoning: Math.round(costBreakdown.reasoning * 100000) / 100000,
+      },
+      tokenUsage,
+      modelBreakdown: modelBreakdownArray,
+      lastUpdated,
+    };
+  } catch (error) {
+    logger.error('Error calculating conversation cost from messages:', error);
+    return null;
+  }
+}
+
+/**
+ * Get simplified cost display for UI from messages
+ * @param {Array<Object>} messages - Array of message objects from the database
+ * @returns {Object|null} Simplified cost data for UI display
+ */
+function getConversationCostDisplayFromMessages(messages) {
+  try {
+    if (!messages || messages.length === 0) {
+      return null;
+    }
+
+    const costSummary = calculateConversationCostFromMessages(messages);
+    if (!costSummary) {
+      return null;
+    }
+
+    const formatCost = (cost) => {
+      if (cost < 0.001) return '<$0.001';
+      if (cost < 0.01) return `$${cost.toFixed(4)}`;
+      if (cost < 1) return `$${cost.toFixed(3)}`;
+      return `$${cost.toFixed(2)}`;
+    };
+
+    return {
+      totalCost: formatCost(costSummary.totalCost),
+      totalCostRaw: costSummary.totalCost,
+      primaryModel: costSummary.modelBreakdown[0]?.model || 'Unknown',
+      totalTokens: costSummary.tokenUsage.promptTokens + costSummary.tokenUsage.completionTokens,
+      lastUpdated: costSummary.lastUpdated,
+    };
+  } catch (error) {
+    logger.error('Error getting conversation cost display from messages:', error);
+    return null;
+  }
+}
+
+/**
+ * Get costs for multiple conversations in batch
+ * @param {string[]} conversationIds
+ * @param {string} userId
+ * @returns {Promise<Record<string, any>>}
+ */
+async function getMultipleConversationCosts(conversationIds, userId) {
+  try {
+    const { getMessages } = require('~/models/Message');
+    const results = {};
+
+    const batchSize = 10;
+    for (let i = 0; i < conversationIds.length; i += batchSize) {
+      const batch = conversationIds.slice(i, i + batchSize);
+      await Promise.all(
+        batch.map(async (conversationId) => {
+          try {
+            const messages = await getMessages({ user: userId, conversationId });
+            if (messages && messages.length > 0) {
+              const costDisplay = getConversationCostDisplayFromMessages(messages);
+              results[conversationId] = costDisplay ? { ...costDisplay, conversationId } : null;
+            } else {
+              results[conversationId] = null;
+            }
+          } catch (error) {
+            logger.error(`Error calculating cost for conversation ${conversationId}:`, error);
+            results[conversationId] = null;
+          }
+        }),
+      );
+    }
+
+    return results;
+  } catch (error) {
+    logger.error('Error getting multiple conversation costs:', error);
+    return {};
+  }
+}
+
+module.exports = {
+  calculateConversationCostFromMessages,
+  getConversationCostDisplayFromMessages,
+  getMultipleConversationCosts,
+};
+
+const { calculateTokenCost, getModelProvider } = require('./ModelPricing');
+
+// Use console for logging to avoid circular dependencies
+const logger = {
+  info: (msg, data) => console.log(msg, data || ''),
+  warn: (msg) => console.warn(msg),
+  error: (msg, error) => console.error(msg, error || ''),
+};
+
+/**
+ * Calculate the total cost of a conversation from messages
+ * @param {Array<Object>} messages - Array of message objects from the database
+ * @param {string} messages[].messageId - Unique identifier for the message
+ * @param {string|null} messages[].model - The model used (null for user messages)
+ * @param {number} messages[].tokenCount - Token count for the message
+ * @param {Object} [messages[].usage] - OpenAI-style usage object
+ * @param {Object} [messages[].tokens] - Alternative token format
+ * @param {Date} messages[].createdAt - When the message was created
+ * @returns {Object|null} Cost summary with total cost, breakdown, and model details
+ * @returns {number} returns.totalCost - Total cost across all models
+ * @returns {Object} returns.costBreakdown - Breakdown by token type
+ * @returns {Object} returns.tokenUsage - Token counts by type
+ * @returns {Array} returns.modelBreakdown - Per-model cost and usage
+ * @returns {Date} returns.lastUpdated - Timestamp of the last message
+ */
+function calculateConversationCostFromMessages(messages) {
+  try {
+    if (!messages || messages.length === 0) {
+      return null;
+    }
+
+    // Initialize cost tracking
+    const costBreakdown = {
+      prompt: 0,
+      completion: 0,
+      cacheWrite: 0,
+      cacheRead: 0,
+      reasoning: 0,
+    };
+
+    const tokenUsage = {
+      promptTokens: 0,
+      completionTokens: 0,
+      cacheWriteTokens: 0,
+      cacheReadTokens: 0,
+      reasoningTokens: 0,
+    };
+
+    const modelBreakdown = new Map();
+    let lastUpdated = new Date(0);
+
+    // Process each message
+    messages.forEach((message, index) => {
+      // Debug each message processing
+      const hasTokenInfo = !!(message.tokenCount || message.tokens || message.usage);
+      const inferredRole = message.model ? 'assistant' : 'user';
+
+      // Debug logging
+      if (index < 3) {
+        logger.info(
+          `Message ${index}: model=${message.model}, tokenCount=${message.tokenCount}, hasTokenInfo=${hasTokenInfo}, role=${inferredRole}`,
+        );
+      }
+
+      // For LibreChat: Skip messages without token info, but allow both user and assistant messages
+      // User messages have model=null, assistant messages have specific models
+      if (!hasTokenInfo) {
+        return;
+      }
+
+      // For assistant messages, we need a model for pricing
+      if (inferredRole === 'assistant' && !message.model) {
+        return;
+      }
+
+      const messageDate = new Date(message.createdAt || message.timestamp || Date.now());
+      if (messageDate > lastUpdated) {
+        lastUpdated = messageDate;
+      }
+
+      // For user messages, use a special key since they don't have a model
+      const modelKey = message.model || 'user-input';
+
+      // Initialize model breakdown if not exists
+      if (!modelBreakdown.has(modelKey)) {
+        modelBreakdown.set(modelKey, {
+          model: modelKey,
+          provider: message.model ? getModelProvider(message.model) : 'user',
+          cost: 0,
+          tokenUsage: {
+            promptTokens: 0,
+            completionTokens: 0,
+            cacheWriteTokens: 0,
+            cacheReadTokens: 0,
+            reasoningTokens: 0,
+          },
+          messageCount: 0,
+        });
+      }
+
+      const modelData = modelBreakdown.get(modelKey);
+      modelData.messageCount++;
+
+      // Extract token counts from message
+      let currentTokenUsage = {
+        promptTokens: 0,
+        completionTokens: 0,
+        cacheWriteTokens: 0,
+        cacheReadTokens: 0,
+        reasoningTokens: 0,
+      };
+
+      // Check different possible token count formats
+      if (message.usage) {
+        // OpenAI format: { prompt_tokens, completion_tokens }
+        currentTokenUsage.promptTokens = message.usage.prompt_tokens || 0;
+        currentTokenUsage.completionTokens = message.usage.completion_tokens || 0;
+        currentTokenUsage.reasoningTokens = message.usage.reasoning_tokens || 0;
+      } else if (message.tokens) {
+        // Alternative format
+        currentTokenUsage.promptTokens = message.tokens.prompt || message.tokens.input || 0;
+        currentTokenUsage.completionTokens =
+          message.tokens.completion || message.tokens.output || 0;
+      } else if (message.tokenCount) {
+        // LibreChat format: simple tokenCount field
+        // Infer role from model field: null model = user message, specific model = assistant message
+        const inferredRole = message.model ? 'assistant' : 'user';
+
+        if (inferredRole === 'assistant') {
+          currentTokenUsage.completionTokens = message.tokenCount;
+        } else {
+          currentTokenUsage.promptTokens = message.tokenCount;
+        }
+      }
+
+      // Handle cache tokens if present
+      if (message.cacheTokens) {
+        currentTokenUsage.cacheWriteTokens = message.cacheTokens.write || 0;
+        currentTokenUsage.cacheReadTokens = message.cacheTokens.read || 0;
+      }
+
+      // Calculate cost using historical pricing (only for assistant messages with models)
+      if (message.model) {
+        const cost = calculateTokenCost(message.model, currentTokenUsage, messageDate);
+
+        if (!cost.error) {
+          // Add to overall breakdown
+          costBreakdown.prompt += cost.prompt;
+          costBreakdown.completion += cost.completion;
+          costBreakdown.cacheWrite += cost.cacheWrite;
+          costBreakdown.cacheRead += cost.cacheRead;
+          costBreakdown.reasoning += cost.reasoning;
+
+          // Add to model breakdown
+          modelData.cost += cost.total;
+        } else {
+          logger.warn(`Could not calculate cost for model ${message.model}: ${cost.error}`);
+        }
+      }
+
+      // Always update token usage (for both user and assistant messages)
+      for (const [key, value] of Object.entries(currentTokenUsage)) {
+        modelData.tokenUsage[key] += value;
+        tokenUsage[key] += value;
+      }
+    });
+
+    // Calculate total cost
+    const totalCost = Object.values(costBreakdown).reduce((sum, cost) => sum + cost, 0);
+
+    // Convert model breakdown to array
+    const modelBreakdownArray = Array.from(modelBreakdown.values()).sort((a, b) => b.cost - a.cost);
+
+    // Debug final results
+    logger.info('Cost calculation results:', {
+      totalCost,
+      costBreakdown,
+      tokenUsage,
+      modelCount: modelBreakdownArray.length,
+      models: modelBreakdownArray.map((m) => ({
+        model: m.model,
+        cost: m.cost,
+        tokens: m.tokenUsage,
+      })),
+    });
+
+    return {
+      totalCost: Math.round(totalCost * 100000) / 100000, // Round to 5 decimal places
+      costBreakdown: {
+        prompt: Math.round(costBreakdown.prompt * 100000) / 100000,
+        completion: Math.round(costBreakdown.completion * 100000) / 100000,
+        cacheWrite: Math.round(costBreakdown.cacheWrite * 100000) / 100000,
+        cacheRead: Math.round(costBreakdown.cacheRead * 100000) / 100000,
+        reasoning: Math.round(costBreakdown.reasoning * 100000) / 100000,
+      },
+      tokenUsage,
+      modelBreakdown: modelBreakdownArray,
+      lastUpdated,
+    };
+  } catch (error) {
+    logger.error('Error calculating conversation cost from messages:', error);
+    return null;
+  }
+}
+
+/**
+ * Get simplified cost display for UI from messages
+ * @param {Array<Object>} messages - Array of message objects from the database
+ * @returns {Object|null} Simplified cost data for UI display
+ * @returns {string} returns.totalCost - Formatted cost string (e.g., "$0.054")
+ * @returns {number} returns.totalCostRaw - Raw cost value for calculations
+ * @returns {string} returns.primaryModel - The model that contributed most to cost
+ * @returns {number} returns.totalTokens - Total token count across all messages
+ * @returns {Date} returns.lastUpdated - Timestamp of the last message
+ */
+function getConversationCostDisplayFromMessages(messages) {
+  try {
+    if (!messages || messages.length === 0) {
+      return null;
+    }
+
+    const costSummary = calculateConversationCostFromMessages(messages);
+    if (!costSummary) {
+      return null;
+    }
+
+    // Format cost for display
+    const formatCost = (cost) => {
+      if (cost < 0.001) {
+        return '<$0.001';
+      }
+      if (cost < 0.01) {
+        return `$${cost.toFixed(4)}`;
+      }
+      if (cost < 1) {
+        return `$${cost.toFixed(3)}`;
+      }
+      return `$${cost.toFixed(2)}`;
+    };
+
+    return {
+      totalCost: formatCost(costSummary.totalCost),
+      totalCostRaw: costSummary.totalCost,
+      primaryModel: costSummary.modelBreakdown[0]?.model || 'Unknown',
+      totalTokens: costSummary.tokenUsage.promptTokens + costSummary.tokenUsage.completionTokens,
+      lastUpdated: costSummary.lastUpdated,
+    };
+  } catch (error) {
+    logger.error('Error getting conversation cost display from messages:', error);
+    return null;
+  }
+}
+
+/**
+ * Get costs for multiple conversations in batch
+ * @param {string[]} conversationIds - Array of conversation IDs
+ * @param {string} userId - User ID
+ * @returns {Object} Map of conversationId to cost display data
+ */
+async function getMultipleConversationCosts(conversationIds, userId) {
+  try {
+    const { getMessages } = require('~/models/Message');
+    const results = {};
+
+    // Process in batches to avoid overwhelming the database
+    const batchSize = 10;
+    for (let i = 0; i < conversationIds.length; i += batchSize) {
+      const batch = conversationIds.slice(i, i + batchSize);
+
+      // Process batch in parallel
+      await Promise.all(
+        batch.map(async (conversationId) => {
+          try {
+            const messages = await getMessages({
+              user: userId,
+              conversationId: conversationId,
+            });
+
+            if (messages && messages.length > 0) {
+              const costDisplay = getConversationCostDisplayFromMessages(messages);
+              if (costDisplay) {
+                costDisplay.conversationId = conversationId;
+                results[conversationId] = costDisplay;
+              } else {
+                results[conversationId] = null;
+              }
+            } else {
+              results[conversationId] = null;
+            }
+          } catch (error) {
+            logger.error(`Error calculating cost for conversation ${conversationId}:`, error);
+            results[conversationId] = null;
+          }
+        }),
+      );
+    }
+
+    return results;
+  } catch (error) {
+    logger.error('Error getting multiple conversation costs:', error);
+    return {};
+  }
+}
+
+module.exports = {
+  calculateConversationCostFromMessages,
+  getConversationCostDisplayFromMessages,
+  getMultipleConversationCosts,
+};
--- a/api/server/services/ModelPricing.js
+++ b/api/server/services/ModelPricing.js
@ -0,0 +1,993 @@
+// Use console for logging to avoid circular dependencies
+const logger = {
+  warn: (msg) => console.warn(msg),
+  error: (msg) => console.error(msg),
+};
+
+/**
+ * Model pricing configuration with historical data
+ * Prices are in USD per 1M tokens
+ *
+ * Format:
+ * - Each model has an array of pricing periods
+ * - Periods are sorted by effectiveFrom date (newest first)
+ * - effectiveTo is optional (null means current pricing)
+ */
+const PRICING_DATA = {
+  // OpenAI Models
+  'gpt-4o': [
+    {
+      effectiveFrom: new Date('2024-05-13'),
+      prompt: 5.0,
+      completion: 15.0,
+    },
+  ],
+  'gpt-4o-mini': [
+    {
+      effectiveFrom: new Date('2024-07-18'),
+      prompt: 0.15,
+      completion: 0.6,
+    },
+  ],
+  'gpt-4-turbo': [
+    {
+      effectiveFrom: new Date('2024-04-09'),
+      prompt: 10.0,
+      completion: 30.0,
+    },
+  ],
+  'gpt-4': [
+    {
+      effectiveFrom: new Date('2024-01-01'),
+      prompt: 30.0,
+      completion: 60.0,
+    },
+  ],
+  'gpt-4-0613': [
+    {
+      effectiveFrom: new Date('2023-06-13'),
+      prompt: 30.0,
+      completion: 60.0,
+    },
+  ],
+  'gpt-4-0314': [
+    {
+      effectiveFrom: new Date('2023-03-14'),
+      prompt: 30.0,
+      completion: 60.0,
+    },
+  ],
+  'gpt-4-32k-0314': [
+    {
+      effectiveFrom: new Date('2023-03-14'),
+      prompt: 60.0,
+      completion: 120.0,
+    },
+  ],
+  'gpt-3.5-turbo': [
+    {
+      effectiveFrom: new Date('2024-01-25'),
+      prompt: 0.5,
+      completion: 1.5,
+    },
+    {
+      effectiveFrom: new Date('2023-11-06'),
+      effectiveTo: new Date('2024-01-24'),
+      prompt: 1.0,
+      completion: 2.0,
+    },
+  ],
+  o1: [
+    {
+      effectiveFrom: new Date('2024-12-05'),
+      prompt: 15.0,
+      completion: 60.0,
+      reasoning: 15.0,
+    },
+  ],
+  'o1-mini': [
+    {
+      effectiveFrom: new Date('2024-09-12'),
+      prompt: 3.0,
+      completion: 12.0,
+      reasoning: 3.0,
+    },
+  ],
+  'o1-preview': [
+    {
+      effectiveFrom: new Date('2024-09-12'),
+      prompt: 15.0,
+      completion: 60.0,
+      reasoning: 15.0,
+    },
+  ],
+  'o1-preview-2024-09-12': [
+    {
+      effectiveFrom: new Date('2024-09-12'),
+      prompt: 15.0,
+      completion: 60.0,
+      reasoning: 15.0,
+    },
+  ],
+  'o1-mini-2024-09-12': [
+    {
+      effectiveFrom: new Date('2024-09-12'),
+      prompt: 3.0,
+      completion: 12.0,
+      reasoning: 3.0,
+    },
+  ],
+  'o3-mini': [
+    {
+      effectiveFrom: new Date('2024-12-20'),
+      prompt: 1.5,
+      completion: 6.0,
+      reasoning: 1.5,
+    },
+  ],
+
+  // Anthropic Models
+  'claude-3-5-sonnet': [
+    {
+      effectiveFrom: new Date('2024-06-20'),
+      prompt: 3.0,
+      completion: 15.0,
+      cacheWrite: 3.75,
+      cacheRead: 0.3,
+    },
+  ],
+  'claude-3.5-sonnet': [
+    {
+      effectiveFrom: new Date('2024-06-20'),
+      prompt: 3.0,
+      completion: 15.0,
+      cacheWrite: 3.75,
+      cacheRead: 0.3,
+    },
+  ],
+  'claude-3-5-haiku': [
+    {
+      effectiveFrom: new Date('2024-11-01'),
+      prompt: 0.8,
+      completion: 4.0,
+      cacheWrite: 1.0,
+      cacheRead: 0.08,
+    },
+  ],
+  'claude-3.5-haiku': [
+    {
+      effectiveFrom: new Date('2024-11-01'),
+      prompt: 0.8,
+      completion: 4.0,
+      cacheWrite: 1.0,
+      cacheRead: 0.08,
+    },
+  ],
+  'claude-3-opus': [
+    {
+      effectiveFrom: new Date('2024-03-04'),
+      prompt: 15.0,
+      completion: 75.0,
+    },
+  ],
+  'claude-3-sonnet': [
+    {
+      effectiveFrom: new Date('2024-03-04'),
+      prompt: 3.0,
+      completion: 15.0,
+    },
+  ],
+  'claude-3-haiku': [
+    {
+      effectiveFrom: new Date('2024-03-04'),
+      prompt: 0.25,
+      completion: 1.25,
+      cacheWrite: 0.3,
+      cacheRead: 0.03,
+    },
+  ],
+  'claude-3-5-haiku-20241022': [
+    {
+      effectiveFrom: new Date('2024-11-01'),
+      prompt: 0.8,
+      completion: 4.0,
+      cacheWrite: 1.0,
+      cacheRead: 0.08,
+    },
+  ],
+  'claude-3-5-sonnet-latest': [
+    {
+      effectiveFrom: new Date('2024-10-22'),
+      prompt: 3.0,
+      completion: 15.0,
+      cacheWrite: 3.75,
+      cacheRead: 0.3,
+    },
+  ],
+  'claude-3-opus-20240229': [
+    {
+      effectiveFrom: new Date('2024-03-04'),
+      prompt: 15.0,
+      completion: 75.0,
+    },
+  ],
+  'claude-3-sonnet-20240229': [
+    {
+      effectiveFrom: new Date('2024-03-04'),
+      prompt: 3.0,
+      completion: 15.0,
+    },
+  ],
+  'claude-3-haiku-20240307': [
+    {
+      effectiveFrom: new Date('2024-03-07'),
+      prompt: 0.25,
+      completion: 1.25,
+      cacheWrite: 0.3,
+      cacheRead: 0.03,
+    },
+  ],
+
+  // Google Models
+  'gemini-1.5-pro': [
+    {
+      effectiveFrom: new Date('2024-02-15'),
+      prompt: 2.5,
+      completion: 10.0,
+    },
+  ],
+  'gemini-1.5-flash': [
+    {
+      effectiveFrom: new Date('2024-05-14'),
+      prompt: 0.15,
+      completion: 0.6,
+    },
+  ],
+  'gemini-1.5-flash-8b': [
+    {
+      effectiveFrom: new Date('2024-10-03'),
+      prompt: 0.075,
+      completion: 0.3,
+    },
+  ],
+
+  // Additional OpenAI Models
+  'gpt-4.5-preview': [
+    {
+      effectiveFrom: new Date('2025-02-27'),
+      prompt: 10.0,
+      completion: 30.0,
+    },
+  ],
+  'gpt-4.5-preview-2025-02-27': [
+    {
+      effectiveFrom: new Date('2025-02-27'),
+      prompt: 10.0,
+      completion: 30.0,
+    },
+  ],
+  'gpt-4-vision-preview': [
+    {
+      effectiveFrom: new Date('2023-11-06'),
+      prompt: 10.0,
+      completion: 30.0,
+    },
+  ],
+  'gpt-4-turbo-preview': [
+    {
+      effectiveFrom: new Date('2024-01-25'),
+      prompt: 10.0,
+      completion: 30.0,
+    },
+  ],
+  'gpt-4-1106-preview': [
+    {
+      effectiveFrom: new Date('2023-11-06'),
+      prompt: 10.0,
+      completion: 30.0,
+    },
+  ],
+  'gpt-4-0125-preview': [
+    {
+      effectiveFrom: new Date('2024-01-25'),
+      prompt: 10.0,
+      completion: 30.0,
+    },
+  ],
+  'gpt-3.5-turbo-0125': [
+    {
+      effectiveFrom: new Date('2024-01-25'),
+      prompt: 0.5,
+      completion: 1.5,
+    },
+  ],
+  'gpt-3.5-turbo-0613': [
+    {
+      effectiveFrom: new Date('2023-06-13'),
+      prompt: 1.5,
+      completion: 2.0,
+    },
+  ],
+  'gpt-3.5-turbo-16k-0613': [
+    {
+      effectiveFrom: new Date('2023-06-13'),
+      prompt: 3.0,
+      completion: 4.0,
+    },
+  ],
+  'gpt-3.5-turbo-1106': [
+    {
+      effectiveFrom: new Date('2023-11-06'),
+      prompt: 1.0,
+      completion: 2.0,
+    },
+  ],
+  'gpt-3.5-turbo-16k': [
+    {
+      effectiveFrom: new Date('2023-06-13'),
+      prompt: 3.0,
+      completion: 4.0,
+    },
+  ],
+  'gpt-3.5-turbo-instruct': [
+    {
+      effectiveFrom: new Date('2023-09-14'),
+      prompt: 1.5,
+      completion: 2.0,
+    },
+  ],
+  'chatgpt-4o-latest': [
+    {
+      effectiveFrom: new Date('2024-05-13'),
+      prompt: 5.0,
+      completion: 15.0,
+    },
+  ],
+  'gpt-4o-2024-05-13': [
+    {
+      effectiveFrom: new Date('2024-05-13'),
+      prompt: 5.0,
+      completion: 15.0,
+    },
+  ],
+  'gpt-4o-2024-08-06': [
+    {
+      effectiveFrom: new Date('2024-08-06'),
+      prompt: 2.5,
+      completion: 10.0,
+    },
+  ],
+  'gpt-4o-mini-2024-07-18': [
+    {
+      effectiveFrom: new Date('2024-07-18'),
+      prompt: 0.15,
+      completion: 0.6,
+    },
+  ],
+  'gpt-4-turbo-2024-04-09': [
+    {
+      effectiveFrom: new Date('2024-04-09'),
+      prompt: 10.0,
+      completion: 30.0,
+    },
+  ],
+  'gpt-4-0125': [
+    {
+      effectiveFrom: new Date('2024-01-25'),
+      prompt: 10.0,
+      completion: 30.0,
+    },
+  ],
+  'gpt-4-1106': [
+    {
+      effectiveFrom: new Date('2023-11-06'),
+      prompt: 10.0,
+      completion: 30.0,
+    },
+  ],
+
+  // Additional Anthropic Models
+  'claude-opus-4-20250514': [
+    {
+      effectiveFrom: new Date('2025-05-14'),
+      prompt: 15.0,
+      completion: 75.0,
+    },
+  ],
+  'claude-opus-4-latest': [
+    {
+      effectiveFrom: new Date('2025-05-14'),
+      prompt: 15.0,
+      completion: 75.0,
+    },
+  ],
+  'claude-opus-4-1-20250805': [
+    {
+      effectiveFrom: new Date('2025-08-05'),
+      prompt: 15.0,
+      completion: 75.0,
+    },
+  ],
+  'claude-sonnet-4-20250514': [
+    {
+      effectiveFrom: new Date('2025-05-14'),
+      prompt: 3.0,
+      completion: 15.0,
+    },
+  ],
+  'claude-sonnet-4-latest': [
+    {
+      effectiveFrom: new Date('2025-05-14'),
+      prompt: 3.0,
+      completion: 15.0,
+    },
+  ],
+  'claude-3-7-sonnet-latest': [
+    {
+      effectiveFrom: new Date('2025-02-19'),
+      prompt: 1.5,
+      completion: 7.5,
+    },
+  ],
+  'claude-3-7-sonnet-20250219': [
+    {
+      effectiveFrom: new Date('2025-02-19'),
+      prompt: 1.5,
+      completion: 7.5,
+    },
+  ],
+  'claude-3-5-sonnet-20240620': [
+    {
+      effectiveFrom: new Date('2024-06-20'),
+      prompt: 3.0,
+      completion: 15.0,
+      cacheWrite: 3.75,
+      cacheRead: 0.3,
+    },
+  ],
+  'claude-3-5-sonnet-20241022': [
+    {
+      effectiveFrom: new Date('2024-10-22'),
+      prompt: 3.0,
+      completion: 15.0,
+      cacheWrite: 3.75,
+      cacheRead: 0.3,
+    },
+  ],
+  'claude-2.1': [
+    {
+      effectiveFrom: new Date('2023-11-21'),
+      prompt: 8.0,
+      completion: 24.0,
+    },
+  ],
+  'claude-2': [
+    {
+      effectiveFrom: new Date('2023-07-11'),
+      prompt: 8.0,
+      completion: 24.0,
+    },
+  ],
+  'claude-instant-1': [
+    {
+      effectiveFrom: new Date('2023-03-14'),
+      prompt: 0.8,
+      completion: 2.4,
+    },
+  ],
+  'claude-1.2': [
+    {
+      effectiveFrom: new Date('2023-05-01'),
+      prompt: 8.0,
+      completion: 24.0,
+    },
+  ],
+  'claude-1': [
+    {
+      effectiveFrom: new Date('2023-03-14'),
+      prompt: 8.0,
+      completion: 24.0,
+    },
+  ],
+  'claude-1-100k': [
+    {
+      effectiveFrom: new Date('2023-05-01'),
+      prompt: 8.0,
+      completion: 24.0,
+    },
+  ],
+  'claude-instant-1-100k': [
+    {
+      effectiveFrom: new Date('2023-05-01'),
+      prompt: 0.8,
+      completion: 2.4,
+    },
+  ],
+
+  // Additional Google Models
+  'gemini-2.0-flash-001': [
+    {
+      effectiveFrom: new Date('2024-12-11'),
+      prompt: 0.15,
+      completion: 0.6,
+    },
+  ],
+  'gemini-2.0-flash-exp': [
+    {
+      effectiveFrom: new Date('2024-12-11'),
+      prompt: 0.0, // Free during experimental phase
+      completion: 0.0,
+    },
+  ],
+  'gemini-2.0-flash-lite': [
+    {
+      effectiveFrom: new Date('2024-12-11'),
+      prompt: 0.075,
+      completion: 0.3,
+    },
+  ],
+  'gemini-2.0-pro-exp-02-05': [
+    {
+      effectiveFrom: new Date('2025-02-05'),
+      prompt: 0.0, // Free during experimental phase
+      completion: 0.0,
+    },
+  ],
+  'gemini-1.5-flash-001': [
+    {
+      effectiveFrom: new Date('2024-05-14'),
+      prompt: 0.15,
+      completion: 0.6,
+    },
+  ],
+  'gemini-1.5-flash-002': [
+    {
+      effectiveFrom: new Date('2024-09-24'),
+      prompt: 0.15,
+      completion: 0.6,
+    },
+  ],
+  'gemini-1.5-pro-001': [
+    {
+      effectiveFrom: new Date('2024-02-15'),
+      prompt: 2.5,
+      completion: 10.0,
+    },
+  ],
+  'gemini-1.5-pro-002': [
+    {
+      effectiveFrom: new Date('2024-09-24'),
+      prompt: 1.25,
+      completion: 5.0,
+    },
+  ],
+  'gemini-1.0-pro-001': [
+    {
+      effectiveFrom: new Date('2023-12-06'),
+      prompt: 0.5,
+      completion: 1.5,
+    },
+  ],
+  'gemini-pro': [
+    {
+      effectiveFrom: new Date('2023-12-06'),
+      prompt: 0.5,
+      completion: 1.5,
+    },
+  ],
+  'gemini-pro-vision': [
+    {
+      effectiveFrom: new Date('2023-12-06'),
+      prompt: 0.5,
+      completion: 1.5,
+    },
+  ],
+
+  // AWS Bedrock Models (using same pricing as direct API)
+  'anthropic.claude-3-5-sonnet-20241022-v2:0': [
+    {
+      effectiveFrom: new Date('2024-10-22'),
+      prompt: 3.0,
+      completion: 15.0,
+    },
+  ],
+  'anthropic.claude-3-5-sonnet-20240620-v1:0': [
+    {
+      effectiveFrom: new Date('2024-06-20'),
+      prompt: 3.0,
+      completion: 15.0,
+    },
+  ],
+  'anthropic.claude-3-5-haiku-20241022-v1:0': [
+    {
+      effectiveFrom: new Date('2024-11-01'),
+      prompt: 0.8,
+      completion: 4.0,
+    },
+  ],
+  'anthropic.claude-3-opus-20240229-v1:0': [
+    {
+      effectiveFrom: new Date('2024-03-04'),
+      prompt: 15.0,
+      completion: 75.0,
+    },
+  ],
+  'anthropic.claude-3-sonnet-20240229-v1:0': [
+    {
+      effectiveFrom: new Date('2024-03-04'),
+      prompt: 3.0,
+      completion: 15.0,
+    },
+  ],
+  'anthropic.claude-3-haiku-20240307-v1:0': [
+    {
+      effectiveFrom: new Date('2024-03-07'),
+      prompt: 0.25,
+      completion: 1.25,
+    },
+  ],
+  'anthropic.claude-v2': [
+    {
+      effectiveFrom: new Date('2023-07-11'),
+      prompt: 8.0,
+      completion: 24.0,
+    },
+  ],
+  'anthropic.claude-v2:1': [
+    {
+      effectiveFrom: new Date('2023-11-21'),
+      prompt: 8.0,
+      completion: 24.0,
+    },
+  ],
+  'anthropic.claude-instant-v1': [
+    {
+      effectiveFrom: new Date('2023-03-14'),
+      prompt: 0.8,
+      completion: 2.4,
+    },
+  ],
+
+  // Cohere Models (via Bedrock)
+  'cohere.command-r-v1:0': [
+    {
+      effectiveFrom: new Date('2024-03-01'),
+      prompt: 0.5,
+      completion: 1.5,
+    },
+  ],
+  'cohere.command-r-plus-v1:0': [
+    {
+      effectiveFrom: new Date('2024-04-01'),
+      prompt: 3.0,
+      completion: 15.0,
+    },
+  ],
+
+  // Meta Llama Models (via Bedrock)
+  'meta.llama2-13b-chat-v1': [
+    {
+      effectiveFrom: new Date('2023-07-01'),
+      prompt: 0.75,
+      completion: 1.0,
+    },
+  ],
+  'meta.llama2-70b-chat-v1': [
+    {
+      effectiveFrom: new Date('2023-07-01'),
+      prompt: 1.95,
+      completion: 2.56,
+    },
+  ],
+  'meta.llama3-8b-instruct-v1:0': [
+    {
+      effectiveFrom: new Date('2024-04-18'),
+      prompt: 0.3,
+      completion: 0.6,
+    },
+  ],
+  'meta.llama3-70b-instruct-v1:0': [
+    {
+      effectiveFrom: new Date('2024-04-18'),
+      prompt: 2.65,
+      completion: 3.5,
+    },
+  ],
+  'meta.llama3-1-8b-instruct-v1:0': [
+    {
+      effectiveFrom: new Date('2024-07-23'),
+      prompt: 0.22,
+      completion: 0.22,
+    },
+  ],
+  'meta.llama3-1-70b-instruct-v1:0': [
+    {
+      effectiveFrom: new Date('2024-07-23'),
+      prompt: 0.99,
+      completion: 0.99,
+    },
+  ],
+  'meta.llama3-1-405b-instruct-v1:0': [
+    {
+      effectiveFrom: new Date('2024-07-23'),
+      prompt: 5.32,
+      completion: 16.0,
+    },
+  ],
+
+  // Mistral Models (via Bedrock and direct)
+  'mistral.mistral-7b-instruct-v0:2': [
+    {
+      effectiveFrom: new Date('2023-09-27'),
+      prompt: 0.15,
+      completion: 0.2,
+    },
+  ],
+  'mistral.mixtral-8x7b-instruct-v0:1': [
+    {
+      effectiveFrom: new Date('2023-12-11'),
+      prompt: 0.45,
+      completion: 0.7,
+    },
+  ],
+  'mistral.mistral-large-2402-v1:0': [
+    {
+      effectiveFrom: new Date('2024-02-26'),
+      prompt: 4.0,
+      completion: 12.0,
+    },
+  ],
+  'mistral.mistral-large-2407-v1:0': [
+    {
+      effectiveFrom: new Date('2024-07-24'),
+      prompt: 2.0,
+      completion: 6.0,
+    },
+  ],
+  'mistral.mistral-small-2410-v1:0': [
+    {
+      effectiveFrom: new Date('2024-10-01'),
+      prompt: 0.2,
+      completion: 0.6,
+    },
+  ],
+  'mistral.mistral-small-2402-v1:0': [
+    {
+      effectiveFrom: new Date('2024-02-26'),
+      prompt: 0.2,
+      completion: 0.6,
+    },
+  ],
+
+  // AI21 Models (via Bedrock)
+  'ai21.jamba-instruct-v1:0': [
+    {
+      effectiveFrom: new Date('2024-03-01'),
+      prompt: 0.5,
+      completion: 0.7,
+    },
+  ],
+
+  // Amazon Titan Models
+  'amazon.titan-text-lite-v1': [
+    {
+      effectiveFrom: new Date('2023-11-29'),
+      prompt: 0.3,
+      completion: 0.4,
+    },
+  ],
+  'amazon.titan-text-express-v1': [
+    {
+      effectiveFrom: new Date('2023-11-29'),
+      prompt: 0.8,
+      completion: 1.1,
+    },
+  ],
+  'amazon.titan-text-premier-v1:0': [
+    {
+      effectiveFrom: new Date('2024-05-01'),
+      prompt: 5.0,
+      completion: 15.0,
+    },
+  ],
+
+  // xAI Models
+  'grok-2': [
+    {
+      effectiveFrom: new Date('2024-08-01'),
+      prompt: 5.0,
+      completion: 10.0,
+    },
+  ],
+  'grok-2-mini': [
+    {
+      effectiveFrom: new Date('2024-08-01'),
+      prompt: 2.0,
+      completion: 6.0,
+    },
+  ],
+
+  // DeepSeek Models
+  'deepseek-chat': [
+    {
+      effectiveFrom: new Date('2024-05-01'),
+      prompt: 0.14,
+      completion: 0.28,
+    },
+  ],
+  'deepseek-coder': [
+    {
+      effectiveFrom: new Date('2024-05-01'),
+      prompt: 0.14,
+      completion: 0.28,
+    },
+  ],
+
+  // Add more models as needed
+};
+
+/**
+ * Get pricing for a model at a specific date
+ * @param {string} model - Model identifier
+ * @param {Date} [date] - Date to get pricing for (defaults to now)
+ * @returns {Object|null} Pricing data or null if not found
+ */
+function getModelPricing(model, date = new Date()) {
+  const modelPricing = PRICING_DATA[model];
+  if (!modelPricing) {
+    logger.warn(`No pricing data found for model: ${model}`);
+    return null;
+  }
+
+  // Find the pricing period that was effective at the given date
+  for (const period of modelPricing) {
+    if (date >= period.effectiveFrom && (!period.effectiveTo || date <= period.effectiveTo)) {
+      return period;
+    }
+  }
+
+  // If no exact match, return the earliest pricing as fallback
+  return modelPricing[modelPricing.length - 1];
+}
+
+/**
+ * Calculate cost for token usage
+ * @param {string} model - Model identifier
+ * @param {Object} usage - Token usage object
+ * @param {number} [usage.promptTokens] - Number of prompt tokens
+ * @param {number} [usage.completionTokens] - Number of completion tokens
+ * @param {number} [usage.cacheWriteTokens] - Number of cache write tokens
+ * @param {number} [usage.cacheReadTokens] - Number of cache read tokens
+ * @param {number} [usage.reasoningTokens] - Number of reasoning tokens
+ * @param {Date} [date] - Date for pricing calculation (defaults to now)
+ * @returns {Object} Cost breakdown
+ */
+function calculateTokenCost(model, usage, date = new Date()) {
+  // Validate inputs
+  if (!model || typeof model !== 'string') {
+    return {
+      prompt: 0,
+      completion: 0,
+      cacheWrite: 0,
+      cacheRead: 0,
+      reasoning: 0,
+      total: 0,
+      error: 'Invalid model specified',
+    };
+  }
+
+  if (!usage || typeof usage !== 'object') {
+    return {
+      prompt: 0,
+      completion: 0,
+      cacheWrite: 0,
+      cacheRead: 0,
+      reasoning: 0,
+      total: 0,
+      error: 'Invalid usage object',
+    };
+  }
+
+  const pricing = getModelPricing(model, date);
+  if (!pricing) {
+    return {
+      prompt: 0,
+      completion: 0,
+      cacheWrite: 0,
+      cacheRead: 0,
+      reasoning: 0,
+      total: 0,
+      error: 'No pricing data available',
+    };
+  }
+
+  const costs = {
+    prompt: 0,
+    completion: 0,
+    cacheWrite: 0,
+    cacheRead: 0,
+    reasoning: 0,
+  };
+
+  // Calculate each cost component (convert from per million to actual cost)
+  if (usage.promptTokens) {
+    costs.prompt = (usage.promptTokens / 1_000_000) * pricing.prompt;
+  }
+
+  if (usage.completionTokens) {
+    costs.completion = (usage.completionTokens / 1_000_000) * pricing.completion;
+  }
+
+  if (usage.cacheWriteTokens && pricing.cacheWrite) {
+    costs.cacheWrite = (usage.cacheWriteTokens / 1_000_000) * pricing.cacheWrite;
+  }
+
+  if (usage.cacheReadTokens && pricing.cacheRead) {
+    costs.cacheRead = (usage.cacheReadTokens / 1_000_000) * pricing.cacheRead;
+  }
+
+  if (usage.reasoningTokens && pricing.reasoning) {
+    costs.reasoning = (usage.reasoningTokens / 1_000_000) * pricing.reasoning;
+  }
+
+  // Calculate total
+  costs.total =
+    costs.prompt + costs.completion + costs.cacheWrite + costs.cacheRead + costs.reasoning;
+
+  return costs;
+}
+
+/**
+ * Get all supported models
+ * @returns {string[]} Array of model identifiers
+ */
+function getSupportedModels() {
+  return Object.keys(PRICING_DATA);
+}
+
+/**
+ * Get model provider from model name
+ * @param {string} model - Model identifier
+ * @returns {string} Provider name
+ */
+function getModelProvider(model) {
+  if (model.includes('gpt') || model.includes('o1') || model.includes('chatgpt')) {
+    return 'OpenAI';
+  }
+  if (model.includes('claude') || model.startsWith('anthropic.')) {
+    return 'Anthropic';
+  }
+  if (model.includes('gemini')) {
+    return 'Google';
+  }
+  if (model.includes('mistral')) {
+    return 'Mistral';
+  }
+  if (model.includes('command') || model.startsWith('cohere.')) {
+    return 'Cohere';
+  }
+  if (model.includes('llama') || model.startsWith('meta.')) {
+    return 'Meta';
+  }
+  if (model.includes('titan') || model.startsWith('amazon.')) {
+    return 'Amazon';
+  }
+  if (model.includes('grok')) {
+    return 'xAI';
+  }
+  if (model.includes('deepseek')) {
+    return 'DeepSeek';
+  }
+  if (model.startsWith('ai21.')) {
+    return 'AI21';
+  }
+  return 'Unknown';
+}
+
+module.exports = {
+  getModelPricing,
+  calculateTokenCost,
+  getSupportedModels,
+  getModelProvider,
+  PRICING_DATA,
+};
--- a/api/server/services/tests/ModelPricing.test.js
+++ b/api/server/services/tests/ModelPricing.test.js
@ -0,0 +1,329 @@
+const {
+  getModelPricing,
+  calculateTokenCost,
+  getSupportedModels,
+  getModelProvider,
+} = require('../ModelPricing');
+
+describe('ModelPricing Service', () => {
+  describe('getModelPricing', () => {
+    it('should return pricing for known models', () => {
+      const pricing = getModelPricing('gpt-4o');
+      expect(pricing).toBeDefined();
+      expect(pricing.prompt).toBe(5.0);
+      expect(pricing.completion).toBe(15.0);
+    });
+
+    it('should return null for unknown models', () => {
+      const pricing = getModelPricing('unknown-model');
+      expect(pricing).toBeNull();
+    });
+
+    it('should return historical pricing for older dates', () => {
+      const oldDate = new Date('2023-11-10');
+      const pricing = getModelPricing('gpt-3.5-turbo', oldDate);
+      expect(pricing).toBeDefined();
+      expect(pricing.prompt).toBe(1.0); // Historical price
+      expect(pricing.completion).toBe(2.0);
+    });
+
+    it('should return current pricing for recent dates', () => {
+      const recentDate = new Date('2024-06-01');
+      const pricing = getModelPricing('gpt-3.5-turbo', recentDate);
+      expect(pricing).toBeDefined();
+      expect(pricing.prompt).toBe(0.5); // Current price
+      expect(pricing.completion).toBe(1.5);
+    });
+
+    it('should handle Claude models with cache pricing', () => {
+      const pricing = getModelPricing('claude-3-5-sonnet');
+      expect(pricing).toBeDefined();
+      expect(pricing.cacheWrite).toBe(3.75);
+      expect(pricing.cacheRead).toBe(0.3);
+    });
+
+    it('should handle o1 models with reasoning pricing', () => {
+      const pricing = getModelPricing('o1');
+      expect(pricing).toBeDefined();
+      expect(pricing.reasoning).toBe(15.0);
+    });
+
+    it('should handle all newly added models', () => {
+      const newModels = [
+        'gpt-4-0314',
+        'gpt-4-32k-0314',
+        'gpt-3.5-turbo-0613',
+        'gpt-3.5-turbo-16k-0613',
+        'o1-preview-2024-09-12',
+        'o1-mini-2024-09-12',
+        'o3-mini',
+        'gpt-4o-mini-2024-07-18',
+        'gpt-4-turbo-2024-04-09',
+        'gpt-4-0125',
+        'gpt-4-1106',
+        'claude-3-5-haiku-20241022',
+        'claude-3-5-sonnet-latest',
+        'claude-3-opus-20240229',
+        'claude-3-sonnet-20240229',
+        'claude-3-haiku-20240307',
+        'claude-1.2',
+        'claude-1',
+        'claude-1-100k',
+        'claude-instant-1-100k',
+        'anthropic.claude-v2',
+        'anthropic.claude-v2:1',
+        'anthropic.claude-instant-v1',
+        'gemini-pro',
+        'gemini-pro-vision',
+        'mistral.mistral-small-2402-v1:0',
+      ];
+
+      newModels.forEach((model) => {
+        const pricing = getModelPricing(model);
+        expect(pricing).toBeDefined();
+        expect(pricing.prompt).toBeGreaterThan(0);
+        expect(pricing.completion).toBeGreaterThan(0);
+      });
+    });
+  });
+
+  describe('calculateTokenCost', () => {
+    it('should calculate basic prompt and completion costs', () => {
+      const usage = {
+        promptTokens: 1000,
+        completionTokens: 500,
+      };
+      const cost = calculateTokenCost('gpt-4o', usage);
+
+      expect(cost.prompt).toBeCloseTo(0.005); // 1000/1M * 5.0
+      expect(cost.completion).toBeCloseTo(0.0075); // 500/1M * 15.0
+      expect(cost.total).toBeCloseTo(0.0125);
+    });
+
+    it('should handle zero token counts', () => {
+      const usage = {
+        promptTokens: 0,
+        completionTokens: 0,
+      };
+      const cost = calculateTokenCost('gpt-4', usage);
+
+      expect(cost.prompt).toBe(0);
+      expect(cost.completion).toBe(0);
+      expect(cost.total).toBe(0);
+    });
+
+    it('should handle large token counts', () => {
+      const usage = {
+        promptTokens: 100000,
+        completionTokens: 50000,
+      };
+      const cost = calculateTokenCost('gpt-4', usage);
+
+      expect(cost.prompt).toBeCloseTo(3.0); // 100k/1M * 30.0
+      expect(cost.completion).toBeCloseTo(3.0); // 50k/1M * 60.0
+      expect(cost.total).toBeCloseTo(6.0);
+    });
+
+    it('should calculate cache token costs for Claude models', () => {
+      const usage = {
+        promptTokens: 1000,
+        completionTokens: 500,
+        cacheWriteTokens: 2000,
+        cacheReadTokens: 3000,
+      };
+      const cost = calculateTokenCost('claude-3-5-sonnet', usage);
+
+      expect(cost.prompt).toBeCloseTo(0.003); // 1000/1M * 3.0
+      expect(cost.completion).toBeCloseTo(0.0075); // 500/1M * 15.0
+      expect(cost.cacheWrite).toBeCloseTo(0.0075); // 2000/1M * 3.75
+      expect(cost.cacheRead).toBeCloseTo(0.0009); // 3000/1M * 0.3
+      expect(cost.total).toBeCloseTo(0.0189);
+    });
+
+    it('should calculate reasoning token costs for o1 models', () => {
+      const usage = {
+        promptTokens: 1000,
+        completionTokens: 500,
+        reasoningTokens: 2000,
+      };
+      const cost = calculateTokenCost('o1', usage);
+
+      expect(cost.prompt).toBeCloseTo(0.015); // 1000/1M * 15.0
+      expect(cost.completion).toBeCloseTo(0.03); // 500/1M * 60.0
+      expect(cost.reasoning).toBeCloseTo(0.03); // 2000/1M * 15.0
+      expect(cost.total).toBeCloseTo(0.075);
+    });
+
+    it('should handle invalid model gracefully', () => {
+      const usage = {
+        promptTokens: 1000,
+        completionTokens: 500,
+      };
+      const cost = calculateTokenCost('invalid-model', usage);
+
+      expect(cost.total).toBe(0);
+      expect(cost.error).toBe('No pricing data available');
+    });
+
+    it('should handle invalid usage object', () => {
+      const cost = calculateTokenCost('gpt-4', null);
+
+      expect(cost.total).toBe(0);
+      expect(cost.error).toBe('Invalid usage object');
+    });
+
+    it('should handle missing model parameter', () => {
+      const usage = {
+        promptTokens: 1000,
+        completionTokens: 500,
+      };
+      const cost = calculateTokenCost(null, usage);
+
+      expect(cost.total).toBe(0);
+      expect(cost.error).toBe('Invalid model specified');
+    });
+
+    it('should use historical pricing for past dates', () => {
+      const usage = {
+        promptTokens: 1000,
+        completionTokens: 500,
+      };
+      const oldDate = new Date('2023-11-10');
+      const cost = calculateTokenCost('gpt-3.5-turbo', usage, oldDate);
+
+      expect(cost.prompt).toBeCloseTo(0.001); // 1000/1M * 1.0 (historical)
+      expect(cost.completion).toBeCloseTo(0.001); // 500/1M * 2.0 (historical)
+      expect(cost.total).toBeCloseTo(0.002);
+    });
+  });
+
+  describe('getSupportedModels', () => {
+    it('should return array of supported model names', () => {
+      const models = getSupportedModels();
+
+      expect(Array.isArray(models)).toBe(true);
+      expect(models.length).toBeGreaterThan(80); // We have 100+ models now
+      expect(models).toContain('gpt-4');
+      expect(models).toContain('claude-3-opus');
+      expect(models).toContain('gemini-1.5-pro');
+    });
+
+    it('should include all newly added models', () => {
+      const models = getSupportedModels();
+
+      expect(models).toContain('gpt-4-0314');
+      expect(models).toContain('o3-mini');
+      expect(models).toContain('claude-1-100k');
+      expect(models).toContain('gemini-pro');
+      expect(models).toContain('anthropic.claude-v2');
+    });
+  });
+
+  describe('getModelProvider', () => {
+    it('should identify OpenAI models', () => {
+      expect(getModelProvider('gpt-4')).toBe('OpenAI');
+      expect(getModelProvider('gpt-3.5-turbo')).toBe('OpenAI');
+      expect(getModelProvider('o1-preview')).toBe('OpenAI');
+      expect(getModelProvider('chatgpt-4o-latest')).toBe('OpenAI');
+    });
+
+    it('should identify Anthropic models', () => {
+      expect(getModelProvider('claude-3-opus')).toBe('Anthropic');
+      expect(getModelProvider('claude-2.1')).toBe('Anthropic');
+      expect(getModelProvider('anthropic.claude-v2')).toBe('Anthropic');
+    });
+
+    it('should identify Google models', () => {
+      expect(getModelProvider('gemini-1.5-pro')).toBe('Google');
+      expect(getModelProvider('gemini-pro')).toBe('Google');
+    });
+
+    it('should identify Mistral models', () => {
+      expect(getModelProvider('mistral.mistral-7b-instruct-v0:2')).toBe('Mistral');
+    });
+
+    it('should identify Cohere models', () => {
+      expect(getModelProvider('cohere.command-r-v1:0')).toBe('Cohere');
+    });
+
+    it('should identify Meta models', () => {
+      expect(getModelProvider('meta.llama3-70b-instruct-v1:0')).toBe('Meta');
+    });
+
+    it('should identify Amazon models', () => {
+      expect(getModelProvider('amazon.titan-text-express-v1')).toBe('Amazon');
+    });
+
+    it('should identify xAI models', () => {
+      expect(getModelProvider('grok-2')).toBe('xAI');
+    });
+
+    it('should identify DeepSeek models', () => {
+      expect(getModelProvider('deepseek-chat')).toBe('DeepSeek');
+    });
+
+    it('should return Unknown for unrecognized models', () => {
+      expect(getModelProvider('unknown-model')).toBe('Unknown');
+    });
+  });
+
+  describe('Edge Cases', () => {
+    it('should handle extremely small token costs', () => {
+      const usage = {
+        promptTokens: 1,
+        completionTokens: 1,
+      };
+      const cost = calculateTokenCost('gpt-4o-mini', usage);
+
+      expect(cost.prompt).toBeCloseTo(0.00000015);
+      expect(cost.completion).toBeCloseTo(0.0000006);
+      expect(cost.total).toBeCloseTo(0.00000075);
+    });
+
+    it('should handle models with zero-cost experimental pricing', () => {
+      const usage = {
+        promptTokens: 10000,
+        completionTokens: 5000,
+      };
+      const cost = calculateTokenCost('gemini-2.0-flash-exp', usage);
+
+      expect(cost.prompt).toBe(0);
+      expect(cost.completion).toBe(0);
+      expect(cost.total).toBe(0);
+    });
+
+    it('should handle mixed token types in single request', () => {
+      const usage = {
+        promptTokens: 1000,
+        completionTokens: 500,
+        cacheWriteTokens: 200,
+        cacheReadTokens: 300,
+        reasoningTokens: 0, // Not all models have reasoning
+      };
+      const cost = calculateTokenCost('claude-3-5-sonnet', usage);
+
+      expect(cost.prompt).toBeCloseTo(0.003);
+      expect(cost.completion).toBeCloseTo(0.0075);
+      expect(cost.cacheWrite).toBeCloseTo(0.00075);
+      expect(cost.cacheRead).toBeCloseTo(0.00009);
+      expect(cost.reasoning).toBe(0);
+    });
+
+    it('should handle date boundaries correctly', () => {
+      // Test exact date match
+      const exactDate = new Date('2024-01-25');
+      const pricing = getModelPricing('gpt-3.5-turbo', exactDate);
+      expect(pricing.prompt).toBe(0.5);
+
+      // Test one day before change
+      const dayBefore = new Date('2024-01-24');
+      const pricingBefore = getModelPricing('gpt-3.5-turbo', dayBefore);
+      expect(pricingBefore.prompt).toBe(1.0);
+
+      // Test one day after change
+      const dayAfter = new Date('2024-01-26');
+      const pricingAfter = getModelPricing('gpt-3.5-turbo', dayAfter);
+      expect(pricingAfter.prompt).toBe(0.5);
+    });
+  });
+});
--- a/client/src/components/Chat/ConversationCost.tsx
+++ b/client/src/components/Chat/ConversationCost.tsx
@ -0,0 +1,63 @@
+import React, { useMemo } from 'react';
+import { useParams } from 'react-router-dom';
+import { useTranslation } from 'react-i18next';
+import { Constants } from 'librechat-data-provider';
+import { useQuery } from '@tanstack/react-query';
+
+type CostDisplay = {
+  totalCost: string;
+  totalCostRaw: number;
+  primaryModel: string;
+  totalTokens: number;
+  lastUpdated: string | number | Date;
+  conversationId?: string;
+};
+
+export default function ConversationCost() {
+  const { t } = useTranslation();
+  const { conversationId } = useParams();
+
+  const { data } = useQuery<CostDisplay | null>({
+    queryKey: ['conversationCost', conversationId],
+    enabled: Boolean(conversationId && conversationId !== Constants.NEW_CONVO),
+    queryFn: async () => {
+      const res = await fetch(`/api/convos/${conversationId}/cost`, { credentials: 'include' });
+      if (!res.ok) {
+        return null;
+      }
+      return res.json();
+    },
+    staleTime: 5_000,
+    refetchOnWindowFocus: false,
+  });
+
+  const colorClass = useMemo(() => {
+    const cost = data?.totalCostRaw ?? 0;
+    if (cost < 0.01) return 'text-green-600 dark:text-green-400';
+    if (cost < 0.1) return 'text-yellow-600 dark:text-yellow-400';
+    if (cost < 1) return 'text-orange-600 dark:text-orange-400';
+    return 'text-red-600 dark:text-red-400';
+  }, [data?.totalCostRaw]);
+
+  if (!conversationId || conversationId === Constants.NEW_CONVO) {
+    return null;
+  }
+
+  if (!data || data.totalCostRaw === 0) {
+    return (
+      <div className="flex items-center gap-1 rounded-md px-2 py-1 text-xs text-gray-400" title={t('com_ui_conversation_cost')}>
+        <span>💰</span>
+        <span>$0.00</span>
+      </div>
+    );
+  }
+
+  const tooltipText = `${t('com_ui_conversation_cost')}: ${data.totalCost} | ${t('com_ui_primary_model')}: ${data.primaryModel} | ${t('com_ui_total_tokens')}: ${data.totalTokens.toLocaleString()} | ${t('com_ui_last_updated')}: ${new Date(data.lastUpdated).toLocaleTimeString()}`;
+
+  return (
+    <div className="flex items-center gap-1 rounded-md px-2 py-1 text-xs transition-colors hover:bg-surface-hover" title={tooltipText}>
+      <span className="text-text-tertiary">💰</span>
+      <span className={`font-medium ${colorClass}`}>{data.totalCost}</span>
+    </div>
+  );
+}
--- a/client/src/components/Chat/Header.tsx
+++ b/client/src/components/Chat/Header.tsx
@ -9,6 +9,7 @@ import { useGetStartupConfig } from '~/data-provider';
 import ExportAndShareMenu from './ExportAndShareMenu';
 import BookmarkMenu from './Menus/BookmarkMenu';
 import { TemporaryChat } from './TemporaryChat';
+import ConversationCost from './ConversationCost';
 import AddMultiConvo from './AddMultiConvo';
 import { useHasAccess } from '~/hooks';

@ -62,6 +63,7 @@ export default function Header() {
            {hasAccessToMultiConvo === true && <AddMultiConvo />}
            {isSmallScreen && (
              <>
+                <ConversationCost />
                <ExportAndShareMenu
                  isSharedButtonEnabled={startupConfig?.sharedLinksEnabled ?? false}
                />
@ -72,6 +74,7 @@ export default function Header() {
        </div>
        {!isSmallScreen && (
          <div className="flex items-center gap-2">
+            <ConversationCost />
            <ExportAndShareMenu
              isSharedButtonEnabled={startupConfig?.sharedLinksEnabled ?? false}
            />
--- a/client/src/locales/en/translation.json
+++ b/client/src/locales/en/translation.json
@ -1242,4 +1242,9 @@
  "com_ui_yes": "Yes",
  "com_ui_zoom": "Zoom",
  "com_user_message": "You"
+  ,
+  "com_ui_conversation_cost": "Conversation cost",
+  "com_ui_last_updated": "Last updated",
+  "com_ui_primary_model": "Model",
+  "com_ui_total_tokens": "Tokens"
 }
--- a/packages/data-provider/src/schemas.ts
+++ b/packages/data-provider/src/schemas.ts
@ -538,6 +538,9 @@ export const tMessageSchema = z.object({
  unfinished: z.boolean().optional(),
  searchResult: z.boolean().optional(),
  finish_reason: z.string().optional(),
+  tokenCount: z.number().optional(),
+  usage: z.any().optional(),
+  tokens: z.any().optional(),
  /* assistant */
  thread_id: z.string().optional(),
  /* frontend components */