🤖 feat: Support o4-mini and o3 Models (#6928)

* feat: Add support for new OpenAI models (o4-mini, o3) and update related logic * 🔧 fix: Rename 'resubmitFiles' to 'isResubmission' for consistency across types and hooks * 🔧 fix: Replace hardcoded 'pending_req' with CacheKeys.PENDING_REQ for consistency in cache handling * 🔧 fix: Update cache handling to use Time.ONE_MINUTE instead of hardcoded TTL and streamline imports * 🔧 fix: Enhance message handling logic to correctly identify parent messages and streamline imports in useSSE
2026-03-03 23:00:18 +01:00 · 2025-04-17 00:40:26 -04:00 · 2025-04-17 00:40:26 -04:00 · 52f146dd97
commit 52f146dd97
parent 88f4ad7c47
19 changed files with 69 additions and 53 deletions
--- a/api/app/clients/OpenAIClient.js
+++ b/api/app/clients/OpenAIClient.js
@ -108,7 +108,7 @@ class OpenAIClient extends BaseClient {
      this.checkVisionRequest(this.options.attachments);
    }

-    const omniPattern = /\b(o1|o3)\b/i;
+    const omniPattern = /\b(o\d)\b/i;
    this.isOmni = omniPattern.test(this.modelOptions.model);

    const { OPENAI_FORCE_PROMPT } = process.env ?? {};
@ -1237,6 +1237,9 @@ ${convo}
        modelOptions.max_completion_tokens = modelOptions.max_tokens;
        delete modelOptions.max_tokens;
      }
+      if (this.isOmni === true && modelOptions.temperature != null) {
+        delete modelOptions.temperature;
+      }

      if (process.env.OPENAI_ORGANIZATION) {
        opts.organization = process.env.OPENAI_ORGANIZATION;
--- a/api/cache/clearPendingReq.js
+++ b/api/cache/clearPendingReq.js
@ -1,7 +1,8 @@
+const { Time, CacheKeys } = require('librechat-data-provider');
+const { isEnabled } = require('~/server/utils');
 const getLogStores = require('./getLogStores');
-const { isEnabled } = require('../server/utils');
+
 const { USE_REDIS, LIMIT_CONCURRENT_MESSAGES } = process.env ?? {};
-const ttl = 1000 * 60 * 1;

 /**
 * Clear or decrement pending requests from the cache.
@ -28,7 +29,7 @@ const clearPendingReq = async ({ userId, cache: _cache }) => {
    return;
  }

-  const namespace = 'pending_req';
+  const namespace = CacheKeys.PENDING_REQ;
  const cache = _cache ?? getLogStores(namespace);

  if (!cache) {
@ -39,7 +40,7 @@ const clearPendingReq = async ({ userId, cache: _cache }) => {
  const currentReq = +((await cache.get(key)) ?? 0);

  if (currentReq && currentReq >= 1) {
-    await cache.set(key, currentReq - 1, ttl);
+    await cache.set(key, currentReq - 1, Time.ONE_MINUTE);
  } else {
    await cache.delete(key);
  }
--- a/api/cache/getLogStores.js
+++ b/api/cache/getLogStores.js
@ -19,7 +19,7 @@ const createViolationInstance = (namespace) => {
 // Serve cache from memory so no need to clear it on startup/exit
 const pending_req = isRedisEnabled
  ? new Keyv({ store: keyvRedis })
-  : new Keyv({ namespace: 'pending_req' });
+  : new Keyv({ namespace: CacheKeys.PENDING_REQ });

 const config = isRedisEnabled
  ? new Keyv({ store: keyvRedis })
@ -64,7 +64,7 @@ const abortKeys = isRedisEnabled
 const namespaces = {
  [CacheKeys.ROLES]: roles,
  [CacheKeys.CONFIG_STORE]: config,
-  pending_req,
+  [CacheKeys.PENDING_REQ]: pending_req,
  [ViolationTypes.BAN]: new Keyv({ store: keyvMongo, namespace: CacheKeys.BANS, ttl: duration }),
  [CacheKeys.ENCODED_DOMAINS]: new Keyv({
    store: keyvMongo,
--- a/api/models/tx.js
+++ b/api/models/tx.js
@ -76,7 +76,9 @@ const tokenValues = Object.assign(
    '4k': { prompt: 1.5, completion: 2 },
    '16k': { prompt: 3, completion: 4 },
    'gpt-3.5-turbo-1106': { prompt: 1, completion: 2 },
+    'o4-mini': { prompt: 1.1, completion: 4.4 },
    'o3-mini': { prompt: 1.1, completion: 4.4 },
+    o3: { prompt: 10, completion: 40 },
    'o1-mini': { prompt: 1.1, completion: 4.4 },
    'o1-preview': { prompt: 15, completion: 60 },
    o1: { prompt: 15, completion: 60 },
--- a/api/models/tx.spec.js
+++ b/api/models/tx.spec.js
@ -165,6 +165,15 @@ describe('getMultiplier', () => {
    );
  });

+  it('should return correct multipliers for o4-mini and o3', () => {
+    ['o4-mini', 'o3'].forEach((model) => {
+      const prompt = getMultiplier({ model, tokenType: 'prompt' });
+      const completion = getMultiplier({ model, tokenType: 'completion' });
+      expect(prompt).toBe(tokenValues[model].prompt);
+      expect(completion).toBe(tokenValues[model].completion);
+    });
+  });
+
  it('should return defaultRate if tokenType is provided but not found in tokenValues', () => {
    expect(getMultiplier({ valueKey: '8k', tokenType: 'unknownType' })).toBe(defaultRate);
  });
--- a/api/server/controllers/agents/client.js
+++ b/api/server/controllers/agents/client.js
@ -58,7 +58,7 @@ const payloadParser = ({ req, agent, endpoint }) => {

 const legacyContentEndpoints = new Set([KnownEndpoints.groq, KnownEndpoints.deepseek]);

-const noSystemModelRegex = [/\bo1\b/gi];
+const noSystemModelRegex = [/\b(o\d)\b/gi];

 // const { processMemory, memoryInstructions } = require('~/server/services/Endpoints/agents/memory');
 // const { getFormattedMemories } = require('~/models/Memory');
@ -975,7 +975,7 @@ class AgentClient extends BaseClient {
          })
        )?.llmConfig ?? clientOptions;
    }
-    if (/\b(o1|o3)\b/i.test(clientOptions.model) && clientOptions.maxTokens != null) {
+    if (/\b(o\d)\b/i.test(clientOptions.model) && clientOptions.maxTokens != null) {
      delete clientOptions.maxTokens;
    }
    try {
--- a/api/server/middleware/concurrentLimiter.js
+++ b/api/server/middleware/concurrentLimiter.js
@ -1,4 +1,4 @@
-const { Time } = require('librechat-data-provider');
+const { Time, CacheKeys } = require('librechat-data-provider');
 const clearPendingReq = require('~/cache/clearPendingReq');
 const { logViolation, getLogStores } = require('~/cache');
 const { isEnabled } = require('~/server/utils');
@ -25,7 +25,7 @@ const {
 * @throws {Error} Throws an error if the user exceeds the concurrent request limit.
 */
 const concurrentLimiter = async (req, res, next) => {
-  const namespace = 'pending_req';
+  const namespace = CacheKeys.PENDING_REQ;
  const cache = getLogStores(namespace);
  if (!cache) {
    return next();
--- a/api/utils/tokens.js
+++ b/api/utils/tokens.js
@ -2,7 +2,9 @@ const z = require('zod');
 const { EModelEndpoint } = require('librechat-data-provider');

 const openAIModels = {
+  'o4-mini': 200000,
  'o3-mini': 195000, // -5000 from max
+  o3: 200000,
  o1: 195000, // -5000 from max
  'o1-mini': 127500, // -500 from max
  'o1-preview': 127500, // -500 from max
--- a/api/utils/tokens.spec.js
+++ b/api/utils/tokens.spec.js
@ -340,6 +340,15 @@ describe('getModelMaxTokens', () => {
    expect(getModelMaxTokens('o1-preview-something')).toBe(o1PreviewTokens);
    expect(getModelMaxTokens('openai/o1-preview-something')).toBe(o1PreviewTokens);
  });
+
+  test('should return correct max context tokens for o4-mini and o3', () => {
+    const o4MiniTokens = maxTokensMap[EModelEndpoint.openAI]['o4-mini'];
+    const o3Tokens = maxTokensMap[EModelEndpoint.openAI]['o3'];
+    expect(getModelMaxTokens('o4-mini')).toBe(o4MiniTokens);
+    expect(getModelMaxTokens('openai/o4-mini')).toBe(o4MiniTokens);
+    expect(getModelMaxTokens('o3')).toBe(o3Tokens);
+    expect(getModelMaxTokens('openai/o3')).toBe(o3Tokens);
+  });
 });

 describe('matchModelName', () => {