Merge branch 'main' into feat/Custom-Token-Rates-for-Endpoints

2026-03-16 20:56:35 +01:00 · 2025-02-27 11:03:23 +01:00 · 2025-02-27 11:03:23 +01:00 · 59a232812d
commit 59a232812d
parent edf23eb2ae 34f967eff8
27 changed files with 568 additions and 244 deletions
--- a/api/app/clients/AnthropicClient.js
+++ b/api/app/clients/AnthropicClient.js
@ -7,8 +7,7 @@ const {
  getResponseSender,
  validateVisionModel,
 } = require('librechat-data-provider');
-const { SplitStreamHandler, GraphEvents } = require('@librechat/agents');
-const { encodeAndFormat } = require('~/server/services/Files/images/encode');
+const { SplitStreamHandler: _Handler, GraphEvents } = require('@librechat/agents');
 const {
  truncateText,
  formatMessage,
@ -24,6 +23,7 @@ const {
 } = require('~/server/services/Endpoints/anthropic/helpers');
 const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
 const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
+const { encodeAndFormat } = require('~/server/services/Files/images/encode');
 const Tokenizer = require('~/server/services/Tokenizer');
 const { logger, sendEvent } = require('~/config');
 const { sleep } = require('~/server/utils');
@ -32,6 +32,15 @@ const BaseClient = require('./BaseClient');
 const HUMAN_PROMPT = '\n\nHuman:';
 const AI_PROMPT = '\n\nAssistant:';

+class SplitStreamHandler extends _Handler {
+  getDeltaContent(chunk) {
+    return (chunk?.delta?.text ?? chunk?.completion) || '';
+  }
+  getReasoningDelta(chunk) {
+    return chunk?.delta?.thinking || '';
+  }
+}
+
 /** Helper function to introduce a delay before retrying */
 function delayBeforeRetry(attempts, baseDelay = 1000) {
  return new Promise((resolve) => setTimeout(resolve, baseDelay * attempts));
@ -105,7 +114,9 @@ class AnthropicClient extends BaseClient {

    const modelMatch = matchModelName(this.modelOptions.model, EModelEndpoint.anthropic);
    this.isClaude3 = modelMatch.includes('claude-3');
-    this.isLegacyOutput = !modelMatch.includes('claude-3-5-sonnet');
+    this.isLegacyOutput = !(
+      /claude-3[-.]5-sonnet/.test(modelMatch) || /claude-3[-.]7/.test(modelMatch)
+    );
    this.supportsCacheControl = this.options.promptCache && checkPromptCacheSupport(modelMatch);

    if (
@ -733,10 +744,17 @@ class AnthropicClient extends BaseClient {
      stop_sequences,
      temperature,
      metadata,
-      top_p,
-      top_k,
    };

+    if (!/claude-3[-.]7/.test(model)) {
+      if (top_p !== undefined) {
+        requestOptions.top_p = top_p;
+      }
+      if (top_k !== undefined) {
+        requestOptions.top_k = top_k;
+      }
+    }
+
    if (this.useMessages) {
      requestOptions.messages = payload;
      requestOptions.max_tokens =
@ -798,50 +816,16 @@ class AnthropicClient extends BaseClient {
            }
          });

-          /** @param {string} chunk */
-          const handleChunk = (chunk) => {
-            this.streamHandler.handle({
-              choices: [
-                {
-                  delta: {
-                    content: chunk,
-                  },
-                },
-              ],
-            });
-          };
-          /** @param {string} chunk */
-          const handleReasoningChunk = (chunk) => {
-            this.streamHandler.handle({
-              choices: [
-                {
-                  delta: {
-                    reasoning_content: chunk,
-                  },
-                },
-              ],
-            });
-          };
-
          for await (const completion of response) {
-            // Handle each completion as before
            const type = completion?.type ?? '';
            if (tokenEventTypes.has(type)) {
              logger.debug(`[AnthropicClient] ${type}`, completion);
              this[type] = completion;
            }
-            if (completion?.delta?.thinking) {
-              handleReasoningChunk(completion.delta.thinking);
-            } else if (completion?.delta?.text) {
-              handleChunk(completion.delta.text);
-            } else if (completion.completion) {
-              handleChunk(completion.completion);
-            }
-
+            this.streamHandler.handle(completion);
            await sleep(streamRate);
          }

-          // Successful processing, exit loop
          break;
        } catch (error) {
          attempts += 1;
--- a/api/app/clients/BaseClient.js
+++ b/api/app/clients/BaseClient.js
@ -5,10 +5,11 @@ const {
  isAgentsEndpoint,
  isParamEndpoint,
  EModelEndpoint,
+  excludedKeys,
  ErrorTypes,
  Constants,
 } = require('librechat-data-provider');
-const { getMessages, saveMessage, updateMessage, saveConvo } = require('~/models');
+const { getMessages, saveMessage, updateMessage, saveConvo, getConvo } = require('~/models');
 const { addSpaceIfNeeded, isEnabled } = require('~/server/utils');
 const { truncateToolCallOutputs } = require('./prompts');
 const checkBalance = require('~/models/checkBalance');
@ -55,6 +56,10 @@ class BaseClient {
     * Flag to determine if the client re-submitted the latest assistant message.
     * @type {boolean | undefined} */
    this.continued;
+    /**
+     * Flag to determine if the client has already fetched the conversation while saving new messages.
+     * @type {boolean | undefined} */
+    this.fetchedConvo;
    /** @type {TMessage[]} */
    this.currentMessages = [];
    /** @type {import('librechat-data-provider').VisionModes | undefined} */
@ -863,16 +868,39 @@ class BaseClient {
      return { message: savedMessage };
    }

-    const conversation = await saveConvo(
-      this.options.req,
-      {
-        conversationId: message.conversationId,
-        endpoint: this.options.endpoint,
-        endpointType: this.options.endpointType,
-        ...endpointOptions,
-      },
-      { context: 'api/app/clients/BaseClient.js - saveMessageToDatabase #saveConvo' },
-    );
+    const fieldsToKeep = {
+      conversationId: message.conversationId,
+      endpoint: this.options.endpoint,
+      endpointType: this.options.endpointType,
+      ...endpointOptions,
+    };
+
+    const existingConvo =
+      this.fetchedConvo === true
+        ? null
+        : await getConvo(this.options.req?.user?.id, message.conversationId);
+
+    const unsetFields = {};
+    if (existingConvo != null) {
+      this.fetchedConvo = true;
+      for (const key in existingConvo) {
+        if (!key) {
+          continue;
+        }
+        if (excludedKeys.has(key)) {
+          continue;
+        }
+
+        if (endpointOptions?.[key] === undefined) {
+          unsetFields[key] = 1;
+        }
+      }
+    }
+
+    const conversation = await saveConvo(this.options.req, fieldsToKeep, {
+      context: 'api/app/clients/BaseClient.js - saveMessageToDatabase #saveConvo',
+      unsetFields,
+    });

    return { message: savedMessage, conversation };
  }
--- a/api/app/clients/OpenAIClient.js
+++ b/api/app/clients/OpenAIClient.js
@ -109,12 +109,7 @@ class OpenAIClient extends BaseClient {
    const omniPattern = /\b(o1|o3)\b/i;
    this.isOmni = omniPattern.test(this.modelOptions.model);

-    const { OPENROUTER_API_KEY, OPENAI_FORCE_PROMPT } = process.env ?? {};
-    if (OPENROUTER_API_KEY && !this.azure) {
-      this.apiKey = OPENROUTER_API_KEY;
-      this.useOpenRouter = true;
-    }
-
+    const { OPENAI_FORCE_PROMPT } = process.env ?? {};
    const { reverseProxyUrl: reverseProxy } = this.options;

    if (!this.useOpenRouter && reverseProxy && reverseProxy.includes(KnownEndpoints.openrouter)) {
--- a/api/app/clients/specs/AnthropicClient.test.js
+++ b/api/app/clients/specs/AnthropicClient.test.js
@ -1,3 +1,4 @@
+const { SplitStreamHandler } = require('@librechat/agents');
 const { anthropicSettings } = require('librechat-data-provider');
 const AnthropicClient = require('~/app/clients/AnthropicClient');

@ -405,4 +406,278 @@ describe('AnthropicClient', () => {
      expect(Number.isNaN(result)).toBe(false);
    });
  });
+
+  describe('maxOutputTokens handling for different models', () => {
+    it('should not cap maxOutputTokens for Claude 3.5 Sonnet models', () => {
+      const client = new AnthropicClient('test-api-key');
+      const highTokenValue = anthropicSettings.legacy.maxOutputTokens.default * 10;
+
+      client.setOptions({
+        modelOptions: {
+          model: 'claude-3-5-sonnet',
+          maxOutputTokens: highTokenValue,
+        },
+      });
+
+      expect(client.modelOptions.maxOutputTokens).toBe(highTokenValue);
+
+      // Test with decimal notation
+      client.setOptions({
+        modelOptions: {
+          model: 'claude-3.5-sonnet',
+          maxOutputTokens: highTokenValue,
+        },
+      });
+
+      expect(client.modelOptions.maxOutputTokens).toBe(highTokenValue);
+    });
+
+    it('should not cap maxOutputTokens for Claude 3.7 models', () => {
+      const client = new AnthropicClient('test-api-key');
+      const highTokenValue = anthropicSettings.legacy.maxOutputTokens.default * 2;
+
+      client.setOptions({
+        modelOptions: {
+          model: 'claude-3-7-sonnet',
+          maxOutputTokens: highTokenValue,
+        },
+      });
+
+      expect(client.modelOptions.maxOutputTokens).toBe(highTokenValue);
+
+      // Test with decimal notation
+      client.setOptions({
+        modelOptions: {
+          model: 'claude-3.7-sonnet',
+          maxOutputTokens: highTokenValue,
+        },
+      });
+
+      expect(client.modelOptions.maxOutputTokens).toBe(highTokenValue);
+    });
+
+    it('should cap maxOutputTokens for Claude 3.5 Haiku models', () => {
+      const client = new AnthropicClient('test-api-key');
+      const highTokenValue = anthropicSettings.legacy.maxOutputTokens.default * 2;
+
+      client.setOptions({
+        modelOptions: {
+          model: 'claude-3-5-haiku',
+          maxOutputTokens: highTokenValue,
+        },
+      });
+
+      expect(client.modelOptions.maxOutputTokens).toBe(
+        anthropicSettings.legacy.maxOutputTokens.default,
+      );
+
+      // Test with decimal notation
+      client.setOptions({
+        modelOptions: {
+          model: 'claude-3.5-haiku',
+          maxOutputTokens: highTokenValue,
+        },
+      });
+
+      expect(client.modelOptions.maxOutputTokens).toBe(
+        anthropicSettings.legacy.maxOutputTokens.default,
+      );
+    });
+
+    it('should cap maxOutputTokens for Claude 3 Haiku and Opus models', () => {
+      const client = new AnthropicClient('test-api-key');
+      const highTokenValue = anthropicSettings.legacy.maxOutputTokens.default * 2;
+
+      // Test haiku
+      client.setOptions({
+        modelOptions: {
+          model: 'claude-3-haiku',
+          maxOutputTokens: highTokenValue,
+        },
+      });
+
+      expect(client.modelOptions.maxOutputTokens).toBe(
+        anthropicSettings.legacy.maxOutputTokens.default,
+      );
+
+      // Test opus
+      client.setOptions({
+        modelOptions: {
+          model: 'claude-3-opus',
+          maxOutputTokens: highTokenValue,
+        },
+      });
+
+      expect(client.modelOptions.maxOutputTokens).toBe(
+        anthropicSettings.legacy.maxOutputTokens.default,
+      );
+    });
+  });
+
+  describe('topK/topP parameters for different models', () => {
+    beforeEach(() => {
+      // Mock the SplitStreamHandler
+      jest.spyOn(SplitStreamHandler.prototype, 'handle').mockImplementation(() => {});
+    });
+
+    afterEach(() => {
+      jest.restoreAllMocks();
+    });
+
+    it('should include top_k and top_p parameters for non-claude-3.7 models', async () => {
+      const client = new AnthropicClient('test-api-key');
+
+      // Create a mock async generator function
+      async function* mockAsyncGenerator() {
+        yield { type: 'message_start', message: { usage: {} } };
+        yield { delta: { text: 'Test response' } };
+        yield { type: 'message_delta', usage: {} };
+      }
+
+      // Mock createResponse to return the async generator
+      jest.spyOn(client, 'createResponse').mockImplementation(() => {
+        return mockAsyncGenerator();
+      });
+
+      client.setOptions({
+        modelOptions: {
+          model: 'claude-3-opus',
+          temperature: 0.7,
+          topK: 10,
+          topP: 0.9,
+        },
+      });
+
+      // Mock getClient to capture the request options
+      let capturedOptions = null;
+      jest.spyOn(client, 'getClient').mockImplementation((options) => {
+        capturedOptions = options;
+        return {};
+      });
+
+      const payload = [{ role: 'user', content: 'Test message' }];
+      await client.sendCompletion(payload, {});
+
+      // Check the options passed to getClient
+      expect(capturedOptions).toHaveProperty('top_k', 10);
+      expect(capturedOptions).toHaveProperty('top_p', 0.9);
+    });
+
+    it('should include top_k and top_p parameters for claude-3-5-sonnet models', async () => {
+      const client = new AnthropicClient('test-api-key');
+
+      // Create a mock async generator function
+      async function* mockAsyncGenerator() {
+        yield { type: 'message_start', message: { usage: {} } };
+        yield { delta: { text: 'Test response' } };
+        yield { type: 'message_delta', usage: {} };
+      }
+
+      // Mock createResponse to return the async generator
+      jest.spyOn(client, 'createResponse').mockImplementation(() => {
+        return mockAsyncGenerator();
+      });
+
+      client.setOptions({
+        modelOptions: {
+          model: 'claude-3-5-sonnet',
+          temperature: 0.7,
+          topK: 10,
+          topP: 0.9,
+        },
+      });
+
+      // Mock getClient to capture the request options
+      let capturedOptions = null;
+      jest.spyOn(client, 'getClient').mockImplementation((options) => {
+        capturedOptions = options;
+        return {};
+      });
+
+      const payload = [{ role: 'user', content: 'Test message' }];
+      await client.sendCompletion(payload, {});
+
+      // Check the options passed to getClient
+      expect(capturedOptions).toHaveProperty('top_k', 10);
+      expect(capturedOptions).toHaveProperty('top_p', 0.9);
+    });
+
+    it('should not include top_k and top_p parameters for claude-3-7-sonnet models', async () => {
+      const client = new AnthropicClient('test-api-key');
+
+      // Create a mock async generator function
+      async function* mockAsyncGenerator() {
+        yield { type: 'message_start', message: { usage: {} } };
+        yield { delta: { text: 'Test response' } };
+        yield { type: 'message_delta', usage: {} };
+      }
+
+      // Mock createResponse to return the async generator
+      jest.spyOn(client, 'createResponse').mockImplementation(() => {
+        return mockAsyncGenerator();
+      });
+
+      client.setOptions({
+        modelOptions: {
+          model: 'claude-3-7-sonnet',
+          temperature: 0.7,
+          topK: 10,
+          topP: 0.9,
+        },
+      });
+
+      // Mock getClient to capture the request options
+      let capturedOptions = null;
+      jest.spyOn(client, 'getClient').mockImplementation((options) => {
+        capturedOptions = options;
+        return {};
+      });
+
+      const payload = [{ role: 'user', content: 'Test message' }];
+      await client.sendCompletion(payload, {});
+
+      // Check the options passed to getClient
+      expect(capturedOptions).not.toHaveProperty('top_k');
+      expect(capturedOptions).not.toHaveProperty('top_p');
+    });
+
+    it('should not include top_k and top_p parameters for models with decimal notation (claude-3.7)', async () => {
+      const client = new AnthropicClient('test-api-key');
+
+      // Create a mock async generator function
+      async function* mockAsyncGenerator() {
+        yield { type: 'message_start', message: { usage: {} } };
+        yield { delta: { text: 'Test response' } };
+        yield { type: 'message_delta', usage: {} };
+      }
+
+      // Mock createResponse to return the async generator
+      jest.spyOn(client, 'createResponse').mockImplementation(() => {
+        return mockAsyncGenerator();
+      });
+
+      client.setOptions({
+        modelOptions: {
+          model: 'claude-3.7-sonnet',
+          temperature: 0.7,
+          topK: 10,
+          topP: 0.9,
+        },
+      });
+
+      // Mock getClient to capture the request options
+      let capturedOptions = null;
+      jest.spyOn(client, 'getClient').mockImplementation((options) => {
+        capturedOptions = options;
+        return {};
+      });
+
+      const payload = [{ role: 'user', content: 'Test message' }];
+      await client.sendCompletion(payload, {});
+
+      // Check the options passed to getClient
+      expect(capturedOptions).not.toHaveProperty('top_k');
+      expect(capturedOptions).not.toHaveProperty('top_p');
+    });
+  });
 });
--- a/api/app/clients/specs/OpenAIClient.test.js
+++ b/api/app/clients/specs/OpenAIClient.test.js
@ -202,14 +202,6 @@ describe('OpenAIClient', () => {
      expect(client.modelOptions.temperature).toBe(0.7);
    });

-    it('should set apiKey and useOpenRouter if OPENROUTER_API_KEY is present', () => {
-      process.env.OPENROUTER_API_KEY = 'openrouter-key';
-      client.setOptions({});
-      expect(client.apiKey).toBe('openrouter-key');
-      expect(client.useOpenRouter).toBe(true);
-      delete process.env.OPENROUTER_API_KEY; // Cleanup
-    });
-
    it('should set FORCE_PROMPT based on OPENAI_FORCE_PROMPT or reverseProxyUrl', () => {
      process.env.OPENAI_FORCE_PROMPT = 'true';
      client.setOptions({});
@ -534,7 +526,6 @@ describe('OpenAIClient', () => {
    afterEach(() => {
      delete process.env.AZURE_OPENAI_DEFAULT_MODEL;
      delete process.env.AZURE_USE_MODEL_AS_DEPLOYMENT_NAME;
-      delete process.env.OPENROUTER_API_KEY;
    });

    it('should call getCompletion and fetchEventSource when using a text/instruct model', async () => {
--- a/api/models/Conversation.js
+++ b/api/models/Conversation.js
@ -104,10 +104,16 @@ module.exports = {
        update.expiredAt = null;
      }

+      /** @type {{ $set: Partial<TConversation>; $unset?: Record<keyof TConversation, number> }} */
+      const updateOperation = { $set: update };
+      if (metadata && metadata.unsetFields && Object.keys(metadata.unsetFields).length > 0) {
+        updateOperation.$unset = metadata.unsetFields;
+      }
+
      /** Note: the resulting Model object is necessary for Meilisearch operations */
      const conversation = await Conversation.findOneAndUpdate(
        { conversationId, user: req.user.id },
-        update,
+        updateOperation,
        {
          new: true,
          upsert: true,
--- a/api/models/Token.js
+++ b/api/models/Token.js
@ -13,6 +13,13 @@ const Token = mongoose.model('Token', tokenSchema);
 */
 async function fixIndexes() {
  try {
+    if (
+      process.env.NODE_ENV === 'CI' ||
+      process.env.NODE_ENV === 'development' ||
+      process.env.NODE_ENV === 'test'
+    ) {
+      return;
+    }
    const indexes = await Token.collection.indexes();
    logger.debug('Existing Token Indexes:', JSON.stringify(indexes, null, 2));
    const unwantedTTLIndexes = indexes.filter(
--- a/api/models/schema/convoSchema.js
+++ b/api/models/schema/convoSchema.js
@ -20,8 +20,6 @@ const convoSchema = mongoose.Schema(
      index: true,
    },
    messages: [{ type: mongoose.Schema.Types.ObjectId, ref: 'Message' }],
-    // google only
-    examples: { type: [{ type: mongoose.Schema.Types.Mixed }], default: undefined },
    agentOptions: {
      type: mongoose.Schema.Types.Mixed,
    },
@ -48,12 +46,12 @@ if (process.env.MEILI_HOST && process.env.MEILI_MASTER_KEY) {
  convoSchema.plugin(mongoMeili, {
    host: process.env.MEILI_HOST,
    apiKey: process.env.MEILI_MASTER_KEY,
-    indexName: 'convos', // Will get created automatically if it doesn't exist already
+    /** Note: Will get created automatically if it doesn't exist already */
+    indexName: 'convos',
    primaryKey: 'conversationId',
  });
 }

-// Create TTL index
 convoSchema.index({ expiredAt: 1 }, { expireAfterSeconds: 0 });
 convoSchema.index({ createdAt: 1, updatedAt: 1 });
 convoSchema.index({ conversationId: 1, user: 1 }, { unique: true });
--- a/api/models/schema/defaults.js
+++ b/api/models/schema/defaults.js
@ -1,3 +1,5 @@
+const mongoose = require('mongoose');
+
 const conversationPreset = {
  // endpoint: [azureOpenAI, openAI, anthropic, chatGPTBrowser]
  endpoint: {
@ -24,6 +26,7 @@ const conversationPreset = {
    required: false,
  },
  // for google only
+  examples: { type: [{ type: mongoose.Schema.Types.Mixed }], default: undefined },
  modelLabel: {
    type: String,
    required: false,
@ -129,56 +132,6 @@ const conversationPreset = {
  },
 };

-const agentOptions = {
-  model: {
-    type: String,
-    required: false,
-  },
-  // for azureOpenAI, openAI only
-  chatGptLabel: {
-    type: String,
-    required: false,
-  },
-  modelLabel: {
-    type: String,
-    required: false,
-  },
-  promptPrefix: {
-    type: String,
-    required: false,
-  },
-  temperature: {
-    type: Number,
-    required: false,
-  },
-  top_p: {
-    type: Number,
-    required: false,
-  },
-  // for google only
-  topP: {
-    type: Number,
-    required: false,
-  },
-  topK: {
-    type: Number,
-    required: false,
-  },
-  maxOutputTokens: {
-    type: Number,
-    required: false,
-  },
-  presence_penalty: {
-    type: Number,
-    required: false,
-  },
-  frequency_penalty: {
-    type: Number,
-    required: false,
-  },
-};
-
 module.exports = {
  conversationPreset,
-  agentOptions,
 };
--- a/api/models/schema/presetSchema.js
+++ b/api/models/schema/presetSchema.js
@ -23,8 +23,6 @@ const presetSchema = mongoose.Schema(
    order: {
      type: Number,
    },
-    // google only
-    examples: [{ type: mongoose.Schema.Types.Mixed }],
    ...conversationPreset,
    agentOptions: {
      type: mongoose.Schema.Types.Mixed,
--- a/api/server/services/Endpoints/anthropic/llm.js
+++ b/api/server/services/Endpoints/anthropic/llm.js
@ -43,14 +43,21 @@ function getLLMConfig(apiKey, options = {}) {
    model: mergedOptions.model,
    stream: mergedOptions.stream,
    temperature: mergedOptions.temperature,
-    topP: mergedOptions.topP,
-    topK: mergedOptions.topK,
    stopSequences: mergedOptions.stop,
    maxTokens:
      mergedOptions.maxOutputTokens || anthropicSettings.maxOutputTokens.reset(mergedOptions.model),
    clientOptions: {},
  };

+  if (!/claude-3[-.]7/.test(mergedOptions.model)) {
+    if (mergedOptions.topP !== undefined) {
+      requestOptions.topP = mergedOptions.topP;
+    }
+    if (mergedOptions.topK !== undefined) {
+      requestOptions.topK = mergedOptions.topK;
+    }
+  }
+
  const supportsCacheControl =
    systemOptions.promptCache === true && checkPromptCacheSupport(requestOptions.model);
  const headers = getClaudeHeaders(requestOptions.model, supportsCacheControl);
--- a/api/server/services/Endpoints/anthropic/llm.spec.js
+++ b/api/server/services/Endpoints/anthropic/llm.spec.js
@ -0,0 +1,112 @@
+const { anthropicSettings } = require('librechat-data-provider');
+const { getLLMConfig } = require('~/server/services/Endpoints/anthropic/llm');
+
+jest.mock('https-proxy-agent', () => ({
+  HttpsProxyAgent: jest.fn().mockImplementation((proxy) => ({ proxy })),
+}));
+
+describe('getLLMConfig', () => {
+  it('should create a basic configuration with default values', () => {
+    const result = getLLMConfig('test-api-key', { modelOptions: {} });
+
+    expect(result.llmConfig).toHaveProperty('apiKey', 'test-api-key');
+    expect(result.llmConfig).toHaveProperty('model', anthropicSettings.model.default);
+    expect(result.llmConfig).toHaveProperty('stream', true);
+    expect(result.llmConfig).toHaveProperty('maxTokens');
+  });
+
+  it('should include proxy settings when provided', () => {
+    const result = getLLMConfig('test-api-key', {
+      modelOptions: {},
+      proxy: 'http://proxy:8080',
+    });
+
+    expect(result.llmConfig.clientOptions).toHaveProperty('httpAgent');
+    expect(result.llmConfig.clientOptions.httpAgent).toHaveProperty('proxy', 'http://proxy:8080');
+  });
+
+  it('should include reverse proxy URL when provided', () => {
+    const result = getLLMConfig('test-api-key', {
+      modelOptions: {},
+      reverseProxyUrl: 'http://reverse-proxy',
+    });
+
+    expect(result.llmConfig.clientOptions).toHaveProperty('baseURL', 'http://reverse-proxy');
+  });
+
+  it('should include topK and topP for non-Claude-3.7 models', () => {
+    const result = getLLMConfig('test-api-key', {
+      modelOptions: {
+        model: 'claude-3-opus',
+        topK: 10,
+        topP: 0.9,
+      },
+    });
+
+    expect(result.llmConfig).toHaveProperty('topK', 10);
+    expect(result.llmConfig).toHaveProperty('topP', 0.9);
+  });
+
+  it('should include topK and topP for Claude-3.5 models', () => {
+    const result = getLLMConfig('test-api-key', {
+      modelOptions: {
+        model: 'claude-3-5-sonnet',
+        topK: 10,
+        topP: 0.9,
+      },
+    });
+
+    expect(result.llmConfig).toHaveProperty('topK', 10);
+    expect(result.llmConfig).toHaveProperty('topP', 0.9);
+  });
+
+  it('should NOT include topK and topP for Claude-3-7 models (hyphen notation)', () => {
+    const result = getLLMConfig('test-api-key', {
+      modelOptions: {
+        model: 'claude-3-7-sonnet',
+        topK: 10,
+        topP: 0.9,
+      },
+    });
+
+    expect(result.llmConfig).not.toHaveProperty('topK');
+    expect(result.llmConfig).not.toHaveProperty('topP');
+  });
+
+  it('should NOT include topK and topP for Claude-3.7 models (decimal notation)', () => {
+    const result = getLLMConfig('test-api-key', {
+      modelOptions: {
+        model: 'claude-3.7-sonnet',
+        topK: 10,
+        topP: 0.9,
+      },
+    });
+
+    expect(result.llmConfig).not.toHaveProperty('topK');
+    expect(result.llmConfig).not.toHaveProperty('topP');
+  });
+
+  it('should handle custom maxOutputTokens', () => {
+    const result = getLLMConfig('test-api-key', {
+      modelOptions: {
+        model: 'claude-3-opus',
+        maxOutputTokens: 2048,
+      },
+    });
+
+    expect(result.llmConfig).toHaveProperty('maxTokens', 2048);
+  });
+
+  it('should handle promptCache setting', () => {
+    const result = getLLMConfig('test-api-key', {
+      modelOptions: {
+        model: 'claude-3-5-sonnet',
+        promptCache: true,
+      },
+    });
+
+    // We're not checking specific header values since that depends on the actual helper function
+    // Just verifying that the promptCache setting is processed
+    expect(result.llmConfig).toBeDefined();
+  });
+});
--- a/api/server/services/ModelService.js
+++ b/api/server/services/ModelService.js
@ -129,9 +129,6 @@ const fetchOpenAIModels = async (opts, _models = []) => {
    //   .split('/deployments')[0]
    //   .concat(`/models?api-version=${azure.azureOpenAIApiVersion}`);
    // apiKey = azureOpenAIApiKey;
-  } else if (process.env.OPENROUTER_API_KEY) {
-    reverseProxyUrl = 'https://openrouter.ai/api/v1';
-    apiKey = process.env.OPENROUTER_API_KEY;
  }

  if (reverseProxyUrl) {
@ -218,7 +215,7 @@ const getOpenAIModels = async (opts) => {
    return models;
  }

-  if (userProvidedOpenAI && !process.env.OPENROUTER_API_KEY) {
+  if (userProvidedOpenAI) {
    return models;
  }

--- a/api/server/services/ModelService.spec.js
+++ b/api/server/services/ModelService.spec.js
@ -161,22 +161,6 @@ describe('getOpenAIModels', () => {
    expect(models).toEqual(expect.arrayContaining(['openai-model', 'openai-model-2']));
  });

-  it('attempts to use OPENROUTER_API_KEY if set', async () => {
-    process.env.OPENROUTER_API_KEY = 'test-router-key';
-    const expectedModels = ['model-router-1', 'model-router-2'];
-
-    axios.get.mockResolvedValue({
-      data: {
-        data: expectedModels.map((id) => ({ id })),
-      },
-    });
-
-    const models = await getOpenAIModels({ user: 'user456' });
-
-    expect(models).toEqual(expect.arrayContaining(expectedModels));
-    expect(axios.get).toHaveBeenCalled();
-  });
-
  it('utilizes proxy configuration when PROXY is set', async () => {
    axios.get.mockResolvedValue({
      data: {