⚙️ feat: Adjust Rate of Stream Progress (#3244)

* chore: bump data-provider and add MESSAGES CacheKey * refactor: avoid saving messages while streaming, save partial text to cache instead * fix(ci): processChunks * chore: logging aborted request to debug * feat: set stream rate for token processing * chore: specify default stream rate * fix(ci): Update AppService.js to use optional chaining for endpointLocals assignment * refactor: abstract the error handler * feat: streamRate for assistants; refactor: update default rate for token * refactor: update error handling in assistants/errors.js * refactor: update error handling in assistants/errors.js
2026-02-12 12:34:24 +01:00 · 2024-07-17 10:47:17 -04:00 · 2024-07-17 10:47:17 -04:00 · 5d40d0a37a
commit 5d40d0a37a
parent 1c282d1517
29 changed files with 661 additions and 309 deletions
--- a/api/server/services/AppService.js
+++ b/api/server/services/AppService.js
@ -67,17 +67,18 @@ const AppService = async (app) => {
  handleRateLimits(config?.rateLimits);

  const endpointLocals = {};
+  const endpoints = config?.endpoints;

-  if (config?.endpoints?.[EModelEndpoint.azureOpenAI]) {
+  if (endpoints?.[EModelEndpoint.azureOpenAI]) {
    endpointLocals[EModelEndpoint.azureOpenAI] = azureConfigSetup(config);
    checkAzureVariables();
  }

-  if (config?.endpoints?.[EModelEndpoint.azureOpenAI]?.assistants) {
+  if (endpoints?.[EModelEndpoint.azureOpenAI]?.assistants) {
    endpointLocals[EModelEndpoint.azureAssistants] = azureAssistantsDefaults();
  }

-  if (config?.endpoints?.[EModelEndpoint.azureAssistants]) {
+  if (endpoints?.[EModelEndpoint.azureAssistants]) {
    endpointLocals[EModelEndpoint.azureAssistants] = assistantsConfigSetup(
      config,
      EModelEndpoint.azureAssistants,
@ -85,7 +86,7 @@ const AppService = async (app) => {
    );
  }

-  if (config?.endpoints?.[EModelEndpoint.assistants]) {
+  if (endpoints?.[EModelEndpoint.assistants]) {
    endpointLocals[EModelEndpoint.assistants] = assistantsConfigSetup(
      config,
      EModelEndpoint.assistants,
@ -93,6 +94,19 @@ const AppService = async (app) => {
    );
  }

+  if (endpoints?.[EModelEndpoint.openAI]) {
+    endpointLocals[EModelEndpoint.openAI] = endpoints[EModelEndpoint.openAI];
+  }
+  if (endpoints?.[EModelEndpoint.google]) {
+    endpointLocals[EModelEndpoint.google] = endpoints[EModelEndpoint.google];
+  }
+  if (endpoints?.[EModelEndpoint.anthropic]) {
+    endpointLocals[EModelEndpoint.anthropic] = endpoints[EModelEndpoint.anthropic];
+  }
+  if (endpoints?.[EModelEndpoint.gptPlugins]) {
+    endpointLocals[EModelEndpoint.gptPlugins] = endpoints[EModelEndpoint.gptPlugins];
+  }
+
  app.locals = {
    ...defaultLocals,
    modelSpecs: config.modelSpecs,
--- a/api/server/services/Endpoints/anthropic/initializeClient.js
+++ b/api/server/services/Endpoints/anthropic/initializeClient.js
@ -19,11 +19,27 @@ const initializeClient = async ({ req, res, endpointOption }) => {
    checkUserKeyExpiry(expiresAt, EModelEndpoint.anthropic);
  }

+  const clientOptions = {};
+
+  /** @type {undefined | TBaseEndpoint} */
+  const anthropicConfig = req.app.locals[EModelEndpoint.anthropic];
+
+  if (anthropicConfig) {
+    clientOptions.streamRate = anthropicConfig.streamRate;
+  }
+
+  /** @type {undefined | TBaseEndpoint} */
+  const allConfig = req.app.locals.all;
+  if (allConfig) {
+    clientOptions.streamRate = allConfig.streamRate;
+  }
+
  const client = new AnthropicClient(anthropicApiKey, {
    req,
    res,
    reverseProxyUrl: ANTHROPIC_REVERSE_PROXY ?? null,
    proxy: PROXY ?? null,
+    ...clientOptions,
    ...endpointOption,
  });

--- a/api/server/services/Endpoints/custom/initializeClient.js
+++ b/api/server/services/Endpoints/custom/initializeClient.js
@ -114,9 +114,16 @@ const initializeClient = async ({ req, res, endpointOption }) => {
    contextStrategy: endpointConfig.summarize ? 'summarize' : null,
    directEndpoint: endpointConfig.directEndpoint,
    titleMessageRole: endpointConfig.titleMessageRole,
+    streamRate: endpointConfig.streamRate,
    endpointTokenConfig,
  };

+  /** @type {undefined | TBaseEndpoint} */
+  const allConfig = req.app.locals.all;
+  if (allConfig) {
+    customOptions.streamRate = allConfig.streamRate;
+  }
+
  const clientOptions = {
    reverseProxyUrl: baseURL ?? null,
    proxy: PROXY ?? null,
--- a/api/server/services/Endpoints/google/initializeClient.js
+++ b/api/server/services/Endpoints/google/initializeClient.js
@ -27,11 +27,27 @@ const initializeClient = async ({ req, res, endpointOption }) => {
      [AuthKeys.GOOGLE_API_KEY]: GOOGLE_KEY,
    };

+  const clientOptions = {};
+
+  /** @type {undefined | TBaseEndpoint} */
+  const allConfig = req.app.locals.all;
+  /** @type {undefined | TBaseEndpoint} */
+  const googleConfig = req.app.locals[EModelEndpoint.google];
+
+  if (googleConfig) {
+    clientOptions.streamRate = googleConfig.streamRate;
+  }
+
+  if (allConfig) {
+    clientOptions.streamRate = allConfig.streamRate;
+  }
+
  const client = new GoogleClient(credentials, {
    req,
    res,
    reverseProxyUrl: GOOGLE_REVERSE_PROXY ?? null,
    proxy: PROXY ?? null,
+    ...clientOptions,
    ...endpointOption,
  });

--- a/api/server/services/Endpoints/google/initializeClient.spec.js
+++ b/api/server/services/Endpoints/google/initializeClient.spec.js
@ -8,6 +8,8 @@ jest.mock('~/server/services/UserService', () => ({
  getUserKey: jest.fn().mockImplementation(() => ({})),
 }));

+const app = { locals: {} };
+
 describe('google/initializeClient', () => {
  afterEach(() => {
    jest.clearAllMocks();
@ -23,6 +25,7 @@ describe('google/initializeClient', () => {
    const req = {
      body: { key: expiresAt },
      user: { id: '123' },
+      app,
    };
    const res = {};
    const endpointOption = { modelOptions: { model: 'default-model' } };
@ -44,6 +47,7 @@ describe('google/initializeClient', () => {
    const req = {
      body: { key: null },
      user: { id: '123' },
+      app,
    };
    const res = {};
    const endpointOption = { modelOptions: { model: 'default-model' } };
@ -66,6 +70,7 @@ describe('google/initializeClient', () => {
    const req = {
      body: { key: expiresAt },
      user: { id: '123' },
+      app,
    };
    const res = {};
    const endpointOption = { modelOptions: { model: 'default-model' } };
--- a/api/server/services/Endpoints/gptPlugins/initializeClient.js
+++ b/api/server/services/Endpoints/gptPlugins/initializeClient.js
@ -86,6 +86,9 @@ const initializeClient = async ({ req, res, endpointOption }) => {
    clientOptions.titleModel = azureConfig.titleModel;
    clientOptions.titleMethod = azureConfig.titleMethod ?? 'completion';

+    const azureRate = modelName.includes('gpt-4') ? 30 : 17;
+    clientOptions.streamRate = azureConfig.streamRate ?? azureRate;
+
    const groupName = modelGroupMap[modelName].group;
    clientOptions.addParams = azureConfig.groupMap[groupName].addParams;
    clientOptions.dropParams = azureConfig.groupMap[groupName].dropParams;
@ -98,6 +101,19 @@ const initializeClient = async ({ req, res, endpointOption }) => {
    apiKey = clientOptions.azure.azureOpenAIApiKey;
  }

+  /** @type {undefined | TBaseEndpoint} */
+  const pluginsConfig = req.app.locals[EModelEndpoint.gptPlugins];
+
+  if (!useAzure && pluginsConfig) {
+    clientOptions.streamRate = pluginsConfig.streamRate;
+  }
+
+  /** @type {undefined | TBaseEndpoint} */
+  const allConfig = req.app.locals.all;
+  if (allConfig) {
+    clientOptions.streamRate = allConfig.streamRate;
+  }
+
  if (!apiKey) {
    throw new Error(`${endpoint} API key not provided. Please provide it again.`);
  }
--- a/api/server/services/Endpoints/openAI/initializeClient.js
+++ b/api/server/services/Endpoints/openAI/initializeClient.js
@ -76,6 +76,10 @@ const initializeClient = async ({ req, res, endpointOption }) => {

    clientOptions.titleConvo = azureConfig.titleConvo;
    clientOptions.titleModel = azureConfig.titleModel;
+
+    const azureRate = modelName.includes('gpt-4') ? 30 : 17;
+    clientOptions.streamRate = azureConfig.streamRate ?? azureRate;
+
    clientOptions.titleMethod = azureConfig.titleMethod ?? 'completion';

    const groupName = modelGroupMap[modelName].group;
@ -90,6 +94,19 @@ const initializeClient = async ({ req, res, endpointOption }) => {
    apiKey = clientOptions.azure.azureOpenAIApiKey;
  }

+  /** @type {undefined | TBaseEndpoint} */
+  const openAIConfig = req.app.locals[EModelEndpoint.openAI];
+
+  if (!isAzureOpenAI && openAIConfig) {
+    clientOptions.streamRate = openAIConfig.streamRate;
+  }
+
+  /** @type {undefined | TBaseEndpoint} */
+  const allConfig = req.app.locals.all;
+  if (allConfig) {
+    clientOptions.streamRate = allConfig.streamRate;
+  }
+
  if (userProvidesKey & !apiKey) {
    throw new Error(
      JSON.stringify({
--- a/api/server/services/Files/Audio/streamAudio.js
+++ b/api/server/services/Files/Audio/streamAudio.js
@ -1,5 +1,6 @@
 const WebSocket = require('ws');
-const { Message } = require('~/models/Message');
+const { CacheKeys } = require('librechat-data-provider');
+const { getLogStores } = require('~/cache');

 /**
 * @param {string[]} voiceIds - Array of voice IDs
@ -104,6 +105,8 @@ function createChunkProcessor(messageId) {
    throw new Error('Message ID is required');
  }

+  const messageCache = getLogStores(CacheKeys.MESSAGES);
+
  /**
   * @returns {Promise<{ text: string, isFinished: boolean }[] | string>}
   */
@ -116,14 +119,17 @@ function createChunkProcessor(messageId) {
      return `No change in message after ${MAX_NO_CHANGE_COUNT} attempts`;
    }

-    const message = await Message.findOne({ messageId }, 'text unfinished').lean();
+    /** @type { string | { text: string; complete: boolean } } */
+    const message = await messageCache.get(messageId);

-    if (!message || !message.text) {
+    if (!message) {
      notFoundCount++;
      return [];
    }

-    const { text, unfinished } = message;
+    const text = typeof message === 'string' ? message : message.text;
+    const complete = typeof message === 'string' ? false : message.complete;
+
    if (text === processedText) {
      noChangeCount++;
    }
@ -131,7 +137,7 @@ function createChunkProcessor(messageId) {
    const remainingText = text.slice(processedText.length);
    const chunks = [];

-    if (unfinished && remainingText.length >= 20) {
+    if (!complete && remainingText.length >= 20) {
      const separatorIndex = findLastSeparatorIndex(remainingText);
      if (separatorIndex !== -1) {
        const chunkText = remainingText.slice(0, separatorIndex + 1);
@ -141,7 +147,7 @@ function createChunkProcessor(messageId) {
        chunks.push({ text: remainingText, isFinished: false });
        processedText = text;
      }
-    } else if (!unfinished && remainingText.trim().length > 0) {
+    } else if (complete && remainingText.trim().length > 0) {
      chunks.push({ text: remainingText.trim(), isFinished: true });
      processedText = text;
    }
--- a/api/server/services/Files/Audio/streamAudio.spec.js
+++ b/api/server/services/Files/Audio/streamAudio.spec.js
@ -1,89 +1,145 @@
 const { createChunkProcessor, splitTextIntoChunks } = require('./streamAudio');
-const { Message } = require('~/models/Message');

-jest.mock('~/models/Message', () => ({
-  Message: {
-    findOne: jest.fn().mockReturnValue({
-      lean: jest.fn(),
-    }),
-  },
-}));
+jest.mock('keyv');
+
+const globalCache = {};
+jest.mock('~/cache/getLogStores', () => {
+  return jest.fn().mockImplementation(() => {
+    const EventEmitter = require('events');
+    const { CacheKeys } = require('librechat-data-provider');
+
+    class KeyvMongo extends EventEmitter {
+      constructor(url = 'mongodb://127.0.0.1:27017', options) {
+        super();
+        this.ttlSupport = false;
+        url = url ?? {};
+        if (typeof url === 'string') {
+          url = { url };
+        }
+        if (url.uri) {
+          url = { url: url.uri, ...url };
+        }
+        this.opts = {
+          url,
+          collection: 'keyv',
+          ...url,
+          ...options,
+        };
+      }
+
+      get = async (key) => {
+        return new Promise((resolve) => {
+          resolve(globalCache[key] || null);
+        });
+      };
+
+      set = async (key, value) => {
+        return new Promise((resolve) => {
+          globalCache[key] = value;
+          resolve(true);
+        });
+      };
+    }
+
+    return new KeyvMongo('', {
+      namespace: CacheKeys.MESSAGES,
+      ttl: 0,
+    });
+  });
+});

 describe('processChunks', () => {
  let processChunks;
+  let mockMessageCache;

  beforeEach(() => {
+    jest.resetAllMocks();
+    mockMessageCache = {
+      get: jest.fn(),
+    };
+    require('~/cache/getLogStores').mockReturnValue(mockMessageCache);
    processChunks = createChunkProcessor('message-id');
-    Message.findOne.mockClear();
-    Message.findOne().lean.mockClear();
  });

  it('should return an empty array when the message is not found', async () => {
-    Message.findOne().lean.mockResolvedValueOnce(null);
+    mockMessageCache.get.mockResolvedValueOnce(null);

    const result = await processChunks();

    expect(result).toEqual([]);
-    expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
-    expect(Message.findOne().lean).toHaveBeenCalled();
+    expect(mockMessageCache.get).toHaveBeenCalledWith('message-id');
  });

-  it('should return an empty array when the message does not have a text property', async () => {
-    Message.findOne().lean.mockResolvedValueOnce({ unfinished: true });
+  it('should return an error message after MAX_NOT_FOUND_COUNT attempts', async () => {
+    mockMessageCache.get.mockResolvedValue(null);

+    for (let i = 0; i < 6; i++) {
+      await processChunks();
+    }
    const result = await processChunks();

-    expect(result).toEqual([]);
-    expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
-    expect(Message.findOne().lean).toHaveBeenCalled();
+    expect(result).toBe('Message not found after 6 attempts');
  });

-  it('should return chunks for an unfinished message with separators', async () => {
+  it('should return chunks for an incomplete message with separators', async () => {
    const messageText = 'This is a long message. It should be split into chunks. Lol hi mom';
-    Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: true });
+    mockMessageCache.get.mockResolvedValueOnce({ text: messageText, complete: false });

    const result = await processChunks();

    expect(result).toEqual([
      { text: 'This is a long message. It should be split into chunks.', isFinished: false },
    ]);
-    expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
-    expect(Message.findOne().lean).toHaveBeenCalled();
  });

-  it('should return chunks for an unfinished message without separators', async () => {
+  it('should return chunks for an incomplete message without separators', async () => {
    const messageText = 'This is a long message without separators hello there my friend';
-    Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: true });
+    mockMessageCache.get.mockResolvedValueOnce({ text: messageText, complete: false });

    const result = await processChunks();

    expect(result).toEqual([{ text: messageText, isFinished: false }]);
-    expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
-    expect(Message.findOne().lean).toHaveBeenCalled();
  });

-  it('should return the remaining text as a chunk for a finished message', async () => {
+  it('should return the remaining text as a chunk for a complete message', async () => {
    const messageText = 'This is a finished message.';
-    Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
+    mockMessageCache.get.mockResolvedValueOnce({ text: messageText, complete: true });

    const result = await processChunks();

    expect(result).toEqual([{ text: messageText, isFinished: true }]);
-    expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
-    expect(Message.findOne().lean).toHaveBeenCalled();
  });

-  it('should return an empty array for a finished message with no remaining text', async () => {
+  it('should return an empty array for a complete message with no remaining text', async () => {
    const messageText = 'This is a finished message.';
-    Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
+    mockMessageCache.get.mockResolvedValueOnce({ text: messageText, complete: true });

    await processChunks();
-    Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
+    mockMessageCache.get.mockResolvedValueOnce({ text: messageText, complete: true });
    const result = await processChunks();

    expect(result).toEqual([]);
-    expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
-    expect(Message.findOne().lean).toHaveBeenCalledTimes(2);
+  });
+
+  it('should return an error message after MAX_NO_CHANGE_COUNT attempts with no change', async () => {
+    const messageText = 'This is a message that does not change.';
+    mockMessageCache.get.mockResolvedValue({ text: messageText, complete: false });
+
+    for (let i = 0; i < 11; i++) {
+      await processChunks();
+    }
+    const result = await processChunks();
+
+    expect(result).toBe('No change in message after 10 attempts');
+  });
+
+  it('should handle string messages as incomplete', async () => {
+    const messageText = 'This is a message as a string.';
+    mockMessageCache.get.mockResolvedValueOnce(messageText);
+
+    const result = await processChunks();
+
+    expect(result).toEqual([{ text: messageText, isFinished: false }]);
  });
 });

--- a/api/server/services/Runs/StreamRunManager.js
+++ b/api/server/services/Runs/StreamRunManager.js
@ -1,17 +1,19 @@
 const throttle = require('lodash/throttle');
 const {
+  Time,
+  CacheKeys,
  StepTypes,
  ContentTypes,
  ToolCallTypes,
-  // StepStatus,
  MessageContentTypes,
  AssistantStreamEvents,
+  Constants,
 } = require('librechat-data-provider');
 const { retrieveAndProcessFile } = require('~/server/services/Files/process');
 const { processRequiredActions } = require('~/server/services/ToolService');
-const { saveMessage, updateMessageText } = require('~/models/Message');
-const { createOnProgress, sendMessage } = require('~/server/utils');
+const { createOnProgress, sendMessage, sleep } = require('~/server/utils');
 const { processMessages } = require('~/server/services/Threads');
+const { getLogStores } = require('~/cache');
 const { logger } = require('~/config');

 /**
@ -68,8 +70,8 @@ class StreamRunManager {
    this.attachedFileIds = fields.attachedFileIds;
    /** @type {undefined | Promise<ChatCompletion>} */
    this.visionPromise = fields.visionPromise;
-    /** @type {boolean} */
-    this.savedInitialMessage = false;
+    /** @type {number} */
+    this.streamRate = fields.streamRate ?? Constants.DEFAULT_STREAM_RATE;

    /**
     * @type {Object.<AssistantStreamEvents, (event: AssistantStreamEvent) => Promise<void>>}
@ -139,11 +141,11 @@ class StreamRunManager {
    return this.intermediateText;
  }

-  /** Saves the initial intermediate message
-   * @returns {Promise<void>}
+  /** Returns the current, intermediate message
+   * @returns {TMessage}
   */
-  async saveInitialMessage() {
-    return saveMessage(this.req, {
+  getIntermediateMessage() {
+    return {
      conversationId: this.finalMessage.conversationId,
      messageId: this.finalMessage.messageId,
      parentMessageId: this.parentMessageId,
@ -155,7 +157,7 @@ class StreamRunManager {
      sender: 'Assistant',
      unfinished: true,
      error: false,
-    });
+    };
  }

  /* <------------------ Main Event Handlers ------------------> */
@ -347,6 +349,8 @@ class StreamRunManager {
          type: ContentTypes.TOOL_CALL,
          index,
        });
+
+        await sleep(this.streamRate);
      }
    };

@ -444,6 +448,7 @@ class StreamRunManager {
    if (content && content.type === MessageContentTypes.TEXT) {
      this.intermediateText += content.text.value;
      onProgress(content.text.value);
+      await sleep(this.streamRate);
    }
  }

@ -589,21 +594,14 @@ class StreamRunManager {
      const index = this.getStepIndex(stepKey);
      this.orderedRunSteps.set(index, message_creation);

+      const messageCache = getLogStores(CacheKeys.MESSAGES);
      // Create the Factory Function to stream the message
      const { onProgress: progressCallback } = createOnProgress({
        onProgress: throttle(
          () => {
-            if (!this.savedInitialMessage) {
-              this.saveInitialMessage();
-              this.savedInitialMessage = true;
-            } else {
-              updateMessageText({
-                messageId: this.finalMessage.messageId,
-                text: this.getText(),
-              });
-            }
+            messageCache.set(this.finalMessage.messageId, this.getText(), Time.FIVE_MINUTES);
          },
-          2000,
+          3000,
          { trailing: false },
        ),
      });
--- a/api/server/services/start/assistants.js
+++ b/api/server/services/start/assistants.js
@ -51,6 +51,7 @@ function assistantsConfigSetup(config, assistantsEndpoint, prevConfig = {}) {
    excludedIds: parsedConfig.excludedIds,
    privateAssistants: parsedConfig.privateAssistants,
    timeoutMs: parsedConfig.timeoutMs,
+    streamRate: parsedConfig.streamRate,
  };
 }