⚙️ feat: Adjust Rate of Stream Progress (#3244)

* chore: bump data-provider and add MESSAGES CacheKey * refactor: avoid saving messages while streaming, save partial text to cache instead * fix(ci): processChunks * chore: logging aborted request to debug * feat: set stream rate for token processing * chore: specify default stream rate * fix(ci): Update AppService.js to use optional chaining for endpointLocals assignment * refactor: abstract the error handler * feat: streamRate for assistants; refactor: update default rate for token * refactor: update error handling in assistants/errors.js * refactor: update error handling in assistants/errors.js
2026-03-03 14:50:19 +01:00 · 2024-07-17 10:47:17 -04:00 · 2024-07-17 10:47:17 -04:00 · 5d40d0a37a
commit 5d40d0a37a
parent 1c282d1517
29 changed files with 661 additions and 309 deletions
--- a/api/app/clients/GoogleClient.js
+++ b/api/app/clients/GoogleClient.js
@ -13,10 +13,12 @@ const {
  endpointSettings,
  EModelEndpoint,
  VisionModes,
+  Constants,
  AuthKeys,
 } = require('librechat-data-provider');
 const { encodeAndFormat } = require('~/server/services/Files/images');
 const { getModelMaxTokens } = require('~/utils');
+const { sleep } = require('~/server/utils');
 const { logger } = require('~/config');
 const {
  formatMessage,
@ -620,8 +622,9 @@ class GoogleClient extends BaseClient {
  }

  async getCompletion(_payload, options = {}) {
-    const { onProgress, abortController } = options;
    const { parameters, instances } = _payload;
+    const { onProgress, abortController } = options;
+    const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
    const { messages: _messages, context, examples: _examples } = instances?.[0] ?? {};

    let examples;
@ -701,6 +704,7 @@ class GoogleClient extends BaseClient {
          delay,
        });
        reply += chunkText;
+        await sleep(streamRate);
      }
      return reply;
    }
@ -712,10 +716,17 @@ class GoogleClient extends BaseClient {
      safetySettings: safetySettings,
    });

-    let delay = this.isGenerativeModel ? 12 : 8;
-    if (modelName.includes('flash')) {
-      delay = 5;
+    let delay = this.options.streamRate || 8;
+
+    if (!this.options.streamRate) {
+      if (this.isGenerativeModel) {
+        delay = 12;
+      }
+      if (modelName.includes('flash')) {
+        delay = 5;
+      }
    }
+
    for await (const chunk of stream) {
      const chunkText = chunk?.content ?? chunk;
      await this.generateTextStream(chunkText, onProgress, {