⚙️ feat: Adjust Rate of Stream Progress (#3244)

* chore: bump data-provider and add MESSAGES CacheKey * refactor: avoid saving messages while streaming, save partial text to cache instead * fix(ci): processChunks * chore: logging aborted request to debug * feat: set stream rate for token processing * chore: specify default stream rate * fix(ci): Update AppService.js to use optional chaining for endpointLocals assignment * refactor: abstract the error handler * feat: streamRate for assistants; refactor: update default rate for token * refactor: update error handling in assistants/errors.js * refactor: update error handling in assistants/errors.js
2025-12-17 00:40:14 +01:00 · 2024-07-17 10:47:17 -04:00 · 2024-07-17 10:47:17 -04:00 · 5d40d0a37a
commit 5d40d0a37a
parent 1c282d1517
29 changed files with 661 additions and 309 deletions
--- a/api/app/clients/OllamaClient.js
+++ b/api/app/clients/OllamaClient.js
@ -1,7 +1,9 @@
 const { z } = require('zod');
 const axios = require('axios');
 const { Ollama } = require('ollama');
+const { Constants } = require('librechat-data-provider');
 const { deriveBaseURL } = require('~/utils');
+const { sleep } = require('~/server/utils');
 const { logger } = require('~/config');

 const ollamaPayloadSchema = z.object({
@ -40,6 +42,7 @@ const getValidBase64 = (imageUrl) => {
 class OllamaClient {
  constructor(options = {}) {
    const host = deriveBaseURL(options.baseURL ?? 'http://localhost:11434');
+    this.streamRate = options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
    /** @type {Ollama} */
    this.client = new Ollama({ host });
  }
@ -136,6 +139,8 @@ class OllamaClient {
          stream.controller.abort();
          break;
        }
+
+        await sleep(this.streamRate);
      }
    }
    // TODO: regular completion