⚙️ feat: Adjust Rate of Stream Progress (#3244)

* chore: bump data-provider and add MESSAGES CacheKey

* refactor: avoid saving messages while streaming, save partial text to cache instead

* fix(ci): processChunks

* chore: logging aborted request to debug

* feat: set stream rate for token processing

* chore: specify default stream rate

* fix(ci): Update AppService.js to use optional chaining for endpointLocals assignment

* refactor: abstract the error handler

* feat: streamRate for assistants; refactor: update default rate for token

* refactor: update error handling in assistants/errors.js

* refactor: update error handling in assistants/errors.js
This commit is contained in:
Danny Avila 2024-07-17 10:47:17 -04:00 committed by GitHub
parent 1c282d1517
commit 5d40d0a37a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 661 additions and 309 deletions

View file

@ -13,10 +13,12 @@ const {
endpointSettings,
EModelEndpoint,
VisionModes,
Constants,
AuthKeys,
} = require('librechat-data-provider');
const { encodeAndFormat } = require('~/server/services/Files/images');
const { getModelMaxTokens } = require('~/utils');
const { sleep } = require('~/server/utils');
const { logger } = require('~/config');
const {
formatMessage,
@ -620,8 +622,9 @@ class GoogleClient extends BaseClient {
}
async getCompletion(_payload, options = {}) {
const { onProgress, abortController } = options;
const { parameters, instances } = _payload;
const { onProgress, abortController } = options;
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
const { messages: _messages, context, examples: _examples } = instances?.[0] ?? {};
let examples;
@ -701,6 +704,7 @@ class GoogleClient extends BaseClient {
delay,
});
reply += chunkText;
await sleep(streamRate);
}
return reply;
}
@ -712,10 +716,17 @@ class GoogleClient extends BaseClient {
safetySettings: safetySettings,
});
let delay = this.isGenerativeModel ? 12 : 8;
if (modelName.includes('flash')) {
delay = 5;
let delay = this.options.streamRate || 8;
if (!this.options.streamRate) {
if (this.isGenerativeModel) {
delay = 12;
}
if (modelName.includes('flash')) {
delay = 5;
}
}
for await (const chunk of stream) {
const chunkText = chunk?.content ?? chunk;
await this.generateTextStream(chunkText, onProgress, {