mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-20 10:20:15 +01:00
⚙️ feat: Adjust Rate of Stream Progress (#3244)
* chore: bump data-provider and add MESSAGES CacheKey * refactor: avoid saving messages while streaming, save partial text to cache instead * fix(ci): processChunks * chore: logging aborted request to debug * feat: set stream rate for token processing * chore: specify default stream rate * fix(ci): Update AppService.js to use optional chaining for endpointLocals assignment * refactor: abstract the error handler * feat: streamRate for assistants; refactor: update default rate for token * refactor: update error handling in assistants/errors.js * refactor: update error handling in assistants/errors.js
This commit is contained in:
parent
1c282d1517
commit
5d40d0a37a
29 changed files with 661 additions and 309 deletions
|
|
@ -13,10 +13,12 @@ const {
|
|||
endpointSettings,
|
||||
EModelEndpoint,
|
||||
VisionModes,
|
||||
Constants,
|
||||
AuthKeys,
|
||||
} = require('librechat-data-provider');
|
||||
const { encodeAndFormat } = require('~/server/services/Files/images');
|
||||
const { getModelMaxTokens } = require('~/utils');
|
||||
const { sleep } = require('~/server/utils');
|
||||
const { logger } = require('~/config');
|
||||
const {
|
||||
formatMessage,
|
||||
|
|
@ -620,8 +622,9 @@ class GoogleClient extends BaseClient {
|
|||
}
|
||||
|
||||
async getCompletion(_payload, options = {}) {
|
||||
const { onProgress, abortController } = options;
|
||||
const { parameters, instances } = _payload;
|
||||
const { onProgress, abortController } = options;
|
||||
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
|
||||
const { messages: _messages, context, examples: _examples } = instances?.[0] ?? {};
|
||||
|
||||
let examples;
|
||||
|
|
@ -701,6 +704,7 @@ class GoogleClient extends BaseClient {
|
|||
delay,
|
||||
});
|
||||
reply += chunkText;
|
||||
await sleep(streamRate);
|
||||
}
|
||||
return reply;
|
||||
}
|
||||
|
|
@ -712,10 +716,17 @@ class GoogleClient extends BaseClient {
|
|||
safetySettings: safetySettings,
|
||||
});
|
||||
|
||||
let delay = this.isGenerativeModel ? 12 : 8;
|
||||
if (modelName.includes('flash')) {
|
||||
delay = 5;
|
||||
let delay = this.options.streamRate || 8;
|
||||
|
||||
if (!this.options.streamRate) {
|
||||
if (this.isGenerativeModel) {
|
||||
delay = 12;
|
||||
}
|
||||
if (modelName.includes('flash')) {
|
||||
delay = 5;
|
||||
}
|
||||
}
|
||||
|
||||
for await (const chunk of stream) {
|
||||
const chunkText = chunk?.content ?? chunk;
|
||||
await this.generateTextStream(chunkText, onProgress, {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue