mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-31 23:58:50 +01:00
⚙️ feat: Adjust Rate of Stream Progress (#3244)
* chore: bump data-provider and add MESSAGES CacheKey * refactor: avoid saving messages while streaming, save partial text to cache instead * fix(ci): processChunks * chore: logging aborted request to debug * feat: set stream rate for token processing * chore: specify default stream rate * fix(ci): Update AppService.js to use optional chaining for endpointLocals assignment * refactor: abstract the error handler * feat: streamRate for assistants; refactor: update default rate for token * refactor: update error handling in assistants/errors.js * refactor: update error handling in assistants/errors.js
This commit is contained in:
parent
1c282d1517
commit
5d40d0a37a
29 changed files with 661 additions and 309 deletions
|
|
@ -1182,8 +1182,10 @@ ${convo}
|
|||
});
|
||||
}
|
||||
|
||||
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
|
||||
|
||||
if (this.message_file_map && this.isOllama) {
|
||||
const ollamaClient = new OllamaClient({ baseURL });
|
||||
const ollamaClient = new OllamaClient({ baseURL, streamRate });
|
||||
return await ollamaClient.chatCompletion({
|
||||
payload: modelOptions,
|
||||
onProgress,
|
||||
|
|
@ -1221,8 +1223,6 @@ ${convo}
|
|||
}
|
||||
});
|
||||
|
||||
const azureDelay = this.modelOptions.model?.includes('gpt-4') ? 30 : 17;
|
||||
|
||||
for await (const chunk of stream) {
|
||||
const token = chunk.choices[0]?.delta?.content || '';
|
||||
intermediateReply += token;
|
||||
|
|
@ -1232,9 +1232,7 @@ ${convo}
|
|||
break;
|
||||
}
|
||||
|
||||
if (this.azure) {
|
||||
await sleep(azureDelay);
|
||||
}
|
||||
await sleep(streamRate);
|
||||
}
|
||||
|
||||
if (!UnexpectedRoleError) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue