mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-02-23 10:54:11 +01:00
⚙️ feat: Adjust Rate of Stream Progress (#3244)
* chore: bump data-provider and add MESSAGES CacheKey * refactor: avoid saving messages while streaming, save partial text to cache instead * fix(ci): processChunks * chore: logging aborted request to debug * feat: set stream rate for token processing * chore: specify default stream rate * fix(ci): Update AppService.js to use optional chaining for endpointLocals assignment * refactor: abstract the error handler * feat: streamRate for assistants; refactor: update default rate for token * refactor: update error handling in assistants/errors.js * refactor: update error handling in assistants/errors.js
This commit is contained in:
parent
1c282d1517
commit
5d40d0a37a
29 changed files with 661 additions and 309 deletions
|
|
@ -2,8 +2,9 @@ const Anthropic = require('@anthropic-ai/sdk');
|
|||
const { HttpsProxyAgent } = require('https-proxy-agent');
|
||||
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
|
||||
const {
|
||||
getResponseSender,
|
||||
Constants,
|
||||
EModelEndpoint,
|
||||
getResponseSender,
|
||||
validateVisionModel,
|
||||
} = require('librechat-data-provider');
|
||||
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
|
||||
|
|
@ -16,6 +17,7 @@ const {
|
|||
} = require('./prompts');
|
||||
const spendTokens = require('~/models/spendTokens');
|
||||
const { getModelMaxTokens } = require('~/utils');
|
||||
const { sleep } = require('~/server/utils');
|
||||
const BaseClient = require('./BaseClient');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
|
|
@ -605,6 +607,7 @@ class AnthropicClient extends BaseClient {
|
|||
};
|
||||
|
||||
const maxRetries = 3;
|
||||
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
|
||||
async function processResponse() {
|
||||
let attempts = 0;
|
||||
|
||||
|
|
@ -627,6 +630,8 @@ class AnthropicClient extends BaseClient {
|
|||
} else if (completion.completion) {
|
||||
handleChunk(completion.completion);
|
||||
}
|
||||
|
||||
await sleep(streamRate);
|
||||
}
|
||||
|
||||
// Successful processing, exit loop
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
const crypto = require('crypto');
|
||||
const fetch = require('node-fetch');
|
||||
const { supportsBalanceCheck, Constants } = require('librechat-data-provider');
|
||||
const { supportsBalanceCheck, Constants, CacheKeys, Time } = require('librechat-data-provider');
|
||||
const { getMessages, saveMessage, updateMessage, saveConvo } = require('~/models');
|
||||
const { addSpaceIfNeeded, isEnabled } = require('~/server/utils');
|
||||
const checkBalance = require('~/models/checkBalance');
|
||||
const { getFiles } = require('~/models/File');
|
||||
const { getLogStores } = require('~/cache');
|
||||
const TextStream = require('./TextStream');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
|
|
@ -540,6 +541,15 @@ class BaseClient {
|
|||
await this.recordTokenUsage({ promptTokens, completionTokens });
|
||||
}
|
||||
this.responsePromise = this.saveMessageToDatabase(responseMessage, saveOptions, user);
|
||||
const messageCache = getLogStores(CacheKeys.MESSAGES);
|
||||
messageCache.set(
|
||||
responseMessageId,
|
||||
{
|
||||
text: responseMessage.text,
|
||||
complete: true,
|
||||
},
|
||||
Time.FIVE_MINUTES,
|
||||
);
|
||||
delete responseMessage.tokenCount;
|
||||
return responseMessage;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,10 +13,12 @@ const {
|
|||
endpointSettings,
|
||||
EModelEndpoint,
|
||||
VisionModes,
|
||||
Constants,
|
||||
AuthKeys,
|
||||
} = require('librechat-data-provider');
|
||||
const { encodeAndFormat } = require('~/server/services/Files/images');
|
||||
const { getModelMaxTokens } = require('~/utils');
|
||||
const { sleep } = require('~/server/utils');
|
||||
const { logger } = require('~/config');
|
||||
const {
|
||||
formatMessage,
|
||||
|
|
@ -620,8 +622,9 @@ class GoogleClient extends BaseClient {
|
|||
}
|
||||
|
||||
async getCompletion(_payload, options = {}) {
|
||||
const { onProgress, abortController } = options;
|
||||
const { parameters, instances } = _payload;
|
||||
const { onProgress, abortController } = options;
|
||||
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
|
||||
const { messages: _messages, context, examples: _examples } = instances?.[0] ?? {};
|
||||
|
||||
let examples;
|
||||
|
|
@ -701,6 +704,7 @@ class GoogleClient extends BaseClient {
|
|||
delay,
|
||||
});
|
||||
reply += chunkText;
|
||||
await sleep(streamRate);
|
||||
}
|
||||
return reply;
|
||||
}
|
||||
|
|
@ -712,10 +716,17 @@ class GoogleClient extends BaseClient {
|
|||
safetySettings: safetySettings,
|
||||
});
|
||||
|
||||
let delay = this.isGenerativeModel ? 12 : 8;
|
||||
if (modelName.includes('flash')) {
|
||||
delay = 5;
|
||||
let delay = this.options.streamRate || 8;
|
||||
|
||||
if (!this.options.streamRate) {
|
||||
if (this.isGenerativeModel) {
|
||||
delay = 12;
|
||||
}
|
||||
if (modelName.includes('flash')) {
|
||||
delay = 5;
|
||||
}
|
||||
}
|
||||
|
||||
for await (const chunk of stream) {
|
||||
const chunkText = chunk?.content ?? chunk;
|
||||
await this.generateTextStream(chunkText, onProgress, {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
const { z } = require('zod');
|
||||
const axios = require('axios');
|
||||
const { Ollama } = require('ollama');
|
||||
const { Constants } = require('librechat-data-provider');
|
||||
const { deriveBaseURL } = require('~/utils');
|
||||
const { sleep } = require('~/server/utils');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
const ollamaPayloadSchema = z.object({
|
||||
|
|
@ -40,6 +42,7 @@ const getValidBase64 = (imageUrl) => {
|
|||
class OllamaClient {
|
||||
constructor(options = {}) {
|
||||
const host = deriveBaseURL(options.baseURL ?? 'http://localhost:11434');
|
||||
this.streamRate = options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
|
||||
/** @type {Ollama} */
|
||||
this.client = new Ollama({ host });
|
||||
}
|
||||
|
|
@ -136,6 +139,8 @@ class OllamaClient {
|
|||
stream.controller.abort();
|
||||
break;
|
||||
}
|
||||
|
||||
await sleep(this.streamRate);
|
||||
}
|
||||
}
|
||||
// TODO: regular completion
|
||||
|
|
|
|||
|
|
@ -1182,8 +1182,10 @@ ${convo}
|
|||
});
|
||||
}
|
||||
|
||||
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
|
||||
|
||||
if (this.message_file_map && this.isOllama) {
|
||||
const ollamaClient = new OllamaClient({ baseURL });
|
||||
const ollamaClient = new OllamaClient({ baseURL, streamRate });
|
||||
return await ollamaClient.chatCompletion({
|
||||
payload: modelOptions,
|
||||
onProgress,
|
||||
|
|
@ -1221,8 +1223,6 @@ ${convo}
|
|||
}
|
||||
});
|
||||
|
||||
const azureDelay = this.modelOptions.model?.includes('gpt-4') ? 30 : 17;
|
||||
|
||||
for await (const chunk of stream) {
|
||||
const token = chunk.choices[0]?.delta?.content || '';
|
||||
intermediateReply += token;
|
||||
|
|
@ -1232,9 +1232,7 @@ ${convo}
|
|||
break;
|
||||
}
|
||||
|
||||
if (this.azure) {
|
||||
await sleep(azureDelay);
|
||||
}
|
||||
await sleep(streamRate);
|
||||
}
|
||||
|
||||
if (!UnexpectedRoleError) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
const OpenAIClient = require('./OpenAIClient');
|
||||
const { CallbackManager } = require('langchain/callbacks');
|
||||
const { CacheKeys, Time } = require('librechat-data-provider');
|
||||
const { BufferMemory, ChatMessageHistory } = require('langchain/memory');
|
||||
const { initializeCustomAgent, initializeFunctionsAgent } = require('./agents');
|
||||
const { addImages, buildErrorInput, buildPromptPrefix } = require('./output_parsers');
|
||||
|
|
@ -11,6 +12,7 @@ const { SelfReflectionTool } = require('./tools');
|
|||
const { isEnabled } = require('~/server/utils');
|
||||
const { extractBaseURL } = require('~/utils');
|
||||
const { loadTools } = require('./tools/util');
|
||||
const { getLogStores } = require('~/cache');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
class PluginsClient extends OpenAIClient {
|
||||
|
|
@ -220,6 +222,13 @@ class PluginsClient extends OpenAIClient {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {TMessage} responseMessage
|
||||
* @param {Partial<TMessage>} saveOptions
|
||||
* @param {string} user
|
||||
* @returns
|
||||
*/
|
||||
async handleResponseMessage(responseMessage, saveOptions, user) {
|
||||
const { output, errorMessage, ...result } = this.result;
|
||||
logger.debug('[PluginsClient][handleResponseMessage] Output:', {
|
||||
|
|
@ -239,6 +248,15 @@ class PluginsClient extends OpenAIClient {
|
|||
}
|
||||
|
||||
this.responsePromise = this.saveMessageToDatabase(responseMessage, saveOptions, user);
|
||||
const messageCache = getLogStores(CacheKeys.MESSAGES);
|
||||
messageCache.set(
|
||||
responseMessage.messageId,
|
||||
{
|
||||
text: responseMessage.text,
|
||||
complete: true,
|
||||
},
|
||||
Time.FIVE_MINUTES,
|
||||
);
|
||||
delete responseMessage.tokenCount;
|
||||
return { ...responseMessage, ...result };
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue