⚙️ feat: Adjust Rate of Stream Progress (#3244)

* chore: bump data-provider and add MESSAGES CacheKey

* refactor: avoid saving messages while streaming, save partial text to cache instead

* fix(ci): processChunks

* chore: logging aborted request to debug

* feat: set stream rate for token processing

* chore: specify default stream rate

* fix(ci): Update AppService.js to use optional chaining for endpointLocals assignment

* refactor: abstract the error handler

* feat: streamRate for assistants; refactor: update default rate for token

* refactor: update error handling in assistants/errors.js

* refactor: update error handling in assistants/errors.js
This commit is contained in:
Danny Avila 2024-07-17 10:47:17 -04:00 committed by GitHub
parent 1c282d1517
commit 5d40d0a37a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 661 additions and 309 deletions

View file

@ -67,17 +67,18 @@ const AppService = async (app) => {
handleRateLimits(config?.rateLimits);
const endpointLocals = {};
const endpoints = config?.endpoints;
if (config?.endpoints?.[EModelEndpoint.azureOpenAI]) {
if (endpoints?.[EModelEndpoint.azureOpenAI]) {
endpointLocals[EModelEndpoint.azureOpenAI] = azureConfigSetup(config);
checkAzureVariables();
}
if (config?.endpoints?.[EModelEndpoint.azureOpenAI]?.assistants) {
if (endpoints?.[EModelEndpoint.azureOpenAI]?.assistants) {
endpointLocals[EModelEndpoint.azureAssistants] = azureAssistantsDefaults();
}
if (config?.endpoints?.[EModelEndpoint.azureAssistants]) {
if (endpoints?.[EModelEndpoint.azureAssistants]) {
endpointLocals[EModelEndpoint.azureAssistants] = assistantsConfigSetup(
config,
EModelEndpoint.azureAssistants,
@ -85,7 +86,7 @@ const AppService = async (app) => {
);
}
if (config?.endpoints?.[EModelEndpoint.assistants]) {
if (endpoints?.[EModelEndpoint.assistants]) {
endpointLocals[EModelEndpoint.assistants] = assistantsConfigSetup(
config,
EModelEndpoint.assistants,
@ -93,6 +94,19 @@ const AppService = async (app) => {
);
}
if (endpoints?.[EModelEndpoint.openAI]) {
endpointLocals[EModelEndpoint.openAI] = endpoints[EModelEndpoint.openAI];
}
if (endpoints?.[EModelEndpoint.google]) {
endpointLocals[EModelEndpoint.google] = endpoints[EModelEndpoint.google];
}
if (endpoints?.[EModelEndpoint.anthropic]) {
endpointLocals[EModelEndpoint.anthropic] = endpoints[EModelEndpoint.anthropic];
}
if (endpoints?.[EModelEndpoint.gptPlugins]) {
endpointLocals[EModelEndpoint.gptPlugins] = endpoints[EModelEndpoint.gptPlugins];
}
app.locals = {
...defaultLocals,
modelSpecs: config.modelSpecs,

View file

@ -19,11 +19,27 @@ const initializeClient = async ({ req, res, endpointOption }) => {
checkUserKeyExpiry(expiresAt, EModelEndpoint.anthropic);
}
const clientOptions = {};
/** @type {undefined | TBaseEndpoint} */
const anthropicConfig = req.app.locals[EModelEndpoint.anthropic];
if (anthropicConfig) {
clientOptions.streamRate = anthropicConfig.streamRate;
}
/** @type {undefined | TBaseEndpoint} */
const allConfig = req.app.locals.all;
if (allConfig) {
clientOptions.streamRate = allConfig.streamRate;
}
const client = new AnthropicClient(anthropicApiKey, {
req,
res,
reverseProxyUrl: ANTHROPIC_REVERSE_PROXY ?? null,
proxy: PROXY ?? null,
...clientOptions,
...endpointOption,
});

View file

@ -114,9 +114,16 @@ const initializeClient = async ({ req, res, endpointOption }) => {
contextStrategy: endpointConfig.summarize ? 'summarize' : null,
directEndpoint: endpointConfig.directEndpoint,
titleMessageRole: endpointConfig.titleMessageRole,
streamRate: endpointConfig.streamRate,
endpointTokenConfig,
};
/** @type {undefined | TBaseEndpoint} */
const allConfig = req.app.locals.all;
if (allConfig) {
customOptions.streamRate = allConfig.streamRate;
}
const clientOptions = {
reverseProxyUrl: baseURL ?? null,
proxy: PROXY ?? null,

View file

@ -27,11 +27,27 @@ const initializeClient = async ({ req, res, endpointOption }) => {
[AuthKeys.GOOGLE_API_KEY]: GOOGLE_KEY,
};
const clientOptions = {};
/** @type {undefined | TBaseEndpoint} */
const allConfig = req.app.locals.all;
/** @type {undefined | TBaseEndpoint} */
const googleConfig = req.app.locals[EModelEndpoint.google];
if (googleConfig) {
clientOptions.streamRate = googleConfig.streamRate;
}
if (allConfig) {
clientOptions.streamRate = allConfig.streamRate;
}
const client = new GoogleClient(credentials, {
req,
res,
reverseProxyUrl: GOOGLE_REVERSE_PROXY ?? null,
proxy: PROXY ?? null,
...clientOptions,
...endpointOption,
});

View file

@ -8,6 +8,8 @@ jest.mock('~/server/services/UserService', () => ({
getUserKey: jest.fn().mockImplementation(() => ({})),
}));
const app = { locals: {} };
describe('google/initializeClient', () => {
afterEach(() => {
jest.clearAllMocks();
@ -23,6 +25,7 @@ describe('google/initializeClient', () => {
const req = {
body: { key: expiresAt },
user: { id: '123' },
app,
};
const res = {};
const endpointOption = { modelOptions: { model: 'default-model' } };
@ -44,6 +47,7 @@ describe('google/initializeClient', () => {
const req = {
body: { key: null },
user: { id: '123' },
app,
};
const res = {};
const endpointOption = { modelOptions: { model: 'default-model' } };
@ -66,6 +70,7 @@ describe('google/initializeClient', () => {
const req = {
body: { key: expiresAt },
user: { id: '123' },
app,
};
const res = {};
const endpointOption = { modelOptions: { model: 'default-model' } };

View file

@ -86,6 +86,9 @@ const initializeClient = async ({ req, res, endpointOption }) => {
clientOptions.titleModel = azureConfig.titleModel;
clientOptions.titleMethod = azureConfig.titleMethod ?? 'completion';
const azureRate = modelName.includes('gpt-4') ? 30 : 17;
clientOptions.streamRate = azureConfig.streamRate ?? azureRate;
const groupName = modelGroupMap[modelName].group;
clientOptions.addParams = azureConfig.groupMap[groupName].addParams;
clientOptions.dropParams = azureConfig.groupMap[groupName].dropParams;
@ -98,6 +101,19 @@ const initializeClient = async ({ req, res, endpointOption }) => {
apiKey = clientOptions.azure.azureOpenAIApiKey;
}
/** @type {undefined | TBaseEndpoint} */
const pluginsConfig = req.app.locals[EModelEndpoint.gptPlugins];
if (!useAzure && pluginsConfig) {
clientOptions.streamRate = pluginsConfig.streamRate;
}
/** @type {undefined | TBaseEndpoint} */
const allConfig = req.app.locals.all;
if (allConfig) {
clientOptions.streamRate = allConfig.streamRate;
}
if (!apiKey) {
throw new Error(`${endpoint} API key not provided. Please provide it again.`);
}

View file

@ -76,6 +76,10 @@ const initializeClient = async ({ req, res, endpointOption }) => {
clientOptions.titleConvo = azureConfig.titleConvo;
clientOptions.titleModel = azureConfig.titleModel;
const azureRate = modelName.includes('gpt-4') ? 30 : 17;
clientOptions.streamRate = azureConfig.streamRate ?? azureRate;
clientOptions.titleMethod = azureConfig.titleMethod ?? 'completion';
const groupName = modelGroupMap[modelName].group;
@ -90,6 +94,19 @@ const initializeClient = async ({ req, res, endpointOption }) => {
apiKey = clientOptions.azure.azureOpenAIApiKey;
}
/** @type {undefined | TBaseEndpoint} */
const openAIConfig = req.app.locals[EModelEndpoint.openAI];
if (!isAzureOpenAI && openAIConfig) {
clientOptions.streamRate = openAIConfig.streamRate;
}
/** @type {undefined | TBaseEndpoint} */
const allConfig = req.app.locals.all;
if (allConfig) {
clientOptions.streamRate = allConfig.streamRate;
}
if (userProvidesKey & !apiKey) {
throw new Error(
JSON.stringify({

View file

@ -1,5 +1,6 @@
const WebSocket = require('ws');
const { Message } = require('~/models/Message');
const { CacheKeys } = require('librechat-data-provider');
const { getLogStores } = require('~/cache');
/**
* @param {string[]} voiceIds - Array of voice IDs
@ -104,6 +105,8 @@ function createChunkProcessor(messageId) {
throw new Error('Message ID is required');
}
const messageCache = getLogStores(CacheKeys.MESSAGES);
/**
* @returns {Promise<{ text: string, isFinished: boolean }[] | string>}
*/
@ -116,14 +119,17 @@ function createChunkProcessor(messageId) {
return `No change in message after ${MAX_NO_CHANGE_COUNT} attempts`;
}
const message = await Message.findOne({ messageId }, 'text unfinished').lean();
/** @type { string | { text: string; complete: boolean } } */
const message = await messageCache.get(messageId);
if (!message || !message.text) {
if (!message) {
notFoundCount++;
return [];
}
const { text, unfinished } = message;
const text = typeof message === 'string' ? message : message.text;
const complete = typeof message === 'string' ? false : message.complete;
if (text === processedText) {
noChangeCount++;
}
@ -131,7 +137,7 @@ function createChunkProcessor(messageId) {
const remainingText = text.slice(processedText.length);
const chunks = [];
if (unfinished && remainingText.length >= 20) {
if (!complete && remainingText.length >= 20) {
const separatorIndex = findLastSeparatorIndex(remainingText);
if (separatorIndex !== -1) {
const chunkText = remainingText.slice(0, separatorIndex + 1);
@ -141,7 +147,7 @@ function createChunkProcessor(messageId) {
chunks.push({ text: remainingText, isFinished: false });
processedText = text;
}
} else if (!unfinished && remainingText.trim().length > 0) {
} else if (complete && remainingText.trim().length > 0) {
chunks.push({ text: remainingText.trim(), isFinished: true });
processedText = text;
}

View file

@ -1,89 +1,145 @@
const { createChunkProcessor, splitTextIntoChunks } = require('./streamAudio');
const { Message } = require('~/models/Message');
jest.mock('~/models/Message', () => ({
Message: {
findOne: jest.fn().mockReturnValue({
lean: jest.fn(),
}),
},
}));
jest.mock('keyv');
const globalCache = {};
jest.mock('~/cache/getLogStores', () => {
return jest.fn().mockImplementation(() => {
const EventEmitter = require('events');
const { CacheKeys } = require('librechat-data-provider');
class KeyvMongo extends EventEmitter {
constructor(url = 'mongodb://127.0.0.1:27017', options) {
super();
this.ttlSupport = false;
url = url ?? {};
if (typeof url === 'string') {
url = { url };
}
if (url.uri) {
url = { url: url.uri, ...url };
}
this.opts = {
url,
collection: 'keyv',
...url,
...options,
};
}
get = async (key) => {
return new Promise((resolve) => {
resolve(globalCache[key] || null);
});
};
set = async (key, value) => {
return new Promise((resolve) => {
globalCache[key] = value;
resolve(true);
});
};
}
return new KeyvMongo('', {
namespace: CacheKeys.MESSAGES,
ttl: 0,
});
});
});
describe('processChunks', () => {
let processChunks;
let mockMessageCache;
beforeEach(() => {
jest.resetAllMocks();
mockMessageCache = {
get: jest.fn(),
};
require('~/cache/getLogStores').mockReturnValue(mockMessageCache);
processChunks = createChunkProcessor('message-id');
Message.findOne.mockClear();
Message.findOne().lean.mockClear();
});
it('should return an empty array when the message is not found', async () => {
Message.findOne().lean.mockResolvedValueOnce(null);
mockMessageCache.get.mockResolvedValueOnce(null);
const result = await processChunks();
expect(result).toEqual([]);
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
expect(Message.findOne().lean).toHaveBeenCalled();
expect(mockMessageCache.get).toHaveBeenCalledWith('message-id');
});
it('should return an empty array when the message does not have a text property', async () => {
Message.findOne().lean.mockResolvedValueOnce({ unfinished: true });
it('should return an error message after MAX_NOT_FOUND_COUNT attempts', async () => {
mockMessageCache.get.mockResolvedValue(null);
for (let i = 0; i < 6; i++) {
await processChunks();
}
const result = await processChunks();
expect(result).toEqual([]);
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
expect(Message.findOne().lean).toHaveBeenCalled();
expect(result).toBe('Message not found after 6 attempts');
});
it('should return chunks for an unfinished message with separators', async () => {
it('should return chunks for an incomplete message with separators', async () => {
const messageText = 'This is a long message. It should be split into chunks. Lol hi mom';
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: true });
mockMessageCache.get.mockResolvedValueOnce({ text: messageText, complete: false });
const result = await processChunks();
expect(result).toEqual([
{ text: 'This is a long message. It should be split into chunks.', isFinished: false },
]);
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
expect(Message.findOne().lean).toHaveBeenCalled();
});
it('should return chunks for an unfinished message without separators', async () => {
it('should return chunks for an incomplete message without separators', async () => {
const messageText = 'This is a long message without separators hello there my friend';
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: true });
mockMessageCache.get.mockResolvedValueOnce({ text: messageText, complete: false });
const result = await processChunks();
expect(result).toEqual([{ text: messageText, isFinished: false }]);
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
expect(Message.findOne().lean).toHaveBeenCalled();
});
it('should return the remaining text as a chunk for a finished message', async () => {
it('should return the remaining text as a chunk for a complete message', async () => {
const messageText = 'This is a finished message.';
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
mockMessageCache.get.mockResolvedValueOnce({ text: messageText, complete: true });
const result = await processChunks();
expect(result).toEqual([{ text: messageText, isFinished: true }]);
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
expect(Message.findOne().lean).toHaveBeenCalled();
});
it('should return an empty array for a finished message with no remaining text', async () => {
it('should return an empty array for a complete message with no remaining text', async () => {
const messageText = 'This is a finished message.';
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
mockMessageCache.get.mockResolvedValueOnce({ text: messageText, complete: true });
await processChunks();
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
mockMessageCache.get.mockResolvedValueOnce({ text: messageText, complete: true });
const result = await processChunks();
expect(result).toEqual([]);
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
expect(Message.findOne().lean).toHaveBeenCalledTimes(2);
});
it('should return an error message after MAX_NO_CHANGE_COUNT attempts with no change', async () => {
const messageText = 'This is a message that does not change.';
mockMessageCache.get.mockResolvedValue({ text: messageText, complete: false });
for (let i = 0; i < 11; i++) {
await processChunks();
}
const result = await processChunks();
expect(result).toBe('No change in message after 10 attempts');
});
it('should handle string messages as incomplete', async () => {
const messageText = 'This is a message as a string.';
mockMessageCache.get.mockResolvedValueOnce(messageText);
const result = await processChunks();
expect(result).toEqual([{ text: messageText, isFinished: false }]);
});
});

View file

@ -1,17 +1,19 @@
const throttle = require('lodash/throttle');
const {
Time,
CacheKeys,
StepTypes,
ContentTypes,
ToolCallTypes,
// StepStatus,
MessageContentTypes,
AssistantStreamEvents,
Constants,
} = require('librechat-data-provider');
const { retrieveAndProcessFile } = require('~/server/services/Files/process');
const { processRequiredActions } = require('~/server/services/ToolService');
const { saveMessage, updateMessageText } = require('~/models/Message');
const { createOnProgress, sendMessage } = require('~/server/utils');
const { createOnProgress, sendMessage, sleep } = require('~/server/utils');
const { processMessages } = require('~/server/services/Threads');
const { getLogStores } = require('~/cache');
const { logger } = require('~/config');
/**
@ -68,8 +70,8 @@ class StreamRunManager {
this.attachedFileIds = fields.attachedFileIds;
/** @type {undefined | Promise<ChatCompletion>} */
this.visionPromise = fields.visionPromise;
/** @type {boolean} */
this.savedInitialMessage = false;
/** @type {number} */
this.streamRate = fields.streamRate ?? Constants.DEFAULT_STREAM_RATE;
/**
* @type {Object.<AssistantStreamEvents, (event: AssistantStreamEvent) => Promise<void>>}
@ -139,11 +141,11 @@ class StreamRunManager {
return this.intermediateText;
}
/** Saves the initial intermediate message
* @returns {Promise<void>}
/** Returns the current, intermediate message
* @returns {TMessage}
*/
async saveInitialMessage() {
return saveMessage(this.req, {
getIntermediateMessage() {
return {
conversationId: this.finalMessage.conversationId,
messageId: this.finalMessage.messageId,
parentMessageId: this.parentMessageId,
@ -155,7 +157,7 @@ class StreamRunManager {
sender: 'Assistant',
unfinished: true,
error: false,
});
};
}
/* <------------------ Main Event Handlers ------------------> */
@ -347,6 +349,8 @@ class StreamRunManager {
type: ContentTypes.TOOL_CALL,
index,
});
await sleep(this.streamRate);
}
};
@ -444,6 +448,7 @@ class StreamRunManager {
if (content && content.type === MessageContentTypes.TEXT) {
this.intermediateText += content.text.value;
onProgress(content.text.value);
await sleep(this.streamRate);
}
}
@ -589,21 +594,14 @@ class StreamRunManager {
const index = this.getStepIndex(stepKey);
this.orderedRunSteps.set(index, message_creation);
const messageCache = getLogStores(CacheKeys.MESSAGES);
// Create the Factory Function to stream the message
const { onProgress: progressCallback } = createOnProgress({
onProgress: throttle(
() => {
if (!this.savedInitialMessage) {
this.saveInitialMessage();
this.savedInitialMessage = true;
} else {
updateMessageText({
messageId: this.finalMessage.messageId,
text: this.getText(),
});
}
messageCache.set(this.finalMessage.messageId, this.getText(), Time.FIVE_MINUTES);
},
2000,
3000,
{ trailing: false },
),
});

View file

@ -51,6 +51,7 @@ function assistantsConfigSetup(config, assistantsEndpoint, prevConfig = {}) {
excludedIds: parsedConfig.excludedIds,
privateAssistants: parsedConfig.privateAssistants,
timeoutMs: parsedConfig.timeoutMs,
streamRate: parsedConfig.streamRate,
};
}