⚙️ feat: Adjust Rate of Stream Progress (#3244)

* chore: bump data-provider and add MESSAGES CacheKey

* refactor: avoid saving messages while streaming, save partial text to cache instead

* fix(ci): processChunks

* chore: logging aborted request to debug

* feat: set stream rate for token processing

* chore: specify default stream rate

* fix(ci): Update AppService.js to use optional chaining for endpointLocals assignment

* refactor: abstract the error handler

* feat: streamRate for assistants; refactor: update default rate for token

* refactor: update error handling in assistants/errors.js

* refactor: update error handling in assistants/errors.js
This commit is contained in:
Danny Avila 2024-07-17 10:47:17 -04:00 committed by GitHub
parent 1c282d1517
commit 5d40d0a37a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 661 additions and 309 deletions

View file

@ -1,7 +1,8 @@
const throttle = require('lodash/throttle');
const { getResponseSender, Constants, EModelEndpoint } = require('librechat-data-provider');
const { getResponseSender, Constants, CacheKeys, Time } = require('librechat-data-provider');
const { createAbortController, handleAbortError } = require('~/server/middleware');
const { sendMessage, createOnProgress } = require('~/server/utils');
const { getLogStores } = require('~/cache');
const { saveMessage } = require('~/models');
const { logger } = require('~/config');
@ -51,11 +52,13 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
try {
const { client } = await initializeClient({ req, res, endpointOption });
const unfinished = endpointOption.endpoint === EModelEndpoint.google ? false : true;
const messageCache = getLogStores(CacheKeys.MESSAGES);
const { onProgress: progressCallback, getPartialText } = createOnProgress({
onProgress: throttle(
({ text: partialText }) => {
saveMessage(req, {
/*
const unfinished = endpointOption.endpoint === EModelEndpoint.google ? false : true;
messageCache.set(responseMessageId, {
messageId: responseMessageId,
sender,
conversationId,
@ -65,7 +68,10 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
unfinished,
error: false,
user,
});
}, Time.FIVE_MINUTES);
*/
messageCache.set(responseMessageId, partialText, Time.FIVE_MINUTES);
},
3000,
{ trailing: false },

View file

@ -1,7 +1,8 @@
const throttle = require('lodash/throttle');
const { getResponseSender, EModelEndpoint } = require('librechat-data-provider');
const { getResponseSender, CacheKeys, Time } = require('librechat-data-provider');
const { createAbortController, handleAbortError } = require('~/server/middleware');
const { sendMessage, createOnProgress } = require('~/server/utils');
const { getLogStores } = require('~/cache');
const { saveMessage } = require('~/models');
const { logger } = require('~/config');
@ -51,12 +52,14 @@ const EditController = async (req, res, next, initializeClient) => {
}
};
const unfinished = endpointOption.endpoint === EModelEndpoint.google ? false : true;
const messageCache = getLogStores(CacheKeys.MESSAGES);
const { onProgress: progressCallback, getPartialText } = createOnProgress({
generation,
onProgress: throttle(
({ text: partialText }) => {
saveMessage(req, {
/*
const unfinished = endpointOption.endpoint === EModelEndpoint.google ? false : true;
{
messageId: responseMessageId,
sender,
conversationId,
@ -67,7 +70,8 @@ const EditController = async (req, res, next, initializeClient) => {
isEdited: true,
error: false,
user,
});
} */
messageCache.set(responseMessageId, partialText, Time.FIVE_MINUTES);
},
3000,
{ trailing: false },

View file

@ -1,12 +1,12 @@
const { v4 } = require('uuid');
const {
Time,
Constants,
RunStatus,
CacheKeys,
ContentTypes,
ToolCallTypes,
EModelEndpoint,
ViolationTypes,
retrievalMimeTypes,
AssistantStreamEvents,
} = require('librechat-data-provider');
@ -14,12 +14,12 @@ const {
initThread,
recordUsage,
saveUserMessage,
checkMessageGaps,
addThreadMetadata,
saveAssistantMessage,
} = require('~/server/services/Threads');
const { sendResponse, sendMessage, sleep, isEnabled, countTokens } = require('~/server/utils');
const { runAssistant, createOnTextProgress } = require('~/server/services/AssistantService');
const { sendMessage, sleep, isEnabled, countTokens } = require('~/server/utils');
const { createErrorHandler } = require('~/server/controllers/assistants/errors');
const validateAuthor = require('~/server/middleware/assistants/validateAuthor');
const { createRun, StreamRunManager } = require('~/server/services/Runs');
const { addTitle } = require('~/server/services/Endpoints/assistants');
@ -44,7 +44,7 @@ const ten_minutes = 1000 * 60 * 10;
const chatV2 = async (req, res) => {
logger.debug('[/assistants/chat/] req.body', req.body);
/** @type {{ files: MongoFile[]}} */
/** @type {{files: MongoFile[]}} */
const {
text,
model,
@ -90,140 +90,20 @@ const chatV2 = async (req, res) => {
/** @type {Run | undefined} - The completed run, undefined if incomplete */
let completedRun;
const handleError = async (error) => {
const defaultErrorMessage =
'The Assistant run failed to initialize. Try sending a message in a new conversation.';
const messageData = {
thread_id,
assistant_id,
conversationId,
parentMessageId,
sender: 'System',
user: req.user.id,
shouldSaveMessage: false,
messageId: responseMessageId,
endpoint,
};
const getContext = () => ({
openai,
run_id,
endpoint,
cacheKey,
thread_id,
completedRun,
assistant_id,
conversationId,
parentMessageId,
responseMessageId,
});
if (error.message === 'Run cancelled') {
return res.end();
} else if (error.message === 'Request closed' && completedRun) {
return;
} else if (error.message === 'Request closed') {
logger.debug('[/assistants/chat/] Request aborted on close');
} else if (/Files.*are invalid/.test(error.message)) {
const errorMessage = `Files are invalid, or may not have uploaded yet.${
endpoint === EModelEndpoint.azureAssistants
? ' If using Azure OpenAI, files are only available in the region of the assistant\'s model at the time of upload.'
: ''
}`;
return sendResponse(req, res, messageData, errorMessage);
} else if (error?.message?.includes('string too long')) {
return sendResponse(
req,
res,
messageData,
'Message too long. The Assistants API has a limit of 32,768 characters per message. Please shorten it and try again.',
);
} else if (error?.message?.includes(ViolationTypes.TOKEN_BALANCE)) {
return sendResponse(req, res, messageData, error.message);
} else {
logger.error('[/assistants/chat/]', error);
}
if (!openai || !thread_id || !run_id) {
return sendResponse(req, res, messageData, defaultErrorMessage);
}
await sleep(2000);
try {
const status = await cache.get(cacheKey);
if (status === 'cancelled') {
logger.debug('[/assistants/chat/] Run already cancelled');
return res.end();
}
await cache.delete(cacheKey);
const cancelledRun = await openai.beta.threads.runs.cancel(thread_id, run_id);
logger.debug('[/assistants/chat/] Cancelled run:', cancelledRun);
} catch (error) {
logger.error('[/assistants/chat/] Error cancelling run', error);
}
await sleep(2000);
let run;
try {
run = await openai.beta.threads.runs.retrieve(thread_id, run_id);
await recordUsage({
...run.usage,
model: run.model,
user: req.user.id,
conversationId,
});
} catch (error) {
logger.error('[/assistants/chat/] Error fetching or processing run', error);
}
let finalEvent;
try {
const runMessages = await checkMessageGaps({
openai,
run_id,
endpoint,
thread_id,
conversationId,
latestMessageId: responseMessageId,
});
const errorContentPart = {
text: {
value:
error?.message ?? 'There was an error processing your request. Please try again later.',
},
type: ContentTypes.ERROR,
};
if (!Array.isArray(runMessages[runMessages.length - 1]?.content)) {
runMessages[runMessages.length - 1].content = [errorContentPart];
} else {
const contentParts = runMessages[runMessages.length - 1].content;
for (let i = 0; i < contentParts.length; i++) {
const currentPart = contentParts[i];
/** @type {CodeToolCall | RetrievalToolCall | FunctionToolCall | undefined} */
const toolCall = currentPart?.[ContentTypes.TOOL_CALL];
if (
toolCall &&
toolCall?.function &&
!(toolCall?.function?.output || toolCall?.function?.output?.length)
) {
contentParts[i] = {
...currentPart,
[ContentTypes.TOOL_CALL]: {
...toolCall,
function: {
...toolCall.function,
output: 'error processing tool',
},
},
};
}
}
runMessages[runMessages.length - 1].content.push(errorContentPart);
}
finalEvent = {
final: true,
conversation: await getConvo(req.user.id, conversationId),
runMessages,
};
} catch (error) {
logger.error('[/assistants/chat/] Error finalizing error process', error);
return sendResponse(req, res, messageData, 'The Assistant run failed');
}
return sendResponse(req, res, finalEvent);
};
const handleError = createErrorHandler({ req, res, getContext });
try {
res.on('close', async () => {
@ -490,6 +370,11 @@ const chatV2 = async (req, res) => {
},
};
/** @type {undefined | TAssistantEndpoint} */
const config = req.app.locals[endpoint] ?? {};
/** @type {undefined | TBaseEndpoint} */
const allConfig = req.app.locals.all;
const streamRunManager = new StreamRunManager({
req,
res,
@ -499,6 +384,7 @@ const chatV2 = async (req, res) => {
attachedFileIds,
parentMessageId: userMessageId,
responseMessage: openai.responseMessage,
streamRate: allConfig?.streamRate ?? config.streamRate,
// streamOptions: {
// },
@ -511,6 +397,16 @@ const chatV2 = async (req, res) => {
response = streamRunManager;
response.text = streamRunManager.intermediateText;
const messageCache = getLogStores(CacheKeys.MESSAGES);
messageCache.set(
responseMessageId,
{
complete: true,
text: response.text,
},
Time.FIVE_MINUTES,
);
};
await processRun();

View file

@ -0,0 +1,193 @@
// errorHandler.js
const { sendResponse } = require('~/server/utils');
const { logger } = require('~/config');
const getLogStores = require('~/cache/getLogStores');
const { CacheKeys, ViolationTypes, ContentTypes } = require('librechat-data-provider');
const { getConvo } = require('~/models/Conversation');
const { recordUsage, checkMessageGaps } = require('~/server/services/Threads');
/**
* @typedef {Object} ErrorHandlerContext
* @property {OpenAIClient} openai - The OpenAI client
* @property {string} thread_id - The thread ID
* @property {string} run_id - The run ID
* @property {boolean} completedRun - Whether the run has completed
* @property {string} assistant_id - The assistant ID
* @property {string} conversationId - The conversation ID
* @property {string} parentMessageId - The parent message ID
* @property {string} responseMessageId - The response message ID
* @property {string} endpoint - The endpoint being used
* @property {string} cacheKey - The cache key for the current request
*/
/**
* @typedef {Object} ErrorHandlerDependencies
* @property {Express.Request} req - The Express request object
* @property {Express.Response} res - The Express response object
* @property {() => ErrorHandlerContext} getContext - Function to get the current context
* @property {string} [originPath] - The origin path for the error handler
*/
/**
* Creates an error handler function with the given dependencies
* @param {ErrorHandlerDependencies} dependencies - The dependencies for the error handler
* @returns {(error: Error) => Promise<void>} The error handler function
*/
const createErrorHandler = ({ req, res, getContext, originPath = '/assistants/chat/' }) => {
const cache = getLogStores(CacheKeys.ABORT_KEYS);
/**
* Handles errors that occur during the chat process
* @param {Error} error - The error that occurred
* @returns {Promise<void>}
*/
return async (error) => {
const {
openai,
run_id,
endpoint,
cacheKey,
thread_id,
completedRun,
assistant_id,
conversationId,
parentMessageId,
responseMessageId,
} = getContext();
const defaultErrorMessage =
'The Assistant run failed to initialize. Try sending a message in a new conversation.';
const messageData = {
thread_id,
assistant_id,
conversationId,
parentMessageId,
sender: 'System',
user: req.user.id,
shouldSaveMessage: false,
messageId: responseMessageId,
endpoint,
};
if (error.message === 'Run cancelled') {
return res.end();
} else if (error.message === 'Request closed' && completedRun) {
return;
} else if (error.message === 'Request closed') {
logger.debug(`[${originPath}] Request aborted on close`);
} else if (/Files.*are invalid/.test(error.message)) {
const errorMessage = `Files are invalid, or may not have uploaded yet.${
endpoint === 'azureAssistants'
? ' If using Azure OpenAI, files are only available in the region of the assistant\'s model at the time of upload.'
: ''
}`;
return sendResponse(req, res, messageData, errorMessage);
} else if (error?.message?.includes('string too long')) {
return sendResponse(
req,
res,
messageData,
'Message too long. The Assistants API has a limit of 32,768 characters per message. Please shorten it and try again.',
);
} else if (error?.message?.includes(ViolationTypes.TOKEN_BALANCE)) {
return sendResponse(req, res, messageData, error.message);
} else {
logger.error(`[${originPath}]`, error);
}
if (!openai || !thread_id || !run_id) {
return sendResponse(req, res, messageData, defaultErrorMessage);
}
await new Promise((resolve) => setTimeout(resolve, 2000));
try {
const status = await cache.get(cacheKey);
if (status === 'cancelled') {
logger.debug(`[${originPath}] Run already cancelled`);
return res.end();
}
await cache.delete(cacheKey);
const cancelledRun = await openai.beta.threads.runs.cancel(thread_id, run_id);
logger.debug(`[${originPath}] Cancelled run:`, cancelledRun);
} catch (error) {
logger.error(`[${originPath}] Error cancelling run`, error);
}
await new Promise((resolve) => setTimeout(resolve, 2000));
let run;
try {
run = await openai.beta.threads.runs.retrieve(thread_id, run_id);
await recordUsage({
...run.usage,
model: run.model,
user: req.user.id,
conversationId,
});
} catch (error) {
logger.error(`[${originPath}] Error fetching or processing run`, error);
}
let finalEvent;
try {
const runMessages = await checkMessageGaps({
openai,
run_id,
endpoint,
thread_id,
conversationId,
latestMessageId: responseMessageId,
});
const errorContentPart = {
text: {
value:
error?.message ?? 'There was an error processing your request. Please try again later.',
},
type: ContentTypes.ERROR,
};
if (!Array.isArray(runMessages[runMessages.length - 1]?.content)) {
runMessages[runMessages.length - 1].content = [errorContentPart];
} else {
const contentParts = runMessages[runMessages.length - 1].content;
for (let i = 0; i < contentParts.length; i++) {
const currentPart = contentParts[i];
/** @type {CodeToolCall | RetrievalToolCall | FunctionToolCall | undefined} */
const toolCall = currentPart?.[ContentTypes.TOOL_CALL];
if (
toolCall &&
toolCall?.function &&
!(toolCall?.function?.output || toolCall?.function?.output?.length)
) {
contentParts[i] = {
...currentPart,
[ContentTypes.TOOL_CALL]: {
...toolCall,
function: {
...toolCall.function,
output: 'error processing tool',
},
},
};
}
}
runMessages[runMessages.length - 1].content.push(errorContentPart);
}
finalEvent = {
final: true,
conversation: await getConvo(req.user.id, conversationId),
runMessages,
};
} catch (error) {
logger.error(`[${originPath}] Error finalizing error process`, error);
return sendResponse(req, res, messageData, 'The Assistant run failed');
}
return sendResponse(req, res, finalEvent);
};
};
module.exports = { createErrorHandler };