mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 00:40:14 +01:00
⚙️ feat: Adjust Rate of Stream Progress (#3244)
* chore: bump data-provider and add MESSAGES CacheKey * refactor: avoid saving messages while streaming, save partial text to cache instead * fix(ci): processChunks * chore: logging aborted request to debug * feat: set stream rate for token processing * chore: specify default stream rate * fix(ci): Update AppService.js to use optional chaining for endpointLocals assignment * refactor: abstract the error handler * feat: streamRate for assistants; refactor: update default rate for token * refactor: update error handling in assistants/errors.js * refactor: update error handling in assistants/errors.js
This commit is contained in:
parent
1c282d1517
commit
5d40d0a37a
29 changed files with 661 additions and 309 deletions
|
|
@ -1,7 +1,8 @@
|
|||
const throttle = require('lodash/throttle');
|
||||
const { getResponseSender, Constants, EModelEndpoint } = require('librechat-data-provider');
|
||||
const { getResponseSender, Constants, CacheKeys, Time } = require('librechat-data-provider');
|
||||
const { createAbortController, handleAbortError } = require('~/server/middleware');
|
||||
const { sendMessage, createOnProgress } = require('~/server/utils');
|
||||
const { getLogStores } = require('~/cache');
|
||||
const { saveMessage } = require('~/models');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
|
|
@ -51,11 +52,13 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
|
|||
|
||||
try {
|
||||
const { client } = await initializeClient({ req, res, endpointOption });
|
||||
const unfinished = endpointOption.endpoint === EModelEndpoint.google ? false : true;
|
||||
const messageCache = getLogStores(CacheKeys.MESSAGES);
|
||||
const { onProgress: progressCallback, getPartialText } = createOnProgress({
|
||||
onProgress: throttle(
|
||||
({ text: partialText }) => {
|
||||
saveMessage(req, {
|
||||
/*
|
||||
const unfinished = endpointOption.endpoint === EModelEndpoint.google ? false : true;
|
||||
messageCache.set(responseMessageId, {
|
||||
messageId: responseMessageId,
|
||||
sender,
|
||||
conversationId,
|
||||
|
|
@ -65,7 +68,10 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
|
|||
unfinished,
|
||||
error: false,
|
||||
user,
|
||||
});
|
||||
}, Time.FIVE_MINUTES);
|
||||
*/
|
||||
|
||||
messageCache.set(responseMessageId, partialText, Time.FIVE_MINUTES);
|
||||
},
|
||||
3000,
|
||||
{ trailing: false },
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
const throttle = require('lodash/throttle');
|
||||
const { getResponseSender, EModelEndpoint } = require('librechat-data-provider');
|
||||
const { getResponseSender, CacheKeys, Time } = require('librechat-data-provider');
|
||||
const { createAbortController, handleAbortError } = require('~/server/middleware');
|
||||
const { sendMessage, createOnProgress } = require('~/server/utils');
|
||||
const { getLogStores } = require('~/cache');
|
||||
const { saveMessage } = require('~/models');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
|
|
@ -51,12 +52,14 @@ const EditController = async (req, res, next, initializeClient) => {
|
|||
}
|
||||
};
|
||||
|
||||
const unfinished = endpointOption.endpoint === EModelEndpoint.google ? false : true;
|
||||
const messageCache = getLogStores(CacheKeys.MESSAGES);
|
||||
const { onProgress: progressCallback, getPartialText } = createOnProgress({
|
||||
generation,
|
||||
onProgress: throttle(
|
||||
({ text: partialText }) => {
|
||||
saveMessage(req, {
|
||||
/*
|
||||
const unfinished = endpointOption.endpoint === EModelEndpoint.google ? false : true;
|
||||
{
|
||||
messageId: responseMessageId,
|
||||
sender,
|
||||
conversationId,
|
||||
|
|
@ -67,7 +70,8 @@ const EditController = async (req, res, next, initializeClient) => {
|
|||
isEdited: true,
|
||||
error: false,
|
||||
user,
|
||||
});
|
||||
} */
|
||||
messageCache.set(responseMessageId, partialText, Time.FIVE_MINUTES);
|
||||
},
|
||||
3000,
|
||||
{ trailing: false },
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
const { v4 } = require('uuid');
|
||||
const {
|
||||
Time,
|
||||
Constants,
|
||||
RunStatus,
|
||||
CacheKeys,
|
||||
ContentTypes,
|
||||
ToolCallTypes,
|
||||
EModelEndpoint,
|
||||
ViolationTypes,
|
||||
retrievalMimeTypes,
|
||||
AssistantStreamEvents,
|
||||
} = require('librechat-data-provider');
|
||||
|
|
@ -14,12 +14,12 @@ const {
|
|||
initThread,
|
||||
recordUsage,
|
||||
saveUserMessage,
|
||||
checkMessageGaps,
|
||||
addThreadMetadata,
|
||||
saveAssistantMessage,
|
||||
} = require('~/server/services/Threads');
|
||||
const { sendResponse, sendMessage, sleep, isEnabled, countTokens } = require('~/server/utils');
|
||||
const { runAssistant, createOnTextProgress } = require('~/server/services/AssistantService');
|
||||
const { sendMessage, sleep, isEnabled, countTokens } = require('~/server/utils');
|
||||
const { createErrorHandler } = require('~/server/controllers/assistants/errors');
|
||||
const validateAuthor = require('~/server/middleware/assistants/validateAuthor');
|
||||
const { createRun, StreamRunManager } = require('~/server/services/Runs');
|
||||
const { addTitle } = require('~/server/services/Endpoints/assistants');
|
||||
|
|
@ -44,7 +44,7 @@ const ten_minutes = 1000 * 60 * 10;
|
|||
const chatV2 = async (req, res) => {
|
||||
logger.debug('[/assistants/chat/] req.body', req.body);
|
||||
|
||||
/** @type {{ files: MongoFile[]}} */
|
||||
/** @type {{files: MongoFile[]}} */
|
||||
const {
|
||||
text,
|
||||
model,
|
||||
|
|
@ -90,140 +90,20 @@ const chatV2 = async (req, res) => {
|
|||
/** @type {Run | undefined} - The completed run, undefined if incomplete */
|
||||
let completedRun;
|
||||
|
||||
const handleError = async (error) => {
|
||||
const defaultErrorMessage =
|
||||
'The Assistant run failed to initialize. Try sending a message in a new conversation.';
|
||||
const messageData = {
|
||||
thread_id,
|
||||
assistant_id,
|
||||
conversationId,
|
||||
parentMessageId,
|
||||
sender: 'System',
|
||||
user: req.user.id,
|
||||
shouldSaveMessage: false,
|
||||
messageId: responseMessageId,
|
||||
endpoint,
|
||||
};
|
||||
const getContext = () => ({
|
||||
openai,
|
||||
run_id,
|
||||
endpoint,
|
||||
cacheKey,
|
||||
thread_id,
|
||||
completedRun,
|
||||
assistant_id,
|
||||
conversationId,
|
||||
parentMessageId,
|
||||
responseMessageId,
|
||||
});
|
||||
|
||||
if (error.message === 'Run cancelled') {
|
||||
return res.end();
|
||||
} else if (error.message === 'Request closed' && completedRun) {
|
||||
return;
|
||||
} else if (error.message === 'Request closed') {
|
||||
logger.debug('[/assistants/chat/] Request aborted on close');
|
||||
} else if (/Files.*are invalid/.test(error.message)) {
|
||||
const errorMessage = `Files are invalid, or may not have uploaded yet.${
|
||||
endpoint === EModelEndpoint.azureAssistants
|
||||
? ' If using Azure OpenAI, files are only available in the region of the assistant\'s model at the time of upload.'
|
||||
: ''
|
||||
}`;
|
||||
return sendResponse(req, res, messageData, errorMessage);
|
||||
} else if (error?.message?.includes('string too long')) {
|
||||
return sendResponse(
|
||||
req,
|
||||
res,
|
||||
messageData,
|
||||
'Message too long. The Assistants API has a limit of 32,768 characters per message. Please shorten it and try again.',
|
||||
);
|
||||
} else if (error?.message?.includes(ViolationTypes.TOKEN_BALANCE)) {
|
||||
return sendResponse(req, res, messageData, error.message);
|
||||
} else {
|
||||
logger.error('[/assistants/chat/]', error);
|
||||
}
|
||||
|
||||
if (!openai || !thread_id || !run_id) {
|
||||
return sendResponse(req, res, messageData, defaultErrorMessage);
|
||||
}
|
||||
|
||||
await sleep(2000);
|
||||
|
||||
try {
|
||||
const status = await cache.get(cacheKey);
|
||||
if (status === 'cancelled') {
|
||||
logger.debug('[/assistants/chat/] Run already cancelled');
|
||||
return res.end();
|
||||
}
|
||||
await cache.delete(cacheKey);
|
||||
const cancelledRun = await openai.beta.threads.runs.cancel(thread_id, run_id);
|
||||
logger.debug('[/assistants/chat/] Cancelled run:', cancelledRun);
|
||||
} catch (error) {
|
||||
logger.error('[/assistants/chat/] Error cancelling run', error);
|
||||
}
|
||||
|
||||
await sleep(2000);
|
||||
|
||||
let run;
|
||||
try {
|
||||
run = await openai.beta.threads.runs.retrieve(thread_id, run_id);
|
||||
await recordUsage({
|
||||
...run.usage,
|
||||
model: run.model,
|
||||
user: req.user.id,
|
||||
conversationId,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('[/assistants/chat/] Error fetching or processing run', error);
|
||||
}
|
||||
|
||||
let finalEvent;
|
||||
try {
|
||||
const runMessages = await checkMessageGaps({
|
||||
openai,
|
||||
run_id,
|
||||
endpoint,
|
||||
thread_id,
|
||||
conversationId,
|
||||
latestMessageId: responseMessageId,
|
||||
});
|
||||
|
||||
const errorContentPart = {
|
||||
text: {
|
||||
value:
|
||||
error?.message ?? 'There was an error processing your request. Please try again later.',
|
||||
},
|
||||
type: ContentTypes.ERROR,
|
||||
};
|
||||
|
||||
if (!Array.isArray(runMessages[runMessages.length - 1]?.content)) {
|
||||
runMessages[runMessages.length - 1].content = [errorContentPart];
|
||||
} else {
|
||||
const contentParts = runMessages[runMessages.length - 1].content;
|
||||
for (let i = 0; i < contentParts.length; i++) {
|
||||
const currentPart = contentParts[i];
|
||||
/** @type {CodeToolCall | RetrievalToolCall | FunctionToolCall | undefined} */
|
||||
const toolCall = currentPart?.[ContentTypes.TOOL_CALL];
|
||||
if (
|
||||
toolCall &&
|
||||
toolCall?.function &&
|
||||
!(toolCall?.function?.output || toolCall?.function?.output?.length)
|
||||
) {
|
||||
contentParts[i] = {
|
||||
...currentPart,
|
||||
[ContentTypes.TOOL_CALL]: {
|
||||
...toolCall,
|
||||
function: {
|
||||
...toolCall.function,
|
||||
output: 'error processing tool',
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
runMessages[runMessages.length - 1].content.push(errorContentPart);
|
||||
}
|
||||
|
||||
finalEvent = {
|
||||
final: true,
|
||||
conversation: await getConvo(req.user.id, conversationId),
|
||||
runMessages,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error('[/assistants/chat/] Error finalizing error process', error);
|
||||
return sendResponse(req, res, messageData, 'The Assistant run failed');
|
||||
}
|
||||
|
||||
return sendResponse(req, res, finalEvent);
|
||||
};
|
||||
const handleError = createErrorHandler({ req, res, getContext });
|
||||
|
||||
try {
|
||||
res.on('close', async () => {
|
||||
|
|
@ -490,6 +370,11 @@ const chatV2 = async (req, res) => {
|
|||
},
|
||||
};
|
||||
|
||||
/** @type {undefined | TAssistantEndpoint} */
|
||||
const config = req.app.locals[endpoint] ?? {};
|
||||
/** @type {undefined | TBaseEndpoint} */
|
||||
const allConfig = req.app.locals.all;
|
||||
|
||||
const streamRunManager = new StreamRunManager({
|
||||
req,
|
||||
res,
|
||||
|
|
@ -499,6 +384,7 @@ const chatV2 = async (req, res) => {
|
|||
attachedFileIds,
|
||||
parentMessageId: userMessageId,
|
||||
responseMessage: openai.responseMessage,
|
||||
streamRate: allConfig?.streamRate ?? config.streamRate,
|
||||
// streamOptions: {
|
||||
|
||||
// },
|
||||
|
|
@ -511,6 +397,16 @@ const chatV2 = async (req, res) => {
|
|||
|
||||
response = streamRunManager;
|
||||
response.text = streamRunManager.intermediateText;
|
||||
|
||||
const messageCache = getLogStores(CacheKeys.MESSAGES);
|
||||
messageCache.set(
|
||||
responseMessageId,
|
||||
{
|
||||
complete: true,
|
||||
text: response.text,
|
||||
},
|
||||
Time.FIVE_MINUTES,
|
||||
);
|
||||
};
|
||||
|
||||
await processRun();
|
||||
|
|
|
|||
193
api/server/controllers/assistants/errors.js
Normal file
193
api/server/controllers/assistants/errors.js
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
// errorHandler.js
|
||||
const { sendResponse } = require('~/server/utils');
|
||||
const { logger } = require('~/config');
|
||||
const getLogStores = require('~/cache/getLogStores');
|
||||
const { CacheKeys, ViolationTypes, ContentTypes } = require('librechat-data-provider');
|
||||
const { getConvo } = require('~/models/Conversation');
|
||||
const { recordUsage, checkMessageGaps } = require('~/server/services/Threads');
|
||||
|
||||
/**
|
||||
* @typedef {Object} ErrorHandlerContext
|
||||
* @property {OpenAIClient} openai - The OpenAI client
|
||||
* @property {string} thread_id - The thread ID
|
||||
* @property {string} run_id - The run ID
|
||||
* @property {boolean} completedRun - Whether the run has completed
|
||||
* @property {string} assistant_id - The assistant ID
|
||||
* @property {string} conversationId - The conversation ID
|
||||
* @property {string} parentMessageId - The parent message ID
|
||||
* @property {string} responseMessageId - The response message ID
|
||||
* @property {string} endpoint - The endpoint being used
|
||||
* @property {string} cacheKey - The cache key for the current request
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {Object} ErrorHandlerDependencies
|
||||
* @property {Express.Request} req - The Express request object
|
||||
* @property {Express.Response} res - The Express response object
|
||||
* @property {() => ErrorHandlerContext} getContext - Function to get the current context
|
||||
* @property {string} [originPath] - The origin path for the error handler
|
||||
*/
|
||||
|
||||
/**
|
||||
* Creates an error handler function with the given dependencies
|
||||
* @param {ErrorHandlerDependencies} dependencies - The dependencies for the error handler
|
||||
* @returns {(error: Error) => Promise<void>} The error handler function
|
||||
*/
|
||||
const createErrorHandler = ({ req, res, getContext, originPath = '/assistants/chat/' }) => {
|
||||
const cache = getLogStores(CacheKeys.ABORT_KEYS);
|
||||
|
||||
/**
|
||||
* Handles errors that occur during the chat process
|
||||
* @param {Error} error - The error that occurred
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
return async (error) => {
|
||||
const {
|
||||
openai,
|
||||
run_id,
|
||||
endpoint,
|
||||
cacheKey,
|
||||
thread_id,
|
||||
completedRun,
|
||||
assistant_id,
|
||||
conversationId,
|
||||
parentMessageId,
|
||||
responseMessageId,
|
||||
} = getContext();
|
||||
|
||||
const defaultErrorMessage =
|
||||
'The Assistant run failed to initialize. Try sending a message in a new conversation.';
|
||||
const messageData = {
|
||||
thread_id,
|
||||
assistant_id,
|
||||
conversationId,
|
||||
parentMessageId,
|
||||
sender: 'System',
|
||||
user: req.user.id,
|
||||
shouldSaveMessage: false,
|
||||
messageId: responseMessageId,
|
||||
endpoint,
|
||||
};
|
||||
|
||||
if (error.message === 'Run cancelled') {
|
||||
return res.end();
|
||||
} else if (error.message === 'Request closed' && completedRun) {
|
||||
return;
|
||||
} else if (error.message === 'Request closed') {
|
||||
logger.debug(`[${originPath}] Request aborted on close`);
|
||||
} else if (/Files.*are invalid/.test(error.message)) {
|
||||
const errorMessage = `Files are invalid, or may not have uploaded yet.${
|
||||
endpoint === 'azureAssistants'
|
||||
? ' If using Azure OpenAI, files are only available in the region of the assistant\'s model at the time of upload.'
|
||||
: ''
|
||||
}`;
|
||||
return sendResponse(req, res, messageData, errorMessage);
|
||||
} else if (error?.message?.includes('string too long')) {
|
||||
return sendResponse(
|
||||
req,
|
||||
res,
|
||||
messageData,
|
||||
'Message too long. The Assistants API has a limit of 32,768 characters per message. Please shorten it and try again.',
|
||||
);
|
||||
} else if (error?.message?.includes(ViolationTypes.TOKEN_BALANCE)) {
|
||||
return sendResponse(req, res, messageData, error.message);
|
||||
} else {
|
||||
logger.error(`[${originPath}]`, error);
|
||||
}
|
||||
|
||||
if (!openai || !thread_id || !run_id) {
|
||||
return sendResponse(req, res, messageData, defaultErrorMessage);
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 2000));
|
||||
|
||||
try {
|
||||
const status = await cache.get(cacheKey);
|
||||
if (status === 'cancelled') {
|
||||
logger.debug(`[${originPath}] Run already cancelled`);
|
||||
return res.end();
|
||||
}
|
||||
await cache.delete(cacheKey);
|
||||
const cancelledRun = await openai.beta.threads.runs.cancel(thread_id, run_id);
|
||||
logger.debug(`[${originPath}] Cancelled run:`, cancelledRun);
|
||||
} catch (error) {
|
||||
logger.error(`[${originPath}] Error cancelling run`, error);
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 2000));
|
||||
|
||||
let run;
|
||||
try {
|
||||
run = await openai.beta.threads.runs.retrieve(thread_id, run_id);
|
||||
await recordUsage({
|
||||
...run.usage,
|
||||
model: run.model,
|
||||
user: req.user.id,
|
||||
conversationId,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error(`[${originPath}] Error fetching or processing run`, error);
|
||||
}
|
||||
|
||||
let finalEvent;
|
||||
try {
|
||||
const runMessages = await checkMessageGaps({
|
||||
openai,
|
||||
run_id,
|
||||
endpoint,
|
||||
thread_id,
|
||||
conversationId,
|
||||
latestMessageId: responseMessageId,
|
||||
});
|
||||
|
||||
const errorContentPart = {
|
||||
text: {
|
||||
value:
|
||||
error?.message ?? 'There was an error processing your request. Please try again later.',
|
||||
},
|
||||
type: ContentTypes.ERROR,
|
||||
};
|
||||
|
||||
if (!Array.isArray(runMessages[runMessages.length - 1]?.content)) {
|
||||
runMessages[runMessages.length - 1].content = [errorContentPart];
|
||||
} else {
|
||||
const contentParts = runMessages[runMessages.length - 1].content;
|
||||
for (let i = 0; i < contentParts.length; i++) {
|
||||
const currentPart = contentParts[i];
|
||||
/** @type {CodeToolCall | RetrievalToolCall | FunctionToolCall | undefined} */
|
||||
const toolCall = currentPart?.[ContentTypes.TOOL_CALL];
|
||||
if (
|
||||
toolCall &&
|
||||
toolCall?.function &&
|
||||
!(toolCall?.function?.output || toolCall?.function?.output?.length)
|
||||
) {
|
||||
contentParts[i] = {
|
||||
...currentPart,
|
||||
[ContentTypes.TOOL_CALL]: {
|
||||
...toolCall,
|
||||
function: {
|
||||
...toolCall.function,
|
||||
output: 'error processing tool',
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
runMessages[runMessages.length - 1].content.push(errorContentPart);
|
||||
}
|
||||
|
||||
finalEvent = {
|
||||
final: true,
|
||||
conversation: await getConvo(req.user.id, conversationId),
|
||||
runMessages,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(`[${originPath}] Error finalizing error process`, error);
|
||||
return sendResponse(req, res, messageData, 'The Assistant run failed');
|
||||
}
|
||||
|
||||
return sendResponse(req, res, finalEvent);
|
||||
};
|
||||
};
|
||||
|
||||
module.exports = { createErrorHandler };
|
||||
Loading…
Add table
Add a link
Reference in a new issue