mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-21 02:40:14 +01:00
🚀 feat: Assistants Streaming (#2159)
* chore: bump openai to 4.29.0 and npm audit fix * chore: remove unnecessary stream field from ContentData * feat: new enum and types for AssistantStreamEvent * refactor(AssistantService): remove stream field and add conversationId to text ContentData > - return `finalMessage` and `text` on run completion > - move `processMessages` to services/Threads to avoid circular dependencies with new stream handling > - refactor(processMessages/retrieveAndProcessFile): add new `client` field to differentiate new RunClient type * WIP: new assistants stream handling * chore: stores messages to StreamRunManager * chore: add additional typedefs * fix: pass req and openai to StreamRunManager * fix(AssistantService): pass openai as client to `retrieveAndProcessFile` * WIP: streaming tool i/o, handle in_progress and completed run steps * feat(assistants): process required actions with streaming enabled * chore: condense early return check for useSSE useEffect * chore: remove unnecessary comments and only handle completed tool calls when not function * feat: add TTL for assistants run abort cacheKey * feat: abort stream runs * fix(assistants): render streaming cursor * fix(assistants): hide edit icon as functionality is not supported * fix(textArea): handle pasting edge cases; first, when onChange events wouldn't fire; second, when textarea wouldn't resize * chore: memoize Conversations * chore(useTextarea): reverse args order * fix: load default capabilities when an azure is configured to support assistants, but `assistants` endpoint is not configured * fix(AssistantSelect): update form assistant model on assistant form select * fix(actions): handle azure strict validation for function names to fix crud for actions * chore: remove content data debug log as it fires in rapid succession * feat: improve UX for assistant errors mid-request * feat: add tool call localizations and replace any domain separators from azure action names * refactor(chat): error out tool calls without outputs during handleError * fix(ToolService): handle domain separators allowing Azure use of actions * refactor(StreamRunManager): types and throw Error if tool submission fails
This commit is contained in:
parent
ed64c76053
commit
f427ad792a
39 changed files with 1503 additions and 330 deletions
|
|
@ -2,9 +2,9 @@ const { v4 } = require('uuid');
|
|||
const express = require('express');
|
||||
const { actionDelimiter } = require('librechat-data-provider');
|
||||
const { initializeClient } = require('~/server/services/Endpoints/assistants');
|
||||
const { encryptMetadata, domainParser } = require('~/server/services/ActionService');
|
||||
const { updateAction, getActions, deleteAction } = require('~/models/Action');
|
||||
const { updateAssistant, getAssistant } = require('~/models/Assistant');
|
||||
const { encryptMetadata } = require('~/server/services/ActionService');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
const router = express.Router();
|
||||
|
|
@ -44,7 +44,10 @@ router.post('/:assistant_id', async (req, res) => {
|
|||
|
||||
let metadata = encryptMetadata(_metadata);
|
||||
|
||||
const { domain } = metadata;
|
||||
let { domain } = metadata;
|
||||
/* Azure doesn't support periods in function names */
|
||||
domain = domainParser(req, domain, true);
|
||||
|
||||
if (!domain) {
|
||||
return res.status(400).json({ message: 'No domain provided' });
|
||||
}
|
||||
|
|
@ -141,9 +144,10 @@ router.post('/:assistant_id', async (req, res) => {
|
|||
* @param {string} req.params.action_id - The ID of the action to delete.
|
||||
* @returns {Object} 200 - success response - application/json
|
||||
*/
|
||||
router.delete('/:assistant_id/:action_id', async (req, res) => {
|
||||
router.delete('/:assistant_id/:action_id/:model', async (req, res) => {
|
||||
try {
|
||||
const { assistant_id, action_id } = req.params;
|
||||
const { assistant_id, action_id, model } = req.params;
|
||||
req.body.model = model;
|
||||
|
||||
/** @type {{ openai: OpenAI }} */
|
||||
const { openai } = await initializeClient({ req, res });
|
||||
|
|
@ -167,6 +171,8 @@ router.delete('/:assistant_id/:action_id', async (req, res) => {
|
|||
return true;
|
||||
});
|
||||
|
||||
domain = domainParser(req, domain, true);
|
||||
|
||||
const updatedTools = tools.filter(
|
||||
(tool) => !(tool.function && tool.function.name.includes(domain)),
|
||||
);
|
||||
|
|
|
|||
|
|
@ -4,8 +4,10 @@ const {
|
|||
Constants,
|
||||
RunStatus,
|
||||
CacheKeys,
|
||||
ContentTypes,
|
||||
EModelEndpoint,
|
||||
ViolationTypes,
|
||||
AssistantStreamEvents,
|
||||
} = require('librechat-data-provider');
|
||||
const {
|
||||
initThread,
|
||||
|
|
@ -18,8 +20,8 @@ const {
|
|||
const { runAssistant, createOnTextProgress } = require('~/server/services/AssistantService');
|
||||
const { addTitle, initializeClient } = require('~/server/services/Endpoints/assistants');
|
||||
const { sendResponse, sendMessage, sleep, isEnabled, countTokens } = require('~/server/utils');
|
||||
const { createRun, StreamRunManager } = require('~/server/services/Runs');
|
||||
const { getTransactions } = require('~/models/Transaction');
|
||||
const { createRun } = require('~/server/services/Runs');
|
||||
const checkBalance = require('~/models/checkBalance');
|
||||
const { getConvo } = require('~/models/Conversation');
|
||||
const getLogStores = require('~/cache/getLogStores');
|
||||
|
|
@ -38,6 +40,8 @@ const {
|
|||
|
||||
router.post('/abort', handleAbort());
|
||||
|
||||
const ten_minutes = 1000 * 60 * 10;
|
||||
|
||||
/**
|
||||
* @route POST /
|
||||
* @desc Chat with an assistant
|
||||
|
|
@ -147,7 +151,7 @@ router.post('/', validateModel, buildEndpointOption, setHeaders, async (req, res
|
|||
return sendResponse(res, messageData, defaultErrorMessage);
|
||||
}
|
||||
|
||||
await sleep(3000);
|
||||
await sleep(2000);
|
||||
|
||||
try {
|
||||
const status = await cache.get(cacheKey);
|
||||
|
|
@ -187,6 +191,42 @@ router.post('/', validateModel, buildEndpointOption, setHeaders, async (req, res
|
|||
latestMessageId: responseMessageId,
|
||||
});
|
||||
|
||||
const errorContentPart = {
|
||||
text: {
|
||||
value:
|
||||
error?.message ?? 'There was an error processing your request. Please try again later.',
|
||||
},
|
||||
type: ContentTypes.ERROR,
|
||||
};
|
||||
|
||||
if (!Array.isArray(runMessages[runMessages.length - 1]?.content)) {
|
||||
runMessages[runMessages.length - 1].content = [errorContentPart];
|
||||
} else {
|
||||
const contentParts = runMessages[runMessages.length - 1].content;
|
||||
for (let i = 0; i < contentParts.length; i++) {
|
||||
const currentPart = contentParts[i];
|
||||
/** @type {CodeToolCall | RetrievalToolCall | FunctionToolCall | undefined} */
|
||||
const toolCall = currentPart?.[ContentTypes.TOOL_CALL];
|
||||
if (
|
||||
toolCall &&
|
||||
toolCall?.function &&
|
||||
!(toolCall?.function?.output || toolCall?.function?.output?.length)
|
||||
) {
|
||||
contentParts[i] = {
|
||||
...currentPart,
|
||||
[ContentTypes.TOOL_CALL]: {
|
||||
...toolCall,
|
||||
function: {
|
||||
...toolCall.function,
|
||||
output: 'error processing tool',
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
runMessages[runMessages.length - 1].content.push(errorContentPart);
|
||||
}
|
||||
|
||||
finalEvent = {
|
||||
title: 'New Chat',
|
||||
final: true,
|
||||
|
|
@ -358,53 +398,107 @@ router.post('/', validateModel, buildEndpointOption, setHeaders, async (req, res
|
|||
body.instructions = instructions;
|
||||
}
|
||||
|
||||
/* NOTE:
|
||||
* By default, a Run will use the model and tools configuration specified in Assistant object,
|
||||
* but you can override most of these when creating the Run for added flexibility:
|
||||
*/
|
||||
const run = await createRun({
|
||||
openai,
|
||||
thread_id,
|
||||
body,
|
||||
});
|
||||
|
||||
run_id = run.id;
|
||||
await cache.set(cacheKey, `${thread_id}:${run_id}`);
|
||||
|
||||
sendMessage(res, {
|
||||
sync: true,
|
||||
conversationId,
|
||||
// messages: previousMessages,
|
||||
requestMessage,
|
||||
responseMessage: {
|
||||
user: req.user.id,
|
||||
messageId: openai.responseMessage.messageId,
|
||||
parentMessageId: userMessageId,
|
||||
const sendInitialResponse = () => {
|
||||
sendMessage(res, {
|
||||
sync: true,
|
||||
conversationId,
|
||||
assistant_id,
|
||||
thread_id,
|
||||
model: assistant_id,
|
||||
},
|
||||
});
|
||||
// messages: previousMessages,
|
||||
requestMessage,
|
||||
responseMessage: {
|
||||
user: req.user.id,
|
||||
messageId: openai.responseMessage.messageId,
|
||||
parentMessageId: userMessageId,
|
||||
conversationId,
|
||||
assistant_id,
|
||||
thread_id,
|
||||
model: assistant_id,
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
// todo: retry logic
|
||||
let response = await runAssistant({ openai, thread_id, run_id });
|
||||
logger.debug('[/assistants/chat/] response', response);
|
||||
/** @type {RunResponse | typeof StreamRunManager | undefined} */
|
||||
let response;
|
||||
|
||||
if (response.run.status === RunStatus.IN_PROGRESS) {
|
||||
response = await runAssistant({
|
||||
const processRun = async (retry = false) => {
|
||||
if (req.app.locals[EModelEndpoint.azureOpenAI]?.assistants) {
|
||||
if (retry) {
|
||||
response = await runAssistant({
|
||||
openai,
|
||||
thread_id,
|
||||
run_id,
|
||||
in_progress: openai.in_progress,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
/* NOTE:
|
||||
* By default, a Run will use the model and tools configuration specified in Assistant object,
|
||||
* but you can override most of these when creating the Run for added flexibility:
|
||||
*/
|
||||
const run = await createRun({
|
||||
openai,
|
||||
thread_id,
|
||||
body,
|
||||
});
|
||||
|
||||
run_id = run.id;
|
||||
await cache.set(cacheKey, `${thread_id}:${run_id}`, ten_minutes);
|
||||
sendInitialResponse();
|
||||
|
||||
// todo: retry logic
|
||||
response = await runAssistant({ openai, thread_id, run_id });
|
||||
return;
|
||||
}
|
||||
|
||||
/** @type {{[AssistantStreamEvents.ThreadRunCreated]: (event: ThreadRunCreated) => Promise<void>}} */
|
||||
const handlers = {
|
||||
[AssistantStreamEvents.ThreadRunCreated]: async (event) => {
|
||||
await cache.set(cacheKey, `${thread_id}:${event.data.id}`, ten_minutes);
|
||||
run_id = event.data.id;
|
||||
sendInitialResponse();
|
||||
},
|
||||
};
|
||||
|
||||
const streamRunManager = new StreamRunManager({
|
||||
req,
|
||||
res,
|
||||
openai,
|
||||
thread_id,
|
||||
run_id,
|
||||
in_progress: openai.in_progress,
|
||||
responseMessage: openai.responseMessage,
|
||||
handlers,
|
||||
// streamOptions: {
|
||||
|
||||
// },
|
||||
});
|
||||
|
||||
await streamRunManager.runAssistant({
|
||||
thread_id,
|
||||
body,
|
||||
});
|
||||
|
||||
response = streamRunManager;
|
||||
};
|
||||
|
||||
await processRun();
|
||||
logger.debug('[/assistants/chat/] response', {
|
||||
run: response.run,
|
||||
steps: response.steps,
|
||||
});
|
||||
|
||||
if (response.run.status === RunStatus.CANCELLED) {
|
||||
logger.debug('[/assistants/chat/] Run cancelled, handled by `abortRun`');
|
||||
return res.end();
|
||||
}
|
||||
|
||||
if (response.run.status === RunStatus.IN_PROGRESS) {
|
||||
processRun(true);
|
||||
}
|
||||
|
||||
completedRun = response.run;
|
||||
|
||||
/** @type {ResponseMessage} */
|
||||
const responseMessage = {
|
||||
...openai.responseMessage,
|
||||
...response.finalMessage,
|
||||
parentMessageId: userMessageId,
|
||||
conversationId,
|
||||
user: req.user.id,
|
||||
|
|
@ -413,9 +507,6 @@ router.post('/', validateModel, buildEndpointOption, setHeaders, async (req, res
|
|||
model: assistant_id,
|
||||
};
|
||||
|
||||
// TODO: token count from usage returned in run
|
||||
// TODO: parse responses, save to db, send to user
|
||||
|
||||
sendMessage(res, {
|
||||
title: 'New Chat',
|
||||
final: true,
|
||||
|
|
@ -432,7 +523,7 @@ router.post('/', validateModel, buildEndpointOption, setHeaders, async (req, res
|
|||
if (parentMessageId === Constants.NO_PARENT && !_thread_id) {
|
||||
addTitle(req, {
|
||||
text,
|
||||
responseText: openai.responseText,
|
||||
responseText: response.text,
|
||||
conversationId,
|
||||
client,
|
||||
});
|
||||
|
|
@ -447,7 +538,7 @@ router.post('/', validateModel, buildEndpointOption, setHeaders, async (req, res
|
|||
|
||||
if (!response.run.usage) {
|
||||
await sleep(3000);
|
||||
completedRun = await openai.beta.threads.runs.retrieve(thread_id, run.id);
|
||||
completedRun = await openai.beta.threads.runs.retrieve(thread_id, response.run.id);
|
||||
if (completedRun.usage) {
|
||||
await recordUsage({
|
||||
...completedRun.usage,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue