🤖 feat: OpenAI Assistants v2 (initial support) (#2781)

* 🤖 Assistants V2 Support: Part 1 - Separated Azure Assistants to its own endpoint - File Search / Vector Store integration is incomplete, but can toggle and use storage from playground - Code Interpreter resource files can be added but not deleted - GPT-4o is supported - Many improvements to the Assistants Endpoint overall data-provider v2 changes copy existing route as v1 chore: rename new endpoint to reduce comparison operations and add new azure filesource api: add azureAssistants part 1 force use of version for assistants/assistantsAzure chore: switch name back to azureAssistants refactor type version: string | number Ensure assistants endpoints have version set fix: isArchived type issue in ConversationListParams refactor: update assistants mutations/queries with endpoint/version definitions, update Assistants Map structure chore: FilePreview component ExtendedFile type assertion feat: isAssistantsEndpoint helper chore: remove unused useGenerations chore(buildTree): type issue chore(Advanced): type issue (unused component, maybe in future) first pass for multi-assistant endpoint rewrite fix(listAssistants): pass params correctly feat: list separate assistants by endpoint fix(useTextarea): access assistantMap correctly fix: assistant endpoint switching, resetting ID fix: broken during rewrite, selecting assistant mention fix: set/invalidate assistants endpoint query data correctly feat: Fix issue with assistant ID not being reset correctly getOpenAIClient helper function feat: add toast for assistant deletion fix: assistants delete right after create issue for azure fix: assistant patching refactor: actions to use getOpenAIClient refactor: consolidate logic into helpers file fix: issue where conversation data was not initially available v1 chat support refactor(spendTokens): only early return if completionTokens isNaN fix(OpenAIClient): ensure spendTokens has all necessary params refactor: route/controller logic fix(assistants/initializeClient): use defaultHeaders field fix: sanitize default operation id chore: bump openai package first pass v2 action service feat: retroactive domain parsing for actions added via v1 feat: delete db records of actions/assistants on openai assistant deletion chore: remove vision tools from v2 assistants feat: v2 upload and delete assistant vision images WIP first pass, thread attachments fix: show assistant vision files (save local/firebase copy) v2 image continue fix: annotations fix: refine annotations show analyze as error if is no longer submitting before progress reaches 1 and show file_search as retrieval tool fix: abort run, undefined endpoint issue refactor: consolidate capabilities logic and anticipate versioning frontend version 2 changes fix: query selection and filter add endpoint to unknown filepath add file ids to resource, deleting in progress enable/disable file search remove version log * 🤖 Assistants V2 Support: Part 2 🎹 fix: Autocompletion Chrome Bug on Action API Key Input chore: remove `useOriginNavigate` chore: set correct OpenAI Storage Source fix: azure file deletions, instantiate clients by source for deletion update code interpret files info feat: deleteResourceFileId chore: increase poll interval as azure easily rate limits fix: openai file deletions, TODO: evaluate rejected deletion settled promises to determine which to delete from db records file source icons update table file filters chore: file search info and versioning fix: retrieval update with necessary tool_resources if specified fix(useMentions): add optional chaining in case listMap value is undefined fix: force assistant avatar roundedness fix: azure assistants, check correct flag chore: bump data-provider * fix: merge conflict * ci: fix backend tests due to new updates * chore: update .env.example * meilisearch improvements * localization updates * chore: update comparisons * feat: add additional metadata: endpoint, author ID * chore: azureAssistants ENDPOINTS exclusion warning
2026-01-31 06:45:17 +01:00 · 2024-05-19 12:56:55 -04:00 · 2024-05-19 12:56:55 -04:00 · 1a452121fa
commit 1a452121fa
parent af8bcb08d6
158 changed files with 4184 additions and 1204 deletions
--- a/api/server/controllers/EndpointController.js
+++ b/api/server/controllers/EndpointController.js
@ -16,10 +16,28 @@ async function endpointController(req, res) {
  /** @type {TEndpointsConfig} */
  const mergedConfig = { ...defaultEndpointsConfig, ...customConfigEndpoints };
  if (mergedConfig[EModelEndpoint.assistants] && req.app.locals?.[EModelEndpoint.assistants]) {
-    const { disableBuilder, retrievalModels, capabilities, ..._rest } =
+    const { disableBuilder, retrievalModels, capabilities, version, ..._rest } =
      req.app.locals[EModelEndpoint.assistants];
+
    mergedConfig[EModelEndpoint.assistants] = {
      ...mergedConfig[EModelEndpoint.assistants],
+      version,
+      retrievalModels,
+      disableBuilder,
+      capabilities,
+    };
+  }
+
+  if (
+    mergedConfig[EModelEndpoint.azureAssistants] &&
+    req.app.locals?.[EModelEndpoint.azureAssistants]
+  ) {
+    const { disableBuilder, retrievalModels, capabilities, version, ..._rest } =
+      req.app.locals[EModelEndpoint.azureAssistants];
+
+    mergedConfig[EModelEndpoint.azureAssistants] = {
+      ...mergedConfig[EModelEndpoint.azureAssistants],
+      version,
      retrievalModels,
      disableBuilder,
      capabilities,
--- a/api/server/controllers/assistants/chatV1.js
+++ b/api/server/controllers/assistants/chatV1.js
@ -0,0 +1,650 @@
+const { v4 } = require('uuid');
+const {
+  Constants,
+  RunStatus,
+  CacheKeys,
+  ContentTypes,
+  EModelEndpoint,
+  ViolationTypes,
+  ImageVisionTool,
+  checkOpenAIStorage,
+  AssistantStreamEvents,
+} = require('librechat-data-provider');
+const {
+  initThread,
+  recordUsage,
+  saveUserMessage,
+  checkMessageGaps,
+  addThreadMetadata,
+  saveAssistantMessage,
+} = require('~/server/services/Threads');
+const { sendResponse, sendMessage, sleep, isEnabled, countTokens } = require('~/server/utils');
+const { runAssistant, createOnTextProgress } = require('~/server/services/AssistantService');
+const { formatMessage, createVisionPrompt } = require('~/app/clients/prompts');
+const { createRun, StreamRunManager } = require('~/server/services/Runs');
+const { addTitle } = require('~/server/services/Endpoints/assistants');
+const { getTransactions } = require('~/models/Transaction');
+const checkBalance = require('~/models/checkBalance');
+const { getConvo } = require('~/models/Conversation');
+const getLogStores = require('~/cache/getLogStores');
+const { getModelMaxTokens } = require('~/utils');
+const { getOpenAIClient } = require('./helpers');
+const { logger } = require('~/config');
+
+const { handleAbortError } = require('~/server/middleware');
+
+const ten_minutes = 1000 * 60 * 10;
+
+/**
+ * @route POST /
+ * @desc Chat with an assistant
+ * @access Public
+ * @param {Express.Request} req - The request object, containing the request data.
+ * @param {Express.Response} res - The response object, used to send back a response.
+ * @returns {void}
+ */
+const chatV1 = async (req, res) => {
+  logger.debug('[/assistants/chat/] req.body', req.body);
+
+  const {
+    text,
+    model,
+    endpoint,
+    files = [],
+    promptPrefix,
+    assistant_id,
+    instructions,
+    thread_id: _thread_id,
+    messageId: _messageId,
+    conversationId: convoId,
+    parentMessageId: _parentId = Constants.NO_PARENT,
+  } = req.body;
+
+  /** @type {Partial<TAssistantEndpoint>} */
+  const assistantsConfig = req.app.locals?.[endpoint];
+
+  if (assistantsConfig) {
+    const { supportedIds, excludedIds } = assistantsConfig;
+    const error = { message: 'Assistant not supported' };
+    if (supportedIds?.length && !supportedIds.includes(assistant_id)) {
+      return await handleAbortError(res, req, error, {
+        sender: 'System',
+        conversationId: convoId,
+        messageId: v4(),
+        parentMessageId: _messageId,
+        error,
+      });
+    } else if (excludedIds?.length && excludedIds.includes(assistant_id)) {
+      return await handleAbortError(res, req, error, {
+        sender: 'System',
+        conversationId: convoId,
+        messageId: v4(),
+        parentMessageId: _messageId,
+      });
+    }
+  }
+
+  /** @type {OpenAIClient} */
+  let openai;
+  /** @type {string|undefined} - the current thread id */
+  let thread_id = _thread_id;
+  /** @type {string|undefined} - the current run id */
+  let run_id;
+  /** @type {string|undefined} - the parent messageId */
+  let parentMessageId = _parentId;
+  /** @type {TMessage[]} */
+  let previousMessages = [];
+  /** @type {import('librechat-data-provider').TConversation | null} */
+  let conversation = null;
+  /** @type {string[]} */
+  let file_ids = [];
+  /** @type {Set<string>} */
+  let attachedFileIds = new Set();
+  /** @type {TMessage | null} */
+  let requestMessage = null;
+  /** @type {undefined | Promise<ChatCompletion>} */
+  let visionPromise;
+
+  const userMessageId = v4();
+  const responseMessageId = v4();
+
+  /** @type {string} - The conversation UUID - created if undefined */
+  const conversationId = convoId ?? v4();
+
+  const cache = getLogStores(CacheKeys.ABORT_KEYS);
+  const cacheKey = `${req.user.id}:${conversationId}`;
+
+  /** @type {Run | undefined} - The completed run, undefined if incomplete */
+  let completedRun;
+
+  const handleError = async (error) => {
+    const defaultErrorMessage =
+      'The Assistant run failed to initialize. Try sending a message in a new conversation.';
+    const messageData = {
+      thread_id,
+      assistant_id,
+      conversationId,
+      parentMessageId,
+      sender: 'System',
+      user: req.user.id,
+      shouldSaveMessage: false,
+      messageId: responseMessageId,
+      endpoint,
+    };
+
+    if (error.message === 'Run cancelled') {
+      return res.end();
+    } else if (error.message === 'Request closed' && completedRun) {
+      return;
+    } else if (error.message === 'Request closed') {
+      logger.debug('[/assistants/chat/] Request aborted on close');
+    } else if (/Files.*are invalid/.test(error.message)) {
+      const errorMessage = `Files are invalid, or may not have uploaded yet.${
+        endpoint === EModelEndpoint.azureAssistants
+          ? ' If using Azure OpenAI, files are only available in the region of the assistant\'s model at the time of upload.'
+          : ''
+      }`;
+      return sendResponse(res, messageData, errorMessage);
+    } else if (error?.message?.includes('string too long')) {
+      return sendResponse(
+        res,
+        messageData,
+        'Message too long. The Assistants API has a limit of 32,768 characters per message. Please shorten it and try again.',
+      );
+    } else if (error?.message?.includes(ViolationTypes.TOKEN_BALANCE)) {
+      return sendResponse(res, messageData, error.message);
+    } else {
+      logger.error('[/assistants/chat/]', error);
+    }
+
+    if (!openai || !thread_id || !run_id) {
+      return sendResponse(res, messageData, defaultErrorMessage);
+    }
+
+    await sleep(2000);
+
+    try {
+      const status = await cache.get(cacheKey);
+      if (status === 'cancelled') {
+        logger.debug('[/assistants/chat/] Run already cancelled');
+        return res.end();
+      }
+      await cache.delete(cacheKey);
+      const cancelledRun = await openai.beta.threads.runs.cancel(thread_id, run_id);
+      logger.debug('[/assistants/chat/] Cancelled run:', cancelledRun);
+    } catch (error) {
+      logger.error('[/assistants/chat/] Error cancelling run', error);
+    }
+
+    await sleep(2000);
+
+    let run;
+    try {
+      run = await openai.beta.threads.runs.retrieve(thread_id, run_id);
+      await recordUsage({
+        ...run.usage,
+        model: run.model,
+        user: req.user.id,
+        conversationId,
+      });
+    } catch (error) {
+      logger.error('[/assistants/chat/] Error fetching or processing run', error);
+    }
+
+    let finalEvent;
+    try {
+      const runMessages = await checkMessageGaps({
+        openai,
+        run_id,
+        endpoint,
+        thread_id,
+        conversationId,
+        latestMessageId: responseMessageId,
+      });
+
+      const errorContentPart = {
+        text: {
+          value:
+            error?.message ?? 'There was an error processing your request. Please try again later.',
+        },
+        type: ContentTypes.ERROR,
+      };
+
+      if (!Array.isArray(runMessages[runMessages.length - 1]?.content)) {
+        runMessages[runMessages.length - 1].content = [errorContentPart];
+      } else {
+        const contentParts = runMessages[runMessages.length - 1].content;
+        for (let i = 0; i < contentParts.length; i++) {
+          const currentPart = contentParts[i];
+          /** @type {CodeToolCall | RetrievalToolCall | FunctionToolCall | undefined} */
+          const toolCall = currentPart?.[ContentTypes.TOOL_CALL];
+          if (
+            toolCall &&
+            toolCall?.function &&
+            !(toolCall?.function?.output || toolCall?.function?.output?.length)
+          ) {
+            contentParts[i] = {
+              ...currentPart,
+              [ContentTypes.TOOL_CALL]: {
+                ...toolCall,
+                function: {
+                  ...toolCall.function,
+                  output: 'error processing tool',
+                },
+              },
+            };
+          }
+        }
+        runMessages[runMessages.length - 1].content.push(errorContentPart);
+      }
+
+      finalEvent = {
+        final: true,
+        conversation: await getConvo(req.user.id, conversationId),
+        runMessages,
+      };
+    } catch (error) {
+      logger.error('[/assistants/chat/] Error finalizing error process', error);
+      return sendResponse(res, messageData, 'The Assistant run failed');
+    }
+
+    return sendResponse(res, finalEvent);
+  };
+
+  try {
+    res.on('close', async () => {
+      if (!completedRun) {
+        await handleError(new Error('Request closed'));
+      }
+    });
+
+    if (convoId && !_thread_id) {
+      completedRun = true;
+      throw new Error('Missing thread_id for existing conversation');
+    }
+
+    if (!assistant_id) {
+      completedRun = true;
+      throw new Error('Missing assistant_id');
+    }
+
+    const checkBalanceBeforeRun = async () => {
+      if (!isEnabled(process.env.CHECK_BALANCE)) {
+        return;
+      }
+      const transactions =
+        (await getTransactions({
+          user: req.user.id,
+          context: 'message',
+          conversationId,
+        })) ?? [];
+
+      const totalPreviousTokens = Math.abs(
+        transactions.reduce((acc, curr) => acc + curr.rawAmount, 0),
+      );
+
+      // TODO: make promptBuffer a config option; buffer for titles, needs buffer for system instructions
+      const promptBuffer = parentMessageId === Constants.NO_PARENT && !_thread_id ? 200 : 0;
+      // 5 is added for labels
+      let promptTokens = (await countTokens(text + (promptPrefix ?? ''))) + 5;
+      promptTokens += totalPreviousTokens + promptBuffer;
+      // Count tokens up to the current context window
+      promptTokens = Math.min(promptTokens, getModelMaxTokens(model));
+
+      await checkBalance({
+        req,
+        res,
+        txData: {
+          model,
+          user: req.user.id,
+          tokenType: 'prompt',
+          amount: promptTokens,
+        },
+      });
+    };
+
+    const { openai: _openai, client } = await getOpenAIClient({
+      req,
+      res,
+      endpointOption: req.body.endpointOption,
+      initAppClient: true,
+    });
+
+    openai = _openai;
+
+    if (previousMessages.length) {
+      parentMessageId = previousMessages[previousMessages.length - 1].messageId;
+    }
+
+    let userMessage = {
+      role: 'user',
+      content: text,
+      metadata: {
+        messageId: userMessageId,
+      },
+    };
+
+    /** @type {CreateRunBody | undefined} */
+    const body = {
+      assistant_id,
+      model,
+    };
+
+    if (promptPrefix) {
+      body.additional_instructions = promptPrefix;
+    }
+
+    if (instructions) {
+      body.instructions = instructions;
+    }
+
+    const getRequestFileIds = async () => {
+      let thread_file_ids = [];
+      if (convoId) {
+        const convo = await getConvo(req.user.id, convoId);
+        if (convo && convo.file_ids) {
+          thread_file_ids = convo.file_ids;
+        }
+      }
+
+      file_ids = files.map(({ file_id }) => file_id);
+      if (file_ids.length || thread_file_ids.length) {
+        userMessage.file_ids = file_ids;
+        attachedFileIds = new Set([...file_ids, ...thread_file_ids]);
+      }
+    };
+
+    const addVisionPrompt = async () => {
+      if (!req.body.endpointOption.attachments) {
+        return;
+      }
+
+      /** @type {MongoFile[]} */
+      const attachments = await req.body.endpointOption.attachments;
+      if (attachments && attachments.every((attachment) => checkOpenAIStorage(attachment.source))) {
+        return;
+      }
+
+      const assistant = await openai.beta.assistants.retrieve(assistant_id);
+      const visionToolIndex = assistant.tools.findIndex(
+        (tool) => tool?.function && tool?.function?.name === ImageVisionTool.function.name,
+      );
+
+      if (visionToolIndex === -1) {
+        return;
+      }
+
+      let visionMessage = {
+        role: 'user',
+        content: '',
+      };
+      const files = await client.addImageURLs(visionMessage, attachments);
+      if (!visionMessage.image_urls?.length) {
+        return;
+      }
+
+      const imageCount = visionMessage.image_urls.length;
+      const plural = imageCount > 1;
+      visionMessage.content = createVisionPrompt(plural);
+      visionMessage = formatMessage({ message: visionMessage, endpoint: EModelEndpoint.openAI });
+
+      visionPromise = openai.chat.completions.create({
+        model: 'gpt-4-vision-preview',
+        messages: [visionMessage],
+        max_tokens: 4000,
+      });
+
+      const pluralized = plural ? 's' : '';
+      body.additional_instructions = `${
+        body.additional_instructions ? `${body.additional_instructions}\n` : ''
+      }The user has uploaded ${imageCount} image${pluralized}.
+      Use the \`${ImageVisionTool.function.name}\` tool to retrieve ${
+  plural ? '' : 'a '
+}detailed text description${pluralized} for ${plural ? 'each' : 'the'} image${pluralized}.`;
+
+      return files;
+    };
+
+    const initializeThread = async () => {
+      /** @type {[ undefined | MongoFile[]]}*/
+      const [processedFiles] = await Promise.all([addVisionPrompt(), getRequestFileIds()]);
+      // TODO: may allow multiple messages to be created beforehand in a future update
+      const initThreadBody = {
+        messages: [userMessage],
+        metadata: {
+          user: req.user.id,
+          conversationId,
+        },
+      };
+
+      if (processedFiles) {
+        for (const file of processedFiles) {
+          if (!checkOpenAIStorage(file.source)) {
+            attachedFileIds.delete(file.file_id);
+            const index = file_ids.indexOf(file.file_id);
+            if (index > -1) {
+              file_ids.splice(index, 1);
+            }
+          }
+        }
+
+        userMessage.file_ids = file_ids;
+      }
+
+      const result = await initThread({ openai, body: initThreadBody, thread_id });
+      thread_id = result.thread_id;
+
+      createOnTextProgress({
+        openai,
+        conversationId,
+        userMessageId,
+        messageId: responseMessageId,
+        thread_id,
+      });
+
+      requestMessage = {
+        user: req.user.id,
+        text,
+        messageId: userMessageId,
+        parentMessageId,
+        // TODO: make sure client sends correct format for `files`, use zod
+        files,
+        file_ids,
+        conversationId,
+        isCreatedByUser: true,
+        assistant_id,
+        thread_id,
+        model: assistant_id,
+        endpoint,
+      };
+
+      previousMessages.push(requestMessage);
+
+      /* asynchronous */
+      saveUserMessage({ ...requestMessage, model });
+
+      conversation = {
+        conversationId,
+        endpoint,
+        promptPrefix: promptPrefix,
+        instructions: instructions,
+        assistant_id,
+        // model,
+      };
+
+      if (file_ids.length) {
+        conversation.file_ids = file_ids;
+      }
+    };
+
+    const promises = [initializeThread(), checkBalanceBeforeRun()];
+    await Promise.all(promises);
+
+    const sendInitialResponse = () => {
+      sendMessage(res, {
+        sync: true,
+        conversationId,
+        // messages: previousMessages,
+        requestMessage,
+        responseMessage: {
+          user: req.user.id,
+          messageId: openai.responseMessage.messageId,
+          parentMessageId: userMessageId,
+          conversationId,
+          assistant_id,
+          thread_id,
+          model: assistant_id,
+        },
+      });
+    };
+
+    /** @type {RunResponse | typeof StreamRunManager | undefined} */
+    let response;
+
+    const processRun = async (retry = false) => {
+      if (endpoint === EModelEndpoint.azureAssistants) {
+        body.model = openai._options.model;
+        openai.attachedFileIds = attachedFileIds;
+        openai.visionPromise = visionPromise;
+        if (retry) {
+          response = await runAssistant({
+            openai,
+            thread_id,
+            run_id,
+            in_progress: openai.in_progress,
+          });
+          return;
+        }
+
+        /* NOTE:
+         * By default, a Run will use the model and tools configuration specified in Assistant object,
+         * but you can override most of these when creating the Run for added flexibility:
+         */
+        const run = await createRun({
+          openai,
+          thread_id,
+          body,
+        });
+
+        run_id = run.id;
+        await cache.set(cacheKey, `${thread_id}:${run_id}`, ten_minutes);
+        sendInitialResponse();
+
+        // todo: retry logic
+        response = await runAssistant({ openai, thread_id, run_id });
+        return;
+      }
+
+      /** @type {{[AssistantStreamEvents.ThreadRunCreated]: (event: ThreadRunCreated) => Promise<void>}} */
+      const handlers = {
+        [AssistantStreamEvents.ThreadRunCreated]: async (event) => {
+          await cache.set(cacheKey, `${thread_id}:${event.data.id}`, ten_minutes);
+          run_id = event.data.id;
+          sendInitialResponse();
+        },
+      };
+
+      const streamRunManager = new StreamRunManager({
+        req,
+        res,
+        openai,
+        handlers,
+        thread_id,
+        visionPromise,
+        attachedFileIds,
+        responseMessage: openai.responseMessage,
+        // streamOptions: {
+
+        // },
+      });
+
+      await streamRunManager.runAssistant({
+        thread_id,
+        body,
+      });
+
+      response = streamRunManager;
+    };
+
+    await processRun();
+    logger.debug('[/assistants/chat/] response', {
+      run: response.run,
+      steps: response.steps,
+    });
+
+    if (response.run.status === RunStatus.CANCELLED) {
+      logger.debug('[/assistants/chat/] Run cancelled, handled by `abortRun`');
+      return res.end();
+    }
+
+    if (response.run.status === RunStatus.IN_PROGRESS) {
+      processRun(true);
+    }
+
+    completedRun = response.run;
+
+    /** @type {ResponseMessage} */
+    const responseMessage = {
+      ...(response.responseMessage ?? response.finalMessage),
+      parentMessageId: userMessageId,
+      conversationId,
+      user: req.user.id,
+      assistant_id,
+      thread_id,
+      model: assistant_id,
+      endpoint,
+    };
+
+    sendMessage(res, {
+      final: true,
+      conversation,
+      requestMessage: {
+        parentMessageId,
+        thread_id,
+      },
+    });
+    res.end();
+
+    await saveAssistantMessage({ ...responseMessage, model });
+
+    if (parentMessageId === Constants.NO_PARENT && !_thread_id) {
+      addTitle(req, {
+        text,
+        responseText: response.text,
+        conversationId,
+        client,
+      });
+    }
+
+    await addThreadMetadata({
+      openai,
+      thread_id,
+      messageId: responseMessage.messageId,
+      messages: response.messages,
+    });
+
+    if (!response.run.usage) {
+      await sleep(3000);
+      completedRun = await openai.beta.threads.runs.retrieve(thread_id, response.run.id);
+      if (completedRun.usage) {
+        await recordUsage({
+          ...completedRun.usage,
+          user: req.user.id,
+          model: completedRun.model ?? model,
+          conversationId,
+        });
+      }
+    } else {
+      await recordUsage({
+        ...response.run.usage,
+        user: req.user.id,
+        model: response.run.model ?? model,
+        conversationId,
+      });
+    }
+  } catch (error) {
+    await handleError(error);
+  }
+};
+
+module.exports = chatV1;
--- a/api/server/controllers/assistants/chatV2.js
+++ b/api/server/controllers/assistants/chatV2.js
@ -0,0 +1,618 @@
+const { v4 } = require('uuid');
+const {
+  Constants,
+  RunStatus,
+  CacheKeys,
+  ContentTypes,
+  ToolCallTypes,
+  EModelEndpoint,
+  ViolationTypes,
+  retrievalMimeTypes,
+  AssistantStreamEvents,
+} = require('librechat-data-provider');
+const {
+  initThread,
+  recordUsage,
+  saveUserMessage,
+  checkMessageGaps,
+  addThreadMetadata,
+  saveAssistantMessage,
+} = require('~/server/services/Threads');
+const { sendResponse, sendMessage, sleep, isEnabled, countTokens } = require('~/server/utils');
+const { runAssistant, createOnTextProgress } = require('~/server/services/AssistantService');
+const { createRun, StreamRunManager } = require('~/server/services/Runs');
+const { addTitle } = require('~/server/services/Endpoints/assistants');
+const { getTransactions } = require('~/models/Transaction');
+const checkBalance = require('~/models/checkBalance');
+const { getConvo } = require('~/models/Conversation');
+const getLogStores = require('~/cache/getLogStores');
+const { getModelMaxTokens } = require('~/utils');
+const { getOpenAIClient } = require('./helpers');
+const { logger } = require('~/config');
+
+const { handleAbortError } = require('~/server/middleware');
+
+const ten_minutes = 1000 * 60 * 10;
+
+/**
+ * @route POST /
+ * @desc Chat with an assistant
+ * @access Public
+ * @param {Express.Request} req - The request object, containing the request data.
+ * @param {Express.Response} res - The response object, used to send back a response.
+ * @returns {void}
+ */
+const chatV2 = async (req, res) => {
+  logger.debug('[/assistants/chat/] req.body', req.body);
+
+  /** @type {{ files: MongoFile[]}} */
+  const {
+    text,
+    model,
+    endpoint,
+    files = [],
+    promptPrefix,
+    assistant_id,
+    instructions,
+    thread_id: _thread_id,
+    messageId: _messageId,
+    conversationId: convoId,
+    parentMessageId: _parentId = Constants.NO_PARENT,
+  } = req.body;
+
+  /** @type {Partial<TAssistantEndpoint>} */
+  const assistantsConfig = req.app.locals?.[endpoint];
+
+  if (assistantsConfig) {
+    const { supportedIds, excludedIds } = assistantsConfig;
+    const error = { message: 'Assistant not supported' };
+    if (supportedIds?.length && !supportedIds.includes(assistant_id)) {
+      return await handleAbortError(res, req, error, {
+        sender: 'System',
+        conversationId: convoId,
+        messageId: v4(),
+        parentMessageId: _messageId,
+        error,
+      });
+    } else if (excludedIds?.length && excludedIds.includes(assistant_id)) {
+      return await handleAbortError(res, req, error, {
+        sender: 'System',
+        conversationId: convoId,
+        messageId: v4(),
+        parentMessageId: _messageId,
+      });
+    }
+  }
+
+  /** @type {OpenAIClient} */
+  let openai;
+  /** @type {string|undefined} - the current thread id */
+  let thread_id = _thread_id;
+  /** @type {string|undefined} - the current run id */
+  let run_id;
+  /** @type {string|undefined} - the parent messageId */
+  let parentMessageId = _parentId;
+  /** @type {TMessage[]} */
+  let previousMessages = [];
+  /** @type {import('librechat-data-provider').TConversation | null} */
+  let conversation = null;
+  /** @type {string[]} */
+  let file_ids = [];
+  /** @type {Set<string>} */
+  let attachedFileIds = new Set();
+  /** @type {TMessage | null} */
+  let requestMessage = null;
+
+  const userMessageId = v4();
+  const responseMessageId = v4();
+
+  /** @type {string} - The conversation UUID - created if undefined */
+  const conversationId = convoId ?? v4();
+
+  const cache = getLogStores(CacheKeys.ABORT_KEYS);
+  const cacheKey = `${req.user.id}:${conversationId}`;
+
+  /** @type {Run | undefined} - The completed run, undefined if incomplete */
+  let completedRun;
+
+  const handleError = async (error) => {
+    const defaultErrorMessage =
+      'The Assistant run failed to initialize. Try sending a message in a new conversation.';
+    const messageData = {
+      thread_id,
+      assistant_id,
+      conversationId,
+      parentMessageId,
+      sender: 'System',
+      user: req.user.id,
+      shouldSaveMessage: false,
+      messageId: responseMessageId,
+      endpoint,
+    };
+
+    if (error.message === 'Run cancelled') {
+      return res.end();
+    } else if (error.message === 'Request closed' && completedRun) {
+      return;
+    } else if (error.message === 'Request closed') {
+      logger.debug('[/assistants/chat/] Request aborted on close');
+    } else if (/Files.*are invalid/.test(error.message)) {
+      const errorMessage = `Files are invalid, or may not have uploaded yet.${
+        endpoint === EModelEndpoint.azureAssistants
+          ? ' If using Azure OpenAI, files are only available in the region of the assistant\'s model at the time of upload.'
+          : ''
+      }`;
+      return sendResponse(res, messageData, errorMessage);
+    } else if (error?.message?.includes('string too long')) {
+      return sendResponse(
+        res,
+        messageData,
+        'Message too long. The Assistants API has a limit of 32,768 characters per message. Please shorten it and try again.',
+      );
+    } else if (error?.message?.includes(ViolationTypes.TOKEN_BALANCE)) {
+      return sendResponse(res, messageData, error.message);
+    } else {
+      logger.error('[/assistants/chat/]', error);
+    }
+
+    if (!openai || !thread_id || !run_id) {
+      return sendResponse(res, messageData, defaultErrorMessage);
+    }
+
+    await sleep(2000);
+
+    try {
+      const status = await cache.get(cacheKey);
+      if (status === 'cancelled') {
+        logger.debug('[/assistants/chat/] Run already cancelled');
+        return res.end();
+      }
+      await cache.delete(cacheKey);
+      const cancelledRun = await openai.beta.threads.runs.cancel(thread_id, run_id);
+      logger.debug('[/assistants/chat/] Cancelled run:', cancelledRun);
+    } catch (error) {
+      logger.error('[/assistants/chat/] Error cancelling run', error);
+    }
+
+    await sleep(2000);
+
+    let run;
+    try {
+      run = await openai.beta.threads.runs.retrieve(thread_id, run_id);
+      await recordUsage({
+        ...run.usage,
+        model: run.model,
+        user: req.user.id,
+        conversationId,
+      });
+    } catch (error) {
+      logger.error('[/assistants/chat/] Error fetching or processing run', error);
+    }
+
+    let finalEvent;
+    try {
+      const runMessages = await checkMessageGaps({
+        openai,
+        run_id,
+        endpoint,
+        thread_id,
+        conversationId,
+        latestMessageId: responseMessageId,
+      });
+
+      const errorContentPart = {
+        text: {
+          value:
+            error?.message ?? 'There was an error processing your request. Please try again later.',
+        },
+        type: ContentTypes.ERROR,
+      };
+
+      if (!Array.isArray(runMessages[runMessages.length - 1]?.content)) {
+        runMessages[runMessages.length - 1].content = [errorContentPart];
+      } else {
+        const contentParts = runMessages[runMessages.length - 1].content;
+        for (let i = 0; i < contentParts.length; i++) {
+          const currentPart = contentParts[i];
+          /** @type {CodeToolCall | RetrievalToolCall | FunctionToolCall | undefined} */
+          const toolCall = currentPart?.[ContentTypes.TOOL_CALL];
+          if (
+            toolCall &&
+            toolCall?.function &&
+            !(toolCall?.function?.output || toolCall?.function?.output?.length)
+          ) {
+            contentParts[i] = {
+              ...currentPart,
+              [ContentTypes.TOOL_CALL]: {
+                ...toolCall,
+                function: {
+                  ...toolCall.function,
+                  output: 'error processing tool',
+                },
+              },
+            };
+          }
+        }
+        runMessages[runMessages.length - 1].content.push(errorContentPart);
+      }
+
+      finalEvent = {
+        final: true,
+        conversation: await getConvo(req.user.id, conversationId),
+        runMessages,
+      };
+    } catch (error) {
+      logger.error('[/assistants/chat/] Error finalizing error process', error);
+      return sendResponse(res, messageData, 'The Assistant run failed');
+    }
+
+    return sendResponse(res, finalEvent);
+  };
+
+  try {
+    res.on('close', async () => {
+      if (!completedRun) {
+        await handleError(new Error('Request closed'));
+      }
+    });
+
+    if (convoId && !_thread_id) {
+      completedRun = true;
+      throw new Error('Missing thread_id for existing conversation');
+    }
+
+    if (!assistant_id) {
+      completedRun = true;
+      throw new Error('Missing assistant_id');
+    }
+
+    const checkBalanceBeforeRun = async () => {
+      if (!isEnabled(process.env.CHECK_BALANCE)) {
+        return;
+      }
+      const transactions =
+        (await getTransactions({
+          user: req.user.id,
+          context: 'message',
+          conversationId,
+        })) ?? [];
+
+      const totalPreviousTokens = Math.abs(
+        transactions.reduce((acc, curr) => acc + curr.rawAmount, 0),
+      );
+
+      // TODO: make promptBuffer a config option; buffer for titles, needs buffer for system instructions
+      const promptBuffer = parentMessageId === Constants.NO_PARENT && !_thread_id ? 200 : 0;
+      // 5 is added for labels
+      let promptTokens = (await countTokens(text + (promptPrefix ?? ''))) + 5;
+      promptTokens += totalPreviousTokens + promptBuffer;
+      // Count tokens up to the current context window
+      promptTokens = Math.min(promptTokens, getModelMaxTokens(model));
+
+      await checkBalance({
+        req,
+        res,
+        txData: {
+          model,
+          user: req.user.id,
+          tokenType: 'prompt',
+          amount: promptTokens,
+        },
+      });
+    };
+
+    const { openai: _openai, client } = await getOpenAIClient({
+      req,
+      res,
+      endpointOption: req.body.endpointOption,
+      initAppClient: true,
+    });
+
+    openai = _openai;
+
+    if (previousMessages.length) {
+      parentMessageId = previousMessages[previousMessages.length - 1].messageId;
+    }
+
+    let userMessage = {
+      role: 'user',
+      content: [
+        {
+          type: ContentTypes.TEXT,
+          text,
+        },
+      ],
+      metadata: {
+        messageId: userMessageId,
+      },
+    };
+
+    /** @type {CreateRunBody | undefined} */
+    const body = {
+      assistant_id,
+      model,
+    };
+
+    if (promptPrefix) {
+      body.additional_instructions = promptPrefix;
+    }
+
+    if (instructions) {
+      body.instructions = instructions;
+    }
+
+    const getRequestFileIds = async () => {
+      let thread_file_ids = [];
+      if (convoId) {
+        const convo = await getConvo(req.user.id, convoId);
+        if (convo && convo.file_ids) {
+          thread_file_ids = convo.file_ids;
+        }
+      }
+
+      if (files.length || thread_file_ids.length) {
+        attachedFileIds = new Set([...file_ids, ...thread_file_ids]);
+
+        let attachmentIndex = 0;
+        for (const file of files) {
+          file_ids.push(file.file_id);
+          if (file.type.startsWith('image')) {
+            userMessage.content.push({
+              type: ContentTypes.IMAGE_FILE,
+              [ContentTypes.IMAGE_FILE]: { file_id: file.file_id },
+            });
+          }
+
+          if (!userMessage.attachments) {
+            userMessage.attachments = [];
+          }
+
+          userMessage.attachments.push({
+            file_id: file.file_id,
+            tools: [{ type: ToolCallTypes.CODE_INTERPRETER }],
+          });
+
+          if (file.type.startsWith('image')) {
+            continue;
+          }
+
+          const mimeType = file.type;
+          const isSupportedByRetrieval = retrievalMimeTypes.some((regex) => regex.test(mimeType));
+          if (isSupportedByRetrieval) {
+            userMessage.attachments[attachmentIndex].tools.push({
+              type: ToolCallTypes.FILE_SEARCH,
+            });
+          }
+
+          attachmentIndex++;
+        }
+      }
+    };
+
+    const initializeThread = async () => {
+      await getRequestFileIds();
+
+      // TODO: may allow multiple messages to be created beforehand in a future update
+      const initThreadBody = {
+        messages: [userMessage],
+        metadata: {
+          user: req.user.id,
+          conversationId,
+        },
+      };
+
+      const result = await initThread({ openai, body: initThreadBody, thread_id });
+      thread_id = result.thread_id;
+
+      createOnTextProgress({
+        openai,
+        conversationId,
+        userMessageId,
+        messageId: responseMessageId,
+        thread_id,
+      });
+
+      requestMessage = {
+        user: req.user.id,
+        text,
+        messageId: userMessageId,
+        parentMessageId,
+        // TODO: make sure client sends correct format for `files`, use zod
+        files,
+        file_ids,
+        conversationId,
+        isCreatedByUser: true,
+        assistant_id,
+        thread_id,
+        model: assistant_id,
+        endpoint,
+      };
+
+      previousMessages.push(requestMessage);
+
+      /* asynchronous */
+      saveUserMessage({ ...requestMessage, model });
+
+      conversation = {
+        conversationId,
+        endpoint,
+        promptPrefix: promptPrefix,
+        instructions: instructions,
+        assistant_id,
+        // model,
+      };
+
+      if (file_ids.length) {
+        conversation.file_ids = file_ids;
+      }
+    };
+
+    const promises = [initializeThread(), checkBalanceBeforeRun()];
+    await Promise.all(promises);
+
+    const sendInitialResponse = () => {
+      sendMessage(res, {
+        sync: true,
+        conversationId,
+        // messages: previousMessages,
+        requestMessage,
+        responseMessage: {
+          user: req.user.id,
+          messageId: openai.responseMessage.messageId,
+          parentMessageId: userMessageId,
+          conversationId,
+          assistant_id,
+          thread_id,
+          model: assistant_id,
+        },
+      });
+    };
+
+    /** @type {RunResponse | typeof StreamRunManager | undefined} */
+    let response;
+
+    const processRun = async (retry = false) => {
+      if (endpoint === EModelEndpoint.azureAssistants) {
+        body.model = openai._options.model;
+        openai.attachedFileIds = attachedFileIds;
+        if (retry) {
+          response = await runAssistant({
+            openai,
+            thread_id,
+            run_id,
+            in_progress: openai.in_progress,
+          });
+          return;
+        }
+
+        /* NOTE:
+         * By default, a Run will use the model and tools configuration specified in Assistant object,
+         * but you can override most of these when creating the Run for added flexibility:
+         */
+        const run = await createRun({
+          openai,
+          thread_id,
+          body,
+        });
+
+        run_id = run.id;
+        await cache.set(cacheKey, `${thread_id}:${run_id}`, ten_minutes);
+        sendInitialResponse();
+
+        // todo: retry logic
+        response = await runAssistant({ openai, thread_id, run_id });
+        return;
+      }
+
+      /** @type {{[AssistantStreamEvents.ThreadRunCreated]: (event: ThreadRunCreated) => Promise<void>}} */
+      const handlers = {
+        [AssistantStreamEvents.ThreadRunCreated]: async (event) => {
+          await cache.set(cacheKey, `${thread_id}:${event.data.id}`, ten_minutes);
+          run_id = event.data.id;
+          sendInitialResponse();
+        },
+      };
+
+      const streamRunManager = new StreamRunManager({
+        req,
+        res,
+        openai,
+        handlers,
+        thread_id,
+        attachedFileIds,
+        responseMessage: openai.responseMessage,
+        // streamOptions: {
+
+        // },
+      });
+
+      await streamRunManager.runAssistant({
+        thread_id,
+        body,
+      });
+
+      response = streamRunManager;
+    };
+
+    await processRun();
+    logger.debug('[/assistants/chat/] response', {
+      run: response.run,
+      steps: response.steps,
+    });
+
+    if (response.run.status === RunStatus.CANCELLED) {
+      logger.debug('[/assistants/chat/] Run cancelled, handled by `abortRun`');
+      return res.end();
+    }
+
+    if (response.run.status === RunStatus.IN_PROGRESS) {
+      processRun(true);
+    }
+
+    completedRun = response.run;
+
+    /** @type {ResponseMessage} */
+    const responseMessage = {
+      ...(response.responseMessage ?? response.finalMessage),
+      parentMessageId: userMessageId,
+      conversationId,
+      user: req.user.id,
+      assistant_id,
+      thread_id,
+      model: assistant_id,
+      endpoint,
+    };
+
+    sendMessage(res, {
+      final: true,
+      conversation,
+      requestMessage: {
+        parentMessageId,
+        thread_id,
+      },
+    });
+    res.end();
+
+    await saveAssistantMessage({ ...responseMessage, model });
+
+    if (parentMessageId === Constants.NO_PARENT && !_thread_id) {
+      addTitle(req, {
+        text,
+        responseText: response.text,
+        conversationId,
+        client,
+      });
+    }
+
+    await addThreadMetadata({
+      openai,
+      thread_id,
+      messageId: responseMessage.messageId,
+      messages: response.messages,
+    });
+
+    if (!response.run.usage) {
+      await sleep(3000);
+      completedRun = await openai.beta.threads.runs.retrieve(thread_id, response.run.id);
+      if (completedRun.usage) {
+        await recordUsage({
+          ...completedRun.usage,
+          user: req.user.id,
+          model: completedRun.model ?? model,
+          conversationId,
+        });
+      }
+    } else {
+      await recordUsage({
+        ...response.run.usage,
+        user: req.user.id,
+        model: response.run.model ?? model,
+        conversationId,
+      });
+    }
+  } catch (error) {
+    await handleError(error);
+  }
+};
+
+module.exports = chatV2;
--- a/api/server/controllers/assistants/helpers.js
+++ b/api/server/controllers/assistants/helpers.js
@ -0,0 +1,158 @@
+const { EModelEndpoint, CacheKeys, defaultAssistantsVersion } = require('librechat-data-provider');
+const {
+  initializeClient: initAzureClient,
+} = require('~/server/services/Endpoints/azureAssistants');
+const { initializeClient } = require('~/server/services/Endpoints/assistants');
+const { getLogStores } = require('~/cache');
+
+/**
+ * @param {Express.Request} req
+ * @param {string} [endpoint]
+ * @returns {Promise<string>}
+ */
+const getCurrentVersion = async (req, endpoint) => {
+  const index = req.baseUrl.lastIndexOf('/v');
+  let version = index !== -1 ? req.baseUrl.substring(index + 1, index + 3) : null;
+  if (!version && req.body.version) {
+    version = `v${req.body.version}`;
+  }
+  if (!version && endpoint) {
+    const cache = getLogStores(CacheKeys.CONFIG_STORE);
+    const cachedEndpointsConfig = await cache.get(CacheKeys.ENDPOINT_CONFIG);
+    version = `v${
+      cachedEndpointsConfig?.[endpoint]?.version ?? defaultAssistantsVersion[endpoint]
+    }`;
+  }
+  if (!version?.startsWith('v') && version.length !== 2) {
+    throw new Error(`[${req.baseUrl}] Invalid version: ${version}`);
+  }
+  return version;
+};
+
+/**
+ * Asynchronously lists assistants based on provided query parameters.
+ *
+ * Initializes the client with the current request and response objects and lists assistants
+ * according to the query parameters. This function abstracts the logic for non-Azure paths.
+ *
+ * @async
+ * @param {object} params - The parameters object.
+ * @param {object} params.req - The request object, used for initializing the client.
+ * @param {object} params.res - The response object, used for initializing the client.
+ * @param {string} params.version - The API version to use.
+ * @param {object} params.query - The query parameters to list assistants (e.g., limit, order).
+ * @returns {Promise<object>} A promise that resolves to the response from the `openai.beta.assistants.list` method call.
+ */
+const listAssistants = async ({ req, res, version, query }) => {
+  const { openai } = await getOpenAIClient({ req, res, version });
+  return openai.beta.assistants.list(query);
+};
+
+/**
+ * Asynchronously lists assistants for Azure configured groups.
+ *
+ * Iterates through Azure configured assistant groups, initializes the client with the current request and response objects,
+ * lists assistants based on the provided query parameters, and merges their data alongside the model information into a single array.
+ *
+ * @async
+ * @param {object} params - The parameters object.
+ * @param {object} params.req - The request object, used for initializing the client and manipulating the request body.
+ * @param {object} params.res - The response object, used for initializing the client.
+ * @param {string} params.version - The API version to use.
+ * @param {TAzureConfig} params.azureConfig - The Azure configuration object containing assistantGroups and groupMap.
+ * @param {object} params.query - The query parameters to list assistants (e.g., limit, order).
+ * @returns {Promise<AssistantListResponse>} A promise that resolves to an array of assistant data merged with their respective model information.
+ */
+const listAssistantsForAzure = async ({ req, res, version, azureConfig = {}, query }) => {
+  /** @type {Array<[string, TAzureModelConfig]>} */
+  const groupModelTuples = [];
+  const promises = [];
+  /** @type {Array<TAzureGroup>} */
+  const groups = [];
+
+  const { groupMap, assistantGroups } = azureConfig;
+
+  for (const groupName of assistantGroups) {
+    const group = groupMap[groupName];
+    groups.push(group);
+
+    const currentModelTuples = Object.entries(group?.models);
+    groupModelTuples.push(currentModelTuples);
+
+    /* The specified model is only necessary to
+    fetch assistants for the shared instance */
+    req.body.model = currentModelTuples[0][0];
+    promises.push(listAssistants({ req, res, version, query }));
+  }
+
+  const resolvedQueries = await Promise.all(promises);
+  const data = resolvedQueries.flatMap((res, i) =>
+    res.data.map((assistant) => {
+      const deploymentName = assistant.model;
+      const currentGroup = groups[i];
+      const currentModelTuples = groupModelTuples[i];
+      const firstModel = currentModelTuples[0][0];
+
+      if (currentGroup.deploymentName === deploymentName) {
+        return { ...assistant, model: firstModel };
+      }
+
+      for (const [model, modelConfig] of currentModelTuples) {
+        if (modelConfig.deploymentName === deploymentName) {
+          return { ...assistant, model };
+        }
+      }
+
+      return { ...assistant, model: firstModel };
+    }),
+  );
+
+  return {
+    first_id: data[0]?.id,
+    last_id: data[data.length - 1]?.id,
+    object: 'list',
+    has_more: false,
+    data,
+  };
+};
+
+async function getOpenAIClient({ req, res, endpointOption, initAppClient, overrideEndpoint }) {
+  let endpoint = overrideEndpoint ?? req.body.endpoint ?? req.query.endpoint;
+  const version = await getCurrentVersion(req, endpoint);
+  if (!endpoint) {
+    throw new Error(`[${req.baseUrl}] Endpoint is required`);
+  }
+
+  let result;
+  if (endpoint === EModelEndpoint.assistants) {
+    result = await initializeClient({ req, res, version, endpointOption, initAppClient });
+  } else if (endpoint === EModelEndpoint.azureAssistants) {
+    result = await initAzureClient({ req, res, version, endpointOption, initAppClient });
+  }
+
+  return result;
+}
+
+const fetchAssistants = async (req, res) => {
+  const { limit = 100, order = 'desc', after, before, endpoint } = req.query;
+  const version = await getCurrentVersion(req, endpoint);
+  const query = { limit, order, after, before };
+
+  /** @type {AssistantListResponse} */
+  let body;
+
+  if (endpoint === EModelEndpoint.assistants) {
+    ({ body } = await listAssistants({ req, res, version, query }));
+  } else if (endpoint === EModelEndpoint.azureAssistants) {
+    const azureConfig = req.app.locals[EModelEndpoint.azureOpenAI];
+    body = await listAssistantsForAzure({ req, res, version, azureConfig, query });
+  }
+
+  return body;
+};
+
+module.exports = {
+  getOpenAIClient,
+  fetchAssistants,
+  getCurrentVersion,
+};
--- a/api/server/controllers/assistants/v1.js
+++ b/api/server/controllers/assistants/v1.js
@ -0,0 +1,262 @@
+const { FileContext } = require('librechat-data-provider');
+const { getStrategyFunctions } = require('~/server/services/Files/strategies');
+const { deleteAssistantActions } = require('~/server/services/ActionService');
+const { uploadImageBuffer } = require('~/server/services/Files/process');
+const { updateAssistant, getAssistants } = require('~/models/Assistant');
+const { getOpenAIClient, fetchAssistants } = require('./helpers');
+const { deleteFileByFilter } = require('~/models/File');
+const { logger } = require('~/config');
+
+/**
+ * Create an assistant.
+ * @route POST /assistants
+ * @param {AssistantCreateParams} req.body - The assistant creation parameters.
+ * @returns {Assistant} 201 - success response - application/json
+ */
+const createAssistant = async (req, res) => {
+  try {
+    const { openai } = await getOpenAIClient({ req, res });
+
+    const { tools = [], endpoint, ...assistantData } = req.body;
+    assistantData.tools = tools
+      .map((tool) => {
+        if (typeof tool !== 'string') {
+          return tool;
+        }
+
+        return req.app.locals.availableTools[tool];
+      })
+      .filter((tool) => tool);
+
+    let azureModelIdentifier = null;
+    if (openai.locals?.azureOptions) {
+      azureModelIdentifier = assistantData.model;
+      assistantData.model = openai.locals.azureOptions.azureOpenAIApiDeploymentName;
+    }
+
+    assistantData.metadata = {
+      author: req.user.id,
+      endpoint,
+    };
+
+    const assistant = await openai.beta.assistants.create(assistantData);
+    if (azureModelIdentifier) {
+      assistant.model = azureModelIdentifier;
+    }
+    logger.debug('/assistants/', assistant);
+    res.status(201).json(assistant);
+  } catch (error) {
+    logger.error('[/assistants] Error creating assistant', error);
+    res.status(500).json({ error: error.message });
+  }
+};
+
+/**
+ * Retrieves an assistant.
+ * @route GET /assistants/:id
+ * @param {string} req.params.id - Assistant identifier.
+ * @returns {Assistant} 200 - success response - application/json
+ */
+const retrieveAssistant = async (req, res) => {
+  try {
+    /* NOTE: not actually being used right now */
+    const { openai } = await getOpenAIClient({ req, res });
+
+    const assistant_id = req.params.id;
+    const assistant = await openai.beta.assistants.retrieve(assistant_id);
+    res.json(assistant);
+  } catch (error) {
+    logger.error('[/assistants/:id] Error retrieving assistant', error);
+    res.status(500).json({ error: error.message });
+  }
+};
+
+/**
+ * Modifies an assistant.
+ * @route PATCH /assistants/:id
+ * @param {object} req - Express Request
+ * @param {object} req.params - Request params
+ * @param {string} req.params.id - Assistant identifier.
+ * @param {AssistantUpdateParams} req.body - The assistant update parameters.
+ * @returns {Assistant} 200 - success response - application/json
+ */
+const patchAssistant = async (req, res) => {
+  try {
+    const { openai } = await getOpenAIClient({ req, res });
+
+    const assistant_id = req.params.id;
+    const { endpoint: _e, ...updateData } = req.body;
+    updateData.tools = (updateData.tools ?? [])
+      .map((tool) => {
+        if (typeof tool !== 'string') {
+          return tool;
+        }
+
+        return req.app.locals.availableTools[tool];
+      })
+      .filter((tool) => tool);
+
+    if (openai.locals?.azureOptions && updateData.model) {
+      updateData.model = openai.locals.azureOptions.azureOpenAIApiDeploymentName;
+    }
+
+    const updatedAssistant = await openai.beta.assistants.update(assistant_id, updateData);
+    res.json(updatedAssistant);
+  } catch (error) {
+    logger.error('[/assistants/:id] Error updating assistant', error);
+    res.status(500).json({ error: error.message });
+  }
+};
+
+/**
+ * Deletes an assistant.
+ * @route DELETE /assistants/:id
+ * @param {object} req - Express Request
+ * @param {object} req.params - Request params
+ * @param {string} req.params.id - Assistant identifier.
+ * @returns {Assistant} 200 - success response - application/json
+ */
+const deleteAssistant = async (req, res) => {
+  try {
+    const { openai } = await getOpenAIClient({ req, res });
+
+    const assistant_id = req.params.id;
+    const deletionStatus = await openai.beta.assistants.del(assistant_id);
+    if (deletionStatus?.deleted) {
+      await deleteAssistantActions({ req, assistant_id });
+    }
+    res.json(deletionStatus);
+  } catch (error) {
+    logger.error('[/assistants/:id] Error deleting assistant', error);
+    res.status(500).json({ error: 'Error deleting assistant' });
+  }
+};
+
+/**
+ * Returns a list of assistants.
+ * @route GET /assistants
+ * @param {object} req - Express Request
+ * @param {AssistantListParams} req.query - The assistant list parameters for pagination and sorting.
+ * @returns {AssistantListResponse} 200 - success response - application/json
+ */
+const listAssistants = async (req, res) => {
+  try {
+    const body = await fetchAssistants(req, res);
+
+    if (req.app.locals?.[req.query.endpoint]) {
+      /** @type {Partial<TAssistantEndpoint>} */
+      const assistantsConfig = req.app.locals[req.query.endpoint];
+      const { supportedIds, excludedIds } = assistantsConfig;
+      if (supportedIds?.length) {
+        body.data = body.data.filter((assistant) => supportedIds.includes(assistant.id));
+      } else if (excludedIds?.length) {
+        body.data = body.data.filter((assistant) => !excludedIds.includes(assistant.id));
+      }
+    }
+
+    res.json(body);
+  } catch (error) {
+    logger.error('[/assistants] Error listing assistants', error);
+    res.status(500).json({ message: 'Error listing assistants' });
+  }
+};
+
+/**
+ * Returns a list of the user's assistant documents (metadata saved to database).
+ * @route GET /assistants/documents
+ * @returns {AssistantDocument[]} 200 - success response - application/json
+ */
+const getAssistantDocuments = async (req, res) => {
+  try {
+    res.json(await getAssistants({ user: req.user.id }));
+  } catch (error) {
+    logger.error('[/assistants/documents] Error listing assistant documents', error);
+    res.status(500).json({ error: error.message });
+  }
+};
+
+/**
+ * Uploads and updates an avatar for a specific assistant.
+ * @route POST /avatar/:assistant_id
+ * @param {object} req - Express Request
+ * @param {object} req.params - Request params
+ * @param {string} req.params.assistant_id - The ID of the assistant.
+ * @param {Express.Multer.File} req.file - The avatar image file.
+ * @param {object} req.body - Request body
+ * @param {string} [req.body.metadata] - Optional metadata for the assistant's avatar.
+ * @returns {Object} 200 - success response - application/json
+ */
+const uploadAssistantAvatar = async (req, res) => {
+  try {
+    const { assistant_id } = req.params;
+    if (!assistant_id) {
+      return res.status(400).json({ message: 'Assistant ID is required' });
+    }
+
+    let { metadata: _metadata = '{}' } = req.body;
+    const { openai } = await getOpenAIClient({ req, res });
+
+    const image = await uploadImageBuffer({
+      req,
+      context: FileContext.avatar,
+      metadata: {
+        buffer: req.file.buffer,
+      },
+    });
+
+    try {
+      _metadata = JSON.parse(_metadata);
+    } catch (error) {
+      logger.error('[/avatar/:assistant_id] Error parsing metadata', error);
+      _metadata = {};
+    }
+
+    if (_metadata.avatar && _metadata.avatar_source) {
+      const { deleteFile } = getStrategyFunctions(_metadata.avatar_source);
+      try {
+        await deleteFile(req, { filepath: _metadata.avatar });
+        await deleteFileByFilter({ filepath: _metadata.avatar });
+      } catch (error) {
+        logger.error('[/avatar/:assistant_id] Error deleting old avatar', error);
+      }
+    }
+
+    const metadata = {
+      ..._metadata,
+      avatar: image.filepath,
+      avatar_source: req.app.locals.fileStrategy,
+    };
+
+    const promises = [];
+    promises.push(
+      updateAssistant(
+        { assistant_id },
+        {
+          avatar: {
+            filepath: image.filepath,
+            source: req.app.locals.fileStrategy,
+          },
+          user: req.user.id,
+        },
+      ),
+    );
+    promises.push(openai.beta.assistants.update(assistant_id, { metadata }));
+
+    const resolved = await Promise.all(promises);
+    res.status(201).json(resolved[1]);
+  } catch (error) {
+    const message = 'An error occurred while updating the Assistant Avatar';
+    logger.error(message, error);
+    res.status(500).json({ message });
+  }
+};
+
+module.exports = {
+  createAssistant,
+  retrieveAssistant,
+  patchAssistant,
+  deleteAssistant,
+  listAssistants,
+  getAssistantDocuments,
+  uploadAssistantAvatar,
+};
--- a/api/server/controllers/assistants/v2.js
+++ b/api/server/controllers/assistants/v2.js
@ -0,0 +1,208 @@
+const { ToolCallTypes } = require('librechat-data-provider');
+const { validateAndUpdateTool } = require('~/server/services/ActionService');
+const { getOpenAIClient } = require('./helpers');
+const { logger } = require('~/config');
+
+/**
+ * Create an assistant.
+ * @route POST /assistants
+ * @param {AssistantCreateParams} req.body - The assistant creation parameters.
+ * @returns {Assistant} 201 - success response - application/json
+ */
+const createAssistant = async (req, res) => {
+  try {
+    /** @type {{ openai: OpenAIClient }} */
+    const { openai } = await getOpenAIClient({ req, res });
+
+    const { tools = [], endpoint, ...assistantData } = req.body;
+    assistantData.tools = tools
+      .map((tool) => {
+        if (typeof tool !== 'string') {
+          return tool;
+        }
+
+        return req.app.locals.availableTools[tool];
+      })
+      .filter((tool) => tool);
+
+    let azureModelIdentifier = null;
+    if (openai.locals?.azureOptions) {
+      azureModelIdentifier = assistantData.model;
+      assistantData.model = openai.locals.azureOptions.azureOpenAIApiDeploymentName;
+    }
+
+    assistantData.metadata = {
+      author: req.user.id,
+      endpoint,
+    };
+
+    const assistant = await openai.beta.assistants.create(assistantData);
+    if (azureModelIdentifier) {
+      assistant.model = azureModelIdentifier;
+    }
+    logger.debug('/assistants/', assistant);
+    res.status(201).json(assistant);
+  } catch (error) {
+    logger.error('[/assistants] Error creating assistant', error);
+    res.status(500).json({ error: error.message });
+  }
+};
+
+/**
+ * Modifies an assistant.
+ * @param {object} params
+ * @param {Express.Request} params.req
+ * @param {OpenAIClient} params.openai
+ * @param {string} params.assistant_id
+ * @param {AssistantUpdateParams} params.updateData
+ * @returns {Promise<Assistant>} The updated assistant.
+ */
+const updateAssistant = async ({ req, openai, assistant_id, updateData }) => {
+  const tools = [];
+
+  let hasFileSearch = false;
+  for (const tool of updateData.tools ?? []) {
+    let actualTool = typeof tool === 'string' ? req.app.locals.availableTools[tool] : tool;
+
+    if (!actualTool) {
+      continue;
+    }
+
+    if (actualTool.type === ToolCallTypes.FILE_SEARCH) {
+      hasFileSearch = true;
+    }
+
+    if (!actualTool.function) {
+      tools.push(actualTool);
+      continue;
+    }
+
+    const updatedTool = await validateAndUpdateTool({ req, tool: actualTool, assistant_id });
+    if (updatedTool) {
+      tools.push(updatedTool);
+    }
+  }
+
+  if (hasFileSearch && !updateData.tool_resources) {
+    const assistant = await openai.beta.assistants.retrieve(assistant_id);
+    updateData.tool_resources = assistant.tool_resources ?? null;
+  }
+
+  if (hasFileSearch && !updateData.tool_resources?.file_search) {
+    updateData.tool_resources = {
+      ...(updateData.tool_resources ?? {}),
+      file_search: {
+        vector_store_ids: [],
+      },
+    };
+  }
+
+  updateData.tools = tools;
+
+  if (openai.locals?.azureOptions && updateData.model) {
+    updateData.model = openai.locals.azureOptions.azureOpenAIApiDeploymentName;
+  }
+
+  return await openai.beta.assistants.update(assistant_id, updateData);
+};
+
+/**
+ * Modifies an assistant with the resource file id.
+ * @param {object} params
+ * @param {Express.Request} params.req
+ * @param {OpenAIClient} params.openai
+ * @param {string} params.assistant_id
+ * @param {string} params.tool_resource
+ * @param {string} params.file_id
+ * @param {AssistantUpdateParams} params.updateData
+ * @returns {Promise<Assistant>} The updated assistant.
+ */
+const addResourceFileId = async ({ req, openai, assistant_id, tool_resource, file_id }) => {
+  const assistant = await openai.beta.assistants.retrieve(assistant_id);
+  const { tool_resources = {} } = assistant;
+  if (tool_resources[tool_resource]) {
+    tool_resources[tool_resource].file_ids.push(file_id);
+  } else {
+    tool_resources[tool_resource] = { file_ids: [file_id] };
+  }
+
+  delete assistant.id;
+  return await updateAssistant({
+    req,
+    openai,
+    assistant_id,
+    updateData: { tools: assistant.tools, tool_resources },
+  });
+};
+
+/**
+ * Deletes a file ID from an assistant's resource.
+ * @param {object} params
+ * @param {Express.Request} params.req
+ * @param {OpenAIClient} params.openai
+ * @param {string} params.assistant_id
+ * @param {string} [params.tool_resource]
+ * @param {string} params.file_id
+ * @param {AssistantUpdateParams} params.updateData
+ * @returns {Promise<Assistant>} The updated assistant.
+ */
+const deleteResourceFileId = async ({ req, openai, assistant_id, tool_resource, file_id }) => {
+  const assistant = await openai.beta.assistants.retrieve(assistant_id);
+  const { tool_resources = {} } = assistant;
+
+  if (tool_resource && tool_resources[tool_resource]) {
+    const resource = tool_resources[tool_resource];
+    const index = resource.file_ids.indexOf(file_id);
+    if (index !== -1) {
+      resource.file_ids.splice(index, 1);
+    }
+  } else {
+    for (const resourceKey in tool_resources) {
+      const resource = tool_resources[resourceKey];
+      const index = resource.file_ids.indexOf(file_id);
+      if (index !== -1) {
+        resource.file_ids.splice(index, 1);
+        break;
+      }
+    }
+  }
+
+  delete assistant.id;
+  return await updateAssistant({
+    req,
+    openai,
+    assistant_id,
+    updateData: { tools: assistant.tools, tool_resources },
+  });
+};
+
+/**
+ * Modifies an assistant.
+ * @route PATCH /assistants/:id
+ * @param {object} req - Express Request
+ * @param {object} req.params - Request params
+ * @param {string} req.params.id - Assistant identifier.
+ * @param {AssistantUpdateParams} req.body - The assistant update parameters.
+ * @returns {Assistant} 200 - success response - application/json
+ */
+const patchAssistant = async (req, res) => {
+  try {
+    const { openai } = await getOpenAIClient({ req, res });
+    const assistant_id = req.params.id;
+    const { endpoint: _e, ...updateData } = req.body;
+    updateData.tools = updateData.tools ?? [];
+    const updatedAssistant = await updateAssistant({ req, openai, assistant_id, updateData });
+    res.json(updatedAssistant);
+  } catch (error) {
+    logger.error('[/assistants/:id] Error updating assistant', error);
+    res.status(500).json({ error: error.message });
+  }
+};
+
+module.exports = {
+  patchAssistant,
+  createAssistant,
+  updateAssistant,
+  addResourceFileId,
+  deleteResourceFileId,
+};