🔧 fix(assistants): Vision minor fix & Add Docs (#2196)

* 👓 fix(assistants): Only Retrieve Assistant Data for Vision Requests if attachments exist in Host Storage * docs: add capability
2025-12-17 00:40:14 +01:00 · 2024-03-25 00:02:54 -04:00 · 2024-03-25 00:02:54 -04:00 · f86d80de59
commit f86d80de59
parent 798e8763d0
3 changed files with 33 additions and 25 deletions
--- a/api/server/routes/assistants/chat.js
+++ b/api/server/routes/assistants/chat.js
@ -363,16 +363,24 @@ router.post('/', validateModel, buildEndpointOption, setHeaders, async (req, res
        return;
      }

+      /** @type {MongoFile[]} */
+      const attachments = await req.body.endpointOption.attachments;
+      if (
+        attachments &&
+        attachments.every((attachment) => attachment.source === FileSources.openai)
+      ) {
+        return;
+      }
+
      const assistant = await openai.beta.assistants.retrieve(assistant_id);
      const visionToolIndex = assistant.tools.findIndex(
-        (tool) => tool.function.name === ImageVisionTool.function.name,
+        (tool) => tool?.function && tool?.function?.name === ImageVisionTool.function.name,
      );

      if (visionToolIndex === -1) {
        return;
      }

-      const attachments = await req.body.endpointOption.attachments;
      let visionMessage = {
        role: 'user',
        content: '',
--- a/docs/install/configuration/custom_config.md
+++ b/docs/install/configuration/custom_config.md
@ -126,7 +126,7 @@ endpoints:
    # (optional) Models that support retrieval, will default to latest known OpenAI models that support the feature
    # retrievalModels: ["gpt-4-turbo-preview"]
    # (optional) Assistant Capabilities available to all users. Omit the ones you wish to exclude. Defaults to list below.
-    # capabilities: ["code_interpreter", "retrieval", "actions", "tools"]
+    # capabilities: ["code_interpreter", "retrieval", "actions", "tools", "image_vision"]
  custom:
    - name: "Mistral"
      apiKey: "${MISTRAL_API_KEY}"
@ -475,7 +475,7 @@ endpoints:
    # (optional) Models that support retrieval, will default to latest known OpenAI models that support the feature
    # retrievalModels: ["gpt-4-turbo-preview"]
    # (optional) Assistant Capabilities available to all users. Omit the ones you wish to exclude. Defaults to list below.
-    # capabilities: ["code_interpreter", "retrieval", "actions", "tools"]
+    # capabilities: ["code_interpreter", "retrieval", "actions", "tools", "image_vision"]
 ```
 > This configuration enables the builder interface for assistants, sets a polling interval of 500ms to check for run updates, and establishes a timeout of 10 seconds for assistant run operations.

@ -538,9 +538,10 @@ In addition to custom endpoints, you can configure settings specific to the assi
 > Specifies the assistant capabilities available to all users for the assistants endpoint.

 - **Type**: Array/List of Strings
- **Example**: `capabilities: ["code_interpreter", "retrieval", "actions", "tools"]`
+- **Example**: `capabilities: ["code_interpreter", "retrieval", "actions", "tools", "image_vision"]`
 - **Description**: Defines the assistant capabilities that are available to all users for the assistants endpoint. You can omit the capabilities you wish to exclude from the list. The available capabilities are:
  - `code_interpreter`: Enables code interpretation capabilities for the assistant.
+  - `image_vision`: Enables unofficial vision support for uploaded images.
  - `retrieval`: Enables retrieval capabilities for the assistant.
  - `actions`: Enables action capabilities for the assistant.
  - `tools`: Enables tool capabilities for the assistant.
--- a/librechat.example.yaml
+++ b/librechat.example.yaml
@ -24,25 +24,6 @@ registration:
  socialLogins: ['github', 'google', 'discord', 'openid', 'facebook']
  # allowedDomains:
  # - "gmail.com"
-
-# fileConfig:
-#   endpoints:
-#     assistants:
-#       fileLimit: 5
-#       fileSizeLimit: 10  # Maximum size for an individual file in MB
-#       totalSizeLimit: 50  # Maximum total size for all files in a single request in MB
-#       supportedMimeTypes:
-#         - "image/.*"
-#         - "application/pdf"
-#     openAI:
-#       disabled: true  # Disables file uploading to the OpenAI endpoint
-#     default:
-#       totalSizeLimit: 20
-#     YourCustomEndpointName:
-#       fileLimit: 2
-#       fileSizeLimit: 5
-#   serverFileSizeLimit: 100  # Global server file size limit in MB
-#   avatarSizeLimit: 2  # Limit for user avatar image size in MB
 # rateLimits:
 #   fileUploads:
 #     ipMax: 100
@ -62,7 +43,7 @@ endpoints:
  #   # (optional) Models that support retrieval, will default to latest known OpenAI models that support the feature
  #   retrievalModels: ["gpt-4-turbo-preview"]
  #   # (optional) Assistant Capabilities available to all users. Omit the ones you wish to exclude. Defaults to list below.
-  #   capabilities: ["code_interpreter", "retrieval", "actions", "tools"]
+  #   capabilities: ["code_interpreter", "retrieval", "actions", "tools", "image_vision"]
  custom:
    # Groq Example
    - name: 'groq'
@ -135,5 +116,23 @@ endpoints:
      # Recommended: Drop the stop parameter from the request as Openrouter models use a variety of stop tokens.
      dropParams: ['stop']
      modelDisplayLabel: 'OpenRouter'
+# fileConfig:
+#   endpoints:
+#     assistants:
+#       fileLimit: 5
+#       fileSizeLimit: 10  # Maximum size for an individual file in MB
+#       totalSizeLimit: 50  # Maximum total size for all files in a single request in MB
+#       supportedMimeTypes:
+#         - "image/.*"
+#         - "application/pdf"
+#     openAI:
+#       disabled: true  # Disables file uploading to the OpenAI endpoint
+#     default:
+#       totalSizeLimit: 20
+#     YourCustomEndpointName:
+#       fileLimit: 2
+#       fileSizeLimit: 5
+#   serverFileSizeLimit: 100  # Global server file size limit in MB
+#   avatarSizeLimit: 2  # Limit for user avatar image size in MB
 # See the Custom Configuration Guide for more information:
 # https://docs.librechat.ai/install/configuration/custom_config.html