🪙 feat: Configure Max Context and Output Tokens (#2648)

* chore: make frequent 'error' log into 'debug' log * feat: add maxContextTokens as a conversation field * refactor(settings): increase popover height * feat: add DynamicInputNumber and maxContextTokens to all endpoints that support it (frontend), fix schema * feat: maxContextTokens handling (backend) * style: revert popover height * feat: max tokens * fix: Ollama Vision firebase compatibility * fix: Ollama Vision, use message_file_map to determine multimodal request * refactor: bring back MobileNav and improve title styling
2026-03-11 18:42:36 +01:00 · 2024-05-09 13:27:13 -04:00 · 2024-05-09 13:27:13 -04:00 · 6ba7f60eec
commit 6ba7f60eec
parent 5293b73b6d
26 changed files with 420 additions and 22 deletions
--- a/api/app/clients/AnthropicClient.js
+++ b/api/app/clients/AnthropicClient.js
@ -75,7 +75,9 @@ class AnthropicClient extends BaseClient {
    this.options.attachments?.then((attachments) => this.checkVisionRequest(attachments));

    this.maxContextTokens =
-      getModelMaxTokens(this.modelOptions.model, EModelEndpoint.anthropic) ?? 100000;
+      this.options.maxContextTokens ??
+      getModelMaxTokens(this.modelOptions.model, EModelEndpoint.anthropic) ??
+      100000;
    this.maxResponseTokens = this.modelOptions.maxOutputTokens || 1500;
    this.maxPromptTokens =
      this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;
@ -652,6 +654,7 @@ class AnthropicClient extends BaseClient {

  getSaveOptions() {
    return {
+      maxContextTokens: this.options.maxContextTokens,
      promptPrefix: this.options.promptPrefix,
      modelLabel: this.options.modelLabel,
      resendFiles: this.options.resendFiles,
--- a/api/app/clients/GoogleClient.js
+++ b/api/app/clients/GoogleClient.js
@ -138,7 +138,10 @@ class GoogleClient extends BaseClient {
      !isGenerativeModel && !isChatModel && /code|text/.test(this.modelOptions.model);
    const { isTextModel } = this;

-    this.maxContextTokens = getModelMaxTokens(this.modelOptions.model, EModelEndpoint.google);
+    this.maxContextTokens =
+      this.options.maxContextTokens ??
+      getModelMaxTokens(this.modelOptions.model, EModelEndpoint.google);
+
    // The max prompt tokens is determined by the max context tokens minus the max response tokens.
    // Earlier messages will be dropped until the prompt is within the limit.
    this.maxResponseTokens = this.modelOptions.maxOutputTokens || settings.maxOutputTokens.default;
--- a/api/app/clients/OpenAIClient.js
+++ b/api/app/clients/OpenAIClient.js
@ -161,11 +161,13 @@ class OpenAIClient extends BaseClient {
      model.startsWith('text-chat') || model.startsWith('text-davinci-002-render');

    this.maxContextTokens =
+      this.options.maxContextTokens ??
      getModelMaxTokens(
        model,
        this.options.endpointType ?? this.options.endpoint,
        this.options.endpointTokenConfig,
-      ) ?? 4095; // 1 less than maximum
+      ) ??
+      4095; // 1 less than maximum

    if (this.shouldSummarize) {
      this.maxContextTokens = Math.floor(this.maxContextTokens / 2);
@ -407,6 +409,7 @@ class OpenAIClient extends BaseClient {

  getSaveOptions() {
    return {
+      maxContextTokens: this.options.maxContextTokens,
      chatGptLabel: this.options.chatGptLabel,
      promptPrefix: this.options.promptPrefix,
      resendFiles: this.options.resendFiles,
@ -435,7 +438,11 @@ class OpenAIClient extends BaseClient {
   * @returns {Promise<MongoFile[]>}
   */
  async addImageURLs(message, attachments) {
-    const { files, image_urls } = await encodeAndFormat(this.options.req, attachments);
+    const { files, image_urls } = await encodeAndFormat(
+      this.options.req,
+      attachments,
+      this.options.endpoint,
+    );
    message.image_urls = image_urls.length ? image_urls : undefined;
    return files;
  }
@ -1158,7 +1165,7 @@ ${convo}
        });
      }

-      if (this.options.attachments && this.options.endpoint?.toLowerCase() === 'ollama') {
+      if (this.message_file_map && this.options.endpoint?.toLowerCase() === 'ollama') {
        const ollamaClient = new OllamaClient({ baseURL });
        return await ollamaClient.chatCompletion({
          payload: modelOptions,
--- a/api/app/clients/specs/FakeClient.js
+++ b/api/app/clients/specs/FakeClient.js
@ -40,7 +40,8 @@ class FakeClient extends BaseClient {
      };
    }

-    this.maxContextTokens = getModelMaxTokens(this.modelOptions.model) ?? 4097;
+    this.maxContextTokens =
+      this.options.maxContextTokens ?? getModelMaxTokens(this.modelOptions.model) ?? 4097;
  }
  buildMessages() {}
  getTokenCount(str) {
--- a/api/models/plugins/mongoMeili.js
+++ b/api/models/plugins/mongoMeili.js
@ -348,7 +348,7 @@ module.exports = function mongoMeili(schema, options) {
      try {
        meiliDoc = await client.index('convos').getDocument(doc.conversationId);
      } catch (error) {
-        logger.error(
+        logger.debug(
          '[MeiliMongooseModel.findOneAndUpdate] Convo not found in MeiliSearch and will index ' +
            doc.conversationId,
          error,
--- a/api/models/schema/defaults.js
+++ b/api/models/schema/defaults.js
@ -104,6 +104,12 @@ const conversationPreset = {
    type: String,
  },
  tools: { type: [{ type: String }], default: undefined },
+  maxContextTokens: {
+    type: Number,
+  },
+  max_tokens: {
+    type: Number,
+  },
 };

 const agentOptions = {
--- a/api/server/services/Endpoints/anthropic/buildOptions.js
+++ b/api/server/services/Endpoints/anthropic/buildOptions.js
@ -1,5 +1,14 @@
 const buildOptions = (endpoint, parsedBody) => {
-  const { modelLabel, promptPrefix, resendFiles, iconURL, greeting, spec, ...rest } = parsedBody;
+  const {
+    modelLabel,
+    promptPrefix,
+    maxContextTokens,
+    resendFiles,
+    iconURL,
+    greeting,
+    spec,
+    ...rest
+  } = parsedBody;
  const endpointOption = {
    endpoint,
    modelLabel,
@ -8,6 +17,7 @@ const buildOptions = (endpoint, parsedBody) => {
    iconURL,
    greeting,
    spec,
+    maxContextTokens,
    modelOptions: {
      ...rest,
    },
--- a/api/server/services/Endpoints/custom/buildOptions.js
+++ b/api/server/services/Endpoints/custom/buildOptions.js
@ -1,6 +1,15 @@
 const buildOptions = (endpoint, parsedBody, endpointType) => {
-  const { chatGptLabel, promptPrefix, resendFiles, imageDetail, iconURL, greeting, spec, ...rest } =
-    parsedBody;
+  const {
+    chatGptLabel,
+    promptPrefix,
+    maxContextTokens,
+    resendFiles,
+    imageDetail,
+    iconURL,
+    greeting,
+    spec,
+    ...rest
+  } = parsedBody;
  const endpointOption = {
    endpoint,
    endpointType,
@ -11,6 +20,7 @@ const buildOptions = (endpoint, parsedBody, endpointType) => {
    iconURL,
    greeting,
    spec,
+    maxContextTokens,
    modelOptions: {
      ...rest,
    },
--- a/api/server/services/Endpoints/gptPlugins/buildOptions.js
+++ b/api/server/services/Endpoints/gptPlugins/buildOptions.js
@ -7,6 +7,7 @@ const buildOptions = (endpoint, parsedBody) => {
    iconURL,
    greeting,
    spec,
+    maxContextTokens,
    ...modelOptions
  } = parsedBody;
  const endpointOption = {
@ -21,6 +22,7 @@ const buildOptions = (endpoint, parsedBody) => {
    iconURL,
    greeting,
    spec,
+    maxContextTokens,
    modelOptions,
  };

--- a/api/server/services/Endpoints/openAI/buildOptions.js
+++ b/api/server/services/Endpoints/openAI/buildOptions.js
@ -1,6 +1,15 @@
 const buildOptions = (endpoint, parsedBody) => {
-  const { chatGptLabel, promptPrefix, resendFiles, imageDetail, iconURL, greeting, spec, ...rest } =
-    parsedBody;
+  const {
+    chatGptLabel,
+    promptPrefix,
+    maxContextTokens,
+    resendFiles,
+    imageDetail,
+    iconURL,
+    greeting,
+    spec,
+    ...rest
+  } = parsedBody;
  const endpointOption = {
    endpoint,
    chatGptLabel,
@ -10,6 +19,7 @@ const buildOptions = (endpoint, parsedBody) => {
    iconURL,
    greeting,
    spec,
+    maxContextTokens,
    modelOptions: {
      ...rest,
    },
--- a/api/server/services/Files/images/encode.js
+++ b/api/server/services/Files/images/encode.js
@ -23,7 +23,7 @@ async function fetchImageToBase64(url) {
  }
 }

-const base64Only = new Set([EModelEndpoint.google, EModelEndpoint.anthropic]);
+const base64Only = new Set([EModelEndpoint.google, EModelEndpoint.anthropic, 'Ollama', 'ollama']);

 /**
 * Encodes and formats the given files.