Merge branch 'main' into feature/entra-id-azure-integration

2026-02-26 20:34:10 +01:00 · 2025-10-31 13:16:16 +01:00 · 2025-10-31 13:16:16 +01:00 · 23ac2556da
commit 23ac2556da
parent 631f4b3703 1e53ffa7ea
193 changed files with 3845 additions and 692 deletions
--- a/packages/api/package.json
+++ b/packages/api/package.json
@ -1,6 +1,6 @@
 {
  "name": "@librechat/api",
-  "version": "1.4.1",
+  "version": "1.5.0",
  "type": "commonjs",
  "description": "MCP services for LibreChat",
  "main": "dist/index.js",
@ -80,7 +80,7 @@
    "@azure/storage-blob": "^12.27.0",
    "@keyv/redis": "^4.3.3",
    "@langchain/core": "^0.3.62",
-    "@librechat/agents": "^2.4.85",
+    "@librechat/agents": "^2.4.90",
    "@librechat/data-schemas": "*",
    "@modelcontextprotocol/sdk": "^1.17.1",
    "axios": "^1.12.1",
--- a/packages/api/src/agents/memory.ts
+++ b/packages/api/src/agents/memory.ts
@ -383,9 +383,11 @@ ${memory ?? 'No existing memories'}`;
    });

    const config = {
+      runName: 'MemoryRun',
      configurable: {
+        user_id: userId,
+        thread_id: conversationId,
        provider: llmConfig?.provider,
-        thread_id: `memory-run-${conversationId}`,
      },
      streamMode: 'values',
      recursionLimit: 3,
--- a/packages/api/src/auth/domain.spec.ts
+++ b/packages/api/src/auth/domain.spec.ts
@ -6,15 +6,27 @@ describe('isEmailDomainAllowed', () => {
    jest.clearAllMocks();
  });

-  it('should return false if email is falsy', async () => {
+  it('should return true if email is falsy and no domain restrictions exist', async () => {
    const email = '';
    const result = isEmailDomainAllowed(email);
+    expect(result).toBe(true);
+  });
+
+  it('should return true if domain is not present in the email and no domain restrictions exist', async () => {
+    const email = 'test';
+    const result = isEmailDomainAllowed(email);
+    expect(result).toBe(true);
+  });
+
+  it('should return false if email is falsy and domain restrictions exist', async () => {
+    const email = '';
+    const result = isEmailDomainAllowed(email, ['domain1.com']);
    expect(result).toBe(false);
  });

-  it('should return false if domain is not present in the email', async () => {
+  it('should return false if domain is not present in the email and domain restrictions exist', async () => {
    const email = 'test';
-    const result = isEmailDomainAllowed(email);
+    const result = isEmailDomainAllowed(email, ['domain1.com']);
    expect(result).toBe(false);
  });

--- a/packages/api/src/auth/domain.ts
+++ b/packages/api/src/auth/domain.ts
@ -3,6 +3,12 @@
 * @param allowedDomains
 */
 export function isEmailDomainAllowed(email: string, allowedDomains?: string[] | null): boolean {
+  /** If no domain restrictions are configured, allow all */
+  if (!allowedDomains || !Array.isArray(allowedDomains) || !allowedDomains.length) {
+    return true;
+  }
+
+  /** If restrictions exist, validate email format */
  if (!email) {
    return false;
  }
@ -13,12 +19,6 @@ export function isEmailDomainAllowed(email: string, allowedDomains?: string[] |
    return false;
  }

-  if (!allowedDomains) {
-    return true;
-  } else if (!Array.isArray(allowedDomains) || !allowedDomains.length) {
-    return true;
-  }
-
  return allowedDomains.some((allowedDomain) => allowedDomain?.toLowerCase() === domain);
 }

--- a/packages/api/src/endpoints/anthropic/llm.spec.ts
+++ b/packages/api/src/endpoints/anthropic/llm.spec.ts
@ -245,8 +245,8 @@ describe('getLLMConfig', () => {
        },
      });

-      // The actual anthropicSettings.maxOutputTokens.reset('claude-3-opus') returns 4096
-      expect(result.llmConfig).toHaveProperty('maxTokens', 4096);
+      // The actual anthropicSettings.maxOutputTokens.reset('claude-3-opus') returns 8192
+      expect(result.llmConfig).toHaveProperty('maxTokens', 8192);
    });

    it('should handle both proxy and reverseProxyUrl', () => {
@ -698,9 +698,17 @@ describe('getLLMConfig', () => {
          { model: 'claude-3.5-sonnet-20241022', expectedMaxTokens: 8192 },
          { model: 'claude-3-7-sonnet', expectedMaxTokens: 8192 },
          { model: 'claude-3.7-sonnet-20250109', expectedMaxTokens: 8192 },
-          { model: 'claude-3-opus', expectedMaxTokens: 4096 },
-          { model: 'claude-3-haiku', expectedMaxTokens: 4096 },
-          { model: 'claude-2.1', expectedMaxTokens: 4096 },
+          { model: 'claude-3-opus', expectedMaxTokens: 8192 },
+          { model: 'claude-3-haiku', expectedMaxTokens: 8192 },
+          { model: 'claude-2.1', expectedMaxTokens: 8192 },
+          { model: 'claude-sonnet-4-5', expectedMaxTokens: 64000 },
+          { model: 'claude-sonnet-4-5-20250929', expectedMaxTokens: 64000 },
+          { model: 'claude-haiku-4-5', expectedMaxTokens: 64000 },
+          { model: 'claude-haiku-4-5-20251001', expectedMaxTokens: 64000 },
+          { model: 'claude-opus-4-1', expectedMaxTokens: 32000 },
+          { model: 'claude-opus-4-1-20250805', expectedMaxTokens: 32000 },
+          { model: 'claude-sonnet-4-20250514', expectedMaxTokens: 64000 },
+          { model: 'claude-opus-4-0', expectedMaxTokens: 32000 },
        ];

        testCases.forEach(({ model, expectedMaxTokens }) => {
@ -729,6 +737,222 @@ describe('getLLMConfig', () => {
      });
    });

+    describe('Claude 4.x Model maxOutputTokens Defaults', () => {
+      it('should default Claude Sonnet 4.x models to 64K tokens', () => {
+        const testCases = ['claude-sonnet-4-5', 'claude-sonnet-4-5-20250929', 'claude-sonnet-4.5'];
+
+        testCases.forEach((model) => {
+          const result = getLLMConfig('test-key', {
+            modelOptions: { model },
+          });
+          expect(result.llmConfig.maxTokens).toBe(64000);
+        });
+      });
+
+      it('should default Claude Haiku 4.x models to 64K tokens', () => {
+        const testCases = ['claude-haiku-4-5', 'claude-haiku-4-5-20251001', 'claude-haiku-4.5'];
+
+        testCases.forEach((model) => {
+          const result = getLLMConfig('test-key', {
+            modelOptions: { model },
+          });
+          expect(result.llmConfig.maxTokens).toBe(64000);
+        });
+      });
+
+      it('should default Claude Opus 4.x models to 32K tokens', () => {
+        const testCases = ['claude-opus-4-1', 'claude-opus-4-1-20250805', 'claude-opus-4.1'];
+
+        testCases.forEach((model) => {
+          const result = getLLMConfig('test-key', {
+            modelOptions: { model },
+          });
+          expect(result.llmConfig.maxTokens).toBe(32000);
+        });
+      });
+
+      it('should default future Claude 4.x Sonnet/Haiku models to 64K (future-proofing)', () => {
+        const testCases = ['claude-sonnet-4-20250514', 'claude-sonnet-4-9', 'claude-haiku-4-8'];
+
+        testCases.forEach((model) => {
+          const result = getLLMConfig('test-key', {
+            modelOptions: { model },
+          });
+          expect(result.llmConfig.maxTokens).toBe(64000);
+        });
+      });
+
+      it('should default future Claude 4.x Opus models to 32K (future-proofing)', () => {
+        const testCases = ['claude-opus-4-0', 'claude-opus-4-7'];
+
+        testCases.forEach((model) => {
+          const result = getLLMConfig('test-key', {
+            modelOptions: { model },
+          });
+          expect(result.llmConfig.maxTokens).toBe(32000);
+        });
+      });
+
+      it('should handle explicit maxOutputTokens override for Claude 4.x models', () => {
+        const result = getLLMConfig('test-key', {
+          modelOptions: {
+            model: 'claude-sonnet-4-5',
+            maxOutputTokens: 64000, // Explicitly set to 64K
+          },
+        });
+
+        expect(result.llmConfig.maxTokens).toBe(64000);
+      });
+
+      it('should handle undefined maxOutputTokens for Claude 4.x (use reset default)', () => {
+        const testCases = [
+          { model: 'claude-sonnet-4-5', expected: 64000 },
+          { model: 'claude-haiku-4-5', expected: 64000 },
+          { model: 'claude-opus-4-1', expected: 32000 },
+        ];
+
+        testCases.forEach(({ model, expected }) => {
+          const result = getLLMConfig('test-key', {
+            modelOptions: {
+              model,
+              maxOutputTokens: undefined,
+            },
+          });
+          expect(result.llmConfig.maxTokens).toBe(expected);
+        });
+      });
+
+      it('should handle Claude 4 Sonnet/Haiku with thinking enabled', () => {
+        const testCases = ['claude-sonnet-4-5', 'claude-haiku-4-5'];
+
+        testCases.forEach((model) => {
+          const result = getLLMConfig('test-key', {
+            modelOptions: {
+              model,
+              thinking: true,
+              thinkingBudget: 10000,
+            },
+          });
+
+          expect(result.llmConfig.thinking).toMatchObject({
+            type: 'enabled',
+            budget_tokens: 10000,
+          });
+          expect(result.llmConfig.maxTokens).toBe(64000);
+        });
+      });
+
+      it('should handle Claude 4 Opus with thinking enabled', () => {
+        const result = getLLMConfig('test-key', {
+          modelOptions: {
+            model: 'claude-opus-4-1',
+            thinking: true,
+            thinkingBudget: 10000,
+          },
+        });
+
+        expect(result.llmConfig.thinking).toMatchObject({
+          type: 'enabled',
+          budget_tokens: 10000,
+        });
+        expect(result.llmConfig.maxTokens).toBe(32000);
+      });
+
+      it('should respect model-specific maxOutputTokens for Claude 4.x models', () => {
+        const testCases = [
+          { model: 'claude-sonnet-4-5', maxOutputTokens: 50000, expected: 50000 },
+          { model: 'claude-haiku-4-5', maxOutputTokens: 40000, expected: 40000 },
+          { model: 'claude-opus-4-1', maxOutputTokens: 20000, expected: 20000 },
+        ];
+
+        testCases.forEach(({ model, maxOutputTokens, expected }) => {
+          const result = getLLMConfig('test-key', {
+            modelOptions: {
+              model,
+              maxOutputTokens,
+            },
+          });
+          expect(result.llmConfig.maxTokens).toBe(expected);
+        });
+      });
+
+      it('should future-proof Claude 5.x Sonnet models with 64K default', () => {
+        const testCases = [
+          'claude-sonnet-5',
+          'claude-sonnet-5-0',
+          'claude-sonnet-5-2-20260101',
+          'claude-sonnet-5.5',
+        ];
+
+        testCases.forEach((model) => {
+          const result = getLLMConfig('test-key', {
+            modelOptions: { model },
+          });
+          expect(result.llmConfig.maxTokens).toBe(64000);
+        });
+      });
+
+      it('should future-proof Claude 5.x Haiku models with 64K default', () => {
+        const testCases = [
+          'claude-haiku-5',
+          'claude-haiku-5-0',
+          'claude-haiku-5-2-20260101',
+          'claude-haiku-5.5',
+        ];
+
+        testCases.forEach((model) => {
+          const result = getLLMConfig('test-key', {
+            modelOptions: { model },
+          });
+          expect(result.llmConfig.maxTokens).toBe(64000);
+        });
+      });
+
+      it('should future-proof Claude 5.x Opus models with 32K default', () => {
+        const testCases = [
+          'claude-opus-5',
+          'claude-opus-5-0',
+          'claude-opus-5-2-20260101',
+          'claude-opus-5.5',
+        ];
+
+        testCases.forEach((model) => {
+          const result = getLLMConfig('test-key', {
+            modelOptions: { model },
+          });
+          expect(result.llmConfig.maxTokens).toBe(32000);
+        });
+      });
+
+      it('should future-proof Claude 6-9.x models with correct defaults', () => {
+        const testCases = [
+          // Claude 6.x
+          { model: 'claude-sonnet-6', expected: 64000 },
+          { model: 'claude-haiku-6-0', expected: 64000 },
+          { model: 'claude-opus-6-1', expected: 32000 },
+          // Claude 7.x
+          { model: 'claude-sonnet-7-20270101', expected: 64000 },
+          { model: 'claude-haiku-7.5', expected: 64000 },
+          { model: 'claude-opus-7', expected: 32000 },
+          // Claude 8.x
+          { model: 'claude-sonnet-8', expected: 64000 },
+          { model: 'claude-haiku-8-2', expected: 64000 },
+          { model: 'claude-opus-8-latest', expected: 32000 },
+          // Claude 9.x
+          { model: 'claude-sonnet-9', expected: 64000 },
+          { model: 'claude-haiku-9', expected: 64000 },
+          { model: 'claude-opus-9', expected: 32000 },
+        ];
+
+        testCases.forEach(({ model, expected }) => {
+          const result = getLLMConfig('test-key', {
+            modelOptions: { model },
+          });
+          expect(result.llmConfig.maxTokens).toBe(expected);
+        });
+      });
+    });
+
    describe('Parameter Boundary and Validation Logic', () => {
      it('should handle temperature boundary values', () => {
        const testCases = [
@ -784,7 +1008,7 @@ describe('getLLMConfig', () => {
      it('should handle maxOutputTokens boundary values', () => {
        const testCases = [
          { model: 'claude-3-opus', maxOutputTokens: 1, expected: 1 }, // min
-          { model: 'claude-3-opus', maxOutputTokens: 4096, expected: 4096 }, // max for legacy
+          { model: 'claude-3-opus', maxOutputTokens: 8192, expected: 8192 }, // default for claude-3
          { model: 'claude-3-5-sonnet', maxOutputTokens: 1, expected: 1 }, // min
          { model: 'claude-3-5-sonnet', maxOutputTokens: 200000, expected: 200000 }, // max for new
          { model: 'claude-3-7-sonnet', maxOutputTokens: 8192, expected: 8192 }, // default
--- a/packages/api/src/endpoints/anthropic/llm.ts
+++ b/packages/api/src/endpoints/anthropic/llm.ts
@ -34,7 +34,6 @@ function getLLMConfig(

  const defaultOptions = {
    model: anthropicSettings.model.default,
-    maxOutputTokens: anthropicSettings.maxOutputTokens.default,
    stream: true,
  };

--- a/packages/api/src/endpoints/openai/config.anthropic.spec.ts
+++ b/packages/api/src/endpoints/openai/config.anthropic.spec.ts
@ -30,7 +30,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
          apiKey: 'sk-xxxx',
          model: 'claude-sonnet-4',
          stream: true,
-          maxTokens: 8192,
+          maxTokens: 64000,
          modelKwargs: {
            metadata: {
              user_id: 'some_user_id',
--- a/packages/api/src/endpoints/openai/config.spec.ts
+++ b/packages/api/src/endpoints/openai/config.spec.ts
@ -1,4 +1,9 @@
-import { Verbosity, ReasoningEffort, ReasoningSummary } from 'librechat-data-provider';
+import {
+  Verbosity,
+  EModelEndpoint,
+  ReasoningEffort,
+  ReasoningSummary,
+} from 'librechat-data-provider';
 import type { RequestInit } from 'undici';
 import type { OpenAIParameters, AzureOptions } from '~/types';
 import { getOpenAIConfig } from './config';
@ -103,12 +108,89 @@ describe('getOpenAIConfig', () => {

    const result = getOpenAIConfig(mockApiKey, { modelOptions });

+    /** When no endpoint is specified, it's treated as non-openAI/azureOpenAI, so uses reasoning object */
+    expect(result.llmConfig.reasoning).toEqual({
+      effort: ReasoningEffort.high,
+      summary: ReasoningSummary.detailed,
+    });
+    expect((result.llmConfig as Record<string, unknown>).reasoning_effort).toBeUndefined();
+  });
+
+  it('should use reasoning_effort for openAI endpoint without useResponsesApi', () => {
+    const modelOptions = {
+      reasoning_effort: ReasoningEffort.high,
+      reasoning_summary: ReasoningSummary.detailed,
+    };
+
+    const result = getOpenAIConfig(mockApiKey, { modelOptions }, EModelEndpoint.openAI);
+
    expect((result.llmConfig as Record<string, unknown>).reasoning_effort).toBe(
      ReasoningEffort.high,
    );
    expect(result.llmConfig.reasoning).toBeUndefined();
  });

+  it('should use reasoning_effort for azureOpenAI endpoint without useResponsesApi', () => {
+    const modelOptions = {
+      reasoning_effort: ReasoningEffort.high,
+      reasoning_summary: ReasoningSummary.detailed,
+    };
+
+    const result = getOpenAIConfig(mockApiKey, { modelOptions }, EModelEndpoint.azureOpenAI);
+
+    expect((result.llmConfig as Record<string, unknown>).reasoning_effort).toBe(
+      ReasoningEffort.high,
+    );
+    expect(result.llmConfig.reasoning).toBeUndefined();
+  });
+
+  it('should use reasoning object for openAI endpoint with useResponsesApi=true', () => {
+    const modelOptions = {
+      reasoning_effort: ReasoningEffort.high,
+      reasoning_summary: ReasoningSummary.detailed,
+      useResponsesApi: true,
+    };
+
+    const result = getOpenAIConfig(mockApiKey, { modelOptions }, EModelEndpoint.openAI);
+
+    expect(result.llmConfig.reasoning).toEqual({
+      effort: ReasoningEffort.high,
+      summary: ReasoningSummary.detailed,
+    });
+    expect((result.llmConfig as Record<string, unknown>).reasoning_effort).toBeUndefined();
+  });
+
+  it('should use reasoning object for azureOpenAI endpoint with useResponsesApi=true', () => {
+    const modelOptions = {
+      reasoning_effort: ReasoningEffort.high,
+      reasoning_summary: ReasoningSummary.detailed,
+      useResponsesApi: true,
+    };
+
+    const result = getOpenAIConfig(mockApiKey, { modelOptions }, EModelEndpoint.azureOpenAI);
+
+    expect(result.llmConfig.reasoning).toEqual({
+      effort: ReasoningEffort.high,
+      summary: ReasoningSummary.detailed,
+    });
+    expect((result.llmConfig as Record<string, unknown>).reasoning_effort).toBeUndefined();
+  });
+
+  it('should use reasoning object for non-openAI/azureOpenAI endpoints', () => {
+    const modelOptions = {
+      reasoning_effort: ReasoningEffort.high,
+      reasoning_summary: ReasoningSummary.detailed,
+    };
+
+    const result = getOpenAIConfig(mockApiKey, { modelOptions }, 'custom-endpoint');
+
+    expect(result.llmConfig.reasoning).toEqual({
+      effort: ReasoningEffort.high,
+      summary: ReasoningSummary.detailed,
+    });
+    expect((result.llmConfig as Record<string, unknown>).reasoning_effort).toBeUndefined();
+  });
+
  it('should handle OpenRouter configuration', () => {
    const reverseProxyUrl = 'https://openrouter.ai/api/v1';

@ -655,6 +737,27 @@ describe('getOpenAIConfig', () => {
      ).toBeUndefined();
    });

+    it('should create correct Azure baseURL when response api is selected', () => {
+      const azure = {
+        azureOpenAIApiInstanceName: 'test-instance',
+        azureOpenAIApiDeploymentName: 'test-deployment',
+        azureOpenAIApiVersion: '2023-08-15',
+        azureOpenAIApiKey: 'azure-key',
+      };
+
+      const result = getOpenAIConfig(mockApiKey, {
+        azure,
+        modelOptions: { useResponsesApi: true },
+        reverseProxyUrl:
+          'https://${INSTANCE_NAME}.openai.azure.com/openai/deployments/${DEPLOYMENT_NAME}',
+      });
+
+      expect(result.configOptions?.baseURL).toBe(
+        'https://test-instance.openai.azure.com/openai/v1',
+      );
+      expect(result.configOptions?.baseURL).not.toContain('deployments');
+    });
+
    it('should handle Azure with organization from environment', () => {
      const originalOrg = process.env.OPENAI_ORGANIZATION;
      process.env.OPENAI_ORGANIZATION = 'test-org-123';
--- a/packages/api/src/endpoints/openai/config.ts
+++ b/packages/api/src/endpoints/openai/config.ts
@ -68,6 +68,7 @@ export function getOpenAIConfig(
      azure,
      apiKey,
      baseURL,
+      endpoint,
      streaming,
      addParams,
      dropParams,
@ -112,8 +113,10 @@ export function getOpenAIConfig(
        return;
      }

+      const updatedUrl = configOptions.baseURL?.replace(/\/deployments(?:\/.*)?$/, '/v1');
+
      configOptions.baseURL = constructAzureURL({
-        baseURL: configOptions.baseURL || 'https://${INSTANCE_NAME}.openai.azure.com/openai/v1',
+        baseURL: updatedUrl || 'https://${INSTANCE_NAME}.openai.azure.com/openai/v1',
        azureOptions: azure,
      });

--- a/packages/api/src/endpoints/openai/llm.ts
+++ b/packages/api/src/endpoints/openai/llm.ts
@ -1,4 +1,4 @@
-import { removeNullishValues } from 'librechat-data-provider';
+import { EModelEndpoint, removeNullishValues } from 'librechat-data-provider';
 import type { BindToolsInput } from '@langchain/core/language_models/chat_models';
 import type { AzureOpenAIInput } from '@langchain/openai';
 import type { OpenAI } from 'openai';
@ -79,6 +79,7 @@ export function getOpenAILLMConfig({
  azure,
  apiKey,
  baseURL,
+  endpoint,
  streaming,
  addParams,
  dropParams,
@ -88,6 +89,7 @@ export function getOpenAILLMConfig({
  apiKey: string;
  streaming: boolean;
  baseURL?: string | null;
+  endpoint?: EModelEndpoint | string | null;
  modelOptions: Partial<t.OpenAIParameters>;
  addParams?: Record<string, unknown>;
  dropParams?: string[];
@ -155,7 +157,8 @@ export function getOpenAILLMConfig({

  if (
    hasReasoningParams({ reasoning_effort, reasoning_summary }) &&
-    (llmConfig.useResponsesApi === true || useOpenRouter)
+    (llmConfig.useResponsesApi === true ||
+      (endpoint !== EModelEndpoint.openAI && endpoint !== EModelEndpoint.azureOpenAI))
  ) {
    llmConfig.reasoning = removeNullishValues(
      {
--- a/packages/api/src/files/encode/document.ts
+++ b/packages/api/src/files/encode/document.ts
@ -2,7 +2,7 @@ import { Providers } from '@librechat/agents';
 import { isOpenAILikeProvider, isDocumentSupportedProvider } from 'librechat-data-provider';
 import type { IMongoFile } from '@librechat/data-schemas';
 import type { Request } from 'express';
-import type { StrategyFunctions, DocumentResult } from '~/types/files';
+import type { StrategyFunctions, DocumentResult, AnthropicDocumentBlock } from '~/types/files';
 import { validatePdf } from '~/files/validation';
 import { getFileStream } from './utils';

@ -69,16 +69,21 @@ export async function encodeAndFormatDocuments(
      }

      if (provider === Providers.ANTHROPIC) {
-        result.documents.push({
+        const document: AnthropicDocumentBlock = {
          type: 'document',
          source: {
            type: 'base64',
            media_type: 'application/pdf',
            data: content,
          },
-          cache_control: { type: 'ephemeral' },
          citations: { enabled: true },
-        });
+        };
+
+        if (file.filename) {
+          document.context = `File: "${file.filename}"`;
+        }
+
+        result.documents.push(document);
      } else if (useResponsesApi) {
        result.documents.push({
          type: 'input_file',
--- a/packages/api/src/types/files.ts
+++ b/packages/api/src/types/files.ts
@ -46,29 +46,51 @@ export interface VideoResult {
  }>;
 }

+/** Anthropic document block format */
+export interface AnthropicDocumentBlock {
+  type: 'document';
+  source: {
+    type: string;
+    media_type: string;
+    data: string;
+  };
+  context?: string;
+  title?: string;
+  cache_control?: { type: string };
+  citations?: { enabled: boolean };
+}
+
+/** Google document block format */
+export interface GoogleDocumentBlock {
+  type: 'document';
+  mimeType: string;
+  data: string;
+}
+
+/** OpenAI file block format */
+export interface OpenAIFileBlock {
+  type: 'file';
+  file: {
+    filename: string;
+    file_data: string;
+  };
+}
+
+/** OpenAI Responses API file format */
+export interface OpenAIInputFileBlock {
+  type: 'input_file';
+  filename: string;
+  file_data: string;
+}
+
+export type DocumentBlock =
+  | AnthropicDocumentBlock
+  | GoogleDocumentBlock
+  | OpenAIFileBlock
+  | OpenAIInputFileBlock;
+
 export interface DocumentResult {
-  documents: Array<{
-    type: 'document' | 'file' | 'input_file';
-    /** Anthropic File Format, `document` */
-    source?: {
-      type: string;
-      media_type: string;
-      data: string;
-    };
-    cache_control?: { type: string };
-    citations?: { enabled: boolean };
-    /** Google File Format, `document` */
-    mimeType?: string;
-    data?: string;
-    /** OpenAI File Format, `file` */
-    file?: {
-      filename?: string;
-      file_data?: string;
-    };
-    /** OpenAI Responses API File Format, `input_file` */
-    filename?: string;
-    file_data?: string;
-  }>;
+  documents: DocumentBlock[];
  files: Array<{
    file_id?: string;
    temp_file_id?: string;
--- a/packages/api/src/utils/index.ts
+++ b/packages/api/src/utils/index.ts
@ -10,6 +10,7 @@ export * from './key';
 export * from './llm';
 export * from './math';
 export * from './openid';
+export * from './sanitizeTitle';
 export * from './tempChatRetention';
 export * from './text';
 export { default as Tokenizer } from './tokenizer';
--- a/packages/api/src/utils/sanitizeTitle.spec.ts
+++ b/packages/api/src/utils/sanitizeTitle.spec.ts
@ -0,0 +1,217 @@
+import { sanitizeTitle } from './sanitizeTitle';
+
+describe('sanitizeTitle', () => {
+  describe('Happy Path', () => {
+    it('should remove a single think block and return the clean title', () => {
+      const input = '<think>This is reasoning about the topic</think> User Hi Greeting';
+      expect(sanitizeTitle(input)).toBe('User Hi Greeting');
+    });
+
+    it('should handle thinking block at the start', () => {
+      const input = '<think>reasoning here</think> Clean Title Text';
+      expect(sanitizeTitle(input)).toBe('Clean Title Text');
+    });
+
+    it('should handle thinking block at the end', () => {
+      const input = 'Clean Title Text <think>reasoning here</think>';
+      expect(sanitizeTitle(input)).toBe('Clean Title Text');
+    });
+
+    it('should handle title without any thinking blocks', () => {
+      const input = 'Just a Normal Title';
+      expect(sanitizeTitle(input)).toBe('Just a Normal Title');
+    });
+  });
+
+  describe('Multiple Blocks', () => {
+    it('should remove multiple think blocks', () => {
+      const input =
+        '<think>reason 1</think> Intro <think>reason 2</think> Middle <think>reason 3</think> Final';
+      expect(sanitizeTitle(input)).toBe('Intro Middle Final');
+    });
+
+    it('should handle consecutive think blocks', () => {
+      const input = '<think>r1</think><think>r2</think>Title';
+      expect(sanitizeTitle(input)).toBe('Title');
+    });
+  });
+
+  describe('Case Insensitivity', () => {
+    it('should handle uppercase THINK tags', () => {
+      const input = '<THINK>reasoning</THINK> Title';
+      expect(sanitizeTitle(input)).toBe('Title');
+    });
+
+    it('should handle mixed case Think tags', () => {
+      const input = '<Think>reasoning</ThInk> Title';
+      expect(sanitizeTitle(input)).toBe('Title');
+    });
+
+    it('should handle mixed case closing tag', () => {
+      const input = '<think>reasoning</THINK> Title';
+      expect(sanitizeTitle(input)).toBe('Title');
+    });
+  });
+
+  describe('Attributes in Tags', () => {
+    it('should remove think tags with attributes', () => {
+      const input = '<think reason="complex logic">reasoning here</think> Title';
+      expect(sanitizeTitle(input)).toBe('Title');
+    });
+
+    it('should handle multiple attributes', () => {
+      const input =
+        '<think reason="test" type="deep" id="1">reasoning</think> Title';
+      expect(sanitizeTitle(input)).toBe('Title');
+    });
+
+    it('should handle single-quoted attributes', () => {
+      const input = "<think reason='explanation'>content</think> Title";
+      expect(sanitizeTitle(input)).toBe('Title');
+    });
+
+    it('should handle unquoted attributes', () => {
+      const input = '<think x=y>reasoning</think> Title';
+      expect(sanitizeTitle(input)).toBe('Title');
+    });
+  });
+
+  describe('Newlines and Multiline Content', () => {
+    it('should handle newlines within the think block', () => {
+      const input = `<think>
+        This is a long reasoning
+        spanning multiple lines
+        with various thoughts
+      </think> Clean Title`;
+      expect(sanitizeTitle(input)).toBe('Clean Title');
+    });
+
+    it('should handle newlines around tags', () => {
+      const input = `
+        <think>reasoning</think>
+        My Title
+      `;
+      expect(sanitizeTitle(input)).toBe('My Title');
+    });
+
+    it('should handle mixed whitespace', () => {
+      const input = '<think>\n\t  reasoning  \t\n</think>\n Title';
+      expect(sanitizeTitle(input)).toBe('Title');
+    });
+  });
+
+  describe('Whitespace Normalization', () => {
+    it('should collapse multiple spaces', () => {
+      const input = '<think>x</think>   Multiple   Spaces';
+      expect(sanitizeTitle(input)).toBe('Multiple Spaces');
+    });
+
+    it('should collapse mixed whitespace', () => {
+      const input = 'Start  \n\t  Middle  <think>x</think>  \n  End';
+      expect(sanitizeTitle(input)).toBe('Start Middle End');
+    });
+
+    it('should trim leading whitespace', () => {
+      const input = '  <think>reasoning</think> Title';
+      expect(sanitizeTitle(input)).toBe('Title');
+    });
+
+    it('should trim trailing whitespace', () => {
+      const input = 'Title <think>reasoning</think>  \n  ';
+      expect(sanitizeTitle(input)).toBe('Title');
+    });
+  });
+
+  describe('Empty and Fallback Cases', () => {
+    it('should return fallback for empty string', () => {
+      expect(sanitizeTitle('')).toBe('Untitled Conversation');
+    });
+
+    it('should return fallback when only whitespace remains', () => {
+      const input = '<think>thinking</think>     \n\t\r\n    ';
+      expect(sanitizeTitle(input)).toBe('Untitled Conversation');
+    });
+
+    it('should return fallback when only think blocks exist', () => {
+      const input = '<think>just thinking</think><think>more thinking</think>';
+      expect(sanitizeTitle(input)).toBe('Untitled Conversation');
+    });
+
+    it('should return fallback for non-string whitespace', () => {
+      expect(sanitizeTitle('   ')).toBe('Untitled Conversation');
+    });
+  });
+
+  describe('Edge Cases and Real-World', () => {
+    it('should handle long reasoning blocks', () => {
+      const longReasoning =
+        'This is a very long reasoning block ' + 'with lots of text. '.repeat(50);
+      const input = `<think>${longReasoning}</think> Final Title`;
+      expect(sanitizeTitle(input)).toBe('Final Title');
+    });
+
+    it('should handle nested-like patterns', () => {
+      const input = '<think>outer <think>inner</think> end</think> Title';
+      const result = sanitizeTitle(input);
+      expect(result).toContain('Title');
+    });
+
+    it('should handle malformed tags missing closing', () => {
+      const input = '<think>unclosed reasoning. Title';
+      const result = sanitizeTitle(input);
+      expect(result).toContain('Title');
+      expect(result).toContain('<think>');
+    });
+
+    it('should handle real-world LLM example', () => {
+      const input =
+        '<think>\nThe user is asking for a greeting. I should provide a friendly response.\n</think> User Hi Greeting';
+      expect(sanitizeTitle(input)).toBe('User Hi Greeting');
+    });
+
+    it('should handle real-world with attributes', () => {
+      const input =
+        '<think reasoning="multi-step">\nStep 1\nStep 2\n</think> Project Status';
+      expect(sanitizeTitle(input)).toBe('Project Status');
+    });
+  });
+
+  describe('Idempotency', () => {
+    it('should be idempotent', () => {
+      const input = '<think>reasoning</think> My Title';
+      const once = sanitizeTitle(input);
+      const twice = sanitizeTitle(once);
+      expect(once).toBe(twice);
+      expect(once).toBe('My Title');
+    });
+
+    it('should be idempotent with fallback', () => {
+      const input = '<think>only thinking</think>';
+      const once = sanitizeTitle(input);
+      const twice = sanitizeTitle(once);
+      expect(once).toBe(twice);
+      expect(once).toBe('Untitled Conversation');
+    });
+  });
+
+  describe('Return Type Safety', () => {
+    it('should always return a string', () => {
+      expect(typeof sanitizeTitle('<think>x</think> Title')).toBe('string');
+      expect(typeof sanitizeTitle('No blocks')).toBe('string');
+      expect(typeof sanitizeTitle('')).toBe('string');
+    });
+
+    it('should never return empty', () => {
+      expect(sanitizeTitle('')).not.toBe('');
+      expect(sanitizeTitle('  ')).not.toBe('');
+      expect(sanitizeTitle('<think>x</think>')).not.toBe('');
+    });
+
+    it('should never return null or undefined', () => {
+      expect(sanitizeTitle('test')).not.toBeNull();
+      expect(sanitizeTitle('test')).not.toBeUndefined();
+      expect(sanitizeTitle('')).not.toBeNull();
+      expect(sanitizeTitle('')).not.toBeUndefined();
+    });
+  });
+});
--- a/packages/api/src/utils/sanitizeTitle.ts
+++ b/packages/api/src/utils/sanitizeTitle.ts
@ -0,0 +1,30 @@
+/**
+ * Sanitizes LLM-generated chat titles by removing <think>...</think> reasoning blocks.
+ *
+ * This function strips out all reasoning blocks (with optional attributes and newlines)
+ * and returns a clean title. If the result is empty, a fallback is returned.
+ *
+ * @param rawTitle - The raw LLM-generated title string, potentially containing <think> blocks.
+ * @returns A sanitized title string, never empty (fallback used if needed).
+ */
+export function sanitizeTitle(rawTitle: string): string {
+  const DEFAULT_FALLBACK = 'Untitled Conversation';
+
+  // Step 1: Input Validation
+  if (!rawTitle || typeof rawTitle !== 'string') {
+    return DEFAULT_FALLBACK;
+  }
+
+  // Step 2: Build and apply the regex to remove all <think>...</think> blocks
+  const thinkBlockRegex = /<think\b[^>]*>[\s\S]*?<\/think>/gi;
+  const cleaned = rawTitle.replace(thinkBlockRegex, '');
+
+  // Step 3: Normalize whitespace (collapse multiple spaces/newlines to single space)
+  const normalized = cleaned.replace(/\s+/g, ' ');
+
+  // Step 4: Trim leading and trailing whitespace
+  const trimmed = normalized.trim();
+
+  // Step 5: Return trimmed result or fallback if empty
+  return trimmed.length > 0 ? trimmed : DEFAULT_FALLBACK;
+}
--- a/packages/api/src/utils/tokens.ts
+++ b/packages/api/src/utils/tokens.ts
@ -40,10 +40,10 @@ const openAIModels = {
  'gpt-5': 400000,
  'gpt-5-mini': 400000,
  'gpt-5-nano': 400000,
+  'gpt-5-pro': 400000,
  'gpt-4o': 127500, // -500 from max
  'gpt-4o-mini': 127500, // -500 from max
  'gpt-4o-2024-05-13': 127500, // -500 from max
-  'gpt-4o-2024-08-06': 127500, // -500 from max
  'gpt-4-turbo': 127500, // -500 from max
  'gpt-4-vision': 127500, // -500 from max
  'gpt-3.5-turbo': 16375, // -10 from max
@ -60,9 +60,11 @@ const mistralModels = {
  'mistral-7b': 31990, // -10 from max
  'mistral-small': 31990, // -10 from max
  'mixtral-8x7b': 31990, // -10 from max
+  'mixtral-8x22b': 65536,
  'mistral-large': 131000,
  'mistral-large-2402': 127500,
  'mistral-large-2407': 127500,
+  'mistral-nemo': 131000,
  'pixtral-large': 131000,
  'mistral-saba': 32000,
  codestral: 256000,
@ -75,6 +77,7 @@ const cohereModels = {
  'command-light-nightly': 8182, // -10 from max
  command: 4086, // -10 from max
  'command-nightly': 8182, // -10 from max
+  'command-text': 4086, // -10 from max
  'command-r': 127500, // -500 from max
  'command-r-plus': 127500, // -500 from max
 };
@ -127,14 +130,17 @@ const anthropicModels = {
  'claude-3.7-sonnet': 200000,
  'claude-3-5-sonnet-latest': 200000,
  'claude-3.5-sonnet-latest': 200000,
+  'claude-haiku-4-5': 200000,
  'claude-sonnet-4': 1000000,
  'claude-opus-4': 200000,
  'claude-4': 200000,
 };

 const deepseekModels = {
-  'deepseek-reasoner': 128000,
  deepseek: 128000,
+  'deepseek-reasoner': 128000,
+  'deepseek-r1': 128000,
+  'deepseek-v3': 128000,
  'deepseek.r1': 128000,
 };

@ -200,32 +206,57 @@ const metaModels = {
  'llama2:70b': 4000,
 };

-const ollamaModels = {
+const qwenModels = {
+  qwen: 32000,
  'qwen2.5': 32000,
+  'qwen-turbo': 1000000,
+  'qwen-plus': 131000,
+  'qwen-max': 32000,
+  'qwq-32b': 32000,
+  // Qwen3 models
+  qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
+  'qwen3-8b': 128000,
+  'qwen3-14b': 40960,
+  'qwen3-30b-a3b': 40960,
+  'qwen3-32b': 40960,
+  'qwen3-235b-a22b': 40960,
+  // Qwen3 VL (Vision-Language) models
+  'qwen3-vl-8b-thinking': 256000,
+  'qwen3-vl-8b-instruct': 262144,
+  'qwen3-vl-30b-a3b': 262144,
+  'qwen3-vl-235b-a22b': 131072,
+  // Qwen3 specialized models
+  'qwen3-max': 256000,
+  'qwen3-coder': 262144,
+  'qwen3-coder-30b-a3b': 262144,
+  'qwen3-coder-plus': 128000,
+  'qwen3-coder-flash': 128000,
+  'qwen3-next-80b-a3b': 262144,
 };

 const ai21Models = {
-  'ai21.j2-mid-v1': 8182, // -10 from max
-  'ai21.j2-ultra-v1': 8182, // -10 from max
-  'ai21.jamba-instruct-v1:0': 255500, // -500 from max
+  'j2-mid': 8182, // -10 from max
+  'j2-ultra': 8182, // -10 from max
+  'jamba-instruct': 255500, // -500 from max
 };

 const amazonModels = {
-  'amazon.titan-text-lite-v1': 4000,
-  'amazon.titan-text-express-v1': 8000,
-  'amazon.titan-text-premier-v1:0': 31500, // -500 from max
+  // Amazon Titan models
+  'titan-text-lite': 4000,
+  'titan-text-express': 8000,
+  'titan-text-premier': 31500, // -500 from max
+  // Amazon Nova models
  // https://aws.amazon.com/ai/generative-ai/nova/
-  'amazon.nova-micro-v1:0': 127000, // -1000 from max,
-  'amazon.nova-lite-v1:0': 295000, // -5000 from max,
-  'amazon.nova-pro-v1:0': 295000, // -5000 from max,
-  'amazon.nova-premier-v1:0': 995000, // -5000 from max,
+  'nova-micro': 127000, // -1000 from max
+  'nova-lite': 295000, // -5000 from max
+  'nova-pro': 295000, // -5000 from max
+  'nova-premier': 995000, // -5000 from max
 };

 const bedrockModels = {
  ...anthropicModels,
  ...mistralModels,
  ...cohereModels,
-  ...ollamaModels,
  ...deepseekModels,
  ...metaModels,
  ...ai21Models,
@ -254,6 +285,7 @@ const aggregateModels = {
  ...googleModels,
  ...bedrockModels,
  ...xAIModels,
+  ...qwenModels,
  // misc.
  kimi: 131000,
  // GPT-OSS
@ -289,6 +321,7 @@ export const modelMaxOutputs = {
  'gpt-5': 128000,
  'gpt-5-mini': 128000,
  'gpt-5-nano': 128000,
+  'gpt-5-pro': 128000,
  'gpt-oss-20b': 131000,
  'gpt-oss-120b': 131000,
  system_default: 32000,
@ -299,6 +332,7 @@ const anthropicMaxOutputs = {
  'claude-3-haiku': 4096,
  'claude-3-sonnet': 4096,
  'claude-3-opus': 4096,
+  'claude-haiku-4-5': 64000,
  'claude-opus-4': 32000,
  'claude-sonnet-4': 64000,
  'claude-3.5-sonnet': 8192,