👤 feat: AWS Bedrock Custom Inference Profiles (#11308)

* feat: add support for inferenceProfiles mapping * fix: remove friendly name since api requires actual model id for validation alongside inference profile * docs: more generic description in docs * chore: address comments * chore: update peer dependency versions in package.json - Bump @aws-sdk/client-bedrock-runtime from ^3.941.0 to ^3.970.0 - Update @librechat/agents from ^3.0.78 to ^3.0.79 * fix: update @librechat/agents dependency to version 3.0.80 * test: add unit tests for inference profile configuration in initializeBedrock function - Introduced tests to validate the applicationInferenceProfile setting based on model configuration. - Ensured correct handling of environment variables and fallback scenarios for inference profile ARNs. - Added cases for empty inferenceProfiles and absence of bedrock config to confirm expected behavior. * fix: update bedrock endpoint schema reference in config - Changed the bedrock endpoint reference from baseEndpointSchema to bedrockEndpointSchema for improved clarity and accuracy in configuration. * test: add unit tests for Bedrock endpoint configuration - Introduced tests to validate the configuration of Bedrock endpoints with models and inference profiles. - Added scenarios for both complete and minimal configurations to ensure expected behavior. - Enhanced coverage for the handling of inference profiles without a models array. --------- Co-authored-by: Danny Avila <danny@librechat.ai>
2026-01-28 21:26:13 +01:00 · 2026-01-16 10:52:58 -08:00 · 2026-01-16 10:52:58 -08:00 · bd49693afc
commit bd49693afc
parent cc32895d13
10 changed files with 2081 additions and 1776 deletions
--- a/api/package.json
+++ b/api/package.json
@ -36,7 +36,7 @@
  "dependencies": {
    "@anthropic-ai/sdk": "^0.71.0",
    "@anthropic-ai/vertex-sdk": "^0.14.0",
-    "@aws-sdk/client-bedrock-runtime": "^3.941.0",
+    "@aws-sdk/client-bedrock-runtime": "^3.970.0",
    "@aws-sdk/client-s3": "^3.758.0",
    "@aws-sdk/s3-request-presigner": "^3.758.0",
    "@azure/identity": "^4.7.0",
@ -45,7 +45,7 @@
    "@google/genai": "^1.19.0",
    "@keyv/redis": "^4.3.3",
    "@langchain/core": "^0.3.80",
-    "@librechat/agents": "^3.0.78",
+    "@librechat/agents": "^3.0.80",
    "@librechat/api": "*",
    "@librechat/data-schemas": "*",
    "@microsoft/microsoft-graph-client": "^3.0.7",
--- a/api/server/services/Config/loadConfigModels.js
+++ b/api/server/services/Config/loadConfigModels.js
@ -28,6 +28,11 @@ async function loadConfigModels(req) {
    modelsConfig[EModelEndpoint.azureAssistants] = azureConfig.assistantModels;
  }
  const bedrockConfig = appConfig.endpoints?.[EModelEndpoint.bedrock];
  if (bedrockConfig?.models && Array.isArray(bedrockConfig.models)) {
    modelsConfig[EModelEndpoint.bedrock] = bedrockConfig.models;
  }
  if (!Array.isArray(appConfig.endpoints?.[EModelEndpoint.custom])) {
    return modelsConfig;
  }
--- a/librechat.example.yaml
+++ b/librechat.example.yaml
@ -446,6 +446,21 @@ endpoints:
  # AWS Bedrock Example
  # Note: Bedrock endpoint is configured via environment variables
  # bedrock:
  #   # Models Configuration
  #   # Specify which models are available (equivalent to BEDROCK_AWS_MODELS env variable)
  #   models:
  #     - "anthropic.claude-3-7-sonnet-20250219-v1:0"
  #     - "anthropic.claude-3-5-sonnet-20241022-v2:0"
  #
  #   # Inference Profiles Configuration
  #   # Maps model IDs to their inference profile ARNs
  #   # IMPORTANT: The model ID (key) MUST be a valid AWS Bedrock model ID that you've added to the models list above
  #   # The ARN (value) is the inference profile you wish to map to for that model
  #   # Both the model ID and ARN are sent to AWS - the model ID for validation/metadata, the ARN for routing
  #   inferenceProfiles:
  #     "us.anthropic.claude-sonnet-4-20250514-v1:0": "${BEDROCK_INFERENCE_PROFILE_CLAUDE_SONNET}"
  #     "anthropic.claude-3-7-sonnet-20250219-v1:0": "arn:aws:bedrock:us-west-2:123456789012:application-inference-profile/abc123"
  #
  #   # Guardrail Configuration
  #   guardrailConfig:
  #     guardrailIdentifier: "your-guardrail-id"
@ -457,7 +472,6 @@ endpoints:
  #     # - "disabled": No trace information (default)
  #     # Trace output is logged to application log files for compliance auditing
  #     trace: "enabled"
 # Example modelSpecs configuration showing grouping options
 # The 'group' field organizes model specs in the UI selector:
 # - If 'group' matches an endpoint name (e.g., "openAI", "groq"), the spec appears nested under that endpoint
--- a/package-lock.json
+++ b/package-lock.json
--- a/packages/api/package.json
+++ b/packages/api/package.json
@ -79,7 +79,7 @@
  },
  "peerDependencies": {
    "@anthropic-ai/vertex-sdk": "^0.14.0",
-    "@aws-sdk/client-bedrock-runtime": "^3.941.0",
+    "@aws-sdk/client-bedrock-runtime": "^3.970.0",
    "@aws-sdk/client-s3": "^3.758.0",
    "@azure/identity": "^4.7.0",
    "@azure/search-documents": "^12.0.0",
@ -87,7 +87,7 @@
    "@google/genai": "^1.19.0",
    "@keyv/redis": "^4.3.3",
    "@langchain/core": "^0.3.80",
-    "@librechat/agents": "^3.0.78",
+    "@librechat/agents": "^3.0.80",
    "@librechat/data-schemas": "*",
    "@modelcontextprotocol/sdk": "^1.25.2",
    "@smithy/node-http-handler": "^4.4.5",
--- a/packages/api/src/app/AppService.spec.ts
+++ b/packages/api/src/app/AppService.spec.ts
@ -611,6 +611,78 @@ describe('AppService', () => {
    );
  });
  it('should correctly configure Bedrock endpoint with models and inferenceProfiles', async () => {
    const config: Partial<TCustomConfig> = {
      endpoints: {
        [EModelEndpoint.bedrock]: {
          models: [
            'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
            'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
            'global.anthropic.claude-opus-4-5-20251101-v1:0',
          ],
          inferenceProfiles: {
            'us.anthropic.claude-3-7-sonnet-20250219-v1:0':
              'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123',
            'us.anthropic.claude-sonnet-4-5-20250929-v1:0': '${BEDROCK_SONNET_45_PROFILE}',
          },
          availableRegions: ['us-east-1', 'us-west-2'],
          titleConvo: true,
          titleModel: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
        },
      },
    };
    const result = await AppService({ config });
    expect(result).toEqual(
      expect.objectContaining({
        endpoints: expect.objectContaining({
          [EModelEndpoint.bedrock]: expect.objectContaining({
            models: expect.arrayContaining([
              'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
              'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
              'global.anthropic.claude-opus-4-5-20251101-v1:0',
            ]),
            inferenceProfiles: expect.objectContaining({
              'us.anthropic.claude-3-7-sonnet-20250219-v1:0':
                'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123',
              'us.anthropic.claude-sonnet-4-5-20250929-v1:0': '${BEDROCK_SONNET_45_PROFILE}',
            }),
            availableRegions: expect.arrayContaining(['us-east-1', 'us-west-2']),
            titleConvo: true,
            titleModel: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
          }),
        }),
      }),
    );
  });
  it('should configure Bedrock endpoint with only inferenceProfiles (no models array)', async () => {
    const config: Partial<TCustomConfig> = {
      endpoints: {
        [EModelEndpoint.bedrock]: {
          inferenceProfiles: {
            'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${BEDROCK_INFERENCE_PROFILE_ARN}',
          },
        },
      },
    };
    const result = await AppService({ config });
    expect(result).toEqual(
      expect.objectContaining({
        endpoints: expect.objectContaining({
          [EModelEndpoint.bedrock]: expect.objectContaining({
            inferenceProfiles: expect.objectContaining({
              'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${BEDROCK_INFERENCE_PROFILE_ARN}',
            }),
          }),
        }),
      }),
    );
  });
  it('should correctly configure all endpoint when specified', async () => {
    const config: Partial<TCustomConfig> = {
      endpoints: {
--- a/packages/api/src/endpoints/bedrock/initialize.spec.ts
+++ b/packages/api/src/endpoints/bedrock/initialize.spec.ts
@ -313,4 +313,304 @@ describe('initializeBedrock', () => {
      expect(typeof result.configOptions).toBe('object');
    });
  });
  describe('Inference Profile Configuration', () => {
    it('should set applicationInferenceProfile when model has matching inference profile config', async () => {
      const inferenceProfileArn =
        'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123';
      const params = createMockParams({
        config: {
          endpoints: {
            [EModelEndpoint.bedrock]: {
              inferenceProfiles: {
                'us.anthropic.claude-3-7-sonnet-20250219-v1:0': inferenceProfileArn,
              },
            },
          },
        },
        model_parameters: {
          model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
        },
      });
      const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
      expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
    });
    it('should NOT set applicationInferenceProfile when model has no matching config', async () => {
      const params = createMockParams({
        config: {
          endpoints: {
            [EModelEndpoint.bedrock]: {
              inferenceProfiles: {
                'us.anthropic.claude-sonnet-4-5-20250929-v1:0':
                  'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/xyz789',
              },
            },
          },
        },
        model_parameters: {
          model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0', // Different model
        },
      });
      const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
      expect(result.llmConfig).not.toHaveProperty('applicationInferenceProfile');
    });
    it('should resolve environment variable in inference profile ARN', async () => {
      const inferenceProfileArn =
        'arn:aws:bedrock:us-east-1:951834775723:application-inference-profile/yjr1elcyt29s';
      process.env.BEDROCK_INFERENCE_PROFILE_ARN = inferenceProfileArn;
      const params = createMockParams({
        config: {
          endpoints: {
            [EModelEndpoint.bedrock]: {
              inferenceProfiles: {
                'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${BEDROCK_INFERENCE_PROFILE_ARN}',
              },
            },
          },
        },
        model_parameters: {
          model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
        },
      });
      const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
      expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
    });
    it('should use direct ARN when no env variable syntax is used', async () => {
      const directArn =
        'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/direct123';
      const params = createMockParams({
        config: {
          endpoints: {
            [EModelEndpoint.bedrock]: {
              inferenceProfiles: {
                'us.anthropic.claude-3-7-sonnet-20250219-v1:0': directArn,
              },
            },
          },
        },
        model_parameters: {
          model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
        },
      });
      const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
      expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', directArn);
    });
    it('should fall back to original string when env variable is not set', async () => {
      // Ensure the env var is not set
      delete process.env.NONEXISTENT_PROFILE_ARN;
      const params = createMockParams({
        config: {
          endpoints: {
            [EModelEndpoint.bedrock]: {
              inferenceProfiles: {
                'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${NONEXISTENT_PROFILE_ARN}',
              },
            },
          },
        },
        model_parameters: {
          model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
        },
      });
      const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
      // Should return the original ${VAR} string when env var doesn't exist
      expect(result.llmConfig).toHaveProperty(
        'applicationInferenceProfile',
        '${NONEXISTENT_PROFILE_ARN}',
      );
    });
    it('should resolve multiple different env variables for different models', async () => {
      const claude37Arn =
        'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/claude37';
      const sonnet45Arn =
        'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/sonnet45';
      process.env.CLAUDE_37_PROFILE = claude37Arn;
      process.env.SONNET_45_PROFILE = sonnet45Arn;
      const params = createMockParams({
        config: {
          endpoints: {
            [EModelEndpoint.bedrock]: {
              inferenceProfiles: {
                'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${CLAUDE_37_PROFILE}',
                'us.anthropic.claude-sonnet-4-5-20250929-v1:0': '${SONNET_45_PROFILE}',
              },
            },
          },
        },
        model_parameters: {
          model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
        },
      });
      const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
      expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', claude37Arn);
    });
    it('should handle env variable with whitespace around it', async () => {
      const inferenceProfileArn =
        'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/trimmed';
      process.env.TRIMMED_PROFILE_ARN = inferenceProfileArn;
      const params = createMockParams({
        config: {
          endpoints: {
            [EModelEndpoint.bedrock]: {
              inferenceProfiles: {
                'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '  ${TRIMMED_PROFILE_ARN}  ',
              },
            },
          },
        },
        model_parameters: {
          model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
        },
      });
      const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
      expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
    });
    it('should NOT set applicationInferenceProfile when inferenceProfiles config is empty', async () => {
      const params = createMockParams({
        config: {
          endpoints: {
            [EModelEndpoint.bedrock]: {
              inferenceProfiles: {},
            },
          },
        },
        model_parameters: {
          model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
        },
      });
      const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
      expect(result.llmConfig).not.toHaveProperty('applicationInferenceProfile');
    });
    it('should NOT set applicationInferenceProfile when no bedrock config exists', async () => {
      const params = createMockParams({
        config: {},
        model_parameters: {
          model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
        },
      });
      const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
      expect(result.llmConfig).not.toHaveProperty('applicationInferenceProfile');
    });
    it('should handle multiple inference profiles and select the correct one', async () => {
      const sonnet45Arn =
        'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/sonnet45';
      const claude37Arn =
        'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/claude37';
      const params = createMockParams({
        config: {
          endpoints: {
            [EModelEndpoint.bedrock]: {
              inferenceProfiles: {
                'us.anthropic.claude-sonnet-4-5-20250929-v1:0': sonnet45Arn,
                'us.anthropic.claude-3-7-sonnet-20250219-v1:0': claude37Arn,
                'global.anthropic.claude-opus-4-5-20251101-v1:0':
                  'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/opus45',
              },
            },
          },
        },
        model_parameters: {
          model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
        },
      });
      const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
      expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', claude37Arn);
    });
    it('should work alongside guardrailConfig', async () => {
      const inferenceProfileArn =
        'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123';
      const guardrailConfig = {
        guardrailIdentifier: 'test-guardrail',
        guardrailVersion: '1',
      };
      const params = createMockParams({
        config: {
          endpoints: {
            [EModelEndpoint.bedrock]: {
              inferenceProfiles: {
                'us.anthropic.claude-3-7-sonnet-20250219-v1:0': inferenceProfileArn,
              },
              guardrailConfig,
            },
          },
        },
        model_parameters: {
          model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
        },
      });
      const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
      expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
      expect(result.llmConfig).toHaveProperty('guardrailConfig', guardrailConfig);
    });
    it('should preserve the original model ID in llmConfig.model', async () => {
      const inferenceProfileArn =
        'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123';
      const params = createMockParams({
        config: {
          endpoints: {
            [EModelEndpoint.bedrock]: {
              inferenceProfiles: {
                'us.anthropic.claude-3-7-sonnet-20250219-v1:0': inferenceProfileArn,
              },
            },
          },
        },
        model_parameters: {
          model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
        },
      });
      const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
      // Model ID should remain unchanged - only applicationInferenceProfile should be set
      expect(result.llmConfig).toHaveProperty(
        'model',
        'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
      );
      expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
    });
  });
 });
--- a/packages/api/src/endpoints/bedrock/initialize.ts
+++ b/packages/api/src/endpoints/bedrock/initialize.ts
@ -4,6 +4,7 @@ import { BedrockRuntimeClient } from '@aws-sdk/client-bedrock-runtime';
 import {
  AuthType,
  EModelEndpoint,
  extractEnvVariable,
  bedrockInputParser,
  bedrockOutputParser,
  removeNullishValues,
@ -13,6 +14,7 @@ import type {
  InitializeResultBase,
  BedrockCredentials,
  GuardrailConfiguration,
  InferenceProfileConfig,
 } from '~/types';
 import { checkUserKeyExpiry } from '~/utils';
@ -49,7 +51,10 @@ export async function initializeBedrock({
  void endpoint;
  const appConfig = req.config;
  const bedrockConfig = appConfig?.endpoints?.[EModelEndpoint.bedrock] as
-    | ({ guardrailConfig?: GuardrailConfiguration } & Record<string, unknown>)
+    | ({
        guardrailConfig?: GuardrailConfiguration;
        inferenceProfiles?: InferenceProfileConfig;
      } & Record<string, unknown>)
    | undefined;
  const {
@ -105,17 +110,25 @@ export async function initializeBedrock({
      }),
    ),
  ) as InitializeResultBase['llmConfig'] & {
    model?: string;
    region?: string;
    client?: BedrockRuntimeClient;
    credentials?: BedrockCredentials;
    endpointHost?: string;
    guardrailConfig?: GuardrailConfiguration;
    applicationInferenceProfile?: string;
  };
  if (bedrockConfig?.guardrailConfig) {
    llmConfig.guardrailConfig = bedrockConfig.guardrailConfig;
  }
  const model = model_parameters?.model as string | undefined;
  if (model && bedrockConfig?.inferenceProfiles?.[model]) {
    const applicationInferenceProfile = extractEnvVariable(bedrockConfig.inferenceProfiles[model]);
    llmConfig.applicationInferenceProfile = applicationInferenceProfile;
  }
  /** Only include credentials if they're complete (accessKeyId and secretAccessKey are both set) */
  const hasCompleteCredentials =
    credentials &&
--- a/packages/api/src/types/bedrock.ts
+++ b/packages/api/src/types/bedrock.ts
@ -21,6 +21,13 @@ export interface GuardrailConfiguration {
  trace?: 'enabled' | 'disabled' | 'enabled_full';
 }
 /**
 * AWS Bedrock Inference Profile configuration
 * Maps model IDs to their inference profile ARNs
 * @see https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles.html
 */
 export type InferenceProfileConfig = Record<string, string>;
 /**
 * Configuration options for Bedrock LLM
 */
@ -36,6 +43,8 @@ export interface BedrockConfigOptions {
  endpointHost?: string;
  /** Guardrail configuration for content filtering */
  guardrailConfig?: GuardrailConfiguration;
  /** Inference profile ARNs keyed by model ID / friendly name */
  inferenceProfiles?: InferenceProfileConfig;
 }
 /**
@ -48,6 +57,7 @@ export interface BedrockLLMConfigResult {
    credentials?: BedrockCredentials;
    endpointHost?: string;
    guardrailConfig?: GuardrailConfiguration;
    applicationInferenceProfile?: string;
  };
  configOptions: Record<string, unknown>;
 }
--- a/packages/data-provider/src/config.ts
+++ b/packages/data-provider/src/config.ts
@ -212,6 +212,8 @@ export type TBaseEndpoint = z.infer<typeof baseEndpointSchema>;
 export const bedrockEndpointSchema = baseEndpointSchema.merge(
  z.object({
    availableRegions: z.array(z.string()).optional(),
    models: z.array(z.string()).optional(),
    inferenceProfiles: z.record(z.string(), z.string()).optional(),
  }),
 );
@ -983,7 +985,7 @@ export const configSchema = z.object({
      [EModelEndpoint.assistants]: assistantEndpointSchema.optional(),
      [EModelEndpoint.agents]: agentsEndpointSchema.optional(),
      [EModelEndpoint.custom]: customEndpointsSchema.optional(),
-      [EModelEndpoint.bedrock]: baseEndpointSchema.optional(),
+      [EModelEndpoint.bedrock]: bedrockEndpointSchema.optional(),
    })
    .strict()
    .refine((data) => Object.keys(data).length > 0, {