👤 feat: AWS Bedrock Custom Inference Profiles (#11308)

* feat: add support for inferenceProfiles mapping

* fix: remove friendly name since api requires actual model id for validation alongside inference profile

* docs: more generic description in docs

* chore: address comments

* chore: update peer dependency versions in package.json

- Bump @aws-sdk/client-bedrock-runtime from ^3.941.0 to ^3.970.0
- Update @librechat/agents from ^3.0.78 to ^3.0.79

* fix: update @librechat/agents dependency to version 3.0.80

* test: add unit tests for inference profile configuration in initializeBedrock function

- Introduced tests to validate the applicationInferenceProfile setting based on model configuration.
- Ensured correct handling of environment variables and fallback scenarios for inference profile ARNs.
- Added cases for empty inferenceProfiles and absence of bedrock config to confirm expected behavior.

* fix: update bedrock endpoint schema reference in config

- Changed the bedrock endpoint reference from baseEndpointSchema to bedrockEndpointSchema for improved clarity and accuracy in configuration.

* test: add unit tests for Bedrock endpoint configuration

- Introduced tests to validate the configuration of Bedrock endpoints with models and inference profiles.
- Added scenarios for both complete and minimal configurations to ensure expected behavior.
- Enhanced coverage for the handling of inference profiles without a models array.

---------

Co-authored-by: Danny Avila <danny@librechat.ai>
This commit is contained in:
Dustin Healy 2026-01-16 10:52:58 -08:00 committed by Danny Avila
parent 75c02a1a18
commit bb220f1af9
No known key found for this signature in database
GPG key ID: BF31EEB2C5CA0956
10 changed files with 2081 additions and 1776 deletions

View file

@ -313,4 +313,304 @@ describe('initializeBedrock', () => {
expect(typeof result.configOptions).toBe('object');
});
});
describe('Inference Profile Configuration', () => {
it('should set applicationInferenceProfile when model has matching inference profile config', async () => {
const inferenceProfileArn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123';
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': inferenceProfileArn,
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
});
it('should NOT set applicationInferenceProfile when model has no matching config', async () => {
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-sonnet-4-5-20250929-v1:0':
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/xyz789',
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0', // Different model
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).not.toHaveProperty('applicationInferenceProfile');
});
it('should resolve environment variable in inference profile ARN', async () => {
const inferenceProfileArn =
'arn:aws:bedrock:us-east-1:951834775723:application-inference-profile/yjr1elcyt29s';
process.env.BEDROCK_INFERENCE_PROFILE_ARN = inferenceProfileArn;
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${BEDROCK_INFERENCE_PROFILE_ARN}',
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
});
it('should use direct ARN when no env variable syntax is used', async () => {
const directArn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/direct123';
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': directArn,
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', directArn);
});
it('should fall back to original string when env variable is not set', async () => {
// Ensure the env var is not set
delete process.env.NONEXISTENT_PROFILE_ARN;
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${NONEXISTENT_PROFILE_ARN}',
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
// Should return the original ${VAR} string when env var doesn't exist
expect(result.llmConfig).toHaveProperty(
'applicationInferenceProfile',
'${NONEXISTENT_PROFILE_ARN}',
);
});
it('should resolve multiple different env variables for different models', async () => {
const claude37Arn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/claude37';
const sonnet45Arn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/sonnet45';
process.env.CLAUDE_37_PROFILE = claude37Arn;
process.env.SONNET_45_PROFILE = sonnet45Arn;
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${CLAUDE_37_PROFILE}',
'us.anthropic.claude-sonnet-4-5-20250929-v1:0': '${SONNET_45_PROFILE}',
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', claude37Arn);
});
it('should handle env variable with whitespace around it', async () => {
const inferenceProfileArn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/trimmed';
process.env.TRIMMED_PROFILE_ARN = inferenceProfileArn;
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': ' ${TRIMMED_PROFILE_ARN} ',
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
});
it('should NOT set applicationInferenceProfile when inferenceProfiles config is empty', async () => {
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).not.toHaveProperty('applicationInferenceProfile');
});
it('should NOT set applicationInferenceProfile when no bedrock config exists', async () => {
const params = createMockParams({
config: {},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).not.toHaveProperty('applicationInferenceProfile');
});
it('should handle multiple inference profiles and select the correct one', async () => {
const sonnet45Arn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/sonnet45';
const claude37Arn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/claude37';
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-sonnet-4-5-20250929-v1:0': sonnet45Arn,
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': claude37Arn,
'global.anthropic.claude-opus-4-5-20251101-v1:0':
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/opus45',
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', claude37Arn);
});
it('should work alongside guardrailConfig', async () => {
const inferenceProfileArn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123';
const guardrailConfig = {
guardrailIdentifier: 'test-guardrail',
guardrailVersion: '1',
};
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': inferenceProfileArn,
},
guardrailConfig,
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
expect(result.llmConfig).toHaveProperty('guardrailConfig', guardrailConfig);
});
it('should preserve the original model ID in llmConfig.model', async () => {
const inferenceProfileArn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123';
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': inferenceProfileArn,
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
// Model ID should remain unchanged - only applicationInferenceProfile should be set
expect(result.llmConfig).toHaveProperty(
'model',
'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
);
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
});
});
});

View file

@ -4,6 +4,7 @@ import { BedrockRuntimeClient } from '@aws-sdk/client-bedrock-runtime';
import {
AuthType,
EModelEndpoint,
extractEnvVariable,
bedrockInputParser,
bedrockOutputParser,
removeNullishValues,
@ -13,6 +14,7 @@ import type {
InitializeResultBase,
BedrockCredentials,
GuardrailConfiguration,
InferenceProfileConfig,
} from '~/types';
import { checkUserKeyExpiry } from '~/utils';
@ -49,7 +51,10 @@ export async function initializeBedrock({
void endpoint;
const appConfig = req.config;
const bedrockConfig = appConfig?.endpoints?.[EModelEndpoint.bedrock] as
| ({ guardrailConfig?: GuardrailConfiguration } & Record<string, unknown>)
| ({
guardrailConfig?: GuardrailConfiguration;
inferenceProfiles?: InferenceProfileConfig;
} & Record<string, unknown>)
| undefined;
const {
@ -105,17 +110,25 @@ export async function initializeBedrock({
}),
),
) as InitializeResultBase['llmConfig'] & {
model?: string;
region?: string;
client?: BedrockRuntimeClient;
credentials?: BedrockCredentials;
endpointHost?: string;
guardrailConfig?: GuardrailConfiguration;
applicationInferenceProfile?: string;
};
if (bedrockConfig?.guardrailConfig) {
llmConfig.guardrailConfig = bedrockConfig.guardrailConfig;
}
const model = model_parameters?.model as string | undefined;
if (model && bedrockConfig?.inferenceProfiles?.[model]) {
const applicationInferenceProfile = extractEnvVariable(bedrockConfig.inferenceProfiles[model]);
llmConfig.applicationInferenceProfile = applicationInferenceProfile;
}
/** Only include credentials if they're complete (accessKeyId and secretAccessKey are both set) */
const hasCompleteCredentials =
credentials &&