👤 feat: AWS Bedrock Custom Inference Profiles (#11308)

* feat: add support for inferenceProfiles mapping

* fix: remove friendly name since api requires actual model id for validation alongside inference profile

* docs: more generic description in docs

* chore: address comments

* chore: update peer dependency versions in package.json

- Bump @aws-sdk/client-bedrock-runtime from ^3.941.0 to ^3.970.0
- Update @librechat/agents from ^3.0.78 to ^3.0.79

* fix: update @librechat/agents dependency to version 3.0.80

* test: add unit tests for inference profile configuration in initializeBedrock function

- Introduced tests to validate the applicationInferenceProfile setting based on model configuration.
- Ensured correct handling of environment variables and fallback scenarios for inference profile ARNs.
- Added cases for empty inferenceProfiles and absence of bedrock config to confirm expected behavior.

* fix: update bedrock endpoint schema reference in config

- Changed the bedrock endpoint reference from baseEndpointSchema to bedrockEndpointSchema for improved clarity and accuracy in configuration.

* test: add unit tests for Bedrock endpoint configuration

- Introduced tests to validate the configuration of Bedrock endpoints with models and inference profiles.
- Added scenarios for both complete and minimal configurations to ensure expected behavior.
- Enhanced coverage for the handling of inference profiles without a models array.

---------

Co-authored-by: Danny Avila <danny@librechat.ai>
This commit is contained in:
Dustin Healy 2026-01-16 10:52:58 -08:00 committed by Danny Avila
parent cc32895d13
commit bd49693afc
No known key found for this signature in database
GPG key ID: BF31EEB2C5CA0956
10 changed files with 2081 additions and 1776 deletions

View file

@ -79,7 +79,7 @@
},
"peerDependencies": {
"@anthropic-ai/vertex-sdk": "^0.14.0",
"@aws-sdk/client-bedrock-runtime": "^3.941.0",
"@aws-sdk/client-bedrock-runtime": "^3.970.0",
"@aws-sdk/client-s3": "^3.758.0",
"@azure/identity": "^4.7.0",
"@azure/search-documents": "^12.0.0",
@ -87,7 +87,7 @@
"@google/genai": "^1.19.0",
"@keyv/redis": "^4.3.3",
"@langchain/core": "^0.3.80",
"@librechat/agents": "^3.0.78",
"@librechat/agents": "^3.0.80",
"@librechat/data-schemas": "*",
"@modelcontextprotocol/sdk": "^1.25.2",
"@smithy/node-http-handler": "^4.4.5",

View file

@ -611,6 +611,78 @@ describe('AppService', () => {
);
});
it('should correctly configure Bedrock endpoint with models and inferenceProfiles', async () => {
const config: Partial<TCustomConfig> = {
endpoints: {
[EModelEndpoint.bedrock]: {
models: [
'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
'global.anthropic.claude-opus-4-5-20251101-v1:0',
],
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0':
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123',
'us.anthropic.claude-sonnet-4-5-20250929-v1:0': '${BEDROCK_SONNET_45_PROFILE}',
},
availableRegions: ['us-east-1', 'us-west-2'],
titleConvo: true,
titleModel: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
},
};
const result = await AppService({ config });
expect(result).toEqual(
expect.objectContaining({
endpoints: expect.objectContaining({
[EModelEndpoint.bedrock]: expect.objectContaining({
models: expect.arrayContaining([
'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
'global.anthropic.claude-opus-4-5-20251101-v1:0',
]),
inferenceProfiles: expect.objectContaining({
'us.anthropic.claude-3-7-sonnet-20250219-v1:0':
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123',
'us.anthropic.claude-sonnet-4-5-20250929-v1:0': '${BEDROCK_SONNET_45_PROFILE}',
}),
availableRegions: expect.arrayContaining(['us-east-1', 'us-west-2']),
titleConvo: true,
titleModel: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
}),
}),
}),
);
});
it('should configure Bedrock endpoint with only inferenceProfiles (no models array)', async () => {
const config: Partial<TCustomConfig> = {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${BEDROCK_INFERENCE_PROFILE_ARN}',
},
},
},
};
const result = await AppService({ config });
expect(result).toEqual(
expect.objectContaining({
endpoints: expect.objectContaining({
[EModelEndpoint.bedrock]: expect.objectContaining({
inferenceProfiles: expect.objectContaining({
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${BEDROCK_INFERENCE_PROFILE_ARN}',
}),
}),
}),
}),
);
});
it('should correctly configure all endpoint when specified', async () => {
const config: Partial<TCustomConfig> = {
endpoints: {

View file

@ -313,4 +313,304 @@ describe('initializeBedrock', () => {
expect(typeof result.configOptions).toBe('object');
});
});
describe('Inference Profile Configuration', () => {
it('should set applicationInferenceProfile when model has matching inference profile config', async () => {
const inferenceProfileArn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123';
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': inferenceProfileArn,
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
});
it('should NOT set applicationInferenceProfile when model has no matching config', async () => {
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-sonnet-4-5-20250929-v1:0':
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/xyz789',
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0', // Different model
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).not.toHaveProperty('applicationInferenceProfile');
});
it('should resolve environment variable in inference profile ARN', async () => {
const inferenceProfileArn =
'arn:aws:bedrock:us-east-1:951834775723:application-inference-profile/yjr1elcyt29s';
process.env.BEDROCK_INFERENCE_PROFILE_ARN = inferenceProfileArn;
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${BEDROCK_INFERENCE_PROFILE_ARN}',
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
});
it('should use direct ARN when no env variable syntax is used', async () => {
const directArn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/direct123';
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': directArn,
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', directArn);
});
it('should fall back to original string when env variable is not set', async () => {
// Ensure the env var is not set
delete process.env.NONEXISTENT_PROFILE_ARN;
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${NONEXISTENT_PROFILE_ARN}',
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
// Should return the original ${VAR} string when env var doesn't exist
expect(result.llmConfig).toHaveProperty(
'applicationInferenceProfile',
'${NONEXISTENT_PROFILE_ARN}',
);
});
it('should resolve multiple different env variables for different models', async () => {
const claude37Arn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/claude37';
const sonnet45Arn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/sonnet45';
process.env.CLAUDE_37_PROFILE = claude37Arn;
process.env.SONNET_45_PROFILE = sonnet45Arn;
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${CLAUDE_37_PROFILE}',
'us.anthropic.claude-sonnet-4-5-20250929-v1:0': '${SONNET_45_PROFILE}',
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', claude37Arn);
});
it('should handle env variable with whitespace around it', async () => {
const inferenceProfileArn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/trimmed';
process.env.TRIMMED_PROFILE_ARN = inferenceProfileArn;
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': ' ${TRIMMED_PROFILE_ARN} ',
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
});
it('should NOT set applicationInferenceProfile when inferenceProfiles config is empty', async () => {
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).not.toHaveProperty('applicationInferenceProfile');
});
it('should NOT set applicationInferenceProfile when no bedrock config exists', async () => {
const params = createMockParams({
config: {},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).not.toHaveProperty('applicationInferenceProfile');
});
it('should handle multiple inference profiles and select the correct one', async () => {
const sonnet45Arn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/sonnet45';
const claude37Arn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/claude37';
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-sonnet-4-5-20250929-v1:0': sonnet45Arn,
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': claude37Arn,
'global.anthropic.claude-opus-4-5-20251101-v1:0':
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/opus45',
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', claude37Arn);
});
it('should work alongside guardrailConfig', async () => {
const inferenceProfileArn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123';
const guardrailConfig = {
guardrailIdentifier: 'test-guardrail',
guardrailVersion: '1',
};
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': inferenceProfileArn,
},
guardrailConfig,
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
expect(result.llmConfig).toHaveProperty('guardrailConfig', guardrailConfig);
});
it('should preserve the original model ID in llmConfig.model', async () => {
const inferenceProfileArn =
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123';
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
inferenceProfiles: {
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': inferenceProfileArn,
},
},
},
},
model_parameters: {
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
},
});
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
// Model ID should remain unchanged - only applicationInferenceProfile should be set
expect(result.llmConfig).toHaveProperty(
'model',
'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
);
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
});
});
});

View file

@ -4,6 +4,7 @@ import { BedrockRuntimeClient } from '@aws-sdk/client-bedrock-runtime';
import {
AuthType,
EModelEndpoint,
extractEnvVariable,
bedrockInputParser,
bedrockOutputParser,
removeNullishValues,
@ -13,6 +14,7 @@ import type {
InitializeResultBase,
BedrockCredentials,
GuardrailConfiguration,
InferenceProfileConfig,
} from '~/types';
import { checkUserKeyExpiry } from '~/utils';
@ -49,7 +51,10 @@ export async function initializeBedrock({
void endpoint;
const appConfig = req.config;
const bedrockConfig = appConfig?.endpoints?.[EModelEndpoint.bedrock] as
| ({ guardrailConfig?: GuardrailConfiguration } & Record<string, unknown>)
| ({
guardrailConfig?: GuardrailConfiguration;
inferenceProfiles?: InferenceProfileConfig;
} & Record<string, unknown>)
| undefined;
const {
@ -105,17 +110,25 @@ export async function initializeBedrock({
}),
),
) as InitializeResultBase['llmConfig'] & {
model?: string;
region?: string;
client?: BedrockRuntimeClient;
credentials?: BedrockCredentials;
endpointHost?: string;
guardrailConfig?: GuardrailConfiguration;
applicationInferenceProfile?: string;
};
if (bedrockConfig?.guardrailConfig) {
llmConfig.guardrailConfig = bedrockConfig.guardrailConfig;
}
const model = model_parameters?.model as string | undefined;
if (model && bedrockConfig?.inferenceProfiles?.[model]) {
const applicationInferenceProfile = extractEnvVariable(bedrockConfig.inferenceProfiles[model]);
llmConfig.applicationInferenceProfile = applicationInferenceProfile;
}
/** Only include credentials if they're complete (accessKeyId and secretAccessKey are both set) */
const hasCompleteCredentials =
credentials &&

View file

@ -21,6 +21,13 @@ export interface GuardrailConfiguration {
trace?: 'enabled' | 'disabled' | 'enabled_full';
}
/**
* AWS Bedrock Inference Profile configuration
* Maps model IDs to their inference profile ARNs
* @see https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles.html
*/
export type InferenceProfileConfig = Record<string, string>;
/**
* Configuration options for Bedrock LLM
*/
@ -36,6 +43,8 @@ export interface BedrockConfigOptions {
endpointHost?: string;
/** Guardrail configuration for content filtering */
guardrailConfig?: GuardrailConfiguration;
/** Inference profile ARNs keyed by model ID / friendly name */
inferenceProfiles?: InferenceProfileConfig;
}
/**
@ -48,6 +57,7 @@ export interface BedrockLLMConfigResult {
credentials?: BedrockCredentials;
endpointHost?: string;
guardrailConfig?: GuardrailConfiguration;
applicationInferenceProfile?: string;
};
configOptions: Record<string, unknown>;
}

View file

@ -212,6 +212,8 @@ export type TBaseEndpoint = z.infer<typeof baseEndpointSchema>;
export const bedrockEndpointSchema = baseEndpointSchema.merge(
z.object({
availableRegions: z.array(z.string()).optional(),
models: z.array(z.string()).optional(),
inferenceProfiles: z.record(z.string(), z.string()).optional(),
}),
);
@ -983,7 +985,7 @@ export const configSchema = z.object({
[EModelEndpoint.assistants]: assistantEndpointSchema.optional(),
[EModelEndpoint.agents]: agentsEndpointSchema.optional(),
[EModelEndpoint.custom]: customEndpointsSchema.optional(),
[EModelEndpoint.bedrock]: baseEndpointSchema.optional(),
[EModelEndpoint.bedrock]: bedrockEndpointSchema.optional(),
})
.strict()
.refine((data) => Object.keys(data).length > 0, {