mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-28 21:26:13 +01:00
👤 feat: AWS Bedrock Custom Inference Profiles (#11308)
* feat: add support for inferenceProfiles mapping * fix: remove friendly name since api requires actual model id for validation alongside inference profile * docs: more generic description in docs * chore: address comments * chore: update peer dependency versions in package.json - Bump @aws-sdk/client-bedrock-runtime from ^3.941.0 to ^3.970.0 - Update @librechat/agents from ^3.0.78 to ^3.0.79 * fix: update @librechat/agents dependency to version 3.0.80 * test: add unit tests for inference profile configuration in initializeBedrock function - Introduced tests to validate the applicationInferenceProfile setting based on model configuration. - Ensured correct handling of environment variables and fallback scenarios for inference profile ARNs. - Added cases for empty inferenceProfiles and absence of bedrock config to confirm expected behavior. * fix: update bedrock endpoint schema reference in config - Changed the bedrock endpoint reference from baseEndpointSchema to bedrockEndpointSchema for improved clarity and accuracy in configuration. * test: add unit tests for Bedrock endpoint configuration - Introduced tests to validate the configuration of Bedrock endpoints with models and inference profiles. - Added scenarios for both complete and minimal configurations to ensure expected behavior. - Enhanced coverage for the handling of inference profiles without a models array. --------- Co-authored-by: Danny Avila <danny@librechat.ai>
This commit is contained in:
parent
cc32895d13
commit
bd49693afc
10 changed files with 2081 additions and 1776 deletions
|
|
@ -36,7 +36,7 @@
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@anthropic-ai/sdk": "^0.71.0",
|
"@anthropic-ai/sdk": "^0.71.0",
|
||||||
"@anthropic-ai/vertex-sdk": "^0.14.0",
|
"@anthropic-ai/vertex-sdk": "^0.14.0",
|
||||||
"@aws-sdk/client-bedrock-runtime": "^3.941.0",
|
"@aws-sdk/client-bedrock-runtime": "^3.970.0",
|
||||||
"@aws-sdk/client-s3": "^3.758.0",
|
"@aws-sdk/client-s3": "^3.758.0",
|
||||||
"@aws-sdk/s3-request-presigner": "^3.758.0",
|
"@aws-sdk/s3-request-presigner": "^3.758.0",
|
||||||
"@azure/identity": "^4.7.0",
|
"@azure/identity": "^4.7.0",
|
||||||
|
|
@ -45,7 +45,7 @@
|
||||||
"@google/genai": "^1.19.0",
|
"@google/genai": "^1.19.0",
|
||||||
"@keyv/redis": "^4.3.3",
|
"@keyv/redis": "^4.3.3",
|
||||||
"@langchain/core": "^0.3.80",
|
"@langchain/core": "^0.3.80",
|
||||||
"@librechat/agents": "^3.0.78",
|
"@librechat/agents": "^3.0.80",
|
||||||
"@librechat/api": "*",
|
"@librechat/api": "*",
|
||||||
"@librechat/data-schemas": "*",
|
"@librechat/data-schemas": "*",
|
||||||
"@microsoft/microsoft-graph-client": "^3.0.7",
|
"@microsoft/microsoft-graph-client": "^3.0.7",
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,11 @@ async function loadConfigModels(req) {
|
||||||
modelsConfig[EModelEndpoint.azureAssistants] = azureConfig.assistantModels;
|
modelsConfig[EModelEndpoint.azureAssistants] = azureConfig.assistantModels;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const bedrockConfig = appConfig.endpoints?.[EModelEndpoint.bedrock];
|
||||||
|
if (bedrockConfig?.models && Array.isArray(bedrockConfig.models)) {
|
||||||
|
modelsConfig[EModelEndpoint.bedrock] = bedrockConfig.models;
|
||||||
|
}
|
||||||
|
|
||||||
if (!Array.isArray(appConfig.endpoints?.[EModelEndpoint.custom])) {
|
if (!Array.isArray(appConfig.endpoints?.[EModelEndpoint.custom])) {
|
||||||
return modelsConfig;
|
return modelsConfig;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -446,6 +446,21 @@ endpoints:
|
||||||
# AWS Bedrock Example
|
# AWS Bedrock Example
|
||||||
# Note: Bedrock endpoint is configured via environment variables
|
# Note: Bedrock endpoint is configured via environment variables
|
||||||
# bedrock:
|
# bedrock:
|
||||||
|
# # Models Configuration
|
||||||
|
# # Specify which models are available (equivalent to BEDROCK_AWS_MODELS env variable)
|
||||||
|
# models:
|
||||||
|
# - "anthropic.claude-3-7-sonnet-20250219-v1:0"
|
||||||
|
# - "anthropic.claude-3-5-sonnet-20241022-v2:0"
|
||||||
|
#
|
||||||
|
# # Inference Profiles Configuration
|
||||||
|
# # Maps model IDs to their inference profile ARNs
|
||||||
|
# # IMPORTANT: The model ID (key) MUST be a valid AWS Bedrock model ID that you've added to the models list above
|
||||||
|
# # The ARN (value) is the inference profile you wish to map to for that model
|
||||||
|
# # Both the model ID and ARN are sent to AWS - the model ID for validation/metadata, the ARN for routing
|
||||||
|
# inferenceProfiles:
|
||||||
|
# "us.anthropic.claude-sonnet-4-20250514-v1:0": "${BEDROCK_INFERENCE_PROFILE_CLAUDE_SONNET}"
|
||||||
|
# "anthropic.claude-3-7-sonnet-20250219-v1:0": "arn:aws:bedrock:us-west-2:123456789012:application-inference-profile/abc123"
|
||||||
|
#
|
||||||
# # Guardrail Configuration
|
# # Guardrail Configuration
|
||||||
# guardrailConfig:
|
# guardrailConfig:
|
||||||
# guardrailIdentifier: "your-guardrail-id"
|
# guardrailIdentifier: "your-guardrail-id"
|
||||||
|
|
@ -457,7 +472,6 @@ endpoints:
|
||||||
# # - "disabled": No trace information (default)
|
# # - "disabled": No trace information (default)
|
||||||
# # Trace output is logged to application log files for compliance auditing
|
# # Trace output is logged to application log files for compliance auditing
|
||||||
# trace: "enabled"
|
# trace: "enabled"
|
||||||
|
|
||||||
# Example modelSpecs configuration showing grouping options
|
# Example modelSpecs configuration showing grouping options
|
||||||
# The 'group' field organizes model specs in the UI selector:
|
# The 'group' field organizes model specs in the UI selector:
|
||||||
# - If 'group' matches an endpoint name (e.g., "openAI", "groq"), the spec appears nested under that endpoint
|
# - If 'group' matches an endpoint name (e.g., "openAI", "groq"), the spec appears nested under that endpoint
|
||||||
|
|
|
||||||
3425
package-lock.json
generated
3425
package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
|
@ -79,7 +79,7 @@
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@anthropic-ai/vertex-sdk": "^0.14.0",
|
"@anthropic-ai/vertex-sdk": "^0.14.0",
|
||||||
"@aws-sdk/client-bedrock-runtime": "^3.941.0",
|
"@aws-sdk/client-bedrock-runtime": "^3.970.0",
|
||||||
"@aws-sdk/client-s3": "^3.758.0",
|
"@aws-sdk/client-s3": "^3.758.0",
|
||||||
"@azure/identity": "^4.7.0",
|
"@azure/identity": "^4.7.0",
|
||||||
"@azure/search-documents": "^12.0.0",
|
"@azure/search-documents": "^12.0.0",
|
||||||
|
|
@ -87,7 +87,7 @@
|
||||||
"@google/genai": "^1.19.0",
|
"@google/genai": "^1.19.0",
|
||||||
"@keyv/redis": "^4.3.3",
|
"@keyv/redis": "^4.3.3",
|
||||||
"@langchain/core": "^0.3.80",
|
"@langchain/core": "^0.3.80",
|
||||||
"@librechat/agents": "^3.0.78",
|
"@librechat/agents": "^3.0.80",
|
||||||
"@librechat/data-schemas": "*",
|
"@librechat/data-schemas": "*",
|
||||||
"@modelcontextprotocol/sdk": "^1.25.2",
|
"@modelcontextprotocol/sdk": "^1.25.2",
|
||||||
"@smithy/node-http-handler": "^4.4.5",
|
"@smithy/node-http-handler": "^4.4.5",
|
||||||
|
|
|
||||||
|
|
@ -611,6 +611,78 @@ describe('AppService', () => {
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should correctly configure Bedrock endpoint with models and inferenceProfiles', async () => {
|
||||||
|
const config: Partial<TCustomConfig> = {
|
||||||
|
endpoints: {
|
||||||
|
[EModelEndpoint.bedrock]: {
|
||||||
|
models: [
|
||||||
|
'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
||||||
|
'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
|
||||||
|
'global.anthropic.claude-opus-4-5-20251101-v1:0',
|
||||||
|
],
|
||||||
|
inferenceProfiles: {
|
||||||
|
'us.anthropic.claude-3-7-sonnet-20250219-v1:0':
|
||||||
|
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123',
|
||||||
|
'us.anthropic.claude-sonnet-4-5-20250929-v1:0': '${BEDROCK_SONNET_45_PROFILE}',
|
||||||
|
},
|
||||||
|
availableRegions: ['us-east-1', 'us-west-2'],
|
||||||
|
titleConvo: true,
|
||||||
|
titleModel: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await AppService({ config });
|
||||||
|
|
||||||
|
expect(result).toEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
endpoints: expect.objectContaining({
|
||||||
|
[EModelEndpoint.bedrock]: expect.objectContaining({
|
||||||
|
models: expect.arrayContaining([
|
||||||
|
'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
||||||
|
'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
|
||||||
|
'global.anthropic.claude-opus-4-5-20251101-v1:0',
|
||||||
|
]),
|
||||||
|
inferenceProfiles: expect.objectContaining({
|
||||||
|
'us.anthropic.claude-3-7-sonnet-20250219-v1:0':
|
||||||
|
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123',
|
||||||
|
'us.anthropic.claude-sonnet-4-5-20250929-v1:0': '${BEDROCK_SONNET_45_PROFILE}',
|
||||||
|
}),
|
||||||
|
availableRegions: expect.arrayContaining(['us-east-1', 'us-west-2']),
|
||||||
|
titleConvo: true,
|
||||||
|
titleModel: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should configure Bedrock endpoint with only inferenceProfiles (no models array)', async () => {
|
||||||
|
const config: Partial<TCustomConfig> = {
|
||||||
|
endpoints: {
|
||||||
|
[EModelEndpoint.bedrock]: {
|
||||||
|
inferenceProfiles: {
|
||||||
|
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${BEDROCK_INFERENCE_PROFILE_ARN}',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await AppService({ config });
|
||||||
|
|
||||||
|
expect(result).toEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
endpoints: expect.objectContaining({
|
||||||
|
[EModelEndpoint.bedrock]: expect.objectContaining({
|
||||||
|
inferenceProfiles: expect.objectContaining({
|
||||||
|
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${BEDROCK_INFERENCE_PROFILE_ARN}',
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
it('should correctly configure all endpoint when specified', async () => {
|
it('should correctly configure all endpoint when specified', async () => {
|
||||||
const config: Partial<TCustomConfig> = {
|
const config: Partial<TCustomConfig> = {
|
||||||
endpoints: {
|
endpoints: {
|
||||||
|
|
|
||||||
|
|
@ -313,4 +313,304 @@ describe('initializeBedrock', () => {
|
||||||
expect(typeof result.configOptions).toBe('object');
|
expect(typeof result.configOptions).toBe('object');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('Inference Profile Configuration', () => {
|
||||||
|
it('should set applicationInferenceProfile when model has matching inference profile config', async () => {
|
||||||
|
const inferenceProfileArn =
|
||||||
|
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123';
|
||||||
|
|
||||||
|
const params = createMockParams({
|
||||||
|
config: {
|
||||||
|
endpoints: {
|
||||||
|
[EModelEndpoint.bedrock]: {
|
||||||
|
inferenceProfiles: {
|
||||||
|
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': inferenceProfileArn,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
model_parameters: {
|
||||||
|
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
|
||||||
|
|
||||||
|
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should NOT set applicationInferenceProfile when model has no matching config', async () => {
|
||||||
|
const params = createMockParams({
|
||||||
|
config: {
|
||||||
|
endpoints: {
|
||||||
|
[EModelEndpoint.bedrock]: {
|
||||||
|
inferenceProfiles: {
|
||||||
|
'us.anthropic.claude-sonnet-4-5-20250929-v1:0':
|
||||||
|
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/xyz789',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
model_parameters: {
|
||||||
|
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0', // Different model
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
|
||||||
|
|
||||||
|
expect(result.llmConfig).not.toHaveProperty('applicationInferenceProfile');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should resolve environment variable in inference profile ARN', async () => {
|
||||||
|
const inferenceProfileArn =
|
||||||
|
'arn:aws:bedrock:us-east-1:951834775723:application-inference-profile/yjr1elcyt29s';
|
||||||
|
process.env.BEDROCK_INFERENCE_PROFILE_ARN = inferenceProfileArn;
|
||||||
|
|
||||||
|
const params = createMockParams({
|
||||||
|
config: {
|
||||||
|
endpoints: {
|
||||||
|
[EModelEndpoint.bedrock]: {
|
||||||
|
inferenceProfiles: {
|
||||||
|
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${BEDROCK_INFERENCE_PROFILE_ARN}',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
model_parameters: {
|
||||||
|
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
|
||||||
|
|
||||||
|
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should use direct ARN when no env variable syntax is used', async () => {
|
||||||
|
const directArn =
|
||||||
|
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/direct123';
|
||||||
|
|
||||||
|
const params = createMockParams({
|
||||||
|
config: {
|
||||||
|
endpoints: {
|
||||||
|
[EModelEndpoint.bedrock]: {
|
||||||
|
inferenceProfiles: {
|
||||||
|
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': directArn,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
model_parameters: {
|
||||||
|
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
|
||||||
|
|
||||||
|
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', directArn);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should fall back to original string when env variable is not set', async () => {
|
||||||
|
// Ensure the env var is not set
|
||||||
|
delete process.env.NONEXISTENT_PROFILE_ARN;
|
||||||
|
|
||||||
|
const params = createMockParams({
|
||||||
|
config: {
|
||||||
|
endpoints: {
|
||||||
|
[EModelEndpoint.bedrock]: {
|
||||||
|
inferenceProfiles: {
|
||||||
|
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${NONEXISTENT_PROFILE_ARN}',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
model_parameters: {
|
||||||
|
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
|
||||||
|
|
||||||
|
// Should return the original ${VAR} string when env var doesn't exist
|
||||||
|
expect(result.llmConfig).toHaveProperty(
|
||||||
|
'applicationInferenceProfile',
|
||||||
|
'${NONEXISTENT_PROFILE_ARN}',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should resolve multiple different env variables for different models', async () => {
|
||||||
|
const claude37Arn =
|
||||||
|
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/claude37';
|
||||||
|
const sonnet45Arn =
|
||||||
|
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/sonnet45';
|
||||||
|
|
||||||
|
process.env.CLAUDE_37_PROFILE = claude37Arn;
|
||||||
|
process.env.SONNET_45_PROFILE = sonnet45Arn;
|
||||||
|
|
||||||
|
const params = createMockParams({
|
||||||
|
config: {
|
||||||
|
endpoints: {
|
||||||
|
[EModelEndpoint.bedrock]: {
|
||||||
|
inferenceProfiles: {
|
||||||
|
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': '${CLAUDE_37_PROFILE}',
|
||||||
|
'us.anthropic.claude-sonnet-4-5-20250929-v1:0': '${SONNET_45_PROFILE}',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
model_parameters: {
|
||||||
|
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
|
||||||
|
|
||||||
|
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', claude37Arn);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle env variable with whitespace around it', async () => {
|
||||||
|
const inferenceProfileArn =
|
||||||
|
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/trimmed';
|
||||||
|
process.env.TRIMMED_PROFILE_ARN = inferenceProfileArn;
|
||||||
|
|
||||||
|
const params = createMockParams({
|
||||||
|
config: {
|
||||||
|
endpoints: {
|
||||||
|
[EModelEndpoint.bedrock]: {
|
||||||
|
inferenceProfiles: {
|
||||||
|
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': ' ${TRIMMED_PROFILE_ARN} ',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
model_parameters: {
|
||||||
|
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
|
||||||
|
|
||||||
|
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should NOT set applicationInferenceProfile when inferenceProfiles config is empty', async () => {
|
||||||
|
const params = createMockParams({
|
||||||
|
config: {
|
||||||
|
endpoints: {
|
||||||
|
[EModelEndpoint.bedrock]: {
|
||||||
|
inferenceProfiles: {},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
model_parameters: {
|
||||||
|
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
|
||||||
|
|
||||||
|
expect(result.llmConfig).not.toHaveProperty('applicationInferenceProfile');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should NOT set applicationInferenceProfile when no bedrock config exists', async () => {
|
||||||
|
const params = createMockParams({
|
||||||
|
config: {},
|
||||||
|
model_parameters: {
|
||||||
|
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
|
||||||
|
|
||||||
|
expect(result.llmConfig).not.toHaveProperty('applicationInferenceProfile');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle multiple inference profiles and select the correct one', async () => {
|
||||||
|
const sonnet45Arn =
|
||||||
|
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/sonnet45';
|
||||||
|
const claude37Arn =
|
||||||
|
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/claude37';
|
||||||
|
|
||||||
|
const params = createMockParams({
|
||||||
|
config: {
|
||||||
|
endpoints: {
|
||||||
|
[EModelEndpoint.bedrock]: {
|
||||||
|
inferenceProfiles: {
|
||||||
|
'us.anthropic.claude-sonnet-4-5-20250929-v1:0': sonnet45Arn,
|
||||||
|
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': claude37Arn,
|
||||||
|
'global.anthropic.claude-opus-4-5-20251101-v1:0':
|
||||||
|
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/opus45',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
model_parameters: {
|
||||||
|
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
|
||||||
|
|
||||||
|
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', claude37Arn);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should work alongside guardrailConfig', async () => {
|
||||||
|
const inferenceProfileArn =
|
||||||
|
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123';
|
||||||
|
const guardrailConfig = {
|
||||||
|
guardrailIdentifier: 'test-guardrail',
|
||||||
|
guardrailVersion: '1',
|
||||||
|
};
|
||||||
|
|
||||||
|
const params = createMockParams({
|
||||||
|
config: {
|
||||||
|
endpoints: {
|
||||||
|
[EModelEndpoint.bedrock]: {
|
||||||
|
inferenceProfiles: {
|
||||||
|
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': inferenceProfileArn,
|
||||||
|
},
|
||||||
|
guardrailConfig,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
model_parameters: {
|
||||||
|
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
|
||||||
|
|
||||||
|
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
|
||||||
|
expect(result.llmConfig).toHaveProperty('guardrailConfig', guardrailConfig);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should preserve the original model ID in llmConfig.model', async () => {
|
||||||
|
const inferenceProfileArn =
|
||||||
|
'arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123';
|
||||||
|
|
||||||
|
const params = createMockParams({
|
||||||
|
config: {
|
||||||
|
endpoints: {
|
||||||
|
[EModelEndpoint.bedrock]: {
|
||||||
|
inferenceProfiles: {
|
||||||
|
'us.anthropic.claude-3-7-sonnet-20250219-v1:0': inferenceProfileArn,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
model_parameters: {
|
||||||
|
model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = (await initializeBedrock(params)) as BedrockLLMConfigResult;
|
||||||
|
|
||||||
|
// Model ID should remain unchanged - only applicationInferenceProfile should be set
|
||||||
|
expect(result.llmConfig).toHaveProperty(
|
||||||
|
'model',
|
||||||
|
'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
||||||
|
);
|
||||||
|
expect(result.llmConfig).toHaveProperty('applicationInferenceProfile', inferenceProfileArn);
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import { BedrockRuntimeClient } from '@aws-sdk/client-bedrock-runtime';
|
||||||
import {
|
import {
|
||||||
AuthType,
|
AuthType,
|
||||||
EModelEndpoint,
|
EModelEndpoint,
|
||||||
|
extractEnvVariable,
|
||||||
bedrockInputParser,
|
bedrockInputParser,
|
||||||
bedrockOutputParser,
|
bedrockOutputParser,
|
||||||
removeNullishValues,
|
removeNullishValues,
|
||||||
|
|
@ -13,6 +14,7 @@ import type {
|
||||||
InitializeResultBase,
|
InitializeResultBase,
|
||||||
BedrockCredentials,
|
BedrockCredentials,
|
||||||
GuardrailConfiguration,
|
GuardrailConfiguration,
|
||||||
|
InferenceProfileConfig,
|
||||||
} from '~/types';
|
} from '~/types';
|
||||||
import { checkUserKeyExpiry } from '~/utils';
|
import { checkUserKeyExpiry } from '~/utils';
|
||||||
|
|
||||||
|
|
@ -49,7 +51,10 @@ export async function initializeBedrock({
|
||||||
void endpoint;
|
void endpoint;
|
||||||
const appConfig = req.config;
|
const appConfig = req.config;
|
||||||
const bedrockConfig = appConfig?.endpoints?.[EModelEndpoint.bedrock] as
|
const bedrockConfig = appConfig?.endpoints?.[EModelEndpoint.bedrock] as
|
||||||
| ({ guardrailConfig?: GuardrailConfiguration } & Record<string, unknown>)
|
| ({
|
||||||
|
guardrailConfig?: GuardrailConfiguration;
|
||||||
|
inferenceProfiles?: InferenceProfileConfig;
|
||||||
|
} & Record<string, unknown>)
|
||||||
| undefined;
|
| undefined;
|
||||||
|
|
||||||
const {
|
const {
|
||||||
|
|
@ -105,17 +110,25 @@ export async function initializeBedrock({
|
||||||
}),
|
}),
|
||||||
),
|
),
|
||||||
) as InitializeResultBase['llmConfig'] & {
|
) as InitializeResultBase['llmConfig'] & {
|
||||||
|
model?: string;
|
||||||
region?: string;
|
region?: string;
|
||||||
client?: BedrockRuntimeClient;
|
client?: BedrockRuntimeClient;
|
||||||
credentials?: BedrockCredentials;
|
credentials?: BedrockCredentials;
|
||||||
endpointHost?: string;
|
endpointHost?: string;
|
||||||
guardrailConfig?: GuardrailConfiguration;
|
guardrailConfig?: GuardrailConfiguration;
|
||||||
|
applicationInferenceProfile?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
if (bedrockConfig?.guardrailConfig) {
|
if (bedrockConfig?.guardrailConfig) {
|
||||||
llmConfig.guardrailConfig = bedrockConfig.guardrailConfig;
|
llmConfig.guardrailConfig = bedrockConfig.guardrailConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const model = model_parameters?.model as string | undefined;
|
||||||
|
if (model && bedrockConfig?.inferenceProfiles?.[model]) {
|
||||||
|
const applicationInferenceProfile = extractEnvVariable(bedrockConfig.inferenceProfiles[model]);
|
||||||
|
llmConfig.applicationInferenceProfile = applicationInferenceProfile;
|
||||||
|
}
|
||||||
|
|
||||||
/** Only include credentials if they're complete (accessKeyId and secretAccessKey are both set) */
|
/** Only include credentials if they're complete (accessKeyId and secretAccessKey are both set) */
|
||||||
const hasCompleteCredentials =
|
const hasCompleteCredentials =
|
||||||
credentials &&
|
credentials &&
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,13 @@ export interface GuardrailConfiguration {
|
||||||
trace?: 'enabled' | 'disabled' | 'enabled_full';
|
trace?: 'enabled' | 'disabled' | 'enabled_full';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AWS Bedrock Inference Profile configuration
|
||||||
|
* Maps model IDs to their inference profile ARNs
|
||||||
|
* @see https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles.html
|
||||||
|
*/
|
||||||
|
export type InferenceProfileConfig = Record<string, string>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Configuration options for Bedrock LLM
|
* Configuration options for Bedrock LLM
|
||||||
*/
|
*/
|
||||||
|
|
@ -36,6 +43,8 @@ export interface BedrockConfigOptions {
|
||||||
endpointHost?: string;
|
endpointHost?: string;
|
||||||
/** Guardrail configuration for content filtering */
|
/** Guardrail configuration for content filtering */
|
||||||
guardrailConfig?: GuardrailConfiguration;
|
guardrailConfig?: GuardrailConfiguration;
|
||||||
|
/** Inference profile ARNs keyed by model ID / friendly name */
|
||||||
|
inferenceProfiles?: InferenceProfileConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -48,6 +57,7 @@ export interface BedrockLLMConfigResult {
|
||||||
credentials?: BedrockCredentials;
|
credentials?: BedrockCredentials;
|
||||||
endpointHost?: string;
|
endpointHost?: string;
|
||||||
guardrailConfig?: GuardrailConfiguration;
|
guardrailConfig?: GuardrailConfiguration;
|
||||||
|
applicationInferenceProfile?: string;
|
||||||
};
|
};
|
||||||
configOptions: Record<string, unknown>;
|
configOptions: Record<string, unknown>;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -212,6 +212,8 @@ export type TBaseEndpoint = z.infer<typeof baseEndpointSchema>;
|
||||||
export const bedrockEndpointSchema = baseEndpointSchema.merge(
|
export const bedrockEndpointSchema = baseEndpointSchema.merge(
|
||||||
z.object({
|
z.object({
|
||||||
availableRegions: z.array(z.string()).optional(),
|
availableRegions: z.array(z.string()).optional(),
|
||||||
|
models: z.array(z.string()).optional(),
|
||||||
|
inferenceProfiles: z.record(z.string(), z.string()).optional(),
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -983,7 +985,7 @@ export const configSchema = z.object({
|
||||||
[EModelEndpoint.assistants]: assistantEndpointSchema.optional(),
|
[EModelEndpoint.assistants]: assistantEndpointSchema.optional(),
|
||||||
[EModelEndpoint.agents]: agentsEndpointSchema.optional(),
|
[EModelEndpoint.agents]: agentsEndpointSchema.optional(),
|
||||||
[EModelEndpoint.custom]: customEndpointsSchema.optional(),
|
[EModelEndpoint.custom]: customEndpointsSchema.optional(),
|
||||||
[EModelEndpoint.bedrock]: baseEndpointSchema.optional(),
|
[EModelEndpoint.bedrock]: bedrockEndpointSchema.optional(),
|
||||||
})
|
})
|
||||||
.strict()
|
.strict()
|
||||||
.refine((data) => Object.keys(data).length > 0, {
|
.refine((data) => Object.keys(data).length > 0, {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue