🅰️ feat: Azure AI Studio, Models as a Service Support (#1902)

* feat(data-provider): add Azure serverless inference handling through librechat.yaml * feat(azureOpenAI): serverless inference handling in api * docs: update docs with new azureOpenAI endpoint config fields and serverless inference endpoint setup * chore: remove unnecessary checks for apiKey as schema would not allow apiKey to be undefined * ci(azureOpenAI): update tests for serverless configurations
2025-12-16 16:30:15 +01:00 · 2024-02-26 19:10:29 -05:00 · 2024-02-26 19:10:29 -05:00 · 08d4b3cc8a
commit 08d4b3cc8a
parent 6d6b3c9c1d
9 changed files with 460 additions and 26 deletions
--- a/api/app/clients/OpenAIClient.js
+++ b/api/app/clients/OpenAIClient.js
@ -740,22 +740,34 @@ class OpenAIClient extends BaseClient {

    /** @type {TAzureConfig | undefined} */
    const azureConfig = this.options?.req?.app?.locals?.[EModelEndpoint.azureOpenAI];
-    if (this.azure && azureConfig) {
+
+    const resetTitleOptions =
+      (this.azure && azureConfig) ||
+      (azureConfig && this.options.endpoint === EModelEndpoint.azureOpenAI);
+
+    if (resetTitleOptions) {
      const { modelGroupMap, groupMap } = azureConfig;
      const {
        azureOptions,
        baseURL,
        headers = {},
+        serverless,
      } = mapModelToAzureConfig({
        modelName: modelOptions.model,
        modelGroupMap,
        groupMap,
      });
-      this.azure = azureOptions;
+
      this.options.headers = resolveHeaders(headers);
      this.options.reverseProxyUrl = baseURL ?? null;
      this.langchainProxy = extractBaseURL(this.options.reverseProxyUrl);
      this.apiKey = azureOptions.azureOpenAIApiKey;
+
+      const groupName = modelGroupMap[modelOptions.model].group;
+      this.options.addParams = azureConfig.groupMap[groupName].addParams;
+      this.options.dropParams = azureConfig.groupMap[groupName].dropParams;
+      this.options.forcePrompt = azureConfig.groupMap[groupName].forcePrompt;
+      this.azure = !serverless && azureOptions;
    }

    const titleChatCompletion = async () => {
@ -1011,22 +1023,33 @@ ${convo}
      /** @type {TAzureConfig | undefined} */
      const azureConfig = this.options?.req?.app?.locals?.[EModelEndpoint.azureOpenAI];

-      if (this.azure && this.isVisionModel && azureConfig) {
+      if (
+        (this.azure && this.isVisionModel && azureConfig) ||
+        (azureConfig && this.isVisionModel && this.options.endpoint === EModelEndpoint.azureOpenAI)
+      ) {
        const { modelGroupMap, groupMap } = azureConfig;
        const {
          azureOptions,
          baseURL,
          headers = {},
+          serverless,
        } = mapModelToAzureConfig({
          modelName: modelOptions.model,
          modelGroupMap,
          groupMap,
        });
-        this.azure = azureOptions;
-        this.azureEndpoint = genAzureChatCompletion(this.azure, modelOptions.model, this);
        opts.defaultHeaders = resolveHeaders(headers);
        this.langchainProxy = extractBaseURL(baseURL);
        this.apiKey = azureOptions.azureOpenAIApiKey;
+
+        const groupName = modelGroupMap[modelOptions.model].group;
+        this.options.addParams = azureConfig.groupMap[groupName].addParams;
+        this.options.dropParams = azureConfig.groupMap[groupName].dropParams;
+        // Note: `forcePrompt` not re-assigned as only chat models are vision models
+
+        this.azure = !serverless && azureOptions;
+        this.azureEndpoint =
+          !serverless && genAzureChatCompletion(this.azure, modelOptions.model, this);
      }

      if (this.azure || this.options.azure) {
--- a/api/server/services/Endpoints/gptPlugins/initializeClient.js
+++ b/api/server/services/Endpoints/gptPlugins/initializeClient.js
@ -77,19 +77,27 @@ const initializeClient = async ({ req, res, endpointOption }) => {
      azureOptions,
      baseURL,
      headers = {},
+      serverless,
    } = mapModelToAzureConfig({
      modelName,
      modelGroupMap,
      groupMap,
    });
-    clientOptions.azure = azureOptions;
-    clientOptions.titleConvo = azureConfig.titleConvo;
-    clientOptions.titleModel = azureConfig.titleModel;
-    clientOptions.titleMethod = azureConfig.titleMethod ?? 'completion';
+
    clientOptions.reverseProxyUrl = baseURL ?? clientOptions.reverseProxyUrl;
    clientOptions.headers = resolveHeaders({ ...headers, ...(clientOptions.headers ?? {}) });

-    apiKey = clientOptions.azure.azureOpenAIApiKey;
+    clientOptions.titleConvo = azureConfig.titleConvo;
+    clientOptions.titleModel = azureConfig.titleModel;
+    clientOptions.titleMethod = azureConfig.titleMethod ?? 'completion';
+
+    const groupName = modelGroupMap[modelName].group;
+    clientOptions.addParams = azureConfig.groupMap[groupName].addParams;
+    clientOptions.dropParams = azureConfig.groupMap[groupName].dropParams;
+    clientOptions.forcePrompt = azureConfig.groupMap[groupName].forcePrompt;
+
+    apiKey = azureOptions.azureOpenAIApiKey;
+    clientOptions.azure = !serverless && azureOptions;
  } else if (useAzure || (apiKey && apiKey.includes('{"azure') && !clientOptions.azure)) {
    clientOptions.azure = isUserProvided ? JSON.parse(userKey) : getAzureCredentials();
    apiKey = clientOptions.azure.azureOpenAIApiKey;
--- a/api/server/services/Endpoints/openAI/initializeClient.js
+++ b/api/server/services/Endpoints/openAI/initializeClient.js
@ -65,19 +65,27 @@ const initializeClient = async ({ req, res, endpointOption }) => {
      azureOptions,
      baseURL,
      headers = {},
+      serverless,
    } = mapModelToAzureConfig({
      modelName,
      modelGroupMap,
      groupMap,
    });
-    clientOptions.azure = azureOptions;
-    clientOptions.titleConvo = azureConfig.titleConvo;
-    clientOptions.titleModel = azureConfig.titleModel;
-    clientOptions.titleMethod = azureConfig.titleMethod ?? 'completion';
+
    clientOptions.reverseProxyUrl = baseURL ?? clientOptions.reverseProxyUrl;
    clientOptions.headers = resolveHeaders({ ...headers, ...(clientOptions.headers ?? {}) });

-    apiKey = clientOptions.azure.azureOpenAIApiKey;
+    clientOptions.titleConvo = azureConfig.titleConvo;
+    clientOptions.titleModel = azureConfig.titleModel;
+    clientOptions.titleMethod = azureConfig.titleMethod ?? 'completion';
+
+    const groupName = modelGroupMap[modelName].group;
+    clientOptions.addParams = azureConfig.groupMap[groupName].addParams;
+    clientOptions.dropParams = azureConfig.groupMap[groupName].dropParams;
+    clientOptions.forcePrompt = azureConfig.groupMap[groupName].forcePrompt;
+
+    apiKey = azureOptions.azureOpenAIApiKey;
+    clientOptions.azure = !serverless && azureOptions;
  } else if (isAzureOpenAI) {
    clientOptions.azure = isUserProvided ? JSON.parse(userKey) : getAzureCredentials();
    apiKey = clientOptions.azure.azureOpenAIApiKey;
--- a/docs/install/configuration/azure_openai.md
+++ b/docs/install/configuration/azure_openai.md
@ -2,7 +2,7 @@

 **Azure OpenAI Integration for LibreChat**

-To properly utilize Azure OpenAI within LibreChat, it's crucial to configure the [`librechat.yaml` file](./custom_config.md#azure-openai-object-structure) according to your specific needs. This document guides you through the essential setup process which allows seamless use of multiple deployments and models with as much flexibility as needed.
+LibreChat boasts compatibility with Azure OpenAI API services, treating the endpoint as a first-class citizen. To properly utilize Azure OpenAI within LibreChat, it's crucial to configure the [`librechat.yaml` file](./custom_config.md#azure-openai-object-structure) according to your specific needs. This document guides you through the essential setup process which allows seamless use of multiple deployments and models with as much flexibility as needed.

 ## Setup

@ -22,6 +22,8 @@ To properly integrate Azure OpenAI with LibreChat, specific fields must be accur

 ### Group-Level Configuration

+This is a breakdown of the fields configurable as defined for the Custom Config (`librechat.yaml`) file. For more information on each field, see the [Azure OpenAI section in the Custom Config Docs](./custom_config.md#azure-openai-object-structure).
+
 1. **group** (String, Required): Unique identifier name for a group of models. Duplicate group names are not allowed and will result in validation errors.
   
 2. **apiKey** (String, Required): Must be a valid API key for Azure OpenAI services. It could be a direct key string or an environment variable reference (e.g., `${WESTUS_API_KEY}`).
@ -36,6 +38,14 @@ To properly integrate Azure OpenAI with LibreChat, specific fields must be accur

 7. **additionalHeaders** (Object, Optional): Specifies any extra headers for Azure OpenAI API requests as key-value pairs. Environment variable references can be included as values.

+8. **serverless** (Boolean, Optional): Specifies if the group is a serverless inference chat completions endpoint from [Azure Model Catalog,](https://ai.azure.com/explore) for which only a model identifier, baseURL, and apiKey are needed. For more info, see [serverless inference endpoints.](#serverless-inference-endpoints)
+
+9. **addParams** (Object, Optional): Adds or overrides additional parameters for Azure OpenAI API requests. Useful for specifying API-specific options as key-value pairs.
+
+10. **dropParams** (Array/List, Optional): Allows for the exclusion of certain default parameters from Azure OpenAI API requests. Useful for APIs that do not accept or recognize specific parameters. This should be specified as a list of strings.
+
+11. **forcePrompt** (Boolean, Optional): Dictates whether to send a `prompt` parameter instead of `messages` in the request body. This option is useful when needing to format the request in a manner consistent with OpenAI's API expectations, particularly for scenarios preferring a single text payload.
+
 ### Model-Level Configuration

 Within each group, the `models` field must contain a mapping of records, or model identifiers to either boolean values or object configurations.
@ -44,9 +54,12 @@ Within each group, the `models` field must contain a mapping of records, or mode

 ```yaml
 models:
-  gpt-4-vision-preview: # matching OpenAI Model name
+  # Object setting: must include at least "deploymentName" and/or "version"
+  gpt-4-vision-preview: # Must match OpenAI Model name
    deploymentName: "arbitrary-deployment-name"
    version: "2024-02-15-preview" # version can be any that supports vision
+  # Boolean setting, must be "true"
+  gpt-4-turbo: true
 ```

 - See [Model Deployments](#model-deployments) for more examples.
@ -55,16 +68,20 @@ models:
  
 - If a model is configured as an object, it can specify its own `deploymentName` and `version`. If these are not provided, the model inherits the group's `deploymentName` and `version`.

+- If the group represents a [serverless inference endpoint](#serverless-inference-endpoints), the singular model should be set to `true` to add it to the models list.
+
 ### Special Considerations

 1. **Unique Names**: Both model and group names must be unique across the entire configuration. Duplicate names lead to validation failures.

-2. **Missing Required Fields**: Lack of required `deploymentName` or `version` either at the group level (for boolean-flagged models) or within the models' configurations (if not inheriting or explicitly specified) will result in validation errors.
+2. **Missing Required Fields**: Lack of required `deploymentName` or `version` either at the group level (for boolean-flagged models) or within the models' configurations (if not inheriting or explicitly specified) will result in validation errors, unless the group represents a [serverless inference endpoint](#serverless-inference-endpoints).

 3. **Environment Variable References**: The configuration supports environment variable references (e.g., `${VARIABLE_NAME}`). Ensure that all referenced variables are present in your environment to avoid runtime errors. The absence of defined environment variables referenced in the config will cause errors.`${INSTANCE_NAME}` and `${DEPLOYMENT_NAME}` are unique placeholders, and do not correspond to environment variables, but instead correspond to the instance and deployment name of the currently selected model. It is not recommended you use `INSTANCE_NAME` and `DEPLOYMENT_NAME` as environment variable names to avoid any potential conflicts.

 4. **Error Handling**: Any issues in the config, like duplicate names, undefined environment variables, or missing required fields, will invalidate the setup and generate descriptive error messages aiming for prompt resolution. You will not be allowed to run the server with an invalid configuration.

+5. **Model identifiers**: An unknown model (to the project) can be used as a model identifier, but it must match a known model to reflect its known context length, which is crucial for message/token handling; e.g., `gpt-7000` will be valid but default to a 4k token limit, whereas `gpt-4-turbo` will be recognized as having a 128k context limit.
+
 Applying these setup requirements thoughtfully will ensure a correct and efficient integration of Azure OpenAI services with LibreChat through the `librechat.yaml` configuration. Always validate your configuration against the latest schema definitions and guidelines to maintain compatibility and functionality.


@ -225,6 +242,50 @@ DALLE2_SYSTEM_PROMPT="Your DALL-E-2 System Prompt here"

 - The `DALLE_REVERSE_PROXY` environment variable is ignored when Azure credentials (DALLEx_AZURE_API_VERSION and DALLEx_BASEURL) for DALL-E are configured.

+### Serverless Inference Endpoints
+
+Through the `librechat.yaml` file, you can configure Azure AI Studio serverless inference endpoints to access models from the [Azure Model Catalog.](https://ai.azure.com/explore) Only a model identifier, `baseURL`, and `apiKey` are needed along with the `serverless` field to indicate the special handling these endpoints need.
+
+- You will need to follow the instructions in the compatible model cards to set up **MaaS** ("Models as a Service") access on Azure AI Studio.
+
+    - For reference, here are 2 known compatible model cards:
+
+    - [Mistral-large](https://aka.ms/aistudio/landing/mistral-large) | [Llama-2-70b-chat](https://aka.ms/aistudio/landing/Llama-2-70b-chat)
+
+- You can also review [the technical blog for the "Mistral-large" model release](https://techcommunity.microsoft.com/t5/ai-machine-learning-blog/mistral-large-mistral-ai-s-flagship-llm-debuts-on-azure-ai/ba-p/4066996) for more info.
+
+- Then, you will need to add them to your azureOpenAI config in the librechat.yaml file.
+
+- Here are my example configurations for both Mistral-large and LLama-2-70b-chat:
+
+```yaml
+endpoints:
+  azureOpenAI:
+    groups:
+# serverless examples
+    - group: "mistral-inference"
+      apiKey: "${AZURE_MISTRAL_API_KEY}" # arbitrary env var name
+      baseURL: "https://Mistral-large-vnpet-serverless.region.inference.ai.azure.com/v1/chat/completions"
+      serverless: true
+      models:
+        mistral-large: true
+    - group: "llama-70b-chat"
+      apiKey: "${AZURE_LLAMA2_70B_API_KEY}" # arbitrary env var name
+      baseURL: "https://Llama-2-70b-chat-qmvyb-serverless.region.inference.ai.azure.com/v1/chat/completions"
+      serverless: true
+      models:
+        llama-70b-chat: true
+```
+
+**Notes**:
+
+- Make sure to add the appropriate suffix for your deployment, either "/v1/chat/completions" or "/v1/completions"
+- If using "/v1/completions" (without "chat"), you need to set the `forcePrompt` field to `true` in your [group config.](#group-level-configuration)
+- Compatibility with LibreChat relies on parity with OpenAI API specs, which at the time of writing, are typically **"Pay-as-you-go"** or "Models as a Service" (MaaS) deployments on Azure AI Studio, that are OpenAI-SDK-compatible with either v1/completions or v1/chat/completions endpoint handling.
+- At the moment, only ["Mistral-large"](https://azure.microsoft.com/en-us/blog/microsoft-and-mistral-ai-announce-new-partnership-to-accelerate-ai-innovation-and-introduce-mistral-large-first-on-azure/) and [LLama-2 Chat models](https://techcommunity.microsoft.com/t5/ai-machine-learning-blog/announcing-llama-2-inference-apis-and-hosted-fine-tuning-through/ba-p/3979227) are compatible from the Azure model catalog. You can filter by "Chat completion" under inference tasks to see the full list; however, real time endpoint models have not been tested.
+- These serverless inference endpoint/models are likely not compatible with OpenAI function calling, which enables the use of Plugins. As they have yet been tested, they are available on the Plugins endpoint, although they are not expected to work.
+
+
 ---

 ## ⚠️ Legacy Setup ⚠️
@ -403,4 +464,8 @@ To use Azure with the Plugins endpoint, make sure the following environment vari

 **Important:**

- If using `AZURE_OPENAI_BASEURL`, you should not specify instance and deployment names instead of placeholders as the plugin request will fail.
+- If using `AZURE_OPENAI_BASEURL`, you should not specify instance and deployment names instead of placeholders as the plugin request will fail.
+
+**Generate images with Azure OpenAI Service (DALL-E)**
+
+See the [current Azure DALL-E guide](#generate-images-with-azure-openai-service-dall-e) as it applies to legacy configurations
--- a/docs/install/configuration/custom_config.md
+++ b/docs/install/configuration/custom_config.md
@ -843,6 +843,47 @@ endpoints:
  - **Note**: It's recommended to use a custom env. variable reference for the values of field, as shown in the example.
  - **Note**: `api-key` header value is sent on every request

+#### **serverless**:
+
+  > Indicates the use of a serverless inference endpoint for Azure OpenAI chat completions.
+
+  - Type: Boolean
+  - **Optional**
+  - **Description**: When set to `true`, specifies that the group is configured to use serverless inference endpoints as an Azure "Models as a Service" model.
+  - **Example**: `serverless: true`
+  - **Note**: [More info here](./azure_openai.md#serverless-inference-endpoints)
+
+#### **addParams**:
+
+  > Adds additional parameters to requests.
+
+  - Type: Object/Dictionary
+  - **Description**: Adds/Overrides parameters. Useful for specifying API-specific options.
+  - **Example**: 
+```yaml
+    addParams:
+      safe_prompt: true
+```
+
+#### **dropParams**:
+
+  > Removes [default parameters](#default-parameters) from requests.
+
+  - Type: Array/List of Strings
+  - **Description**: Excludes specified [default parameters](#default-parameters). Useful for APIs that do not accept or recognize certain parameters.
+  - **Example**: `dropParams: ["stop", "user", "frequency_penalty", "presence_penalty"]`
+  - **Note**: For a list of default parameters sent with every request, see the ["Default Parameters"](#default-parameters) Section below.
+
+#### **forcePrompt**:
+
+  > If `true`, sends a `prompt` parameter instead of `messages`.
+
+  - Type: Boolean
+  - Example: `forcePrompt: false`
+  - **Note**: This combines all messages into a single text payload, [following OpenAI format](https://github.com/pvicente/openai-python/blob/main/chatml.md), and
+
+ uses the `/completions` endpoint of your baseURL rather than `/chat/completions`.
+
 #### **models**:

 > Configuration for individual models within a group.
--- a/packages/data-provider/package.json
+++ b/packages/data-provider/package.json
@ -1,6 +1,6 @@
 {
  "name": "librechat-data-provider",
-  "version": "0.4.4",
+  "version": "0.4.5",
  "description": "data services for librechat apps",
  "main": "dist/index.js",
  "module": "dist/index.es.js",
--- a/packages/data-provider/specs/azure.spec.ts
+++ b/packages/data-provider/specs/azure.spec.ts
@ -188,13 +188,147 @@ describe('validateAzureGroups', () => {
        },
      },
    ];
-    // @ts-expect-error This error is expected because the 'instanceName' property is intentionally left out.
    const { isValid, errors } = validateAzureGroups(configs);
    expect(isValid).toBe(false);
    expect(errors.length).toBe(1);
  });
 });

+describe('validateAzureGroups for Serverless Configurations', () => {
+  const originalEnv = process.env;
+
+  beforeEach(() => {
+    jest.resetModules();
+    process.env = { ...originalEnv };
+  });
+
+  afterAll(() => {
+    process.env = originalEnv;
+  });
+
+  it('should validate a correct serverless configuration', () => {
+    const configs = [
+      {
+        group: 'serverless-group',
+        apiKey: '${SERVERLESS_API_KEY}',
+        baseURL: 'https://serverless.example.com/v1/completions',
+        serverless: true,
+        models: {
+          'model-serverless': true,
+        },
+      },
+    ];
+
+    const { isValid, errors } = validateAzureGroups(configs);
+
+    expect(isValid).toBe(true);
+    expect(errors.length).toBe(0);
+  });
+
+  it('should return invalid for a serverless configuration missing baseURL', () => {
+    const configs = [
+      {
+        group: 'serverless-group',
+        apiKey: '${SERVERLESS_API_KEY}',
+        serverless: true,
+        models: {
+          'model-serverless': true,
+        },
+      },
+    ];
+
+    const { isValid, errors } = validateAzureGroups(configs);
+    expect(isValid).toBe(false);
+    expect(errors).toEqual(
+      expect.arrayContaining([
+        expect.stringContaining(
+          'Group "serverless-group" is serverless but missing mandatory "baseURL."',
+        ),
+      ]),
+    );
+  });
+
+  it('should throw an error when environment variable for apiKey is not set', () => {
+    process.env.SERVERLESS_API_KEY = '';
+
+    expect(() => {
+      mapModelToAzureConfig({
+        modelName: 'model-serverless',
+        modelGroupMap: {
+          'model-serverless': {
+            group: 'serverless-group',
+          },
+        },
+        groupMap: {
+          'serverless-group': {
+            apiKey: '${SERVERLESS_API_KEY}',
+            baseURL: 'https://serverless.example.com/v1/completions',
+            serverless: true,
+            models: { 'model-serverless': true },
+          },
+        },
+      });
+    }).toThrow('Azure configuration environment variable "${SERVERLESS_API_KEY}" was not found.');
+  });
+
+  it('should correctly extract environment variables and prepare serverless config', () => {
+    process.env.SERVERLESS_API_KEY = 'abc123';
+
+    const { azureOptions, baseURL, serverless } = mapModelToAzureConfig({
+      modelName: 'model-serverless',
+      modelGroupMap: {
+        'model-serverless': {
+          group: 'serverless-group',
+        },
+      },
+      groupMap: {
+        'serverless-group': {
+          apiKey: '${SERVERLESS_API_KEY}',
+          baseURL: 'https://serverless.example.com/v1/completions',
+          serverless: true,
+          models: { 'model-serverless': true },
+        },
+      },
+    });
+
+    expect(azureOptions.azureOpenAIApiKey).toEqual('abc123');
+    expect(baseURL).toEqual('https://serverless.example.com/v1/completions');
+    expect(serverless).toBe(true);
+  });
+
+  it('should ensure serverless flag triggers appropriate validations and mappings', () => {
+    const configs = [
+      {
+        group: 'serverless-group-2',
+        apiKey: '${NEW_SERVERLESS_API_KEY}',
+        baseURL: 'https://new-serverless.example.com/v1/completions',
+        serverless: true,
+        models: {
+          'new-model-serverless': true,
+        },
+      },
+    ];
+
+    process.env.NEW_SERVERLESS_API_KEY = 'def456';
+
+    const { isValid, errors, modelGroupMap, groupMap } = validateAzureGroups(configs);
+    expect(isValid).toBe(true);
+    expect(errors.length).toBe(0);
+
+    const { azureOptions, baseURL, serverless } = mapModelToAzureConfig({
+      modelName: 'new-model-serverless',
+      modelGroupMap,
+      groupMap,
+    });
+
+    expect(azureOptions).toEqual({
+      azureOpenAIApiKey: 'def456',
+    });
+    expect(baseURL).toEqual('https://new-serverless.example.com/v1/completions');
+    expect(serverless).toBe(true);
+  });
+});
+
 describe('validateAzureGroups with modelGroupMap and groupMap', () => {
  const originalEnv = process.env;

@ -396,6 +530,8 @@ describe('validateAzureGroups with modelGroupMap and groupMap', () => {
  it('should list all expected models in both modelGroupMap and groupMap', () => {
    process.env.WESTUS_API_KEY = 'westus-key';
    process.env.EASTUS_API_KEY = 'eastus-key';
+    process.env.AZURE_MISTRAL_API_KEY = 'mistral-key';
+    process.env.AZURE_LLAMA2_70B_API_KEY = 'llama-key';

    const validConfigs: TAzureGroups = [
      {
@ -436,6 +572,26 @@ describe('validateAzureGroups with modelGroupMap and groupMap', () => {
          'x-api-key': 'x-api-key-value',
        },
      },
+      {
+        group: 'mistral-inference',
+        apiKey: '${AZURE_MISTRAL_API_KEY}',
+        baseURL:
+          'https://Mistral-large-vnpet-serverless.region.inference.ai.azure.com/v1/chat/completions',
+        serverless: true,
+        models: {
+          'mistral-large': true,
+        },
+      },
+      {
+        group: 'llama-70b-chat',
+        apiKey: '${AZURE_LLAMA2_70B_API_KEY}',
+        baseURL:
+          'https://Llama-2-70b-chat-qmvyb-serverless.region.inference.ai.azure.com/v1/chat/completions',
+        serverless: true,
+        models: {
+          'llama-70b-chat': true,
+        },
+      },
    ];
    const { isValid, modelGroupMap, groupMap, modelNames } = validateAzureGroups(validConfigs);
    expect(isValid).toBe(true);
@ -446,6 +602,8 @@ describe('validateAzureGroups with modelGroupMap and groupMap', () => {
      'gpt-4',
      'gpt-4-1106-preview',
      'gpt-4-turbo',
+      'mistral-large',
+      'llama-70b-chat',
    ]);

    // Check modelGroupMap
@ -484,6 +642,34 @@ describe('validateAzureGroups with modelGroupMap and groupMap', () => {
      }),
    );

+    // Check groupMap for 'mistral-inference'
+    expect(groupMap).toHaveProperty('mistral-inference');
+    expect(groupMap['mistral-inference']).toEqual(
+      expect.objectContaining({
+        apiKey: '${AZURE_MISTRAL_API_KEY}',
+        baseURL:
+          'https://Mistral-large-vnpet-serverless.region.inference.ai.azure.com/v1/chat/completions',
+        serverless: true,
+        models: expect.objectContaining({
+          'mistral-large': true,
+        }),
+      }),
+    );
+
+    // Check groupMap for 'llama-70b-chat'
+    expect(groupMap).toHaveProperty('llama-70b-chat');
+    expect(groupMap['llama-70b-chat']).toEqual(
+      expect.objectContaining({
+        apiKey: '${AZURE_LLAMA2_70B_API_KEY}',
+        baseURL:
+          'https://Llama-2-70b-chat-qmvyb-serverless.region.inference.ai.azure.com/v1/chat/completions',
+        serverless: true,
+        models: expect.objectContaining({
+          'llama-70b-chat': true,
+        }),
+      }),
+    );
+
    const { azureOptions: azureOptions1 } = mapModelToAzureConfig({
      modelName: 'gpt-4-vision-preview',
      modelGroupMap,
@ -563,5 +749,39 @@ describe('validateAzureGroups with modelGroupMap and groupMap', () => {
      azureOpenAIApiDeploymentName: 'gpt-4-1106-preview',
      azureOpenAIApiVersion: '2023-12-01-preview',
    });
+
+    const {
+      azureOptions: azureOptions7,
+      serverless: serverlessMistral,
+      baseURL: mistralEndpoint,
+    } = mapModelToAzureConfig({
+      modelName: 'mistral-large',
+      modelGroupMap,
+      groupMap,
+    });
+    expect(serverlessMistral).toBe(true);
+    expect(mistralEndpoint).toBe(
+      'https://Mistral-large-vnpet-serverless.region.inference.ai.azure.com/v1/chat/completions',
+    );
+    expect(azureOptions7).toEqual({
+      azureOpenAIApiKey: 'mistral-key',
+    });
+
+    const {
+      azureOptions: azureOptions8,
+      serverless: serverlessLlama,
+      baseURL: llamaEndpoint,
+    } = mapModelToAzureConfig({
+      modelName: 'llama-70b-chat',
+      modelGroupMap,
+      groupMap,
+    });
+    expect(serverlessLlama).toBe(true);
+    expect(llamaEndpoint).toBe(
+      'https://Llama-2-70b-chat-qmvyb-serverless.region.inference.ai.azure.com/v1/chat/completions',
+    );
+    expect(azureOptions8).toEqual({
+      azureOpenAIApiKey: 'llama-key',
+    });
  });
 });
--- a/packages/data-provider/src/azure.ts
+++ b/packages/data-provider/src/azure.ts
@ -71,6 +71,8 @@ export function validateAzureGroups(configs: TAzureGroups): TValidatedAzureConfi
        baseURL,
        additionalHeaders,
        models,
+        serverless,
+        ...rest
      } = group;

      if (groupMap[groupName]) {
@ -78,6 +80,18 @@ export function validateAzureGroups(configs: TAzureGroups): TValidatedAzureConfi
        return { isValid: false, modelNames, modelGroupMap, groupMap, errors };
      }

+      if (serverless && !baseURL) {
+        errors.push(`Group "${groupName}" is serverless but missing mandatory "baseURL."`);
+        return { isValid: false, modelNames, modelGroupMap, groupMap, errors };
+      }
+
+      if (!instanceName && !serverless) {
+        errors.push(
+          `Group "${groupName}" is missing an "instanceName" for non-serverless configuration.`,
+        );
+        return { isValid: false, modelNames, modelGroupMap, groupMap, errors };
+      }
+
      groupMap[groupName] = {
        apiKey,
        instanceName,
@ -86,6 +100,8 @@ export function validateAzureGroups(configs: TAzureGroups): TValidatedAzureConfi
        baseURL,
        additionalHeaders,
        models,
+        serverless,
+        ...rest,
      };

      for (const modelName in group.models) {
@ -99,6 +115,13 @@ export function validateAzureGroups(configs: TAzureGroups): TValidatedAzureConfi
          return { isValid: false, modelNames, modelGroupMap, groupMap, errors };
        }

+        if (serverless) {
+          modelGroupMap[modelName] = {
+            group: groupName,
+          };
+          continue;
+        }
+
        if (typeof model === 'boolean') {
          // For boolean models, check if group-level deploymentName and version are present.
          if (!group.deploymentName || !group.version) {
@ -138,15 +161,16 @@ export function validateAzureGroups(configs: TAzureGroups): TValidatedAzureConfi

 type AzureOptions = {
  azureOpenAIApiKey: string;
-  azureOpenAIApiInstanceName: string;
-  azureOpenAIApiDeploymentName: string;
-  azureOpenAIApiVersion: string;
+  azureOpenAIApiInstanceName?: string;
+  azureOpenAIApiDeploymentName?: string;
+  azureOpenAIApiVersion?: string;
 };

 type MappedAzureConfig = {
  azureOptions: AzureOptions;
  baseURL?: string;
  headers?: Record<string, string>;
+  serverless?: boolean;
 };

 export function mapModelToAzureConfig({
@ -168,6 +192,47 @@ export function mapModelToAzureConfig({
    );
  }

+  const instanceName = groupConfig.instanceName;
+
+  if (!instanceName && !groupConfig.serverless) {
+    throw new Error(
+      `Group "${modelConfig.group}" is missing an instanceName for non-serverless configuration.`,
+    );
+  }
+
+  if (groupConfig.serverless && !groupConfig.baseURL) {
+    throw new Error(
+      `Group "${modelConfig.group}" is missing the required base URL for serverless configuration.`,
+    );
+  }
+
+  if (groupConfig.serverless) {
+    const result: MappedAzureConfig = {
+      azureOptions: {
+        azureOpenAIApiKey: extractEnvVariable(groupConfig.apiKey),
+      },
+      baseURL: extractEnvVariable(groupConfig.baseURL as string),
+      serverless: true,
+    };
+
+    const apiKeyValue = result.azureOptions.azureOpenAIApiKey;
+    if (typeof apiKeyValue === 'string' && envVarRegex.test(apiKeyValue)) {
+      throw new Error(`Azure configuration environment variable "${apiKeyValue}" was not found.`);
+    }
+
+    if (groupConfig.additionalHeaders) {
+      result.headers = groupConfig.additionalHeaders;
+    }
+
+    return result;
+  }
+
+  if (!instanceName) {
+    throw new Error(
+      `Group "${modelConfig.group}" is missing an instanceName for non-serverless configuration.`,
+    );
+  }
+
  const modelDetails = groupConfig.models[modelName];
  const deploymentName =
    typeof modelDetails === 'object'
@ -186,7 +251,7 @@ export function mapModelToAzureConfig({

  const azureOptions: AzureOptions = {
    azureOpenAIApiKey: extractEnvVariable(groupConfig.apiKey),
-    azureOpenAIApiInstanceName: extractEnvVariable(groupConfig.instanceName),
+    azureOpenAIApiInstanceName: extractEnvVariable(instanceName),
    azureOpenAIApiDeploymentName: extractEnvVariable(deploymentName),
    azureOpenAIApiVersion: extractEnvVariable(version),
  };
--- a/packages/data-provider/src/config.ts
+++ b/packages/data-provider/src/config.ts
@ -19,8 +19,12 @@ export type TAzureModelConfig = z.infer<typeof modelConfigSchema>;

 export const azureBaseSchema = z.object({
  apiKey: z.string(),
-  instanceName: z.string(),
+  serverless: z.boolean().optional(),
+  instanceName: z.string().optional(),
  deploymentName: z.string().optional(),
+  addParams: z.record(z.any()).optional(),
+  dropParams: z.array(z.string()).optional(),
+  forcePrompt: z.boolean().optional(),
  version: z.string().optional(),
  baseURL: z.string().optional(),
  additionalHeaders: z.record(z.any()).optional(),