diff --git a/api/server/services/Config/loadConfigModels.spec.js b/api/server/services/Config/loadConfigModels.spec.js index 1b7dec5fd7..828ecd881e 100644 --- a/api/server/services/Config/loadConfigModels.spec.js +++ b/api/server/services/Config/loadConfigModels.spec.js @@ -46,6 +46,15 @@ const exampleConfig = { fetch: false, }, }, + { + name: 'MLX', + apiKey: 'user_provided', + baseURL: 'http://localhost:8080/v1/', + models: { + default: ['Meta-Llama-3-8B-Instruct-4bit'], + fetch: false, + }, + }, ], }, }; diff --git a/client/public/assets/mlx.png b/client/public/assets/mlx.png new file mode 100644 index 0000000000..06a77c9b6c Binary files /dev/null and b/client/public/assets/mlx.png differ diff --git a/client/src/components/Chat/Menus/Endpoints/UnknownIcon.tsx b/client/src/components/Chat/Menus/Endpoints/UnknownIcon.tsx index 8999f1e42a..e35c4d3e54 100644 --- a/client/src/components/Chat/Menus/Endpoints/UnknownIcon.tsx +++ b/client/src/components/Chat/Menus/Endpoints/UnknownIcon.tsx @@ -9,6 +9,7 @@ const knownEndpointAssets = { [KnownEndpoints.fireworks]: '/assets/fireworks.png', [KnownEndpoints.groq]: '/assets/groq.png', [KnownEndpoints.mistral]: '/assets/mistral.png', + [KnownEndpoints.mlx]: '/assets/mlx.png', [KnownEndpoints.ollama]: '/assets/ollama.png', [KnownEndpoints.openrouter]: '/assets/openrouter.png', [KnownEndpoints.perplexity]: '/assets/perplexity.png', diff --git a/docs/install/configuration/ai_endpoints.md b/docs/install/configuration/ai_endpoints.md index 5cce8a6d37..5690886603 100644 --- a/docs/install/configuration/ai_endpoints.md +++ b/docs/install/configuration/ai_endpoints.md @@ -271,6 +271,40 @@ Some of the endpoints are marked as **Known,** which means they might have speci ![image](https://github.com/danny-avila/LibreChat/assets/110412045/ddb4b2f3-608e-4034-9a27-3e94fc512034) +## Apple MLX +> MLX API key: ignored - [MLX OpenAI Compatibility](https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/SERVER.md) + +**Notes:** + +- **Known:** icon provided. + +- API is mostly strict with unrecognized parameters. +- Support only one model at a time, otherwise you'll need to run a different endpoint with a different `baseURL`. + +```yaml + - name: "MLX" + apiKey: "mlx" + baseURL: "http://localhost:8080/v1/" + models: + default: [ + "Meta-Llama-3-8B-Instruct-4bit" + ] + fetch: false # fetching list of models is not supported + titleConvo: true + titleModel: "current_model" + summarize: false + summaryModel: "current_model" + forcePrompt: false + modelDisplayLabel: "Apple MLX" + addParams: + max_tokens: 2000 + "stop": [ + "<|eot_id|>" + ] +``` + +![image](https://github.com/danny-avila/LibreChat/blob/ae9d88b68c95fdb46787bca1df69407d2dd4e8dc/client/public/assets/mlx.png) + ## Ollama > Ollama API key: Required but ignored - [Ollama OpenAI Compatibility](https://github.com/ollama/ollama/blob/main/docs/openai.md) diff --git a/docs/install/configuration/custom_config.md b/docs/install/configuration/custom_config.md index 3a46b898ff..228d005fc2 100644 --- a/docs/install/configuration/custom_config.md +++ b/docs/install/configuration/custom_config.md @@ -1213,6 +1213,7 @@ Each endpoint in the `custom` array should have the following structure: - "Perplexity" - "together.ai" - "Ollama" + - "MLX" ### **models** diff --git a/docs/install/configuration/mlx.md b/docs/install/configuration/mlx.md new file mode 100644 index 0000000000..3890271527 --- /dev/null +++ b/docs/install/configuration/mlx.md @@ -0,0 +1,30 @@ +--- +title:  Apple MLX +description: Using LibreChat with Apple MLX +weight: -6 +--- +## MLX +Use [MLX](https://ml-explore.github.io/mlx/build/html/index.html) for + +* Running large language models on local Apple Silicon hardware (M1, M2, M3) ARM with unified CPU/GPU memory) + + +### 1. Install MLX on MacOS +#### Mac MX series only +MLX supports GPU acceleration on Apple Metal backend via `mlx-lm` Python package. Follow Instructions at [Install `mlx-lm` package](https://github.com/ml-explore/mlx-examples/tree/main/llms) + + +### 2. Load Models with MLX +MLX supports common HuggingFace models directly, but it's recommended to use converted and tested quantized models (depending on your hardware capability) provided by the [mlx-community](https://huggingface.co/mlx-community). + +Follow Instructions at [Install `mlx-lm` package](https://github.com/ml-explore/mlx-examples/tree/main/llms) + +1. Browse the available models [HuggingFace](https://huggingface.co/models?search=mlx-community) +2. Copy the text from the model page `/` (ex: `mlx-community/Meta-Llama-3-8B-Instruct-4bit`) +3. Check model size. Models that can run in CPU/GPU unified memory perform the best. +4. Follow the instructions to launch the model server [Run OpenAI Compatible Server Locally](https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/SERVER.md) + +```mlx_lm.server --model /``` + +### 3. Configure LibreChat +Use `librechat.yaml` [Configuration file (guide here)](./ai_endpoints.md) to add MLX as a separate endpoint, an example with Llama-3 is provided. \ No newline at end of file diff --git a/docs/install/index.md b/docs/install/index.md index c4676a62b9..82568f0c7c 100644 --- a/docs/install/index.md +++ b/docs/install/index.md @@ -24,6 +24,7 @@ weight: 1 * 🤖 [AI Setup](./configuration/ai_setup.md) * 🚅 [LiteLLM](./configuration/litellm.md) * 🦙 [Ollama](./configuration/ollama.md) + * 🍎 [Apple MLX](./configuration/mlx.md) * 💸 [Free AI APIs](./configuration/free_ai_apis.md) * 🛂 [Authentication System](./configuration/user_auth_system.md) * 🍃 [Online MongoDB](./configuration/mongodb.md) diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 0d30d23051..9198831968 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -299,6 +299,7 @@ export enum KnownEndpoints { fireworks = 'fireworks', groq = 'groq', mistral = 'mistral', + mlx = 'mlx', ollama = 'ollama', openrouter = 'openrouter', perplexity = 'perplexity',