mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-16 16:30:15 +01:00
🦙 docs: fix litellm.md (#2566)
This commit is contained in:
parent
463ca5d613
commit
ba4fa6150e
1 changed files with 257 additions and 255 deletions
|
|
@ -12,11 +12,13 @@ Use **[LiteLLM Proxy](https://docs.litellm.ai/docs/simple_proxy)** for:
|
|||
* Authentication & Spend Tracking Virtual Keys
|
||||
|
||||
## Start LiteLLM Proxy Server
|
||||
### 1. Uncomment desired sections in docker-compose.override.yml
|
||||
|
||||
## 1. Uncomment desired sections in docker-compose.override.yml
|
||||
The override file contains sections for the below LiteLLM features
|
||||
|
||||
Minimum working `docker-compose.override.yml` Example:
|
||||
```
|
||||
|
||||
```yaml
|
||||
litellm:
|
||||
image: ghcr.io/berriai/litellm:main-latest
|
||||
volumes:
|
||||
|
|
@ -32,313 +34,314 @@ litellm:
|
|||
GOOGLE_APPLICATION_CREDENTIALS: /app/application_default_credentials.json
|
||||
```
|
||||
|
||||
#### Caching with Redis
|
||||
### Caching with Redis
|
||||
Litellm supports in-memory, redis, and s3 caching. Note: Caching currently only works with exact matching.
|
||||
|
||||
#### Performance Monitoring with Langfuse
|
||||
### Performance Monitoring with Langfuse
|
||||
Litellm supports various logging and observability options. The settings below will enable Langfuse which will provide a cache_hit tag showing which conversations used cache.
|
||||
|
||||
### 2. Create a Config for LiteLLM proxy
|
||||
## 2. Create a Config for LiteLLM proxy
|
||||
LiteLLM requires a configuration file in addition to the override file. Within LibreChat, this will be `litellm/litellm-config.yml`. The file
|
||||
below has the options to enable llm proxy to various providers, load balancing, Redis caching, and Langfuse monitoring. Review documentation for other configuration options.
|
||||
More information on LiteLLM configurations here: **[docs.litellm.ai/docs/simple_proxy](https://docs.litellm.ai/docs/simple_proxy)**
|
||||
|
||||
#### Working Example of incorporating OpenAI, Azure OpenAI, AWS Bedrock, and GCP
|
||||
### Working Example of incorporating OpenAI, Azure OpenAI, AWS Bedrock, and GCP
|
||||
|
||||
Please note the `...` being a secret or a value you should not share (API key, custom tenant endpoint, etc)
|
||||
You can potentially use env variables for these too, ex: `api_key: "os.environ/AZURE_API_KEY" # does os.getenv("AZURE_API_KEY")`
|
||||
```yaml
|
||||
model_list:
|
||||
# https://litellm.vercel.app/docs/proxy/quick_start
|
||||
- model_name: claude-3-haiku
|
||||
litellm_params:
|
||||
model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
- model_name: claude-3-sonnet
|
||||
litellm_params:
|
||||
model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
??? abstract "Example A"
|
||||
|
||||
- model_name: claude-3-opus
|
||||
litellm_params:
|
||||
model: bedrock/anthropic.claude-3-opus-20240229-v1:0
|
||||
aws_region_name: us-west-2
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
```yaml
|
||||
model_list:
|
||||
# https://litellm.vercel.app/docs/proxy/quick_start
|
||||
|
||||
- model_name: claude-v2
|
||||
litellm_params:
|
||||
model: bedrock/anthropic.claude-v2:1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
# Anthropic
|
||||
- model_name: claude-3-haiku
|
||||
litellm_params:
|
||||
model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
- model_name: claude-instant
|
||||
litellm_params:
|
||||
model: bedrock/anthropic.claude-instant-v1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
- model_name: claude-3-sonnet
|
||||
litellm_params:
|
||||
model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
- model_name: llama2-13b
|
||||
litellm_params:
|
||||
model: bedrock/meta.llama2-13b-chat-v1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
- model_name: claude-3-opus
|
||||
litellm_params:
|
||||
model: bedrock/anthropic.claude-3-opus-20240229-v1:0
|
||||
aws_region_name: us-west-2
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
- model_name: llama2-70b
|
||||
litellm_params:
|
||||
model: bedrock/meta.llama2-70b-chat-v1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
- model_name: claude-v2
|
||||
litellm_params:
|
||||
model: bedrock/anthropic.claude-v2:1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
- model_name: llama3-8b
|
||||
litellm_params:
|
||||
model: bedrock/meta.llama3-8b-instruct-v1:0
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
- model_name: claude-instant
|
||||
litellm_params:
|
||||
model: bedrock/anthropic.claude-instant-v1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
- model_name: llama3-70b
|
||||
litellm_params:
|
||||
model: bedrock/meta.llama3-70b-instruct-v1:0
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
# Llama
|
||||
- model_name: llama2-13b
|
||||
litellm_params:
|
||||
model: bedrock/meta.llama2-13b-chat-v1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
- model_name: llama2-70b
|
||||
litellm_params:
|
||||
model: bedrock/meta.llama2-70b-chat-v1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
- model_name: mistral-7b-instruct
|
||||
litellm_params:
|
||||
model: bedrock/mistral.mistral-7b-instruct-v0:2
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
- model_name: llama3-8b
|
||||
litellm_params:
|
||||
model: bedrock/meta.llama3-8b-instruct-v1:0
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
- model_name: mixtral-8x7b-instruct
|
||||
litellm_params:
|
||||
model: bedrock/mistral.mixtral-8x7b-instruct-v0:1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
- model_name: llama3-70b
|
||||
litellm_params:
|
||||
model: bedrock/meta.llama3-70b-instruct-v1:0
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
- model_name: mixtral-large
|
||||
litellm_params:
|
||||
model: bedrock/mistral.mistral-large-2402-v1:0
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
# Mistral
|
||||
- model_name: mistral-7b-instruct
|
||||
litellm_params:
|
||||
model: bedrock/mistral.mistral-7b-instruct-v0:2
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
- model_name: cohere-command-v14
|
||||
litellm_params:
|
||||
model: bedrock/cohere.command-text-v14
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
- model_name: mixtral-8x7b-instruct
|
||||
litellm_params:
|
||||
model: bedrock/mistral.mixtral-8x7b-instruct-v0:1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
- model_name: cohere-command-light-v14
|
||||
litellm_params:
|
||||
model: bedrock/cohere.command-light-text-v14
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
- model_name: mixtral-large
|
||||
litellm_params:
|
||||
model: bedrock/mistral.mistral-large-2402-v1:0
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
- model_name: ai21-j2-mid
|
||||
litellm_params:
|
||||
model: bedrock/ai21.j2-mid-v1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
# Cohere
|
||||
- model_name: cohere-command-v14
|
||||
litellm_params:
|
||||
model: bedrock/cohere.command-text-v14
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
- model_name: ai21-j2-ultra
|
||||
litellm_params:
|
||||
model: bedrock/ai21.j2-ultra-v1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
- model_name: cohere-command-light-v14
|
||||
litellm_params:
|
||||
model: bedrock/cohere.command-light-text-v14
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
- model_name: amazon-titan-lite
|
||||
litellm_params:
|
||||
model: bedrock/amazon.titan-text-lite-v1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
# AI21 Labs
|
||||
- model_name: ai21-j2-mid
|
||||
litellm_params:
|
||||
model: bedrock/ai21.j2-mid-v1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
- model_name: amazon-titan-express
|
||||
litellm_params:
|
||||
model: bedrock/amazon.titan-text-express-v1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
- model_name: ai21-j2-ultra
|
||||
litellm_params:
|
||||
model: bedrock/ai21.j2-ultra-v1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
# Amazon
|
||||
- model_name: amazon-titan-lite
|
||||
litellm_params:
|
||||
model: bedrock/amazon.titan-text-lite-v1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
- model_name: amazon-titan-express
|
||||
litellm_params:
|
||||
model: bedrock/amazon.titan-text-express-v1
|
||||
aws_region_name: us-east-1
|
||||
aws_access_key_id: A...
|
||||
aws_secret_access_key: ...
|
||||
|
||||
# Azure
|
||||
- model_name: azure-gpt-4-turbo-preview
|
||||
litellm_params:
|
||||
model: azure/gpt-4-turbo-preview
|
||||
api_base: https://tenant-name.openai.azure.com/
|
||||
api_key: ...
|
||||
|
||||
- model_name: azure-gpt-4-turbo-preview
|
||||
litellm_params:
|
||||
model: azure/gpt-4-turbo-preview
|
||||
api_base: https://tenant-name.openai.azure.com/
|
||||
api_key: ...
|
||||
- model_name: azure-gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/gpt-35-turbo
|
||||
api_base: https://tenant-name.openai.azure.com/
|
||||
api_key: ...
|
||||
|
||||
- model_name: azure-gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/gpt-35-turbo
|
||||
api_base: https://tenant-name.openai.azure.com/
|
||||
api_key: ...
|
||||
- model_name: azure-gpt-4
|
||||
litellm_params:
|
||||
model: azure/gpt-4
|
||||
api_base: https://tenant-name.openai.azure.com/
|
||||
api_key: ...
|
||||
|
||||
- model_name: azure-gpt-4
|
||||
litellm_params:
|
||||
model: azure/gpt-4
|
||||
api_base: https://tenant-name.openai.azure.com/
|
||||
api_key: ...
|
||||
- model_name: azure-gpt-3.5-turbo-16k
|
||||
litellm_params:
|
||||
model: azure/gpt-35-turbo-16k
|
||||
api_base: https://tenant-name.openai.azure.com/
|
||||
api_key: ...
|
||||
|
||||
- model_name: azure-gpt-3.5-turbo-16k
|
||||
litellm_params:
|
||||
model: azure/gpt-35-turbo-16k
|
||||
api_base: https://tenant-name.openai.azure.com/
|
||||
api_key: ...
|
||||
- model_name: azure-gpt-4-32k
|
||||
litellm_params:
|
||||
model: azure/gpt-4-32k
|
||||
api_base: https://tenant-name.openai.azure.com/
|
||||
api_key: ...
|
||||
|
||||
- model_name: azure-gpt-4-32k
|
||||
litellm_params:
|
||||
model: azure/gpt-4-32k
|
||||
api_base: https://tenant-name.openai.azure.com/
|
||||
api_key: ...
|
||||
# OpenAI
|
||||
- model_name: gpt-4-turbo
|
||||
litellm_params:
|
||||
model: gpt-4-turbo
|
||||
api_key: ...
|
||||
|
||||
- model_name: old-gpt-4-turbo-preview
|
||||
litellm_params:
|
||||
model: gpt-4-turbo-preview
|
||||
api_key: ...
|
||||
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
api_key: ...
|
||||
|
||||
- model_name: gpt-4-turbo
|
||||
litellm_params:
|
||||
model: gpt-4-turbo
|
||||
api_key: ...
|
||||
- model_name: gpt-4
|
||||
litellm_params:
|
||||
model: gpt-4
|
||||
api_key: ...
|
||||
|
||||
- model_name: old-gpt-4-turbo-preview
|
||||
litellm_params:
|
||||
model: gpt-4-turbo-preview
|
||||
api_key: ...
|
||||
- model_name: gpt-3.5-turbo-16k
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo-16k
|
||||
api_key: ...
|
||||
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
api_key: ...
|
||||
- model_name: gpt-4-32k
|
||||
litellm_params:
|
||||
model: gpt-4-32k
|
||||
api_key: ...
|
||||
|
||||
- model_name: gpt-4
|
||||
litellm_params:
|
||||
model: gpt-4
|
||||
api_key: ...
|
||||
- model_name: gpt-4-vision-preview
|
||||
litellm_params:
|
||||
model: gpt-4-vision-preview
|
||||
api_key: ...
|
||||
|
||||
- model_name: gpt-3.5-turbo-16k
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo-16k
|
||||
api_key: ...
|
||||
# Google
|
||||
# NOTE: For Google - see above about required auth "GOOGLE_APPLICATION_CREDENTIALS" environment and volume mount
|
||||
- model_name: google-chat-bison
|
||||
litellm_params:
|
||||
model: vertex_ai/chat-bison
|
||||
vertex_project: gcp-proj-name
|
||||
vertex_location: us-central1
|
||||
|
||||
- model_name: gpt-4-32k
|
||||
litellm_params:
|
||||
model: gpt-4-32k
|
||||
api_key: ...
|
||||
- model_name: google-chat-bison-32k
|
||||
litellm_params:
|
||||
model: vertex_ai/chat-bison-32k
|
||||
vertex_project: gcp-proj-name
|
||||
vertex_location: us-central1
|
||||
|
||||
- model_name: gpt-4-vision-preview
|
||||
litellm_params:
|
||||
model: gpt-4-vision-preview
|
||||
api_key: ...
|
||||
- model_name: google-gemini-pro-1.0
|
||||
litellm_params:
|
||||
model: vertex_ai/gemini-pro
|
||||
vertex_project: gcp-proj-name
|
||||
vertex_location: us-central1
|
||||
|
||||
- model_name: google-gemini-pro-1.5-preview
|
||||
litellm_params:
|
||||
model: vertex_ai/gemini-1.5-pro-preview-0409
|
||||
vertex_project: gcp-proj-name
|
||||
vertex_location: us-central1
|
||||
|
||||
# NOTE: It may be a good idea to comment out "success_callback", "cache", "cache_params" (both lines under) when you first start until this works!
|
||||
litellm_settings:
|
||||
success_callback: ["langfuse"]
|
||||
cache: True
|
||||
cache_params:
|
||||
type: "redis"
|
||||
supported_call_types: ["acompletion", "completion", "embedding", "aembedding"]
|
||||
general_settings:
|
||||
master_key: sk_live_SetToRandomValue
|
||||
```
|
||||
|
||||
# NOTE: For Google - see above about required auth "GOOGLE_APPLICATION_CREDENTIALS" envronment and volume mount
|
||||
- model_name: google-chat-bison
|
||||
litellm_params:
|
||||
model: vertex_ai/chat-bison
|
||||
vertex_project: gcp-proj-name
|
||||
vertex_location: us-central1
|
||||
### Example of a few Different Options (ex: rpm, stream, ollama)
|
||||
|
||||
# NOTE: For Google - see above about required auth "GOOGLE_APPLICATION_CREDENTIALS" envronment and volume mount
|
||||
- model_name: google-chat-bison-32k
|
||||
litellm_params:
|
||||
model: vertex_ai/chat-bison-32k
|
||||
vertex_project: gcp-proj-name
|
||||
vertex_location: us-central1
|
||||
??? abstract "Example B"
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/gpt-turbo-small-eu
|
||||
api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
|
||||
api_key:
|
||||
rpm: 6 # Rate limit for this deployment: in requests per minute (rpm)
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/gpt-turbo-small-ca
|
||||
api_base: https://my-endpoint-canada-berri992.openai.azure.com/
|
||||
api_key:
|
||||
rpm: 6
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/gpt-turbo-large
|
||||
api_base: https://openai-france-1234.openai.azure.com/
|
||||
api_key:
|
||||
rpm: 1440
|
||||
- model_name: mixtral
|
||||
litellm_params:
|
||||
model: openai/mixtral:8x7b-instruct-v0.1-q5_K_M # use openai/* for ollama's openai api compatibility
|
||||
api_base: http://ollama:11434/v1
|
||||
stream: True
|
||||
- model_name: mistral
|
||||
litellm_params:
|
||||
model: openai/mistral # use openai/* for ollama's openai api compatibility
|
||||
api_base: http://ollama:11434/v1
|
||||
stream: True
|
||||
litellm_settings:
|
||||
success_callback: ["langfuse"]
|
||||
cache: True
|
||||
cache_params:
|
||||
type: "redis"
|
||||
supported_call_types: ["acompletion", "completion", "embedding", "aembedding"]
|
||||
general_settings:
|
||||
master_key: sk_live_SetToRandomValue
|
||||
```
|
||||
|
||||
# NOTE: For Google - see above about required auth "GOOGLE_APPLICATION_CREDENTIALS" envronment and volume mount
|
||||
- model_name: google-gemini-pro-1.0
|
||||
litellm_params:
|
||||
model: vertex_ai/gemini-pro
|
||||
vertex_project: gcp-proj-name
|
||||
vertex_location: us-central1
|
||||
|
||||
# NOTE: For Google - see above about required auth "GOOGLE_APPLICATION_CREDENTIALS" envronment and volume mount
|
||||
- model_name: google-gemini-pro-1.5-preview
|
||||
litellm_params:
|
||||
model: vertex_ai/gemini-1.5-pro-preview-0409
|
||||
vertex_project: gcp-proj-name
|
||||
vertex_location: us-central1
|
||||
|
||||
# NOTE: It may be a good idea to comment out "success_callback", "cache", "cache_params" (both lines under) when you first start until this works!
|
||||
litellm_settings:
|
||||
success_callback: ["langfuse"]
|
||||
cache: True
|
||||
cache_params:
|
||||
type: "redis"
|
||||
supported_call_types: ["acompletion", "completion", "embedding", "aembedding"]
|
||||
general_settings:
|
||||
master_key: sk_live_SetToRandomValue
|
||||
```
|
||||
|
||||
#### Example of a few Different Options (ex: rpm, stream, ollama)
|
||||
```yaml
|
||||
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/gpt-turbo-small-eu
|
||||
api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
|
||||
api_key:
|
||||
rpm: 6 # Rate limit for this deployment: in requests per minute (rpm)
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/gpt-turbo-small-ca
|
||||
api_base: https://my-endpoint-canada-berri992.openai.azure.com/
|
||||
api_key:
|
||||
rpm: 6
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/gpt-turbo-large
|
||||
api_base: https://openai-france-1234.openai.azure.com/
|
||||
api_key:
|
||||
rpm: 1440
|
||||
- model_name: mixtral
|
||||
litellm_params:
|
||||
model: openai/mixtral:8x7b-instruct-v0.1-q5_K_M # use openai/* for ollama's openai api compatibility
|
||||
api_base: http://ollama:11434/v1
|
||||
stream: True
|
||||
- model_name: mistral
|
||||
litellm_params:
|
||||
model: openai/mistral # use openai/* for ollama's openai api compatibility
|
||||
api_base: http://ollama:11434/v1
|
||||
stream: True
|
||||
litellm_settings:
|
||||
success_callback: ["langfuse"]
|
||||
cache: True
|
||||
cache_params:
|
||||
type: "redis"
|
||||
supported_call_types: ["acompletion", "completion", "embedding", "aembedding"]
|
||||
general_settings:
|
||||
master_key: sk_live_SetToRandomValue
|
||||
```
|
||||
|
||||
|
||||
|
||||
### 3. Configure LibreChat
|
||||
## 3. Configure LibreChat
|
||||
|
||||
Use `librechat.yaml` [Configuration file (guide here)](./ai_endpoints.md) to add Reverse Proxies as separate endpoints.
|
||||
|
||||
Here is an example config:
|
||||
|
||||
```
|
||||
```yaml
|
||||
custom:
|
||||
- name: "Lite LLM"
|
||||
# A place holder - otherwise it becomes the default (OpenAI) key
|
||||
|
|
@ -358,9 +361,8 @@ custom:
|
|||
forcePrompt: false
|
||||
modelDisplayLabel: "Lite LLM"
|
||||
```
|
||||
---
|
||||
|
||||
### Why use LiteLLM?
|
||||
## Why use LiteLLM?
|
||||
|
||||
1. **Access to Multiple LLMs**: It allows calling over 100 LLMs from platforms like Huggingface, Bedrock, TogetherAI, etc., using OpenAI's ChatCompletions and Completions format.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue