From ba4fa6150e5fc08f53512aa137d364eb8eede5ad Mon Sep 17 00:00:00 2001 From: Fuegovic <32828263+fuegovic@users.noreply.github.com> Date: Sun, 28 Apr 2024 08:33:51 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=A6=99=20docs:=20fix=20litellm.md=20(#256?= =?UTF-8?q?6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/install/configuration/litellm.md | 512 +++++++++++++------------- 1 file changed, 257 insertions(+), 255 deletions(-) diff --git a/docs/install/configuration/litellm.md b/docs/install/configuration/litellm.md index ec2ead6315..5cdcff8dec 100644 --- a/docs/install/configuration/litellm.md +++ b/docs/install/configuration/litellm.md @@ -12,11 +12,13 @@ Use **[LiteLLM Proxy](https://docs.litellm.ai/docs/simple_proxy)** for: * Authentication & Spend Tracking Virtual Keys ## Start LiteLLM Proxy Server -### 1. Uncomment desired sections in docker-compose.override.yml + +## 1. Uncomment desired sections in docker-compose.override.yml The override file contains sections for the below LiteLLM features Minimum working `docker-compose.override.yml` Example: -``` + +```yaml litellm: image: ghcr.io/berriai/litellm:main-latest volumes: @@ -32,313 +34,314 @@ litellm: GOOGLE_APPLICATION_CREDENTIALS: /app/application_default_credentials.json ``` -#### Caching with Redis +### Caching with Redis Litellm supports in-memory, redis, and s3 caching. Note: Caching currently only works with exact matching. -#### Performance Monitoring with Langfuse +### Performance Monitoring with Langfuse Litellm supports various logging and observability options. The settings below will enable Langfuse which will provide a cache_hit tag showing which conversations used cache. -### 2. Create a Config for LiteLLM proxy +## 2. Create a Config for LiteLLM proxy LiteLLM requires a configuration file in addition to the override file. Within LibreChat, this will be `litellm/litellm-config.yml`. The file below has the options to enable llm proxy to various providers, load balancing, Redis caching, and Langfuse monitoring. Review documentation for other configuration options. More information on LiteLLM configurations here: **[docs.litellm.ai/docs/simple_proxy](https://docs.litellm.ai/docs/simple_proxy)** -#### Working Example of incorporating OpenAI, Azure OpenAI, AWS Bedrock, and GCP +### Working Example of incorporating OpenAI, Azure OpenAI, AWS Bedrock, and GCP Please note the `...` being a secret or a value you should not share (API key, custom tenant endpoint, etc) You can potentially use env variables for these too, ex: `api_key: "os.environ/AZURE_API_KEY" # does os.getenv("AZURE_API_KEY")` -```yaml -model_list: - # https://litellm.vercel.app/docs/proxy/quick_start - - model_name: claude-3-haiku - litellm_params: - model: bedrock/anthropic.claude-3-haiku-20240307-v1:0 - aws_region_name: us-east-1 - aws_access_key_id: A... - aws_secret_access_key: ... - - model_name: claude-3-sonnet - litellm_params: - model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0 - aws_region_name: us-east-1 - aws_access_key_id: A... - aws_secret_access_key: ... +??? abstract "Example A" - - model_name: claude-3-opus - litellm_params: - model: bedrock/anthropic.claude-3-opus-20240229-v1:0 - aws_region_name: us-west-2 - aws_access_key_id: A... - aws_secret_access_key: ... + ```yaml + model_list: + # https://litellm.vercel.app/docs/proxy/quick_start - - model_name: claude-v2 - litellm_params: - model: bedrock/anthropic.claude-v2:1 - aws_region_name: us-east-1 - aws_access_key_id: A... - aws_secret_access_key: ... + # Anthropic + - model_name: claude-3-haiku + litellm_params: + model: bedrock/anthropic.claude-3-haiku-20240307-v1:0 + aws_region_name: us-east-1 + aws_access_key_id: A... + aws_secret_access_key: ... - - model_name: claude-instant - litellm_params: - model: bedrock/anthropic.claude-instant-v1 - aws_region_name: us-east-1 - aws_access_key_id: A... - aws_secret_access_key: ... + - model_name: claude-3-sonnet + litellm_params: + model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0 + aws_region_name: us-east-1 + aws_access_key_id: A... + aws_secret_access_key: ... - - model_name: llama2-13b - litellm_params: - model: bedrock/meta.llama2-13b-chat-v1 - aws_region_name: us-east-1 - aws_access_key_id: A... - aws_secret_access_key: ... + - model_name: claude-3-opus + litellm_params: + model: bedrock/anthropic.claude-3-opus-20240229-v1:0 + aws_region_name: us-west-2 + aws_access_key_id: A... + aws_secret_access_key: ... - - model_name: llama2-70b - litellm_params: - model: bedrock/meta.llama2-70b-chat-v1 - aws_region_name: us-east-1 - aws_access_key_id: A... - aws_secret_access_key: ... + - model_name: claude-v2 + litellm_params: + model: bedrock/anthropic.claude-v2:1 + aws_region_name: us-east-1 + aws_access_key_id: A... + aws_secret_access_key: ... - - model_name: llama3-8b - litellm_params: - model: bedrock/meta.llama3-8b-instruct-v1:0 - aws_region_name: us-east-1 - aws_access_key_id: A... - aws_secret_access_key: ... + - model_name: claude-instant + litellm_params: + model: bedrock/anthropic.claude-instant-v1 + aws_region_name: us-east-1 + aws_access_key_id: A... + aws_secret_access_key: ... - - model_name: llama3-70b - litellm_params: - model: bedrock/meta.llama3-70b-instruct-v1:0 - aws_region_name: us-east-1 - aws_access_key_id: A... - aws_secret_access_key: ... + # Llama + - model_name: llama2-13b + litellm_params: + model: bedrock/meta.llama2-13b-chat-v1 + aws_region_name: us-east-1 + aws_access_key_id: A... + aws_secret_access_key: ... + - model_name: llama2-70b + litellm_params: + model: bedrock/meta.llama2-70b-chat-v1 + aws_region_name: us-east-1 + aws_access_key_id: A... + aws_secret_access_key: ... - - model_name: mistral-7b-instruct - litellm_params: - model: bedrock/mistral.mistral-7b-instruct-v0:2 - aws_region_name: us-east-1 - aws_access_key_id: A... - aws_secret_access_key: ... + - model_name: llama3-8b + litellm_params: + model: bedrock/meta.llama3-8b-instruct-v1:0 + aws_region_name: us-east-1 + aws_access_key_id: A... + aws_secret_access_key: ... - - model_name: mixtral-8x7b-instruct - litellm_params: - model: bedrock/mistral.mixtral-8x7b-instruct-v0:1 - aws_region_name: us-east-1 - aws_access_key_id: A... - aws_secret_access_key: ... + - model_name: llama3-70b + litellm_params: + model: bedrock/meta.llama3-70b-instruct-v1:0 + aws_region_name: us-east-1 + aws_access_key_id: A... + aws_secret_access_key: ... - - model_name: mixtral-large - litellm_params: - model: bedrock/mistral.mistral-large-2402-v1:0 - aws_region_name: us-east-1 - aws_access_key_id: A... - aws_secret_access_key: ... + # Mistral + - model_name: mistral-7b-instruct + litellm_params: + model: bedrock/mistral.mistral-7b-instruct-v0:2 + aws_region_name: us-east-1 + aws_access_key_id: A... + aws_secret_access_key: ... - - model_name: cohere-command-v14 - litellm_params: - model: bedrock/cohere.command-text-v14 - aws_region_name: us-east-1 - aws_access_key_id: A... - aws_secret_access_key: ... + - model_name: mixtral-8x7b-instruct + litellm_params: + model: bedrock/mistral.mixtral-8x7b-instruct-v0:1 + aws_region_name: us-east-1 + aws_access_key_id: A... + aws_secret_access_key: ... - - model_name: cohere-command-light-v14 - litellm_params: - model: bedrock/cohere.command-light-text-v14 - aws_region_name: us-east-1 - aws_access_key_id: A... - aws_secret_access_key: ... + - model_name: mixtral-large + litellm_params: + model: bedrock/mistral.mistral-large-2402-v1:0 + aws_region_name: us-east-1 + aws_access_key_id: A... + aws_secret_access_key: ... - - model_name: ai21-j2-mid - litellm_params: - model: bedrock/ai21.j2-mid-v1 - aws_region_name: us-east-1 - aws_access_key_id: A... - aws_secret_access_key: ... + # Cohere + - model_name: cohere-command-v14 + litellm_params: + model: bedrock/cohere.command-text-v14 + aws_region_name: us-east-1 + aws_access_key_id: A... + aws_secret_access_key: ... - - model_name: ai21-j2-ultra - litellm_params: - model: bedrock/ai21.j2-ultra-v1 - aws_region_name: us-east-1 - aws_access_key_id: A... - aws_secret_access_key: ... + - model_name: cohere-command-light-v14 + litellm_params: + model: bedrock/cohere.command-light-text-v14 + aws_region_name: us-east-1 + aws_access_key_id: A... + aws_secret_access_key: ... - - model_name: amazon-titan-lite - litellm_params: - model: bedrock/amazon.titan-text-lite-v1 - aws_region_name: us-east-1 - aws_access_key_id: A... - aws_secret_access_key: ... + # AI21 Labs + - model_name: ai21-j2-mid + litellm_params: + model: bedrock/ai21.j2-mid-v1 + aws_region_name: us-east-1 + aws_access_key_id: A... + aws_secret_access_key: ... - - model_name: amazon-titan-express - litellm_params: - model: bedrock/amazon.titan-text-express-v1 - aws_region_name: us-east-1 - aws_access_key_id: A... - aws_secret_access_key: ... + - model_name: ai21-j2-ultra + litellm_params: + model: bedrock/ai21.j2-ultra-v1 + aws_region_name: us-east-1 + aws_access_key_id: A... + aws_secret_access_key: ... + # Amazon + - model_name: amazon-titan-lite + litellm_params: + model: bedrock/amazon.titan-text-lite-v1 + aws_region_name: us-east-1 + aws_access_key_id: A... + aws_secret_access_key: ... + - model_name: amazon-titan-express + litellm_params: + model: bedrock/amazon.titan-text-express-v1 + aws_region_name: us-east-1 + aws_access_key_id: A... + aws_secret_access_key: ... + # Azure + - model_name: azure-gpt-4-turbo-preview + litellm_params: + model: azure/gpt-4-turbo-preview + api_base: https://tenant-name.openai.azure.com/ + api_key: ... - - model_name: azure-gpt-4-turbo-preview - litellm_params: - model: azure/gpt-4-turbo-preview - api_base: https://tenant-name.openai.azure.com/ - api_key: ... + - model_name: azure-gpt-3.5-turbo + litellm_params: + model: azure/gpt-35-turbo + api_base: https://tenant-name.openai.azure.com/ + api_key: ... - - model_name: azure-gpt-3.5-turbo - litellm_params: - model: azure/gpt-35-turbo - api_base: https://tenant-name.openai.azure.com/ - api_key: ... + - model_name: azure-gpt-4 + litellm_params: + model: azure/gpt-4 + api_base: https://tenant-name.openai.azure.com/ + api_key: ... - - model_name: azure-gpt-4 - litellm_params: - model: azure/gpt-4 - api_base: https://tenant-name.openai.azure.com/ - api_key: ... + - model_name: azure-gpt-3.5-turbo-16k + litellm_params: + model: azure/gpt-35-turbo-16k + api_base: https://tenant-name.openai.azure.com/ + api_key: ... - - model_name: azure-gpt-3.5-turbo-16k - litellm_params: - model: azure/gpt-35-turbo-16k - api_base: https://tenant-name.openai.azure.com/ - api_key: ... + - model_name: azure-gpt-4-32k + litellm_params: + model: azure/gpt-4-32k + api_base: https://tenant-name.openai.azure.com/ + api_key: ... - - model_name: azure-gpt-4-32k - litellm_params: - model: azure/gpt-4-32k - api_base: https://tenant-name.openai.azure.com/ - api_key: ... + # OpenAI + - model_name: gpt-4-turbo + litellm_params: + model: gpt-4-turbo + api_key: ... + - model_name: old-gpt-4-turbo-preview + litellm_params: + model: gpt-4-turbo-preview + api_key: ... + - model_name: gpt-3.5-turbo + litellm_params: + model: gpt-3.5-turbo + api_key: ... - - model_name: gpt-4-turbo - litellm_params: - model: gpt-4-turbo - api_key: ... + - model_name: gpt-4 + litellm_params: + model: gpt-4 + api_key: ... - - model_name: old-gpt-4-turbo-preview - litellm_params: - model: gpt-4-turbo-preview - api_key: ... + - model_name: gpt-3.5-turbo-16k + litellm_params: + model: gpt-3.5-turbo-16k + api_key: ... - - model_name: gpt-3.5-turbo - litellm_params: - model: gpt-3.5-turbo - api_key: ... + - model_name: gpt-4-32k + litellm_params: + model: gpt-4-32k + api_key: ... - - model_name: gpt-4 - litellm_params: - model: gpt-4 - api_key: ... + - model_name: gpt-4-vision-preview + litellm_params: + model: gpt-4-vision-preview + api_key: ... - - model_name: gpt-3.5-turbo-16k - litellm_params: - model: gpt-3.5-turbo-16k - api_key: ... + # Google + # NOTE: For Google - see above about required auth "GOOGLE_APPLICATION_CREDENTIALS" environment and volume mount + - model_name: google-chat-bison + litellm_params: + model: vertex_ai/chat-bison + vertex_project: gcp-proj-name + vertex_location: us-central1 - - model_name: gpt-4-32k - litellm_params: - model: gpt-4-32k - api_key: ... + - model_name: google-chat-bison-32k + litellm_params: + model: vertex_ai/chat-bison-32k + vertex_project: gcp-proj-name + vertex_location: us-central1 - - model_name: gpt-4-vision-preview - litellm_params: - model: gpt-4-vision-preview - api_key: ... + - model_name: google-gemini-pro-1.0 + litellm_params: + model: vertex_ai/gemini-pro + vertex_project: gcp-proj-name + vertex_location: us-central1 + - model_name: google-gemini-pro-1.5-preview + litellm_params: + model: vertex_ai/gemini-1.5-pro-preview-0409 + vertex_project: gcp-proj-name + vertex_location: us-central1 + # NOTE: It may be a good idea to comment out "success_callback", "cache", "cache_params" (both lines under) when you first start until this works! + litellm_settings: + success_callback: ["langfuse"] + cache: True + cache_params: + type: "redis" + supported_call_types: ["acompletion", "completion", "embedding", "aembedding"] + general_settings: + master_key: sk_live_SetToRandomValue + ``` - # NOTE: For Google - see above about required auth "GOOGLE_APPLICATION_CREDENTIALS" envronment and volume mount - - model_name: google-chat-bison - litellm_params: - model: vertex_ai/chat-bison - vertex_project: gcp-proj-name - vertex_location: us-central1 +### Example of a few Different Options (ex: rpm, stream, ollama) - # NOTE: For Google - see above about required auth "GOOGLE_APPLICATION_CREDENTIALS" envronment and volume mount - - model_name: google-chat-bison-32k - litellm_params: - model: vertex_ai/chat-bison-32k - vertex_project: gcp-proj-name - vertex_location: us-central1 +??? abstract "Example B" + ```yaml + model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: azure/gpt-turbo-small-eu + api_base: https://my-endpoint-europe-berri-992.openai.azure.com/ + api_key: + rpm: 6 # Rate limit for this deployment: in requests per minute (rpm) + - model_name: gpt-3.5-turbo + litellm_params: + model: azure/gpt-turbo-small-ca + api_base: https://my-endpoint-canada-berri992.openai.azure.com/ + api_key: + rpm: 6 + - model_name: gpt-3.5-turbo + litellm_params: + model: azure/gpt-turbo-large + api_base: https://openai-france-1234.openai.azure.com/ + api_key: + rpm: 1440 + - model_name: mixtral + litellm_params: + model: openai/mixtral:8x7b-instruct-v0.1-q5_K_M # use openai/* for ollama's openai api compatibility + api_base: http://ollama:11434/v1 + stream: True + - model_name: mistral + litellm_params: + model: openai/mistral # use openai/* for ollama's openai api compatibility + api_base: http://ollama:11434/v1 + stream: True + litellm_settings: + success_callback: ["langfuse"] + cache: True + cache_params: + type: "redis" + supported_call_types: ["acompletion", "completion", "embedding", "aembedding"] + general_settings: + master_key: sk_live_SetToRandomValue + ``` - # NOTE: For Google - see above about required auth "GOOGLE_APPLICATION_CREDENTIALS" envronment and volume mount - - model_name: google-gemini-pro-1.0 - litellm_params: - model: vertex_ai/gemini-pro - vertex_project: gcp-proj-name - vertex_location: us-central1 - - # NOTE: For Google - see above about required auth "GOOGLE_APPLICATION_CREDENTIALS" envronment and volume mount - - model_name: google-gemini-pro-1.5-preview - litellm_params: - model: vertex_ai/gemini-1.5-pro-preview-0409 - vertex_project: gcp-proj-name - vertex_location: us-central1 - -# NOTE: It may be a good idea to comment out "success_callback", "cache", "cache_params" (both lines under) when you first start until this works! -litellm_settings: - success_callback: ["langfuse"] - cache: True - cache_params: - type: "redis" - supported_call_types: ["acompletion", "completion", "embedding", "aembedding"] -general_settings: - master_key: sk_live_SetToRandomValue -``` - -#### Example of a few Different Options (ex: rpm, stream, ollama) -```yaml - -model_list: - - model_name: gpt-3.5-turbo - litellm_params: - model: azure/gpt-turbo-small-eu - api_base: https://my-endpoint-europe-berri-992.openai.azure.com/ - api_key: - rpm: 6 # Rate limit for this deployment: in requests per minute (rpm) - - model_name: gpt-3.5-turbo - litellm_params: - model: azure/gpt-turbo-small-ca - api_base: https://my-endpoint-canada-berri992.openai.azure.com/ - api_key: - rpm: 6 - - model_name: gpt-3.5-turbo - litellm_params: - model: azure/gpt-turbo-large - api_base: https://openai-france-1234.openai.azure.com/ - api_key: - rpm: 1440 - - model_name: mixtral - litellm_params: - model: openai/mixtral:8x7b-instruct-v0.1-q5_K_M # use openai/* for ollama's openai api compatibility - api_base: http://ollama:11434/v1 - stream: True - - model_name: mistral - litellm_params: - model: openai/mistral # use openai/* for ollama's openai api compatibility - api_base: http://ollama:11434/v1 - stream: True -litellm_settings: - success_callback: ["langfuse"] - cache: True - cache_params: - type: "redis" - supported_call_types: ["acompletion", "completion", "embedding", "aembedding"] -general_settings: - master_key: sk_live_SetToRandomValue -``` - - - -### 3. Configure LibreChat +## 3. Configure LibreChat Use `librechat.yaml` [Configuration file (guide here)](./ai_endpoints.md) to add Reverse Proxies as separate endpoints. Here is an example config: -``` +```yaml custom: - name: "Lite LLM" # A place holder - otherwise it becomes the default (OpenAI) key @@ -358,9 +361,8 @@ custom: forcePrompt: false modelDisplayLabel: "Lite LLM" ``` ---- -### Why use LiteLLM? +## Why use LiteLLM? 1. **Access to Multiple LLMs**: It allows calling over 100 LLMs from platforms like Huggingface, Bedrock, TogetherAI, etc., using OpenAI's ChatCompletions and Completions format.