diff --git a/.env.example b/.env.example index a17cb64ed..5f0e40ac3 100644 --- a/.env.example +++ b/.env.example @@ -144,7 +144,7 @@ GOOGLE_KEY=user_provided #============# OPENAI_API_KEY=user_provided -# OPENAI_MODELS=gpt-4o,gpt-3.5-turbo-0125,gpt-3.5-turbo-0301,gpt-3.5-turbo,gpt-4,gpt-4-0613,gpt-4-vision-preview,gpt-3.5-turbo-0613,gpt-3.5-turbo-16k-0613,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview,gpt-3.5-turbo-1106,gpt-3.5-turbo-instruct,gpt-3.5-turbo-instruct-0914,gpt-3.5-turbo-16k +# OPENAI_MODELS=gpt-4o,gpt-4o-mini,gpt-3.5-turbo-0125,gpt-3.5-turbo-0301,gpt-3.5-turbo,gpt-4,gpt-4-0613,gpt-4-vision-preview,gpt-3.5-turbo-0613,gpt-3.5-turbo-16k-0613,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview,gpt-3.5-turbo-1106,gpt-3.5-turbo-instruct,gpt-3.5-turbo-instruct-0914,gpt-3.5-turbo-16k DEBUG_OPENAI=false @@ -166,7 +166,7 @@ DEBUG_OPENAI=false ASSISTANTS_API_KEY=user_provided # ASSISTANTS_BASE_URL= -# ASSISTANTS_MODELS=gpt-4o,gpt-3.5-turbo-0125,gpt-3.5-turbo-16k-0613,gpt-3.5-turbo-16k,gpt-3.5-turbo,gpt-4,gpt-4-0314,gpt-4-32k-0314,gpt-4-0613,gpt-3.5-turbo-0613,gpt-3.5-turbo-1106,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview +# ASSISTANTS_MODELS=gpt-4o,gpt-4o-mini,gpt-3.5-turbo-0125,gpt-3.5-turbo-16k-0613,gpt-3.5-turbo-16k,gpt-3.5-turbo,gpt-4,gpt-4-0314,gpt-4-32k-0314,gpt-4-0613,gpt-3.5-turbo-0613,gpt-3.5-turbo-1106,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview #==========================# # Azure Assistants API # @@ -188,7 +188,7 @@ ASSISTANTS_API_KEY=user_provided # Plugins # #============# -# PLUGIN_MODELS=gpt-4o,gpt-4,gpt-4-turbo-preview,gpt-4-0125-preview,gpt-4-1106-preview,gpt-4-0613,gpt-3.5-turbo,gpt-3.5-turbo-0125,gpt-3.5-turbo-1106,gpt-3.5-turbo-0613 +# PLUGIN_MODELS=gpt-4o,gpt-4o-mini,gpt-4,gpt-4-turbo-preview,gpt-4-0125-preview,gpt-4-1106-preview,gpt-4-0613,gpt-3.5-turbo,gpt-3.5-turbo-0125,gpt-3.5-turbo-1106,gpt-3.5-turbo-0613 DEBUG_PLUGINS=true diff --git a/README.md b/README.md index 0fbb1c92b..93f80444a 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ - 🔄 Edit, Resubmit, and Continue Messages with Conversation branching - 🌿 Fork Messages & Conversations for Advanced Context control - 💬 Multimodal Chat: - - Upload and analyze images with Claude 3, GPT-4 (including `gpt-4o`), and Gemini Vision 📸 + - Upload and analyze images with Claude 3, GPT-4 (including `gpt-4o` and `gpt-4o-mini`), and Gemini Vision 📸 - Chat with Files using Custom Endpoints, OpenAI, Azure, Anthropic, & Google. 🗃️ - Advanced Agents with Files, Code Interpreter, Tools, and API Actions 🔦 - Available through the [OpenAI Assistants API](https://platform.openai.com/docs/assistants/overview) 🌤️ diff --git a/api/models/tx.js b/api/models/tx.js index ccd865fc8..778d2ce04 100644 --- a/api/models/tx.js +++ b/api/models/tx.js @@ -12,6 +12,7 @@ const tokenValues = { '4k': { prompt: 1.5, completion: 2 }, '16k': { prompt: 3, completion: 4 }, 'gpt-3.5-turbo-1106': { prompt: 1, completion: 2 }, + 'gpt-4o-mini': { prompt: 0.15, completion: 0.6 }, 'gpt-4o': { prompt: 5, completion: 15 }, 'gpt-4-1106': { prompt: 10, completion: 30 }, 'gpt-3.5-turbo-0125': { prompt: 0.5, completion: 1.5 }, @@ -54,6 +55,8 @@ const getValueKey = (model, endpoint) => { return 'gpt-3.5-turbo-1106'; } else if (modelName.includes('gpt-3.5')) { return '4k'; + } else if (modelName.includes('gpt-4o-mini')) { + return 'gpt-4o-mini'; } else if (modelName.includes('gpt-4o')) { return 'gpt-4o'; } else if (modelName.includes('gpt-4-vision')) { diff --git a/api/models/tx.spec.js b/api/models/tx.spec.js index 560b7da33..d59a79a33 100644 --- a/api/models/tx.spec.js +++ b/api/models/tx.spec.js @@ -49,6 +49,12 @@ describe('getValueKey', () => { expect(getValueKey('gpt-4o-0125')).toBe('gpt-4o'); }); + it('should return "gpt-4o-mini" for model type of "gpt-4o-mini"', () => { + expect(getValueKey('gpt-4o-mini-2024-07-18')).toBe('gpt-4o-mini'); + expect(getValueKey('openai/gpt-4o-mini')).toBe('gpt-4o-mini'); + expect(getValueKey('gpt-4o-mini-0718')).toBe('gpt-4o-mini'); + }); + it('should return "claude-3-5-sonnet" for model type of "claude-3-5-sonnet-"', () => { expect(getValueKey('claude-3-5-sonnet-20240620')).toBe('claude-3-5-sonnet'); expect(getValueKey('anthropic/claude-3-5-sonnet')).toBe('claude-3-5-sonnet'); @@ -109,6 +115,19 @@ describe('getMultiplier', () => { ); }); + it('should return the correct multiplier for gpt-4o-mini', () => { + const valueKey = getValueKey('gpt-4o-mini-2024-07-18'); + expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe( + tokenValues['gpt-4o-mini'].prompt, + ); + expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe( + tokenValues['gpt-4o-mini'].completion, + ); + expect(getMultiplier({ valueKey, tokenType: 'completion' })).not.toBe( + tokenValues['gpt-4-1106'].completion, + ); + }); + it('should derive the valueKey from the model if not provided for new models', () => { expect( getMultiplier({ tokenType: 'prompt', model: 'gpt-3.5-turbo-1106-some-other-info' }), diff --git a/api/utils/tokens.js b/api/utils/tokens.js index 1ff0c4340..dec669be2 100644 --- a/api/utils/tokens.js +++ b/api/utils/tokens.js @@ -1,45 +1,6 @@ const z = require('zod'); const { EModelEndpoint } = require('librechat-data-provider'); -const models = [ - 'text-davinci-003', - 'text-davinci-002', - 'text-davinci-001', - 'text-curie-001', - 'text-babbage-001', - 'text-ada-001', - 'davinci', - 'curie', - 'babbage', - 'ada', - 'code-davinci-002', - 'code-davinci-001', - 'code-cushman-002', - 'code-cushman-001', - 'davinci-codex', - 'cushman-codex', - 'text-davinci-edit-001', - 'code-davinci-edit-001', - 'text-embedding-ada-002', - 'text-similarity-davinci-001', - 'text-similarity-curie-001', - 'text-similarity-babbage-001', - 'text-similarity-ada-001', - 'text-search-davinci-doc-001', - 'text-search-curie-doc-001', - 'text-search-babbage-doc-001', - 'text-search-ada-doc-001', - 'code-search-babbage-code-001', - 'code-search-ada-code-001', - 'gpt2', - 'gpt-4', - 'gpt-4-0314', - 'gpt-4-32k', - 'gpt-4-32k-0314', - 'gpt-3.5-turbo', - 'gpt-3.5-turbo-0301', -]; - const openAIModels = { 'gpt-4': 8187, // -5 from max 'gpt-4-0613': 8187, // -5 from max @@ -49,6 +10,7 @@ const openAIModels = { 'gpt-4-1106': 127990, // -10 from max 'gpt-4-0125': 127990, // -10 from max 'gpt-4o': 127990, // -10 from max + 'gpt-4o-mini': 127990, // -10 from max 'gpt-4-turbo': 127990, // -10 from max 'gpt-4-vision': 127990, // -10 from max 'gpt-3.5-turbo': 16375, // -10 from max @@ -101,7 +63,6 @@ const anthropicModels = { const aggregateModels = { ...openAIModels, ...googleModels, ...anthropicModels, ...cohereModels }; -// Order is important here: by model series and context size (gpt-4 then gpt-3, ascending) const maxTokensMap = { [EModelEndpoint.azureOpenAI]: openAIModels, [EModelEndpoint.openAI]: aggregateModels, @@ -110,6 +71,24 @@ const maxTokensMap = { [EModelEndpoint.anthropic]: anthropicModels, }; +/** + * Finds the first matching pattern in the tokens map. + * @param {string} modelName + * @param {Record} tokensMap + * @returns {string|null} + */ +function findMatchingPattern(modelName, tokensMap) { + const keys = Object.keys(tokensMap); + for (let i = keys.length - 1; i >= 0; i--) { + const modelKey = keys[i]; + if (modelName.includes(modelKey)) { + return modelKey; + } + } + + return null; +} + /** * Retrieves the maximum tokens for a given model name. If the exact model name isn't found, * it searches for partial matches within the model name, checking keys in reverse order. @@ -143,12 +122,11 @@ function getModelMaxTokens(modelName, endpoint = EModelEndpoint.openAI, endpoint return tokensMap[modelName]; } - const keys = Object.keys(tokensMap); - for (let i = keys.length - 1; i >= 0; i--) { - if (modelName.includes(keys[i])) { - const result = tokensMap[keys[i]]; - return result?.context ?? result; - } + const matchedPattern = findMatchingPattern(modelName, tokensMap); + + if (matchedPattern) { + const result = tokensMap[matchedPattern]; + return result?.context ?? result; } return undefined; @@ -181,15 +159,8 @@ function matchModelName(modelName, endpoint = EModelEndpoint.openAI) { return modelName; } - const keys = Object.keys(tokensMap); - for (let i = keys.length - 1; i >= 0; i--) { - const modelKey = keys[i]; - if (modelName.includes(modelKey)) { - return modelKey; - } - } - - return modelName; + const matchedPattern = findMatchingPattern(modelName, tokensMap); + return matchedPattern || modelName; } const modelSchema = z.object({ @@ -241,8 +212,47 @@ function processModelData(input) { return tokenConfig; } +const tiktokenModels = new Set([ + 'text-davinci-003', + 'text-davinci-002', + 'text-davinci-001', + 'text-curie-001', + 'text-babbage-001', + 'text-ada-001', + 'davinci', + 'curie', + 'babbage', + 'ada', + 'code-davinci-002', + 'code-davinci-001', + 'code-cushman-002', + 'code-cushman-001', + 'davinci-codex', + 'cushman-codex', + 'text-davinci-edit-001', + 'code-davinci-edit-001', + 'text-embedding-ada-002', + 'text-similarity-davinci-001', + 'text-similarity-curie-001', + 'text-similarity-babbage-001', + 'text-similarity-ada-001', + 'text-search-davinci-doc-001', + 'text-search-curie-doc-001', + 'text-search-babbage-doc-001', + 'text-search-ada-doc-001', + 'code-search-babbage-code-001', + 'code-search-ada-code-001', + 'gpt2', + 'gpt-4', + 'gpt-4-0314', + 'gpt-4-32k', + 'gpt-4-32k-0314', + 'gpt-3.5-turbo', + 'gpt-3.5-turbo-0301', +]); + module.exports = { - tiktokenModels: new Set(models), + tiktokenModels, maxTokensMap, inputSchema, modelSchema, diff --git a/packages/data-provider/package.json b/packages/data-provider/package.json index 393dacd05..8c9a7d442 100644 --- a/packages/data-provider/package.json +++ b/packages/data-provider/package.json @@ -1,6 +1,6 @@ { "name": "librechat-data-provider", - "version": "0.7.2", + "version": "0.7.3", "description": "data services for librechat apps", "main": "dist/index.js", "module": "dist/index.es.js", diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index cacfdd7b0..d977f6970 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -12,6 +12,8 @@ export const defaultSocialLogins = ['google', 'facebook', 'openid', 'github', 'd export const defaultRetrievalModels = [ 'gpt-4o', 'gpt-4o-2024-05-13', + 'gpt-4o-mini', + 'gpt-4o-mini-2024-07-18', 'gpt-4-turbo-preview', 'gpt-3.5-turbo-0125', 'gpt-4-0125-preview', @@ -530,7 +532,7 @@ const sharedOpenAIModels = [ export const defaultModels = { [EModelEndpoint.azureAssistants]: sharedOpenAIModels, - [EModelEndpoint.assistants]: ['gpt-4o', ...sharedOpenAIModels], + [EModelEndpoint.assistants]: ['gpt-4o-mini', 'gpt-4o', ...sharedOpenAIModels], [EModelEndpoint.google]: [ 'gemini-pro', 'gemini-pro-vision', @@ -559,13 +561,12 @@ export const defaultModels = { 'claude-instant-1-100k', ], [EModelEndpoint.openAI]: [ + 'gpt-4o-mini', 'gpt-4o', ...sharedOpenAIModels, 'gpt-4-vision-preview', 'gpt-3.5-turbo-instruct-0914', - 'gpt-3.5-turbo-0301', 'gpt-3.5-turbo-instruct', - 'text-davinci-003', ], }; @@ -621,6 +622,7 @@ export const supportsBalanceCheck = { export const visionModels = [ 'gpt-4o', + 'gpt-4o-mini', 'gpt-4-turbo', 'gpt-4-vision', 'llava', diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index ea4624295..049e84a87 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -219,7 +219,7 @@ export enum EAgent { export const agentOptionSettings = { model: { - default: 'gpt-4o', + default: 'gpt-4o-mini', }, temperature: { min: 0,