LibreChat/api/utils/tokens.spec.js

1416 lines
59 KiB
JavaScript
Raw Normal View History

const { EModelEndpoint } = require('librechat-data-provider');
const {
🎚️ feat: Anthropic Parameter Set Support via Custom Endpoints (#9415) * refactor: modularize openai llm config logic into new getOpenAILLMConfig function (#9412) * ✈️ refactor: Migrate Anthropic's getLLMConfig to TypeScript (#9413) * refactor: move tokens.js over to packages/api and update imports * refactor: port tokens.js to typescript * refactor: move helpers.js over to packages/api and update imports * refactor: port helpers.js to typescript * refactor: move anthropic/llm.js over to packages/api and update imports * refactor: port anthropic/llm.js to typescript with supporting types in types/anthropic.ts and updated tests in llm.spec.js * refactor: move llm.spec.js over to packages/api and update import * refactor: port llm.spec.js over to typescript * 📝 Add Prompt Parameter Support for Anthropic Custom Endpoints (#9414) feat: add anthropic llm config support for openai-like (custom) endpoints * fix: missed compiler / type issues from addition of getAnthropicLLMConfig * refactor: update tokens.ts to export constants and functions, enhance type definitions, and adjust default values * WIP: first pass, decouple `llmConfig` from `configOptions` * chore: update import path for OpenAI configuration from 'llm' to 'config' * refactor: enhance type definitions for ThinkingConfig and update modelOptions in AnthropicConfigOptions * refactor: cleanup type, introduce openai transform from alt provider * chore: integrate removeNullishValues in Google llmConfig and update OpenAI exports * chore: bump version of @librechat/api to 1.3.5 in package.json and package-lock.json * refactor: update customParams type in OpenAIConfigOptions to use TConfig['customParams'] * refactor: enhance transformToOpenAIConfig to include fromEndpoint and improve config extraction * refactor: conform userId field for anthropic/openai, cleanup anthropic typing * ci: add backward compatibility tests for getOpenAIConfig with various endpoints and configurations * ci: replace userId with user in clientOptions for getLLMConfig * test: add Azure OpenAI endpoint tests for various configurations in getOpenAIConfig * refactor: defaultHeaders retrieval for prompt caching for anthropic-based custom endpoint (litellm) * test: add unit tests for getOpenAIConfig with various Anthropic model configurations * test: enhance Anthropic compatibility tests with addParams and dropParams handling * chore: update @librechat/agents dependency to version 2.4.78 in package.json and package-lock.json * chore: update @librechat/agents dependency to version 2.4.79 in package.json and package-lock.json --------- Co-authored-by: Danny Avila <danny@librechat.ai>
2025-09-08 11:35:29 -07:00
maxTokensMap,
matchModelName,
processModelData,
getModelMaxTokens,
maxOutputTokensMap,
findMatchingPattern,
🎚️ feat: Anthropic Parameter Set Support via Custom Endpoints (#9415) * refactor: modularize openai llm config logic into new getOpenAILLMConfig function (#9412) * ✈️ refactor: Migrate Anthropic's getLLMConfig to TypeScript (#9413) * refactor: move tokens.js over to packages/api and update imports * refactor: port tokens.js to typescript * refactor: move helpers.js over to packages/api and update imports * refactor: port helpers.js to typescript * refactor: move anthropic/llm.js over to packages/api and update imports * refactor: port anthropic/llm.js to typescript with supporting types in types/anthropic.ts and updated tests in llm.spec.js * refactor: move llm.spec.js over to packages/api and update import * refactor: port llm.spec.js over to typescript * 📝 Add Prompt Parameter Support for Anthropic Custom Endpoints (#9414) feat: add anthropic llm config support for openai-like (custom) endpoints * fix: missed compiler / type issues from addition of getAnthropicLLMConfig * refactor: update tokens.ts to export constants and functions, enhance type definitions, and adjust default values * WIP: first pass, decouple `llmConfig` from `configOptions` * chore: update import path for OpenAI configuration from 'llm' to 'config' * refactor: enhance type definitions for ThinkingConfig and update modelOptions in AnthropicConfigOptions * refactor: cleanup type, introduce openai transform from alt provider * chore: integrate removeNullishValues in Google llmConfig and update OpenAI exports * chore: bump version of @librechat/api to 1.3.5 in package.json and package-lock.json * refactor: update customParams type in OpenAIConfigOptions to use TConfig['customParams'] * refactor: enhance transformToOpenAIConfig to include fromEndpoint and improve config extraction * refactor: conform userId field for anthropic/openai, cleanup anthropic typing * ci: add backward compatibility tests for getOpenAIConfig with various endpoints and configurations * ci: replace userId with user in clientOptions for getLLMConfig * test: add Azure OpenAI endpoint tests for various configurations in getOpenAIConfig * refactor: defaultHeaders retrieval for prompt caching for anthropic-based custom endpoint (litellm) * test: add unit tests for getOpenAIConfig with various Anthropic model configurations * test: enhance Anthropic compatibility tests with addParams and dropParams handling * chore: update @librechat/agents dependency to version 2.4.78 in package.json and package-lock.json * chore: update @librechat/agents dependency to version 2.4.79 in package.json and package-lock.json --------- Co-authored-by: Danny Avila <danny@librechat.ai>
2025-09-08 11:35:29 -07:00
} = require('@librechat/api');
feat: ConversationSummaryBufferMemory (#973) * refactor: pass model in message edit payload, use encoder in standalone util function * feat: add summaryBuffer helper * refactor(api/messages): use new countTokens helper and add auth middleware at top * wip: ConversationSummaryBufferMemory * refactor: move pre-generation helpers to prompts dir * chore: remove console log * chore: remove test as payload will no longer carry tokenCount * chore: update getMessagesWithinTokenLimit JSDoc * refactor: optimize getMessagesForConversation and also break on summary, feat(ci): getMessagesForConversation tests * refactor(getMessagesForConvo): count '00000000-0000-0000-0000-000000000000' as root message * chore: add newer model to token map * fix: condition was point to prop of array instead of message prop * refactor(BaseClient): use object for refineMessages param, rename 'summary' to 'summaryMessage', add previous_summary refactor(getMessagesWithinTokenLimit): replace text and tokenCount if should summarize, summary, and summaryTokenCount are present fix/refactor(handleContextStrategy): use the right comparison length for context diff, and replace payload first message when a summary is present * chore: log previous_summary if debugging * refactor(formatMessage): assume if role is defined that it's a valid value * refactor(getMessagesWithinTokenLimit): remove summary logic refactor(handleContextStrategy): add usePrevSummary logic in case only summary was pruned refactor(loadHistory): initial message query will return all ordered messages but keep track of the latest summary refactor(getMessagesForConversation): use object for single param, edit jsdoc, edit all files using the method refactor(ChatGPTClient): order messages before buildPrompt is called, TODO: add convoSumBuffMemory logic * fix: undefined handling and summarizing only when shouldRefineContext is true * chore(BaseClient): fix test results omitting system role for summaries and test edge case * chore: export summaryBuffer from index file * refactor(OpenAIClient/BaseClient): move refineMessages to subclass, implement LLM initialization for summaryBuffer * feat: add OPENAI_SUMMARIZE to enable summarizing, refactor: rename client prop 'shouldRefineContext' to 'shouldSummarize', change contextStrategy value to 'summarize' from 'refine' * refactor: rename refineMessages method to summarizeMessages for clarity * chore: clarify summary future intent in .env.example * refactor(initializeLLM): handle case for either 'model' or 'modelName' being passed * feat(gptPlugins): enable summarization for plugins * refactor(gptPlugins): utilize new initializeLLM method and formatting methods for messages, use payload array for currentMessages and assign pastMessages sooner * refactor(agents): use ConversationSummaryBufferMemory for both agent types * refactor(formatMessage): optimize original method for langchain, add helper function for langchain messages, add JSDocs and tests * refactor(summaryBuffer): add helper to createSummaryBufferMemory, and use new formatting helpers * fix: forgot to spread formatMessages also took opportunity to pluralize filename * refactor: pass memory to tools, namely openapi specs. not used and may never be used by new method but added for testing * ci(formatMessages): add more exhaustive checks for langchain messages * feat: add debug env var for OpenAI * chore: delete unnecessary comments * chore: add extra note about summary feature * fix: remove tokenCount from payload instructions * fix: test fail * fix: only pass instructions to payload when defined or not empty object * refactor: fromPromptMessages is deprecated, use renamed method fromMessages * refactor: use 'includes' instead of 'startsWith' for extended OpenRouter compatibility * fix(PluginsClient.buildPromptBody): handle undefined message strings * chore: log langchain titling error * feat: getModelMaxTokens helper * feat: tokenSplit helper * feat: summary prompts updated * fix: optimize _CUT_OFF_SUMMARIZER prompt * refactor(summaryBuffer): use custom summary prompt, allow prompt to be passed, pass humanPrefix and aiPrefix to memory, along with any future variables, rename messagesToRefine to context * fix(summaryBuffer): handle edge case where messagesToRefine exceeds summary context, refactor(BaseClient): allow custom maxContextTokens to be passed to getMessagesWithinTokenLimit, add defined check before unshifting summaryMessage, update shouldSummarize based on this refactor(OpenAIClient): use getModelMaxTokens, use cut-off message method for summary if no messages were left after pruning * fix(handleContextStrategy): handle case where incoming prompt is bigger than model context * chore: rename refinedContent to splitText * chore: remove unnecessary debug log
2023-09-26 21:02:28 -04:00
describe('getModelMaxTokens', () => {
test('should return correct tokens for exact match', () => {
feat(Google): Support all Text/Chat Models, Response streaming, `PaLM` -> `Google` 🤖 (#1316) * feat: update PaLM icons * feat: add additional google models * POC: formatting inputs for Vertex AI streaming * refactor: move endpoints services outside of /routes dir to /services/Endpoints * refactor: shorten schemas import * refactor: rename PALM to GOOGLE * feat: make Google editable endpoint * feat: reusable Ask and Edit controllers based off Anthropic * chore: organize imports/logic * fix(parseConvo): include examples in googleSchema * fix: google only allows odd number of messages to be sent * fix: pass proxy to AnthropicClient * refactor: change `google` altName to `Google` * refactor: update getModelMaxTokens and related functions to handle maxTokensMap with nested endpoint model key/values * refactor: google Icon and response sender changes (Codey and Google logo instead of PaLM in all cases) * feat: google support for maxTokensMap * feat: google updated endpoints with Ask/Edit controllers, buildOptions, and initializeClient * feat(GoogleClient): now builds prompt for text models and supports real streaming from Vertex AI through langchain * chore(GoogleClient): remove comments, left before for reference in git history * docs: update google instructions (WIP) * docs(apis_and_tokens.md): add images to google instructions * docs: remove typo apis_and_tokens.md * Update apis_and_tokens.md * feat(Google): use default settings map, fully support context for both text and chat models, fully support examples for chat models * chore: update more PaLM references to Google * chore: move playwright out of workflows to avoid failing tests
2023-12-10 14:54:13 -05:00
expect(getModelMaxTokens('gpt-4-32k-0613')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4-32k-0613'],
);
feat: ConversationSummaryBufferMemory (#973) * refactor: pass model in message edit payload, use encoder in standalone util function * feat: add summaryBuffer helper * refactor(api/messages): use new countTokens helper and add auth middleware at top * wip: ConversationSummaryBufferMemory * refactor: move pre-generation helpers to prompts dir * chore: remove console log * chore: remove test as payload will no longer carry tokenCount * chore: update getMessagesWithinTokenLimit JSDoc * refactor: optimize getMessagesForConversation and also break on summary, feat(ci): getMessagesForConversation tests * refactor(getMessagesForConvo): count '00000000-0000-0000-0000-000000000000' as root message * chore: add newer model to token map * fix: condition was point to prop of array instead of message prop * refactor(BaseClient): use object for refineMessages param, rename 'summary' to 'summaryMessage', add previous_summary refactor(getMessagesWithinTokenLimit): replace text and tokenCount if should summarize, summary, and summaryTokenCount are present fix/refactor(handleContextStrategy): use the right comparison length for context diff, and replace payload first message when a summary is present * chore: log previous_summary if debugging * refactor(formatMessage): assume if role is defined that it's a valid value * refactor(getMessagesWithinTokenLimit): remove summary logic refactor(handleContextStrategy): add usePrevSummary logic in case only summary was pruned refactor(loadHistory): initial message query will return all ordered messages but keep track of the latest summary refactor(getMessagesForConversation): use object for single param, edit jsdoc, edit all files using the method refactor(ChatGPTClient): order messages before buildPrompt is called, TODO: add convoSumBuffMemory logic * fix: undefined handling and summarizing only when shouldRefineContext is true * chore(BaseClient): fix test results omitting system role for summaries and test edge case * chore: export summaryBuffer from index file * refactor(OpenAIClient/BaseClient): move refineMessages to subclass, implement LLM initialization for summaryBuffer * feat: add OPENAI_SUMMARIZE to enable summarizing, refactor: rename client prop 'shouldRefineContext' to 'shouldSummarize', change contextStrategy value to 'summarize' from 'refine' * refactor: rename refineMessages method to summarizeMessages for clarity * chore: clarify summary future intent in .env.example * refactor(initializeLLM): handle case for either 'model' or 'modelName' being passed * feat(gptPlugins): enable summarization for plugins * refactor(gptPlugins): utilize new initializeLLM method and formatting methods for messages, use payload array for currentMessages and assign pastMessages sooner * refactor(agents): use ConversationSummaryBufferMemory for both agent types * refactor(formatMessage): optimize original method for langchain, add helper function for langchain messages, add JSDocs and tests * refactor(summaryBuffer): add helper to createSummaryBufferMemory, and use new formatting helpers * fix: forgot to spread formatMessages also took opportunity to pluralize filename * refactor: pass memory to tools, namely openapi specs. not used and may never be used by new method but added for testing * ci(formatMessages): add more exhaustive checks for langchain messages * feat: add debug env var for OpenAI * chore: delete unnecessary comments * chore: add extra note about summary feature * fix: remove tokenCount from payload instructions * fix: test fail * fix: only pass instructions to payload when defined or not empty object * refactor: fromPromptMessages is deprecated, use renamed method fromMessages * refactor: use 'includes' instead of 'startsWith' for extended OpenRouter compatibility * fix(PluginsClient.buildPromptBody): handle undefined message strings * chore: log langchain titling error * feat: getModelMaxTokens helper * feat: tokenSplit helper * feat: summary prompts updated * fix: optimize _CUT_OFF_SUMMARIZER prompt * refactor(summaryBuffer): use custom summary prompt, allow prompt to be passed, pass humanPrefix and aiPrefix to memory, along with any future variables, rename messagesToRefine to context * fix(summaryBuffer): handle edge case where messagesToRefine exceeds summary context, refactor(BaseClient): allow custom maxContextTokens to be passed to getMessagesWithinTokenLimit, add defined check before unshifting summaryMessage, update shouldSummarize based on this refactor(OpenAIClient): use getModelMaxTokens, use cut-off message method for summary if no messages were left after pruning * fix(handleContextStrategy): handle case where incoming prompt is bigger than model context * chore: rename refinedContent to splitText * chore: remove unnecessary debug log
2023-09-26 21:02:28 -04:00
});
test('should return correct tokens for partial match', () => {
feat(Google): Support all Text/Chat Models, Response streaming, `PaLM` -> `Google` 🤖 (#1316) * feat: update PaLM icons * feat: add additional google models * POC: formatting inputs for Vertex AI streaming * refactor: move endpoints services outside of /routes dir to /services/Endpoints * refactor: shorten schemas import * refactor: rename PALM to GOOGLE * feat: make Google editable endpoint * feat: reusable Ask and Edit controllers based off Anthropic * chore: organize imports/logic * fix(parseConvo): include examples in googleSchema * fix: google only allows odd number of messages to be sent * fix: pass proxy to AnthropicClient * refactor: change `google` altName to `Google` * refactor: update getModelMaxTokens and related functions to handle maxTokensMap with nested endpoint model key/values * refactor: google Icon and response sender changes (Codey and Google logo instead of PaLM in all cases) * feat: google support for maxTokensMap * feat: google updated endpoints with Ask/Edit controllers, buildOptions, and initializeClient * feat(GoogleClient): now builds prompt for text models and supports real streaming from Vertex AI through langchain * chore(GoogleClient): remove comments, left before for reference in git history * docs: update google instructions (WIP) * docs(apis_and_tokens.md): add images to google instructions * docs: remove typo apis_and_tokens.md * Update apis_and_tokens.md * feat(Google): use default settings map, fully support context for both text and chat models, fully support examples for chat models * chore: update more PaLM references to Google * chore: move playwright out of workflows to avoid failing tests
2023-12-10 14:54:13 -05:00
expect(getModelMaxTokens('gpt-4-32k-unknown')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4-32k'],
);
feat: ConversationSummaryBufferMemory (#973) * refactor: pass model in message edit payload, use encoder in standalone util function * feat: add summaryBuffer helper * refactor(api/messages): use new countTokens helper and add auth middleware at top * wip: ConversationSummaryBufferMemory * refactor: move pre-generation helpers to prompts dir * chore: remove console log * chore: remove test as payload will no longer carry tokenCount * chore: update getMessagesWithinTokenLimit JSDoc * refactor: optimize getMessagesForConversation and also break on summary, feat(ci): getMessagesForConversation tests * refactor(getMessagesForConvo): count '00000000-0000-0000-0000-000000000000' as root message * chore: add newer model to token map * fix: condition was point to prop of array instead of message prop * refactor(BaseClient): use object for refineMessages param, rename 'summary' to 'summaryMessage', add previous_summary refactor(getMessagesWithinTokenLimit): replace text and tokenCount if should summarize, summary, and summaryTokenCount are present fix/refactor(handleContextStrategy): use the right comparison length for context diff, and replace payload first message when a summary is present * chore: log previous_summary if debugging * refactor(formatMessage): assume if role is defined that it's a valid value * refactor(getMessagesWithinTokenLimit): remove summary logic refactor(handleContextStrategy): add usePrevSummary logic in case only summary was pruned refactor(loadHistory): initial message query will return all ordered messages but keep track of the latest summary refactor(getMessagesForConversation): use object for single param, edit jsdoc, edit all files using the method refactor(ChatGPTClient): order messages before buildPrompt is called, TODO: add convoSumBuffMemory logic * fix: undefined handling and summarizing only when shouldRefineContext is true * chore(BaseClient): fix test results omitting system role for summaries and test edge case * chore: export summaryBuffer from index file * refactor(OpenAIClient/BaseClient): move refineMessages to subclass, implement LLM initialization for summaryBuffer * feat: add OPENAI_SUMMARIZE to enable summarizing, refactor: rename client prop 'shouldRefineContext' to 'shouldSummarize', change contextStrategy value to 'summarize' from 'refine' * refactor: rename refineMessages method to summarizeMessages for clarity * chore: clarify summary future intent in .env.example * refactor(initializeLLM): handle case for either 'model' or 'modelName' being passed * feat(gptPlugins): enable summarization for plugins * refactor(gptPlugins): utilize new initializeLLM method and formatting methods for messages, use payload array for currentMessages and assign pastMessages sooner * refactor(agents): use ConversationSummaryBufferMemory for both agent types * refactor(formatMessage): optimize original method for langchain, add helper function for langchain messages, add JSDocs and tests * refactor(summaryBuffer): add helper to createSummaryBufferMemory, and use new formatting helpers * fix: forgot to spread formatMessages also took opportunity to pluralize filename * refactor: pass memory to tools, namely openapi specs. not used and may never be used by new method but added for testing * ci(formatMessages): add more exhaustive checks for langchain messages * feat: add debug env var for OpenAI * chore: delete unnecessary comments * chore: add extra note about summary feature * fix: remove tokenCount from payload instructions * fix: test fail * fix: only pass instructions to payload when defined or not empty object * refactor: fromPromptMessages is deprecated, use renamed method fromMessages * refactor: use 'includes' instead of 'startsWith' for extended OpenRouter compatibility * fix(PluginsClient.buildPromptBody): handle undefined message strings * chore: log langchain titling error * feat: getModelMaxTokens helper * feat: tokenSplit helper * feat: summary prompts updated * fix: optimize _CUT_OFF_SUMMARIZER prompt * refactor(summaryBuffer): use custom summary prompt, allow prompt to be passed, pass humanPrefix and aiPrefix to memory, along with any future variables, rename messagesToRefine to context * fix(summaryBuffer): handle edge case where messagesToRefine exceeds summary context, refactor(BaseClient): allow custom maxContextTokens to be passed to getMessagesWithinTokenLimit, add defined check before unshifting summaryMessage, update shouldSummarize based on this refactor(OpenAIClient): use getModelMaxTokens, use cut-off message method for summary if no messages were left after pruning * fix(handleContextStrategy): handle case where incoming prompt is bigger than model context * chore: rename refinedContent to splitText * chore: remove unnecessary debug log
2023-09-26 21:02:28 -04:00
});
test('should return correct tokens for partial match (OpenRouter)', () => {
feat(Google): Support all Text/Chat Models, Response streaming, `PaLM` -> `Google` 🤖 (#1316) * feat: update PaLM icons * feat: add additional google models * POC: formatting inputs for Vertex AI streaming * refactor: move endpoints services outside of /routes dir to /services/Endpoints * refactor: shorten schemas import * refactor: rename PALM to GOOGLE * feat: make Google editable endpoint * feat: reusable Ask and Edit controllers based off Anthropic * chore: organize imports/logic * fix(parseConvo): include examples in googleSchema * fix: google only allows odd number of messages to be sent * fix: pass proxy to AnthropicClient * refactor: change `google` altName to `Google` * refactor: update getModelMaxTokens and related functions to handle maxTokensMap with nested endpoint model key/values * refactor: google Icon and response sender changes (Codey and Google logo instead of PaLM in all cases) * feat: google support for maxTokensMap * feat: google updated endpoints with Ask/Edit controllers, buildOptions, and initializeClient * feat(GoogleClient): now builds prompt for text models and supports real streaming from Vertex AI through langchain * chore(GoogleClient): remove comments, left before for reference in git history * docs: update google instructions (WIP) * docs(apis_and_tokens.md): add images to google instructions * docs: remove typo apis_and_tokens.md * Update apis_and_tokens.md * feat(Google): use default settings map, fully support context for both text and chat models, fully support examples for chat models * chore: update more PaLM references to Google * chore: move playwright out of workflows to avoid failing tests
2023-12-10 14:54:13 -05:00
expect(getModelMaxTokens('openai/gpt-4-32k')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4-32k'],
);
feat: ConversationSummaryBufferMemory (#973) * refactor: pass model in message edit payload, use encoder in standalone util function * feat: add summaryBuffer helper * refactor(api/messages): use new countTokens helper and add auth middleware at top * wip: ConversationSummaryBufferMemory * refactor: move pre-generation helpers to prompts dir * chore: remove console log * chore: remove test as payload will no longer carry tokenCount * chore: update getMessagesWithinTokenLimit JSDoc * refactor: optimize getMessagesForConversation and also break on summary, feat(ci): getMessagesForConversation tests * refactor(getMessagesForConvo): count '00000000-0000-0000-0000-000000000000' as root message * chore: add newer model to token map * fix: condition was point to prop of array instead of message prop * refactor(BaseClient): use object for refineMessages param, rename 'summary' to 'summaryMessage', add previous_summary refactor(getMessagesWithinTokenLimit): replace text and tokenCount if should summarize, summary, and summaryTokenCount are present fix/refactor(handleContextStrategy): use the right comparison length for context diff, and replace payload first message when a summary is present * chore: log previous_summary if debugging * refactor(formatMessage): assume if role is defined that it's a valid value * refactor(getMessagesWithinTokenLimit): remove summary logic refactor(handleContextStrategy): add usePrevSummary logic in case only summary was pruned refactor(loadHistory): initial message query will return all ordered messages but keep track of the latest summary refactor(getMessagesForConversation): use object for single param, edit jsdoc, edit all files using the method refactor(ChatGPTClient): order messages before buildPrompt is called, TODO: add convoSumBuffMemory logic * fix: undefined handling and summarizing only when shouldRefineContext is true * chore(BaseClient): fix test results omitting system role for summaries and test edge case * chore: export summaryBuffer from index file * refactor(OpenAIClient/BaseClient): move refineMessages to subclass, implement LLM initialization for summaryBuffer * feat: add OPENAI_SUMMARIZE to enable summarizing, refactor: rename client prop 'shouldRefineContext' to 'shouldSummarize', change contextStrategy value to 'summarize' from 'refine' * refactor: rename refineMessages method to summarizeMessages for clarity * chore: clarify summary future intent in .env.example * refactor(initializeLLM): handle case for either 'model' or 'modelName' being passed * feat(gptPlugins): enable summarization for plugins * refactor(gptPlugins): utilize new initializeLLM method and formatting methods for messages, use payload array for currentMessages and assign pastMessages sooner * refactor(agents): use ConversationSummaryBufferMemory for both agent types * refactor(formatMessage): optimize original method for langchain, add helper function for langchain messages, add JSDocs and tests * refactor(summaryBuffer): add helper to createSummaryBufferMemory, and use new formatting helpers * fix: forgot to spread formatMessages also took opportunity to pluralize filename * refactor: pass memory to tools, namely openapi specs. not used and may never be used by new method but added for testing * ci(formatMessages): add more exhaustive checks for langchain messages * feat: add debug env var for OpenAI * chore: delete unnecessary comments * chore: add extra note about summary feature * fix: remove tokenCount from payload instructions * fix: test fail * fix: only pass instructions to payload when defined or not empty object * refactor: fromPromptMessages is deprecated, use renamed method fromMessages * refactor: use 'includes' instead of 'startsWith' for extended OpenRouter compatibility * fix(PluginsClient.buildPromptBody): handle undefined message strings * chore: log langchain titling error * feat: getModelMaxTokens helper * feat: tokenSplit helper * feat: summary prompts updated * fix: optimize _CUT_OFF_SUMMARIZER prompt * refactor(summaryBuffer): use custom summary prompt, allow prompt to be passed, pass humanPrefix and aiPrefix to memory, along with any future variables, rename messagesToRefine to context * fix(summaryBuffer): handle edge case where messagesToRefine exceeds summary context, refactor(BaseClient): allow custom maxContextTokens to be passed to getMessagesWithinTokenLimit, add defined check before unshifting summaryMessage, update shouldSummarize based on this refactor(OpenAIClient): use getModelMaxTokens, use cut-off message method for summary if no messages were left after pruning * fix(handleContextStrategy): handle case where incoming prompt is bigger than model context * chore: rename refinedContent to splitText * chore: remove unnecessary debug log
2023-09-26 21:02:28 -04:00
});
test('should return undefined for no match', () => {
expect(getModelMaxTokens('unknown-model')).toBeUndefined();
});
test('should return correct tokens for another exact match', () => {
expect(getModelMaxTokens('gpt-3.5-turbo-16k-0613')).toBe(
feat(Google): Support all Text/Chat Models, Response streaming, `PaLM` -> `Google` 🤖 (#1316) * feat: update PaLM icons * feat: add additional google models * POC: formatting inputs for Vertex AI streaming * refactor: move endpoints services outside of /routes dir to /services/Endpoints * refactor: shorten schemas import * refactor: rename PALM to GOOGLE * feat: make Google editable endpoint * feat: reusable Ask and Edit controllers based off Anthropic * chore: organize imports/logic * fix(parseConvo): include examples in googleSchema * fix: google only allows odd number of messages to be sent * fix: pass proxy to AnthropicClient * refactor: change `google` altName to `Google` * refactor: update getModelMaxTokens and related functions to handle maxTokensMap with nested endpoint model key/values * refactor: google Icon and response sender changes (Codey and Google logo instead of PaLM in all cases) * feat: google support for maxTokensMap * feat: google updated endpoints with Ask/Edit controllers, buildOptions, and initializeClient * feat(GoogleClient): now builds prompt for text models and supports real streaming from Vertex AI through langchain * chore(GoogleClient): remove comments, left before for reference in git history * docs: update google instructions (WIP) * docs(apis_and_tokens.md): add images to google instructions * docs: remove typo apis_and_tokens.md * Update apis_and_tokens.md * feat(Google): use default settings map, fully support context for both text and chat models, fully support examples for chat models * chore: update more PaLM references to Google * chore: move playwright out of workflows to avoid failing tests
2023-12-10 14:54:13 -05:00
maxTokensMap[EModelEndpoint.openAI]['gpt-3.5-turbo-16k-0613'],
);
feat: ConversationSummaryBufferMemory (#973) * refactor: pass model in message edit payload, use encoder in standalone util function * feat: add summaryBuffer helper * refactor(api/messages): use new countTokens helper and add auth middleware at top * wip: ConversationSummaryBufferMemory * refactor: move pre-generation helpers to prompts dir * chore: remove console log * chore: remove test as payload will no longer carry tokenCount * chore: update getMessagesWithinTokenLimit JSDoc * refactor: optimize getMessagesForConversation and also break on summary, feat(ci): getMessagesForConversation tests * refactor(getMessagesForConvo): count '00000000-0000-0000-0000-000000000000' as root message * chore: add newer model to token map * fix: condition was point to prop of array instead of message prop * refactor(BaseClient): use object for refineMessages param, rename 'summary' to 'summaryMessage', add previous_summary refactor(getMessagesWithinTokenLimit): replace text and tokenCount if should summarize, summary, and summaryTokenCount are present fix/refactor(handleContextStrategy): use the right comparison length for context diff, and replace payload first message when a summary is present * chore: log previous_summary if debugging * refactor(formatMessage): assume if role is defined that it's a valid value * refactor(getMessagesWithinTokenLimit): remove summary logic refactor(handleContextStrategy): add usePrevSummary logic in case only summary was pruned refactor(loadHistory): initial message query will return all ordered messages but keep track of the latest summary refactor(getMessagesForConversation): use object for single param, edit jsdoc, edit all files using the method refactor(ChatGPTClient): order messages before buildPrompt is called, TODO: add convoSumBuffMemory logic * fix: undefined handling and summarizing only when shouldRefineContext is true * chore(BaseClient): fix test results omitting system role for summaries and test edge case * chore: export summaryBuffer from index file * refactor(OpenAIClient/BaseClient): move refineMessages to subclass, implement LLM initialization for summaryBuffer * feat: add OPENAI_SUMMARIZE to enable summarizing, refactor: rename client prop 'shouldRefineContext' to 'shouldSummarize', change contextStrategy value to 'summarize' from 'refine' * refactor: rename refineMessages method to summarizeMessages for clarity * chore: clarify summary future intent in .env.example * refactor(initializeLLM): handle case for either 'model' or 'modelName' being passed * feat(gptPlugins): enable summarization for plugins * refactor(gptPlugins): utilize new initializeLLM method and formatting methods for messages, use payload array for currentMessages and assign pastMessages sooner * refactor(agents): use ConversationSummaryBufferMemory for both agent types * refactor(formatMessage): optimize original method for langchain, add helper function for langchain messages, add JSDocs and tests * refactor(summaryBuffer): add helper to createSummaryBufferMemory, and use new formatting helpers * fix: forgot to spread formatMessages also took opportunity to pluralize filename * refactor: pass memory to tools, namely openapi specs. not used and may never be used by new method but added for testing * ci(formatMessages): add more exhaustive checks for langchain messages * feat: add debug env var for OpenAI * chore: delete unnecessary comments * chore: add extra note about summary feature * fix: remove tokenCount from payload instructions * fix: test fail * fix: only pass instructions to payload when defined or not empty object * refactor: fromPromptMessages is deprecated, use renamed method fromMessages * refactor: use 'includes' instead of 'startsWith' for extended OpenRouter compatibility * fix(PluginsClient.buildPromptBody): handle undefined message strings * chore: log langchain titling error * feat: getModelMaxTokens helper * feat: tokenSplit helper * feat: summary prompts updated * fix: optimize _CUT_OFF_SUMMARIZER prompt * refactor(summaryBuffer): use custom summary prompt, allow prompt to be passed, pass humanPrefix and aiPrefix to memory, along with any future variables, rename messagesToRefine to context * fix(summaryBuffer): handle edge case where messagesToRefine exceeds summary context, refactor(BaseClient): allow custom maxContextTokens to be passed to getMessagesWithinTokenLimit, add defined check before unshifting summaryMessage, update shouldSummarize based on this refactor(OpenAIClient): use getModelMaxTokens, use cut-off message method for summary if no messages were left after pruning * fix(handleContextStrategy): handle case where incoming prompt is bigger than model context * chore: rename refinedContent to splitText * chore: remove unnecessary debug log
2023-09-26 21:02:28 -04:00
});
test('should return correct tokens for another partial match', () => {
feat(Google): Support all Text/Chat Models, Response streaming, `PaLM` -> `Google` 🤖 (#1316) * feat: update PaLM icons * feat: add additional google models * POC: formatting inputs for Vertex AI streaming * refactor: move endpoints services outside of /routes dir to /services/Endpoints * refactor: shorten schemas import * refactor: rename PALM to GOOGLE * feat: make Google editable endpoint * feat: reusable Ask and Edit controllers based off Anthropic * chore: organize imports/logic * fix(parseConvo): include examples in googleSchema * fix: google only allows odd number of messages to be sent * fix: pass proxy to AnthropicClient * refactor: change `google` altName to `Google` * refactor: update getModelMaxTokens and related functions to handle maxTokensMap with nested endpoint model key/values * refactor: google Icon and response sender changes (Codey and Google logo instead of PaLM in all cases) * feat: google support for maxTokensMap * feat: google updated endpoints with Ask/Edit controllers, buildOptions, and initializeClient * feat(GoogleClient): now builds prompt for text models and supports real streaming from Vertex AI through langchain * chore(GoogleClient): remove comments, left before for reference in git history * docs: update google instructions (WIP) * docs(apis_and_tokens.md): add images to google instructions * docs: remove typo apis_and_tokens.md * Update apis_and_tokens.md * feat(Google): use default settings map, fully support context for both text and chat models, fully support examples for chat models * chore: update more PaLM references to Google * chore: move playwright out of workflows to avoid failing tests
2023-12-10 14:54:13 -05:00
expect(getModelMaxTokens('gpt-3.5-turbo-unknown')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-3.5-turbo'],
);
feat: ConversationSummaryBufferMemory (#973) * refactor: pass model in message edit payload, use encoder in standalone util function * feat: add summaryBuffer helper * refactor(api/messages): use new countTokens helper and add auth middleware at top * wip: ConversationSummaryBufferMemory * refactor: move pre-generation helpers to prompts dir * chore: remove console log * chore: remove test as payload will no longer carry tokenCount * chore: update getMessagesWithinTokenLimit JSDoc * refactor: optimize getMessagesForConversation and also break on summary, feat(ci): getMessagesForConversation tests * refactor(getMessagesForConvo): count '00000000-0000-0000-0000-000000000000' as root message * chore: add newer model to token map * fix: condition was point to prop of array instead of message prop * refactor(BaseClient): use object for refineMessages param, rename 'summary' to 'summaryMessage', add previous_summary refactor(getMessagesWithinTokenLimit): replace text and tokenCount if should summarize, summary, and summaryTokenCount are present fix/refactor(handleContextStrategy): use the right comparison length for context diff, and replace payload first message when a summary is present * chore: log previous_summary if debugging * refactor(formatMessage): assume if role is defined that it's a valid value * refactor(getMessagesWithinTokenLimit): remove summary logic refactor(handleContextStrategy): add usePrevSummary logic in case only summary was pruned refactor(loadHistory): initial message query will return all ordered messages but keep track of the latest summary refactor(getMessagesForConversation): use object for single param, edit jsdoc, edit all files using the method refactor(ChatGPTClient): order messages before buildPrompt is called, TODO: add convoSumBuffMemory logic * fix: undefined handling and summarizing only when shouldRefineContext is true * chore(BaseClient): fix test results omitting system role for summaries and test edge case * chore: export summaryBuffer from index file * refactor(OpenAIClient/BaseClient): move refineMessages to subclass, implement LLM initialization for summaryBuffer * feat: add OPENAI_SUMMARIZE to enable summarizing, refactor: rename client prop 'shouldRefineContext' to 'shouldSummarize', change contextStrategy value to 'summarize' from 'refine' * refactor: rename refineMessages method to summarizeMessages for clarity * chore: clarify summary future intent in .env.example * refactor(initializeLLM): handle case for either 'model' or 'modelName' being passed * feat(gptPlugins): enable summarization for plugins * refactor(gptPlugins): utilize new initializeLLM method and formatting methods for messages, use payload array for currentMessages and assign pastMessages sooner * refactor(agents): use ConversationSummaryBufferMemory for both agent types * refactor(formatMessage): optimize original method for langchain, add helper function for langchain messages, add JSDocs and tests * refactor(summaryBuffer): add helper to createSummaryBufferMemory, and use new formatting helpers * fix: forgot to spread formatMessages also took opportunity to pluralize filename * refactor: pass memory to tools, namely openapi specs. not used and may never be used by new method but added for testing * ci(formatMessages): add more exhaustive checks for langchain messages * feat: add debug env var for OpenAI * chore: delete unnecessary comments * chore: add extra note about summary feature * fix: remove tokenCount from payload instructions * fix: test fail * fix: only pass instructions to payload when defined or not empty object * refactor: fromPromptMessages is deprecated, use renamed method fromMessages * refactor: use 'includes' instead of 'startsWith' for extended OpenRouter compatibility * fix(PluginsClient.buildPromptBody): handle undefined message strings * chore: log langchain titling error * feat: getModelMaxTokens helper * feat: tokenSplit helper * feat: summary prompts updated * fix: optimize _CUT_OFF_SUMMARIZER prompt * refactor(summaryBuffer): use custom summary prompt, allow prompt to be passed, pass humanPrefix and aiPrefix to memory, along with any future variables, rename messagesToRefine to context * fix(summaryBuffer): handle edge case where messagesToRefine exceeds summary context, refactor(BaseClient): allow custom maxContextTokens to be passed to getMessagesWithinTokenLimit, add defined check before unshifting summaryMessage, update shouldSummarize based on this refactor(OpenAIClient): use getModelMaxTokens, use cut-off message method for summary if no messages were left after pruning * fix(handleContextStrategy): handle case where incoming prompt is bigger than model context * chore: rename refinedContent to splitText * chore: remove unnecessary debug log
2023-09-26 21:02:28 -04:00
});
test('should return undefined for undefined input', () => {
expect(getModelMaxTokens(undefined)).toBeUndefined();
});
test('should return undefined for null input', () => {
expect(getModelMaxTokens(null)).toBeUndefined();
});
test('should return undefined for number input', () => {
expect(getModelMaxTokens(123)).toBeUndefined();
});
// 11/06 Update
test('should return correct tokens for gpt-3.5-turbo-1106 exact match', () => {
feat(Google): Support all Text/Chat Models, Response streaming, `PaLM` -> `Google` 🤖 (#1316) * feat: update PaLM icons * feat: add additional google models * POC: formatting inputs for Vertex AI streaming * refactor: move endpoints services outside of /routes dir to /services/Endpoints * refactor: shorten schemas import * refactor: rename PALM to GOOGLE * feat: make Google editable endpoint * feat: reusable Ask and Edit controllers based off Anthropic * chore: organize imports/logic * fix(parseConvo): include examples in googleSchema * fix: google only allows odd number of messages to be sent * fix: pass proxy to AnthropicClient * refactor: change `google` altName to `Google` * refactor: update getModelMaxTokens and related functions to handle maxTokensMap with nested endpoint model key/values * refactor: google Icon and response sender changes (Codey and Google logo instead of PaLM in all cases) * feat: google support for maxTokensMap * feat: google updated endpoints with Ask/Edit controllers, buildOptions, and initializeClient * feat(GoogleClient): now builds prompt for text models and supports real streaming from Vertex AI through langchain * chore(GoogleClient): remove comments, left before for reference in git history * docs: update google instructions (WIP) * docs(apis_and_tokens.md): add images to google instructions * docs: remove typo apis_and_tokens.md * Update apis_and_tokens.md * feat(Google): use default settings map, fully support context for both text and chat models, fully support examples for chat models * chore: update more PaLM references to Google * chore: move playwright out of workflows to avoid failing tests
2023-12-10 14:54:13 -05:00
expect(getModelMaxTokens('gpt-3.5-turbo-1106')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-3.5-turbo-1106'],
);
});
test('should return correct tokens for gpt-4-1106 exact match', () => {
feat(Google): Support all Text/Chat Models, Response streaming, `PaLM` -> `Google` 🤖 (#1316) * feat: update PaLM icons * feat: add additional google models * POC: formatting inputs for Vertex AI streaming * refactor: move endpoints services outside of /routes dir to /services/Endpoints * refactor: shorten schemas import * refactor: rename PALM to GOOGLE * feat: make Google editable endpoint * feat: reusable Ask and Edit controllers based off Anthropic * chore: organize imports/logic * fix(parseConvo): include examples in googleSchema * fix: google only allows odd number of messages to be sent * fix: pass proxy to AnthropicClient * refactor: change `google` altName to `Google` * refactor: update getModelMaxTokens and related functions to handle maxTokensMap with nested endpoint model key/values * refactor: google Icon and response sender changes (Codey and Google logo instead of PaLM in all cases) * feat: google support for maxTokensMap * feat: google updated endpoints with Ask/Edit controllers, buildOptions, and initializeClient * feat(GoogleClient): now builds prompt for text models and supports real streaming from Vertex AI through langchain * chore(GoogleClient): remove comments, left before for reference in git history * docs: update google instructions (WIP) * docs(apis_and_tokens.md): add images to google instructions * docs: remove typo apis_and_tokens.md * Update apis_and_tokens.md * feat(Google): use default settings map, fully support context for both text and chat models, fully support examples for chat models * chore: update more PaLM references to Google * chore: move playwright out of workflows to avoid failing tests
2023-12-10 14:54:13 -05:00
expect(getModelMaxTokens('gpt-4-1106')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-4-1106']);
});
test('should return correct tokens for gpt-4-vision exact match', () => {
expect(getModelMaxTokens('gpt-4-vision')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4-vision'],
);
});
test('should return correct tokens for gpt-3.5-turbo-1106 partial match', () => {
expect(getModelMaxTokens('something-/gpt-3.5-turbo-1106')).toBe(
feat(Google): Support all Text/Chat Models, Response streaming, `PaLM` -> `Google` 🤖 (#1316) * feat: update PaLM icons * feat: add additional google models * POC: formatting inputs for Vertex AI streaming * refactor: move endpoints services outside of /routes dir to /services/Endpoints * refactor: shorten schemas import * refactor: rename PALM to GOOGLE * feat: make Google editable endpoint * feat: reusable Ask and Edit controllers based off Anthropic * chore: organize imports/logic * fix(parseConvo): include examples in googleSchema * fix: google only allows odd number of messages to be sent * fix: pass proxy to AnthropicClient * refactor: change `google` altName to `Google` * refactor: update getModelMaxTokens and related functions to handle maxTokensMap with nested endpoint model key/values * refactor: google Icon and response sender changes (Codey and Google logo instead of PaLM in all cases) * feat: google support for maxTokensMap * feat: google updated endpoints with Ask/Edit controllers, buildOptions, and initializeClient * feat(GoogleClient): now builds prompt for text models and supports real streaming from Vertex AI through langchain * chore(GoogleClient): remove comments, left before for reference in git history * docs: update google instructions (WIP) * docs(apis_and_tokens.md): add images to google instructions * docs: remove typo apis_and_tokens.md * Update apis_and_tokens.md * feat(Google): use default settings map, fully support context for both text and chat models, fully support examples for chat models * chore: update more PaLM references to Google * chore: move playwright out of workflows to avoid failing tests
2023-12-10 14:54:13 -05:00
maxTokensMap[EModelEndpoint.openAI]['gpt-3.5-turbo-1106'],
);
expect(getModelMaxTokens('gpt-3.5-turbo-1106/something-/')).toBe(
feat(Google): Support all Text/Chat Models, Response streaming, `PaLM` -> `Google` 🤖 (#1316) * feat: update PaLM icons * feat: add additional google models * POC: formatting inputs for Vertex AI streaming * refactor: move endpoints services outside of /routes dir to /services/Endpoints * refactor: shorten schemas import * refactor: rename PALM to GOOGLE * feat: make Google editable endpoint * feat: reusable Ask and Edit controllers based off Anthropic * chore: organize imports/logic * fix(parseConvo): include examples in googleSchema * fix: google only allows odd number of messages to be sent * fix: pass proxy to AnthropicClient * refactor: change `google` altName to `Google` * refactor: update getModelMaxTokens and related functions to handle maxTokensMap with nested endpoint model key/values * refactor: google Icon and response sender changes (Codey and Google logo instead of PaLM in all cases) * feat: google support for maxTokensMap * feat: google updated endpoints with Ask/Edit controllers, buildOptions, and initializeClient * feat(GoogleClient): now builds prompt for text models and supports real streaming from Vertex AI through langchain * chore(GoogleClient): remove comments, left before for reference in git history * docs: update google instructions (WIP) * docs(apis_and_tokens.md): add images to google instructions * docs: remove typo apis_and_tokens.md * Update apis_and_tokens.md * feat(Google): use default settings map, fully support context for both text and chat models, fully support examples for chat models * chore: update more PaLM references to Google * chore: move playwright out of workflows to avoid failing tests
2023-12-10 14:54:13 -05:00
maxTokensMap[EModelEndpoint.openAI]['gpt-3.5-turbo-1106'],
);
});
test('should return correct tokens for gpt-4-1106 partial match', () => {
feat(Google): Support all Text/Chat Models, Response streaming, `PaLM` -> `Google` 🤖 (#1316) * feat: update PaLM icons * feat: add additional google models * POC: formatting inputs for Vertex AI streaming * refactor: move endpoints services outside of /routes dir to /services/Endpoints * refactor: shorten schemas import * refactor: rename PALM to GOOGLE * feat: make Google editable endpoint * feat: reusable Ask and Edit controllers based off Anthropic * chore: organize imports/logic * fix(parseConvo): include examples in googleSchema * fix: google only allows odd number of messages to be sent * fix: pass proxy to AnthropicClient * refactor: change `google` altName to `Google` * refactor: update getModelMaxTokens and related functions to handle maxTokensMap with nested endpoint model key/values * refactor: google Icon and response sender changes (Codey and Google logo instead of PaLM in all cases) * feat: google support for maxTokensMap * feat: google updated endpoints with Ask/Edit controllers, buildOptions, and initializeClient * feat(GoogleClient): now builds prompt for text models and supports real streaming from Vertex AI through langchain * chore(GoogleClient): remove comments, left before for reference in git history * docs: update google instructions (WIP) * docs(apis_and_tokens.md): add images to google instructions * docs: remove typo apis_and_tokens.md * Update apis_and_tokens.md * feat(Google): use default settings map, fully support context for both text and chat models, fully support examples for chat models * chore: update more PaLM references to Google * chore: move playwright out of workflows to avoid failing tests
2023-12-10 14:54:13 -05:00
expect(getModelMaxTokens('gpt-4-1106/something')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4-1106'],
);
expect(getModelMaxTokens('gpt-4-1106-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4-1106'],
);
expect(getModelMaxTokens('gpt-4-1106-vision-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4-1106'],
);
});
// 01/25 Update
test('should return correct tokens for gpt-4-turbo/0125 matches', () => {
expect(getModelMaxTokens('gpt-4-turbo')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4-turbo'],
);
expect(getModelMaxTokens('gpt-4-turbo-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4-turbo'],
);
expect(getModelMaxTokens('gpt-4-0125')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-4-0125']);
expect(getModelMaxTokens('gpt-4-0125-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4-0125'],
);
expect(getModelMaxTokens('gpt-3.5-turbo-0125')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-3.5-turbo-0125'],
);
});
test('should return correct tokens for gpt-4.5 matches', () => {
expect(getModelMaxTokens('gpt-4.5')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-4.5']);
expect(getModelMaxTokens('gpt-4.5-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4.5'],
);
expect(getModelMaxTokens('openai/gpt-4.5-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4.5'],
);
});
test('should return correct tokens for gpt-4.1 matches', () => {
expect(getModelMaxTokens('gpt-4.1')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-4.1']);
expect(getModelMaxTokens('gpt-4.1-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4.1'],
);
expect(getModelMaxTokens('openai/gpt-4.1')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4.1'],
);
expect(getModelMaxTokens('gpt-4.1-2024-08-06')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4.1'],
);
});
test('should return correct tokens for gpt-4.1-mini matches', () => {
expect(getModelMaxTokens('gpt-4.1-mini')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4.1-mini'],
);
expect(getModelMaxTokens('gpt-4.1-mini-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4.1-mini'],
);
expect(getModelMaxTokens('openai/gpt-4.1-mini')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4.1-mini'],
);
});
test('should return correct tokens for gpt-4.1-nano matches', () => {
expect(getModelMaxTokens('gpt-4.1-nano')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4.1-nano'],
);
expect(getModelMaxTokens('gpt-4.1-nano-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4.1-nano'],
);
expect(getModelMaxTokens('openai/gpt-4.1-nano')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-4.1-nano'],
);
});
test('should return correct tokens for gpt-5 matches', () => {
expect(getModelMaxTokens('gpt-5')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-5']);
expect(getModelMaxTokens('gpt-5-preview')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-5']);
expect(getModelMaxTokens('openai/gpt-5')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-5']);
expect(getModelMaxTokens('gpt-5-2025-01-30')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-5'],
);
});
test('should return correct tokens for gpt-5-mini matches', () => {
expect(getModelMaxTokens('gpt-5-mini')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-5-mini']);
expect(getModelMaxTokens('gpt-5-mini-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-5-mini'],
);
expect(getModelMaxTokens('openai/gpt-5-mini')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-5-mini'],
);
});
test('should return correct tokens for gpt-5-nano matches', () => {
expect(getModelMaxTokens('gpt-5-nano')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-5-nano']);
expect(getModelMaxTokens('gpt-5-nano-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-5-nano'],
);
expect(getModelMaxTokens('openai/gpt-5-nano')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-5-nano'],
);
});
🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173) * updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
2025-10-19 09:23:27 -04:00
test('should return correct tokens for gpt-5-pro matches', () => {
expect(getModelMaxTokens('gpt-5-pro')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro']);
expect(getModelMaxTokens('gpt-5-pro-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'],
);
expect(getModelMaxTokens('openai/gpt-5-pro')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'],
);
expect(getModelMaxTokens('gpt-5-pro-2025-01-30')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'],
);
});
test('should return correct tokens for Anthropic models', () => {
const models = [
'claude-2.1',
'claude-2',
'claude-1.2',
'claude-1',
'claude-1-100k',
'claude-instant-1',
'claude-instant-1-100k',
'claude-3-haiku',
'claude-3-sonnet',
'claude-3-opus',
'claude-3-5-sonnet',
'claude-3-7-sonnet',
];
const maxTokens = {
'claude-': maxTokensMap[EModelEndpoint.anthropic]['claude-'],
'claude-2.1': maxTokensMap[EModelEndpoint.anthropic]['claude-2.1'],
'claude-3': maxTokensMap[EModelEndpoint.anthropic]['claude-3-sonnet'],
};
models.forEach((model) => {
let expectedTokens;
if (model === 'claude-2.1') {
expectedTokens = maxTokens['claude-2.1'];
} else if (model.startsWith('claude-3')) {
expectedTokens = maxTokens['claude-3'];
} else {
expectedTokens = maxTokens['claude-'];
}
feat(Google): Support all Text/Chat Models, Response streaming, `PaLM` -> `Google` 🤖 (#1316) * feat: update PaLM icons * feat: add additional google models * POC: formatting inputs for Vertex AI streaming * refactor: move endpoints services outside of /routes dir to /services/Endpoints * refactor: shorten schemas import * refactor: rename PALM to GOOGLE * feat: make Google editable endpoint * feat: reusable Ask and Edit controllers based off Anthropic * chore: organize imports/logic * fix(parseConvo): include examples in googleSchema * fix: google only allows odd number of messages to be sent * fix: pass proxy to AnthropicClient * refactor: change `google` altName to `Google` * refactor: update getModelMaxTokens and related functions to handle maxTokensMap with nested endpoint model key/values * refactor: google Icon and response sender changes (Codey and Google logo instead of PaLM in all cases) * feat: google support for maxTokensMap * feat: google updated endpoints with Ask/Edit controllers, buildOptions, and initializeClient * feat(GoogleClient): now builds prompt for text models and supports real streaming from Vertex AI through langchain * chore(GoogleClient): remove comments, left before for reference in git history * docs: update google instructions (WIP) * docs(apis_and_tokens.md): add images to google instructions * docs: remove typo apis_and_tokens.md * Update apis_and_tokens.md * feat(Google): use default settings map, fully support context for both text and chat models, fully support examples for chat models * chore: update more PaLM references to Google * chore: move playwright out of workflows to avoid failing tests
2023-12-10 14:54:13 -05:00
expect(getModelMaxTokens(model, EModelEndpoint.anthropic)).toEqual(expectedTokens);
});
});
feat(Google): Support all Text/Chat Models, Response streaming, `PaLM` -> `Google` 🤖 (#1316) * feat: update PaLM icons * feat: add additional google models * POC: formatting inputs for Vertex AI streaming * refactor: move endpoints services outside of /routes dir to /services/Endpoints * refactor: shorten schemas import * refactor: rename PALM to GOOGLE * feat: make Google editable endpoint * feat: reusable Ask and Edit controllers based off Anthropic * chore: organize imports/logic * fix(parseConvo): include examples in googleSchema * fix: google only allows odd number of messages to be sent * fix: pass proxy to AnthropicClient * refactor: change `google` altName to `Google` * refactor: update getModelMaxTokens and related functions to handle maxTokensMap with nested endpoint model key/values * refactor: google Icon and response sender changes (Codey and Google logo instead of PaLM in all cases) * feat: google support for maxTokensMap * feat: google updated endpoints with Ask/Edit controllers, buildOptions, and initializeClient * feat(GoogleClient): now builds prompt for text models and supports real streaming from Vertex AI through langchain * chore(GoogleClient): remove comments, left before for reference in git history * docs: update google instructions (WIP) * docs(apis_and_tokens.md): add images to google instructions * docs: remove typo apis_and_tokens.md * Update apis_and_tokens.md * feat(Google): use default settings map, fully support context for both text and chat models, fully support examples for chat models * chore: update more PaLM references to Google * chore: move playwright out of workflows to avoid failing tests
2023-12-10 14:54:13 -05:00
// Tests for Google models
test('should return correct tokens for exact match - Google models', () => {
expect(getModelMaxTokens('text-bison-32k', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['text-bison-32k'],
);
expect(getModelMaxTokens('codechat-bison-32k', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['codechat-bison-32k'],
);
});
test('should return undefined for no match - Google models', () => {
expect(getModelMaxTokens('unknown-google-model', EModelEndpoint.google)).toBeUndefined();
});
test('should return correct tokens for partial match - Google models', () => {
expect(getModelMaxTokens('gemini-2.0-flash-lite-preview-02-05', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-2.0-flash-lite'],
);
expect(getModelMaxTokens('gemini-2.0-flash-001', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-2.0-flash'],
);
expect(getModelMaxTokens('gemini-2.0-flash-exp', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-2.0-flash'],
);
expect(getModelMaxTokens('gemini-2.0-pro-exp-02-05', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-2.0'],
);
expect(getModelMaxTokens('gemini-1.5-flash-8b', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-1.5-flash-8b'],
);
expect(getModelMaxTokens('gemini-1.5-flash-thinking', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-1.5-flash'],
);
expect(getModelMaxTokens('gemini-1.5-pro-latest', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-1.5'],
);
expect(getModelMaxTokens('gemini-1.5-pro-preview-0409', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-1.5'],
);
expect(getModelMaxTokens('gemini-3', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-3'],
);
expect(getModelMaxTokens('gemini-2.5-pro', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-2.5-pro'],
);
expect(getModelMaxTokens('gemini-2.5-flash', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-2.5-flash'],
);
expect(getModelMaxTokens('gemini-2.5-flash-lite', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-2.5-flash-lite'],
);
expect(getModelMaxTokens('gemini-pro-vision', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-pro-vision'],
);
expect(getModelMaxTokens('gemini-1.0', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini'],
);
feat: Google Gemini ❇️ (#1355) * refactor: add gemini-pro to google Models list; use defaultModels for central model listing * refactor(SetKeyDialog): create useMultipleKeys hook to use for Azure, export `isJson` from utils, use EModelEndpoint * refactor(useUserKey): change variable names to make keyName setting more clear * refactor(FileUpload): allow passing container className string * feat(GoogleClient): Gemini support * refactor(GoogleClient): alternate stream speed for Gemini models * feat(Gemini): styling/settings configuration for Gemini * refactor(GoogleClient): substract max response tokens from max context tokens if context is above 32k (I/O max is combined between the two) * refactor(tokens): correct google max token counts and subtract max response tokens when input/output count are combined towards max context count * feat(google/initializeClient): handle both local and user_provided credentials and write tests * fix(GoogleClient): catch if credentials are undefined, handle if serviceKey is string or object correctly, handle no examples passed, throw error if not a Generative Language model and no service account JSON key is provided, throw error if it is a Generative m odel, but not google API key was provided * refactor(loadAsyncEndpoints/google): activate Google endpoint if either the service key JSON file is provided in /api/data, or a GOOGLE_KEY is defined. * docs: updated Google configuration * fix(ci): Mock import of Service Account Key JSON file (auth.json) * Update apis_and_tokens.md * feat: increase max output tokens slider for gemini pro * refactor(GoogleSettings): handle max and default maxOutputTokens on model change * chore: add sensitive redact regex * docs: add warning about data privacy * Update apis_and_tokens.md
2023-12-15 02:18:07 -05:00
expect(getModelMaxTokens('gemini-pro', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini'],
);
feat(Google): Support all Text/Chat Models, Response streaming, `PaLM` -> `Google` 🤖 (#1316) * feat: update PaLM icons * feat: add additional google models * POC: formatting inputs for Vertex AI streaming * refactor: move endpoints services outside of /routes dir to /services/Endpoints * refactor: shorten schemas import * refactor: rename PALM to GOOGLE * feat: make Google editable endpoint * feat: reusable Ask and Edit controllers based off Anthropic * chore: organize imports/logic * fix(parseConvo): include examples in googleSchema * fix: google only allows odd number of messages to be sent * fix: pass proxy to AnthropicClient * refactor: change `google` altName to `Google` * refactor: update getModelMaxTokens and related functions to handle maxTokensMap with nested endpoint model key/values * refactor: google Icon and response sender changes (Codey and Google logo instead of PaLM in all cases) * feat: google support for maxTokensMap * feat: google updated endpoints with Ask/Edit controllers, buildOptions, and initializeClient * feat(GoogleClient): now builds prompt for text models and supports real streaming from Vertex AI through langchain * chore(GoogleClient): remove comments, left before for reference in git history * docs: update google instructions (WIP) * docs(apis_and_tokens.md): add images to google instructions * docs: remove typo apis_and_tokens.md * Update apis_and_tokens.md * feat(Google): use default settings map, fully support context for both text and chat models, fully support examples for chat models * chore: update more PaLM references to Google * chore: move playwright out of workflows to avoid failing tests
2023-12-10 14:54:13 -05:00
expect(getModelMaxTokens('code-', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['code-'],
);
expect(getModelMaxTokens('chat-', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['chat-'],
);
});
test('should return correct tokens for partial match - Cohere models', () => {
expect(getModelMaxTokens('command', EModelEndpoint.custom)).toBe(
maxTokensMap[EModelEndpoint.custom]['command'],
);
expect(getModelMaxTokens('command-r-plus', EModelEndpoint.custom)).toBe(
maxTokensMap[EModelEndpoint.custom]['command-r-plus'],
);
});
test('should return correct tokens when using a custom endpointTokenConfig', () => {
const customTokenConfig = {
'custom-model': 12345,
};
expect(getModelMaxTokens('custom-model', EModelEndpoint.openAI, customTokenConfig)).toBe(12345);
});
test('should prioritize endpointTokenConfig over the default configuration', () => {
const customTokenConfig = {
'gpt-4-32k': 9999,
};
expect(getModelMaxTokens('gpt-4-32k', EModelEndpoint.openAI, customTokenConfig)).toBe(9999);
});
test('should return undefined if the model is not found in custom endpointTokenConfig', () => {
const customTokenConfig = {
'custom-model': 12345,
};
expect(
getModelMaxTokens('nonexistent-model', EModelEndpoint.openAI, customTokenConfig),
).toBeUndefined();
});
test('should return correct tokens for exact match in azureOpenAI models', () => {
expect(getModelMaxTokens('gpt-4-turbo', EModelEndpoint.azureOpenAI)).toBe(
maxTokensMap[EModelEndpoint.azureOpenAI]['gpt-4-turbo'],
);
});
test('should return undefined for no match in azureOpenAI models', () => {
expect(
getModelMaxTokens('nonexistent-azure-model', EModelEndpoint.azureOpenAI),
).toBeUndefined();
});
test('should return undefined for undefined, null, or number model argument with azureOpenAI endpoint', () => {
expect(getModelMaxTokens(undefined, EModelEndpoint.azureOpenAI)).toBeUndefined();
expect(getModelMaxTokens(null, EModelEndpoint.azureOpenAI)).toBeUndefined();
expect(getModelMaxTokens(1234, EModelEndpoint.azureOpenAI)).toBeUndefined();
});
test('should respect custom endpointTokenConfig over azureOpenAI defaults', () => {
const customTokenConfig = {
'custom-azure-model': 4096,
};
expect(
getModelMaxTokens('custom-azure-model', EModelEndpoint.azureOpenAI, customTokenConfig),
).toBe(4096);
});
test('should return correct tokens for partial match with custom endpointTokenConfig in azureOpenAI', () => {
const customTokenConfig = {
'azure-custom-': 1024,
};
expect(
getModelMaxTokens('azure-custom-gpt-3', EModelEndpoint.azureOpenAI, customTokenConfig),
).toBe(1024);
});
test('should return undefined for a model when using an unsupported endpoint', () => {
expect(getModelMaxTokens('azure-gpt-3', 'unsupportedEndpoint')).toBeUndefined();
});
test('should return correct max context tokens for o1-series models', () => {
// Standard o1 variations
const o1Tokens = maxTokensMap[EModelEndpoint.openAI]['o1'];
expect(getModelMaxTokens('o1')).toBe(o1Tokens);
expect(getModelMaxTokens('o1-latest')).toBe(o1Tokens);
expect(getModelMaxTokens('o1-2024-12-17')).toBe(o1Tokens);
expect(getModelMaxTokens('o1-something-else')).toBe(o1Tokens);
expect(getModelMaxTokens('openai/o1-something-else')).toBe(o1Tokens);
// Mini variations
const o1MiniTokens = maxTokensMap[EModelEndpoint.openAI]['o1-mini'];
expect(getModelMaxTokens('o1-mini')).toBe(o1MiniTokens);
expect(getModelMaxTokens('o1-mini-latest')).toBe(o1MiniTokens);
expect(getModelMaxTokens('o1-mini-2024-09-12')).toBe(o1MiniTokens);
expect(getModelMaxTokens('o1-mini-something')).toBe(o1MiniTokens);
expect(getModelMaxTokens('openai/o1-mini-something')).toBe(o1MiniTokens);
// Preview variations
const o1PreviewTokens = maxTokensMap[EModelEndpoint.openAI]['o1-preview'];
expect(getModelMaxTokens('o1-preview')).toBe(o1PreviewTokens);
expect(getModelMaxTokens('o1-preview-latest')).toBe(o1PreviewTokens);
expect(getModelMaxTokens('o1-preview-2024-09-12')).toBe(o1PreviewTokens);
expect(getModelMaxTokens('o1-preview-something')).toBe(o1PreviewTokens);
expect(getModelMaxTokens('openai/o1-preview-something')).toBe(o1PreviewTokens);
});
test('should return correct max context tokens for o4-mini and o3', () => {
const o4MiniTokens = maxTokensMap[EModelEndpoint.openAI]['o4-mini'];
const o3Tokens = maxTokensMap[EModelEndpoint.openAI]['o3'];
expect(getModelMaxTokens('o4-mini')).toBe(o4MiniTokens);
expect(getModelMaxTokens('openai/o4-mini')).toBe(o4MiniTokens);
expect(getModelMaxTokens('o3')).toBe(o3Tokens);
expect(getModelMaxTokens('openai/o3')).toBe(o3Tokens);
});
test('should return correct tokens for GPT-OSS models', () => {
const expected = maxTokensMap[EModelEndpoint.openAI]['gpt-oss'];
[
'gpt-oss:20b',
'gpt-oss-20b',
'gpt-oss-120b',
'openai/gpt-oss-20b',
'openai/gpt-oss-120b',
'openai/gpt-oss:120b',
].forEach((name) => {
expect(getModelMaxTokens(name)).toBe(expected);
});
});
test('should return correct tokens for GLM models', () => {
expect(getModelMaxTokens('glm-4.6')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.6']);
expect(getModelMaxTokens('glm-4.5v')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.5v']);
expect(getModelMaxTokens('glm-4.5-air')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'],
);
expect(getModelMaxTokens('glm-4.5')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.5']);
expect(getModelMaxTokens('glm-4-32b')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4-32b']);
expect(getModelMaxTokens('glm-4')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4']);
expect(getModelMaxTokens('glm4')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm4']);
});
test('should return correct tokens for GLM models with provider prefixes', () => {
expect(getModelMaxTokens('z-ai/glm-4.6')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.6']);
expect(getModelMaxTokens('z-ai/glm-4.5')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.5']);
expect(getModelMaxTokens('z-ai/glm-4.5-air')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'],
);
expect(getModelMaxTokens('z-ai/glm-4.5v')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.5v'],
);
expect(getModelMaxTokens('z-ai/glm-4-32b')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4-32b'],
);
expect(getModelMaxTokens('zai/glm-4.6')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.6']);
expect(getModelMaxTokens('zai/glm-4.5-air')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'],
);
expect(getModelMaxTokens('zai/glm-4.5v')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.5v']);
expect(getModelMaxTokens('zai-org/GLM-4.6')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.6'],
);
expect(getModelMaxTokens('zai-org/GLM-4.5')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.5'],
);
expect(getModelMaxTokens('zai-org/GLM-4.5-Air')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'],
);
expect(getModelMaxTokens('zai-org/GLM-4.5V')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.5v'],
);
expect(getModelMaxTokens('zai-org/GLM-4-32B-0414')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4-32b'],
);
});
test('should return correct tokens for GLM models with suffixes', () => {
expect(getModelMaxTokens('glm-4.6-fp8')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.6']);
expect(getModelMaxTokens('zai-org/GLM-4.6-FP8')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.6'],
);
expect(getModelMaxTokens('zai-org/GLM-4.5-Air-FP8')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'],
);
});
test('should return correct max output tokens for GPT-5 models', () => {
🎚️ feat: Anthropic Parameter Set Support via Custom Endpoints (#9415) * refactor: modularize openai llm config logic into new getOpenAILLMConfig function (#9412) * ✈️ refactor: Migrate Anthropic's getLLMConfig to TypeScript (#9413) * refactor: move tokens.js over to packages/api and update imports * refactor: port tokens.js to typescript * refactor: move helpers.js over to packages/api and update imports * refactor: port helpers.js to typescript * refactor: move anthropic/llm.js over to packages/api and update imports * refactor: port anthropic/llm.js to typescript with supporting types in types/anthropic.ts and updated tests in llm.spec.js * refactor: move llm.spec.js over to packages/api and update import * refactor: port llm.spec.js over to typescript * 📝 Add Prompt Parameter Support for Anthropic Custom Endpoints (#9414) feat: add anthropic llm config support for openai-like (custom) endpoints * fix: missed compiler / type issues from addition of getAnthropicLLMConfig * refactor: update tokens.ts to export constants and functions, enhance type definitions, and adjust default values * WIP: first pass, decouple `llmConfig` from `configOptions` * chore: update import path for OpenAI configuration from 'llm' to 'config' * refactor: enhance type definitions for ThinkingConfig and update modelOptions in AnthropicConfigOptions * refactor: cleanup type, introduce openai transform from alt provider * chore: integrate removeNullishValues in Google llmConfig and update OpenAI exports * chore: bump version of @librechat/api to 1.3.5 in package.json and package-lock.json * refactor: update customParams type in OpenAIConfigOptions to use TConfig['customParams'] * refactor: enhance transformToOpenAIConfig to include fromEndpoint and improve config extraction * refactor: conform userId field for anthropic/openai, cleanup anthropic typing * ci: add backward compatibility tests for getOpenAIConfig with various endpoints and configurations * ci: replace userId with user in clientOptions for getLLMConfig * test: add Azure OpenAI endpoint tests for various configurations in getOpenAIConfig * refactor: defaultHeaders retrieval for prompt caching for anthropic-based custom endpoint (litellm) * test: add unit tests for getOpenAIConfig with various Anthropic model configurations * test: enhance Anthropic compatibility tests with addParams and dropParams handling * chore: update @librechat/agents dependency to version 2.4.78 in package.json and package-lock.json * chore: update @librechat/agents dependency to version 2.4.79 in package.json and package-lock.json --------- Co-authored-by: Danny Avila <danny@librechat.ai>
2025-09-08 11:35:29 -07:00
const { getModelMaxOutputTokens } = require('@librechat/api');
🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173) * updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
2025-10-19 09:23:27 -04:00
['gpt-5', 'gpt-5-mini', 'gpt-5-nano', 'gpt-5-pro'].forEach((model) => {
expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]);
expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe(
maxOutputTokensMap[EModelEndpoint.openAI][model],
);
expect(getModelMaxOutputTokens(model, EModelEndpoint.azureOpenAI)).toBe(
maxOutputTokensMap[EModelEndpoint.azureOpenAI][model],
);
});
});
test('should return correct max output tokens for GPT-OSS models', () => {
🎚️ feat: Anthropic Parameter Set Support via Custom Endpoints (#9415) * refactor: modularize openai llm config logic into new getOpenAILLMConfig function (#9412) * ✈️ refactor: Migrate Anthropic's getLLMConfig to TypeScript (#9413) * refactor: move tokens.js over to packages/api and update imports * refactor: port tokens.js to typescript * refactor: move helpers.js over to packages/api and update imports * refactor: port helpers.js to typescript * refactor: move anthropic/llm.js over to packages/api and update imports * refactor: port anthropic/llm.js to typescript with supporting types in types/anthropic.ts and updated tests in llm.spec.js * refactor: move llm.spec.js over to packages/api and update import * refactor: port llm.spec.js over to typescript * 📝 Add Prompt Parameter Support for Anthropic Custom Endpoints (#9414) feat: add anthropic llm config support for openai-like (custom) endpoints * fix: missed compiler / type issues from addition of getAnthropicLLMConfig * refactor: update tokens.ts to export constants and functions, enhance type definitions, and adjust default values * WIP: first pass, decouple `llmConfig` from `configOptions` * chore: update import path for OpenAI configuration from 'llm' to 'config' * refactor: enhance type definitions for ThinkingConfig and update modelOptions in AnthropicConfigOptions * refactor: cleanup type, introduce openai transform from alt provider * chore: integrate removeNullishValues in Google llmConfig and update OpenAI exports * chore: bump version of @librechat/api to 1.3.5 in package.json and package-lock.json * refactor: update customParams type in OpenAIConfigOptions to use TConfig['customParams'] * refactor: enhance transformToOpenAIConfig to include fromEndpoint and improve config extraction * refactor: conform userId field for anthropic/openai, cleanup anthropic typing * ci: add backward compatibility tests for getOpenAIConfig with various endpoints and configurations * ci: replace userId with user in clientOptions for getLLMConfig * test: add Azure OpenAI endpoint tests for various configurations in getOpenAIConfig * refactor: defaultHeaders retrieval for prompt caching for anthropic-based custom endpoint (litellm) * test: add unit tests for getOpenAIConfig with various Anthropic model configurations * test: enhance Anthropic compatibility tests with addParams and dropParams handling * chore: update @librechat/agents dependency to version 2.4.78 in package.json and package-lock.json * chore: update @librechat/agents dependency to version 2.4.79 in package.json and package-lock.json --------- Co-authored-by: Danny Avila <danny@librechat.ai>
2025-09-08 11:35:29 -07:00
const { getModelMaxOutputTokens } = require('@librechat/api');
['gpt-oss-20b', 'gpt-oss-120b'].forEach((model) => {
expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]);
expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe(
maxOutputTokensMap[EModelEndpoint.openAI][model],
);
expect(getModelMaxOutputTokens(model, EModelEndpoint.azureOpenAI)).toBe(
maxOutputTokensMap[EModelEndpoint.azureOpenAI][model],
);
});
});
feat: ConversationSummaryBufferMemory (#973) * refactor: pass model in message edit payload, use encoder in standalone util function * feat: add summaryBuffer helper * refactor(api/messages): use new countTokens helper and add auth middleware at top * wip: ConversationSummaryBufferMemory * refactor: move pre-generation helpers to prompts dir * chore: remove console log * chore: remove test as payload will no longer carry tokenCount * chore: update getMessagesWithinTokenLimit JSDoc * refactor: optimize getMessagesForConversation and also break on summary, feat(ci): getMessagesForConversation tests * refactor(getMessagesForConvo): count '00000000-0000-0000-0000-000000000000' as root message * chore: add newer model to token map * fix: condition was point to prop of array instead of message prop * refactor(BaseClient): use object for refineMessages param, rename 'summary' to 'summaryMessage', add previous_summary refactor(getMessagesWithinTokenLimit): replace text and tokenCount if should summarize, summary, and summaryTokenCount are present fix/refactor(handleContextStrategy): use the right comparison length for context diff, and replace payload first message when a summary is present * chore: log previous_summary if debugging * refactor(formatMessage): assume if role is defined that it's a valid value * refactor(getMessagesWithinTokenLimit): remove summary logic refactor(handleContextStrategy): add usePrevSummary logic in case only summary was pruned refactor(loadHistory): initial message query will return all ordered messages but keep track of the latest summary refactor(getMessagesForConversation): use object for single param, edit jsdoc, edit all files using the method refactor(ChatGPTClient): order messages before buildPrompt is called, TODO: add convoSumBuffMemory logic * fix: undefined handling and summarizing only when shouldRefineContext is true * chore(BaseClient): fix test results omitting system role for summaries and test edge case * chore: export summaryBuffer from index file * refactor(OpenAIClient/BaseClient): move refineMessages to subclass, implement LLM initialization for summaryBuffer * feat: add OPENAI_SUMMARIZE to enable summarizing, refactor: rename client prop 'shouldRefineContext' to 'shouldSummarize', change contextStrategy value to 'summarize' from 'refine' * refactor: rename refineMessages method to summarizeMessages for clarity * chore: clarify summary future intent in .env.example * refactor(initializeLLM): handle case for either 'model' or 'modelName' being passed * feat(gptPlugins): enable summarization for plugins * refactor(gptPlugins): utilize new initializeLLM method and formatting methods for messages, use payload array for currentMessages and assign pastMessages sooner * refactor(agents): use ConversationSummaryBufferMemory for both agent types * refactor(formatMessage): optimize original method for langchain, add helper function for langchain messages, add JSDocs and tests * refactor(summaryBuffer): add helper to createSummaryBufferMemory, and use new formatting helpers * fix: forgot to spread formatMessages also took opportunity to pluralize filename * refactor: pass memory to tools, namely openapi specs. not used and may never be used by new method but added for testing * ci(formatMessages): add more exhaustive checks for langchain messages * feat: add debug env var for OpenAI * chore: delete unnecessary comments * chore: add extra note about summary feature * fix: remove tokenCount from payload instructions * fix: test fail * fix: only pass instructions to payload when defined or not empty object * refactor: fromPromptMessages is deprecated, use renamed method fromMessages * refactor: use 'includes' instead of 'startsWith' for extended OpenRouter compatibility * fix(PluginsClient.buildPromptBody): handle undefined message strings * chore: log langchain titling error * feat: getModelMaxTokens helper * feat: tokenSplit helper * feat: summary prompts updated * fix: optimize _CUT_OFF_SUMMARIZER prompt * refactor(summaryBuffer): use custom summary prompt, allow prompt to be passed, pass humanPrefix and aiPrefix to memory, along with any future variables, rename messagesToRefine to context * fix(summaryBuffer): handle edge case where messagesToRefine exceeds summary context, refactor(BaseClient): allow custom maxContextTokens to be passed to getMessagesWithinTokenLimit, add defined check before unshifting summaryMessage, update shouldSummarize based on this refactor(OpenAIClient): use getModelMaxTokens, use cut-off message method for summary if no messages were left after pruning * fix(handleContextStrategy): handle case where incoming prompt is bigger than model context * chore: rename refinedContent to splitText * chore: remove unnecessary debug log
2023-09-26 21:02:28 -04:00
});
feat: Accurate Token Usage Tracking & Optional Balance (#1018) * refactor(Chains/llms): allow passing callbacks * refactor(BaseClient): accurately count completion tokens as generation only * refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM * wip: summary prompt tokens * refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end wip: draft out relevant providers and variables for token tracing * refactor(createLLM): make streaming prop false by default * chore: remove use of getTokenCountForResponse * refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace * chore: remove passing of streaming prop, also console log useful vars for tracing * feat: formatFromLangChain helper function to count tokens for ChatModelStart * refactor(initializeLLM): add role for LLM tracing * chore(formatFromLangChain): update JSDoc * feat(formatMessages): formats langChain messages into OpenAI payload format * chore: install openai-chat-tokens * refactor(formatMessage): optimize conditional langChain logic fix(formatFromLangChain): fix destructuring * feat: accurate prompt tokens for ChatModelStart before generation * refactor(handleChatModelStart): move to callbacks dir, use factory function * refactor(initializeLLM): rename 'role' to 'context' * feat(Balance/Transaction): new schema/models for tracking token spend refactor(Key): factor out model export to separate file * refactor(initializeClient): add req,res objects to client options * feat: add-balance script to add to an existing users' token balance refactor(Transaction): use multiplier map/function, return balance update * refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match * refactor(Tx): add fair fallback value multiplier incase the config result is undefined * refactor(Balance): rename 'tokens' to 'tokenCredits' * feat: balance check, add tx.js for new tx-related methods and tests * chore(summaryPrompts): update prompt token count * refactor(callbacks): pass req, res wip: check balance * refactor(Tx): make convoId a String type, fix(calculateTokenValue) * refactor(BaseClient): add conversationId as client prop when assigned * feat(RunManager): track LLM runs with manager, track token spend from LLM, refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls * feat(spendTokens): helper to spend prompt/completion tokens * feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds refactor(Balance): static check method to return object instead of boolean now wip(OpenAIClient): implement use of checkBalance * refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large refactor(OpenAIClient): add checkBalance refactor(createStartHandler): add checkBalance * chore: remove prompt and completion token logging from route handler * chore(spendTokens): add JSDoc * feat(logTokenCost): record transactions for basic api calls * chore(ask/edit): invoke getResponseSender only once per API call * refactor(ask/edit): pass promptTokens to getIds and include in abort data * refactor(getIds -> getReqData): rename function * refactor(Tx): increase value if incomplete message * feat: record tokenUsage when message is aborted * refactor: subtract tokens when payload includes function_call * refactor: add namespace for token_balance * fix(spendTokens): only execute if corresponding token type amounts are defined * refactor(checkBalance): throws Error if not enough token credits * refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run' * fix(abortMiddleware): circular dependency, and default to empty string for completionTokens * fix: properly cancel title requests when there isn't enough tokens to generate * feat(predictNewSummary): custom chain for summaries to allow signal passing refactor(summaryBuffer): use new custom chain * feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError * refactor(createStartHandler): if summary, add error details to runs * fix(OpenAIClient): support aborting from summarization & showing error to user refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer * refactor(logTokenCost -> recordTokenUsage): rename * refactor(checkBalance): include promptTokens in errorMessage * refactor(checkBalance/spendTokens): move to models dir * fix(createLanguageChain): correctly pass config * refactor(initializeLLM/title): add tokenBuffer of 150 for balance check * refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called * refactor(createStartHandler): add error to run if context is plugins as well * refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run * refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic * chore: use absolute equality for addTitle condition * refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional * style: icon changes to match official * fix(BaseClient): getTokenCountForResponse -> getTokenCount * fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc * refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled * fix(e2e/cleanUp): cleanup new collections, import all model methods from index * fix(config/add-balance): add uncaughtException listener * fix: circular dependency * refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance * fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped * fix(createStartHandler): correct condition for generations * chore: bump postcss due to moderate severity vulnerability * chore: bump zod due to low severity vulnerability * chore: bump openai & data-provider version * feat(types): OpenAI Message types * chore: update bun lockfile * refactor(CodeBlock): add error block formatting * refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON * chore(logViolation): delete user_id after error is logged * refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex * fix(DALL-E): use latest openai SDK * chore: reorganize imports, fix type issue * feat(server): add balance route * fix(api/models): add auth * feat(data-provider): /api/balance query * feat: show balance if checking is enabled, refetch on final message or error * chore: update docs, .env.example with token_usage info, add balance script command * fix(Balance): fallback to empty obj for balance query * style: slight adjustment of balance element * docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
describe('matchModelName', () => {
it('should return the exact model name if it exists in maxTokensMap', () => {
expect(matchModelName('gpt-4-32k-0613')).toBe('gpt-4-32k-0613');
});
it('should return the closest matching key for partial matches', () => {
expect(matchModelName('gpt-4-32k-unknown')).toBe('gpt-4-32k');
});
it('should return the input model name if no match is found', () => {
expect(matchModelName('unknown-model')).toBe('unknown-model');
});
it('should return undefined for non-string inputs', () => {
expect(matchModelName(undefined)).toBeUndefined();
expect(matchModelName(null)).toBeUndefined();
expect(matchModelName(123)).toBeUndefined();
expect(matchModelName({})).toBeUndefined();
});
// 11/06 Update
it('should return the exact model name for gpt-3.5-turbo-1106 if it exists in maxTokensMap', () => {
expect(matchModelName('gpt-3.5-turbo-1106')).toBe('gpt-3.5-turbo-1106');
});
it('should return the exact model name for gpt-4-1106 if it exists in maxTokensMap', () => {
expect(matchModelName('gpt-4-1106')).toBe('gpt-4-1106');
});
it('should return the closest matching key for gpt-3.5-turbo-1106 partial matches', () => {
expect(matchModelName('gpt-3.5-turbo-1106/something')).toBe('gpt-3.5-turbo-1106');
expect(matchModelName('something/gpt-3.5-turbo-1106')).toBe('gpt-3.5-turbo-1106');
});
it('should return the closest matching key for gpt-4-1106 partial matches', () => {
expect(matchModelName('gpt-4-1106/something')).toBe('gpt-4-1106');
expect(matchModelName('gpt-4-1106-preview')).toBe('gpt-4-1106');
expect(matchModelName('gpt-4-1106-vision-preview')).toBe('gpt-4-1106');
});
feat(Google): Support all Text/Chat Models, Response streaming, `PaLM` -> `Google` 🤖 (#1316) * feat: update PaLM icons * feat: add additional google models * POC: formatting inputs for Vertex AI streaming * refactor: move endpoints services outside of /routes dir to /services/Endpoints * refactor: shorten schemas import * refactor: rename PALM to GOOGLE * feat: make Google editable endpoint * feat: reusable Ask and Edit controllers based off Anthropic * chore: organize imports/logic * fix(parseConvo): include examples in googleSchema * fix: google only allows odd number of messages to be sent * fix: pass proxy to AnthropicClient * refactor: change `google` altName to `Google` * refactor: update getModelMaxTokens and related functions to handle maxTokensMap with nested endpoint model key/values * refactor: google Icon and response sender changes (Codey and Google logo instead of PaLM in all cases) * feat: google support for maxTokensMap * feat: google updated endpoints with Ask/Edit controllers, buildOptions, and initializeClient * feat(GoogleClient): now builds prompt for text models and supports real streaming from Vertex AI through langchain * chore(GoogleClient): remove comments, left before for reference in git history * docs: update google instructions (WIP) * docs(apis_and_tokens.md): add images to google instructions * docs: remove typo apis_and_tokens.md * Update apis_and_tokens.md * feat(Google): use default settings map, fully support context for both text and chat models, fully support examples for chat models * chore: update more PaLM references to Google * chore: move playwright out of workflows to avoid failing tests
2023-12-10 14:54:13 -05:00
// 01/25 Update
it('should return the closest matching key for gpt-4-turbo/0125 matches', () => {
expect(matchModelName('openai/gpt-4-0125')).toBe('gpt-4-0125');
expect(matchModelName('gpt-4-turbo-preview')).toBe('gpt-4-turbo');
expect(matchModelName('gpt-4-turbo-vision-preview')).toBe('gpt-4-turbo');
expect(matchModelName('gpt-4-0125')).toBe('gpt-4-0125');
expect(matchModelName('gpt-4-0125-preview')).toBe('gpt-4-0125');
expect(matchModelName('gpt-4-0125-vision-preview')).toBe('gpt-4-0125');
});
it('should return the closest matching key for gpt-4.1 matches', () => {
expect(matchModelName('openai/gpt-4.1')).toBe('gpt-4.1');
expect(matchModelName('gpt-4.1-preview')).toBe('gpt-4.1');
expect(matchModelName('gpt-4.1-2024-08-06')).toBe('gpt-4.1');
expect(matchModelName('gpt-4.1-2024-08-06-0718')).toBe('gpt-4.1');
});
it('should return the closest matching key for gpt-4.1-mini matches', () => {
expect(matchModelName('openai/gpt-4.1-mini')).toBe('gpt-4.1-mini');
expect(matchModelName('gpt-4.1-mini-preview')).toBe('gpt-4.1-mini');
expect(matchModelName('gpt-4.1-mini-2024-08-06')).toBe('gpt-4.1-mini');
});
it('should return the closest matching key for gpt-4.1-nano matches', () => {
expect(matchModelName('openai/gpt-4.1-nano')).toBe('gpt-4.1-nano');
expect(matchModelName('gpt-4.1-nano-preview')).toBe('gpt-4.1-nano');
expect(matchModelName('gpt-4.1-nano-2024-08-06')).toBe('gpt-4.1-nano');
});
it('should return the closest matching key for gpt-5 matches', () => {
expect(matchModelName('openai/gpt-5')).toBe('gpt-5');
expect(matchModelName('gpt-5-preview')).toBe('gpt-5');
expect(matchModelName('gpt-5-2025-01-30')).toBe('gpt-5');
expect(matchModelName('gpt-5-2025-01-30-0130')).toBe('gpt-5');
});
it('should return the closest matching key for gpt-5-mini matches', () => {
expect(matchModelName('openai/gpt-5-mini')).toBe('gpt-5-mini');
expect(matchModelName('gpt-5-mini-preview')).toBe('gpt-5-mini');
expect(matchModelName('gpt-5-mini-2025-01-30')).toBe('gpt-5-mini');
});
it('should return the closest matching key for gpt-5-nano matches', () => {
expect(matchModelName('openai/gpt-5-nano')).toBe('gpt-5-nano');
expect(matchModelName('gpt-5-nano-preview')).toBe('gpt-5-nano');
expect(matchModelName('gpt-5-nano-2025-01-30')).toBe('gpt-5-nano');
});
🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173) * updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
2025-10-19 09:23:27 -04:00
it('should return the closest matching key for gpt-5-pro matches', () => {
expect(matchModelName('openai/gpt-5-pro')).toBe('gpt-5-pro');
expect(matchModelName('gpt-5-pro-preview')).toBe('gpt-5-pro');
expect(matchModelName('gpt-5-pro-2025-01-30')).toBe('gpt-5-pro');
expect(matchModelName('gpt-5-pro-2025-01-30-0130')).toBe('gpt-5-pro');
});
feat(Google): Support all Text/Chat Models, Response streaming, `PaLM` -> `Google` 🤖 (#1316) * feat: update PaLM icons * feat: add additional google models * POC: formatting inputs for Vertex AI streaming * refactor: move endpoints services outside of /routes dir to /services/Endpoints * refactor: shorten schemas import * refactor: rename PALM to GOOGLE * feat: make Google editable endpoint * feat: reusable Ask and Edit controllers based off Anthropic * chore: organize imports/logic * fix(parseConvo): include examples in googleSchema * fix: google only allows odd number of messages to be sent * fix: pass proxy to AnthropicClient * refactor: change `google` altName to `Google` * refactor: update getModelMaxTokens and related functions to handle maxTokensMap with nested endpoint model key/values * refactor: google Icon and response sender changes (Codey and Google logo instead of PaLM in all cases) * feat: google support for maxTokensMap * feat: google updated endpoints with Ask/Edit controllers, buildOptions, and initializeClient * feat(GoogleClient): now builds prompt for text models and supports real streaming from Vertex AI through langchain * chore(GoogleClient): remove comments, left before for reference in git history * docs: update google instructions (WIP) * docs(apis_and_tokens.md): add images to google instructions * docs: remove typo apis_and_tokens.md * Update apis_and_tokens.md * feat(Google): use default settings map, fully support context for both text and chat models, fully support examples for chat models * chore: update more PaLM references to Google * chore: move playwright out of workflows to avoid failing tests
2023-12-10 14:54:13 -05:00
// Tests for Google models
it('should return the exact model name if it exists in maxTokensMap - Google models', () => {
expect(matchModelName('text-bison-32k', EModelEndpoint.google)).toBe('text-bison-32k');
expect(matchModelName('codechat-bison-32k', EModelEndpoint.google)).toBe('codechat-bison-32k');
});
it('should return the input model name if no match is found - Google models', () => {
expect(matchModelName('unknown-google-model', EModelEndpoint.google)).toBe(
'unknown-google-model',
);
});
it('should return the closest matching key for partial matches - Google models', () => {
expect(matchModelName('code-', EModelEndpoint.google)).toBe('code-');
expect(matchModelName('chat-', EModelEndpoint.google)).toBe('chat-');
});
feat: Accurate Token Usage Tracking & Optional Balance (#1018) * refactor(Chains/llms): allow passing callbacks * refactor(BaseClient): accurately count completion tokens as generation only * refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM * wip: summary prompt tokens * refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end wip: draft out relevant providers and variables for token tracing * refactor(createLLM): make streaming prop false by default * chore: remove use of getTokenCountForResponse * refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace * chore: remove passing of streaming prop, also console log useful vars for tracing * feat: formatFromLangChain helper function to count tokens for ChatModelStart * refactor(initializeLLM): add role for LLM tracing * chore(formatFromLangChain): update JSDoc * feat(formatMessages): formats langChain messages into OpenAI payload format * chore: install openai-chat-tokens * refactor(formatMessage): optimize conditional langChain logic fix(formatFromLangChain): fix destructuring * feat: accurate prompt tokens for ChatModelStart before generation * refactor(handleChatModelStart): move to callbacks dir, use factory function * refactor(initializeLLM): rename 'role' to 'context' * feat(Balance/Transaction): new schema/models for tracking token spend refactor(Key): factor out model export to separate file * refactor(initializeClient): add req,res objects to client options * feat: add-balance script to add to an existing users' token balance refactor(Transaction): use multiplier map/function, return balance update * refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match * refactor(Tx): add fair fallback value multiplier incase the config result is undefined * refactor(Balance): rename 'tokens' to 'tokenCredits' * feat: balance check, add tx.js for new tx-related methods and tests * chore(summaryPrompts): update prompt token count * refactor(callbacks): pass req, res wip: check balance * refactor(Tx): make convoId a String type, fix(calculateTokenValue) * refactor(BaseClient): add conversationId as client prop when assigned * feat(RunManager): track LLM runs with manager, track token spend from LLM, refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls * feat(spendTokens): helper to spend prompt/completion tokens * feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds refactor(Balance): static check method to return object instead of boolean now wip(OpenAIClient): implement use of checkBalance * refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large refactor(OpenAIClient): add checkBalance refactor(createStartHandler): add checkBalance * chore: remove prompt and completion token logging from route handler * chore(spendTokens): add JSDoc * feat(logTokenCost): record transactions for basic api calls * chore(ask/edit): invoke getResponseSender only once per API call * refactor(ask/edit): pass promptTokens to getIds and include in abort data * refactor(getIds -> getReqData): rename function * refactor(Tx): increase value if incomplete message * feat: record tokenUsage when message is aborted * refactor: subtract tokens when payload includes function_call * refactor: add namespace for token_balance * fix(spendTokens): only execute if corresponding token type amounts are defined * refactor(checkBalance): throws Error if not enough token credits * refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run' * fix(abortMiddleware): circular dependency, and default to empty string for completionTokens * fix: properly cancel title requests when there isn't enough tokens to generate * feat(predictNewSummary): custom chain for summaries to allow signal passing refactor(summaryBuffer): use new custom chain * feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError * refactor(createStartHandler): if summary, add error details to runs * fix(OpenAIClient): support aborting from summarization & showing error to user refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer * refactor(logTokenCost -> recordTokenUsage): rename * refactor(checkBalance): include promptTokens in errorMessage * refactor(checkBalance/spendTokens): move to models dir * fix(createLanguageChain): correctly pass config * refactor(initializeLLM/title): add tokenBuffer of 150 for balance check * refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called * refactor(createStartHandler): add error to run if context is plugins as well * refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run * refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic * chore: use absolute equality for addTitle condition * refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional * style: icon changes to match official * fix(BaseClient): getTokenCountForResponse -> getTokenCount * fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc * refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled * fix(e2e/cleanUp): cleanup new collections, import all model methods from index * fix(config/add-balance): add uncaughtException listener * fix: circular dependency * refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance * fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped * fix(createStartHandler): correct condition for generations * chore: bump postcss due to moderate severity vulnerability * chore: bump zod due to low severity vulnerability * chore: bump openai & data-provider version * feat(types): OpenAI Message types * chore: update bun lockfile * refactor(CodeBlock): add error block formatting * refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON * chore(logViolation): delete user_id after error is logged * refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex * fix(DALL-E): use latest openai SDK * chore: reorganize imports, fix type issue * feat(server): add balance route * fix(api/models): add auth * feat(data-provider): /api/balance query * feat: show balance if checking is enabled, refetch on final message or error * chore: update docs, .env.example with token_usage info, add balance script command * fix(Balance): fallback to empty obj for balance query * style: slight adjustment of balance element * docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
});
🚧 WIP: Merge Dev Build (#4611) * refactor: Agent CodeFiles, abortUpload WIP * feat: code environment file upload * refactor: useLazyEffect * refactor: - Add `watch` from `useFormContext` to check if code execution is enabled - Disable file upload button if `agent_id` is not selected or code execution is disabled * WIP: primeCodeFiles; refactor: rename sessionId to session_id for uniformity * Refactor: Rename session_id to sessionId for uniformity in AuthService.js * chore: bump @librechat/agents to version 1.7.1 * WIP: prime code files * refactor: Update code env file upload method to use read stream * feat: reupload code env file if no longer active * refactor: isAssistantTool -> isEntityTool + address type issues * feat: execute code tool hook * refactor: Rename isPluginAuthenticated to checkPluginAuth in PluginController.js * refactor: Update PluginController.js to use AuthType constant for comparison * feat: verify tool authentication (execute_code) * feat: enter librechat_code_api_key * refactor: Remove unused imports in BookmarkForm.tsx * feat: authenticate code tool * refactor: Update Action.tsx to conditionally render the key and revoke key buttons * refactor(Code/Action): prevent uncheck-able 'Run Code' capability when key is revoked * refactor(Code/Action): Update Action.tsx to conditionally render the key and revoke key buttons * fix: agent file upload edge cases * chore: bump @librechat/agents * fix: custom endpoint providerValue icon * feat: ollama meta modal token values + context * feat: ollama agents * refactor: Update token models for Ollama models * chore: Comment out CodeForm * refactor: Update token models for Ollama and Meta models
2024-11-01 18:36:39 -04:00
describe('Meta Models Tests', () => {
describe('getModelMaxTokens', () => {
test('should return correct tokens for LLaMa 2 models', () => {
expect(getModelMaxTokens('llama2')).toBe(4000);
expect(getModelMaxTokens('llama2.70b')).toBe(4000);
expect(getModelMaxTokens('llama2-13b')).toBe(4000);
expect(getModelMaxTokens('llama2-70b')).toBe(4000);
});
test('should return correct tokens for LLaMa 3 models', () => {
expect(getModelMaxTokens('llama3')).toBe(8000);
expect(getModelMaxTokens('llama3.8b')).toBe(8000);
expect(getModelMaxTokens('llama3.70b')).toBe(8000);
expect(getModelMaxTokens('llama3-8b')).toBe(8000);
expect(getModelMaxTokens('llama3-70b')).toBe(8000);
});
test('should return correct tokens for LLaMa 3.1 models', () => {
expect(getModelMaxTokens('llama3.1:8b')).toBe(127500);
expect(getModelMaxTokens('llama3.1:70b')).toBe(127500);
expect(getModelMaxTokens('llama3.1:405b')).toBe(127500);
expect(getModelMaxTokens('llama3-1-8b')).toBe(127500);
expect(getModelMaxTokens('llama3-1-70b')).toBe(127500);
expect(getModelMaxTokens('llama3-1-405b')).toBe(127500);
});
test('should handle partial matches for Meta models', () => {
// Test with full model names
expect(getModelMaxTokens('meta/llama3.1:405b')).toBe(127500);
expect(getModelMaxTokens('meta/llama3.1:70b')).toBe(127500);
expect(getModelMaxTokens('meta/llama3.1:8b')).toBe(127500);
expect(getModelMaxTokens('meta/llama3-1-8b')).toBe(127500);
// Test base versions
expect(getModelMaxTokens('meta/llama3.1')).toBe(127500);
expect(getModelMaxTokens('meta/llama3-1')).toBe(127500);
expect(getModelMaxTokens('meta/llama3')).toBe(8000);
expect(getModelMaxTokens('meta/llama2')).toBe(4000);
});
test('should match Deepseek model variations', () => {
expect(getModelMaxTokens('deepseek-chat')).toBe(
maxTokensMap[EModelEndpoint.openAI]['deepseek-chat'],
);
expect(getModelMaxTokens('deepseek-coder')).toBe(
maxTokensMap[EModelEndpoint.openAI]['deepseek'],
);
expect(getModelMaxTokens('deepseek-reasoner')).toBe(
maxTokensMap[EModelEndpoint.openAI]['deepseek-reasoner'],
);
🔗 feat: Agent Chain (Mixture-of-Agents) (#6374) * wip: first pass, dropdown for selecting sequential agents * refactor: Improve agent selection logic and enhance performance in SequentialAgents component * wip: seq. agents working ideas * wip: sequential agents style change * refactor: move agent form options/submission outside of AgentConfig * refactor: prevent repeating code * refactor: simplify current agent display in SequentialAgents component * feat: persist form value handling in AgentSelect component for agent_ids * feat: first pass, sequential agnets agent update * feat: enhance message display with agent updates and empty text handling * chore: update Icon component to use EModelEndpoint for agent endpoints * feat: update content type checks in BaseClient to use constants for better readability * feat: adjust max context tokens calculation to use 90% of the model's max tokens * feat: first pass, agent run message pruning * chore: increase max listeners for abort controller to prevent memory leaks * feat: enhance runAgent function to include current index count map for improved token tracking * chore: update @librechat/agents dependency to version 2.2.5 * feat: update icons and style of SequentialAgents component for improved UI consistency * feat: add AdvancedButton and AdvancedPanel components for enhanced agent settings navigation, update styling for agent form * chore: adjust minimum height of AdvancedPanel component for better layout consistency * chore: update @librechat/agents dependency to version 2.2.6 * feat: enhance message formatting by incorporating tool set into agent message processing, in order to allow better mix/matching of agents (as tool calls for tools not found in set will be stringified) * refactor: reorder components in AgentConfig for improved readability and maintainability * refactor: enhance layout of AgentUpdate component for improved visual structure * feat: add DeepSeek provider to Bedrock settings and schemas * feat: enhance link styling in mobile.css for better visibility and accessibility * fix: update banner model import in update banner script; export Banner model * refactor: `duplicateAgentHandler` to include tool_resources only for OCR context files * feat: add 'qwen-vl' to visionModels for enhanced model support * fix: change image format from JPEG to PNG in DALLE3 response * feat: reorganize Advanced components and add localizations * refactor: simplify JSX structure in AgentChain component to defer container styling to parent * feat: add FormInput component for reusable input handling * feat: make agent recursion limit configurable from builder * feat: add support for agent capabilities chain in AdvancedPanel and update data-provider version * feat: add maxRecursionLimit configuration for agents and update related documentation * fix: update CONFIG_VERSION to 1.2.3 in data provider configuration * feat: replace recursion limit input with MaxAgentSteps component and enhance input handling * feat: enhance AgentChain component with hover card for additional information and update related labels * fix: pass request and response objects to `createActionTool` when using assistant actions to prevent auth error * feat: update AgentChain component layout to include agent count display * feat: increase default max listeners and implement capability check function for agent chain * fix: update link styles in mobile.css for better visibility in dark mode * chore: temp. remove agents package while bumping shared packages * chore: update @langchain/google-genai package to version 0.1.11 * chore: update @langchain/google-vertexai package to version 0.2.2 * chore: add @librechat/agents package at version 2.2.8 * feat: add deepseek.r1 model with token rate and context values for bedrock
2025-03-17 16:43:44 -04:00
expect(getModelMaxTokens('deepseek.r1')).toBe(
maxTokensMap[EModelEndpoint.openAI]['deepseek.r1'],
);
});
test('should return 128000 context tokens for all DeepSeek models', () => {
expect(getModelMaxTokens('deepseek-chat')).toBe(128000);
expect(getModelMaxTokens('deepseek-reasoner')).toBe(128000);
expect(getModelMaxTokens('deepseek-r1')).toBe(128000);
expect(getModelMaxTokens('deepseek-v3')).toBe(128000);
expect(getModelMaxTokens('deepseek.r1')).toBe(128000);
});
test('should handle DeepSeek models with provider prefixes', () => {
expect(getModelMaxTokens('deepseek/deepseek-chat')).toBe(128000);
expect(getModelMaxTokens('openrouter/deepseek-reasoner')).toBe(128000);
expect(getModelMaxTokens('openai/deepseek-v3')).toBe(128000);
});
🚧 WIP: Merge Dev Build (#4611) * refactor: Agent CodeFiles, abortUpload WIP * feat: code environment file upload * refactor: useLazyEffect * refactor: - Add `watch` from `useFormContext` to check if code execution is enabled - Disable file upload button if `agent_id` is not selected or code execution is disabled * WIP: primeCodeFiles; refactor: rename sessionId to session_id for uniformity * Refactor: Rename session_id to sessionId for uniformity in AuthService.js * chore: bump @librechat/agents to version 1.7.1 * WIP: prime code files * refactor: Update code env file upload method to use read stream * feat: reupload code env file if no longer active * refactor: isAssistantTool -> isEntityTool + address type issues * feat: execute code tool hook * refactor: Rename isPluginAuthenticated to checkPluginAuth in PluginController.js * refactor: Update PluginController.js to use AuthType constant for comparison * feat: verify tool authentication (execute_code) * feat: enter librechat_code_api_key * refactor: Remove unused imports in BookmarkForm.tsx * feat: authenticate code tool * refactor: Update Action.tsx to conditionally render the key and revoke key buttons * refactor(Code/Action): prevent uncheck-able 'Run Code' capability when key is revoked * refactor(Code/Action): Update Action.tsx to conditionally render the key and revoke key buttons * fix: agent file upload edge cases * chore: bump @librechat/agents * fix: custom endpoint providerValue icon * feat: ollama meta modal token values + context * feat: ollama agents * refactor: Update token models for Ollama models * chore: Comment out CodeForm * refactor: Update token models for Ollama and Meta models
2024-11-01 18:36:39 -04:00
});
describe('matchModelName', () => {
test('should match exact LLaMa model names', () => {
expect(matchModelName('llama2')).toBe('llama2');
expect(matchModelName('llama3')).toBe('llama3');
expect(matchModelName('llama3.1:8b')).toBe('llama3.1:8b');
});
test('should match LLaMa model variations', () => {
// Test full model names
expect(matchModelName('meta/llama3.1:405b')).toBe('llama3.1:405b');
expect(matchModelName('meta/llama3.1:70b')).toBe('llama3.1:70b');
expect(matchModelName('meta/llama3.1:8b')).toBe('llama3.1:8b');
expect(matchModelName('meta/llama3-1-8b')).toBe('llama3-1-8b');
// Test base versions
expect(matchModelName('meta/llama3.1')).toBe('llama3.1');
expect(matchModelName('meta/llama3-1')).toBe('llama3-1');
});
test('should handle custom endpoint for Meta models', () => {
expect(matchModelName('llama2', EModelEndpoint.bedrock)).toBe('llama2');
expect(matchModelName('llama3', EModelEndpoint.bedrock)).toBe('llama3');
expect(matchModelName('llama3.1:8b', EModelEndpoint.bedrock)).toBe('llama3.1:8b');
});
test('should match Deepseek model variations', () => {
expect(matchModelName('deepseek-chat')).toBe('deepseek-chat');
expect(matchModelName('deepseek-coder')).toBe('deepseek');
});
🚧 WIP: Merge Dev Build (#4611) * refactor: Agent CodeFiles, abortUpload WIP * feat: code environment file upload * refactor: useLazyEffect * refactor: - Add `watch` from `useFormContext` to check if code execution is enabled - Disable file upload button if `agent_id` is not selected or code execution is disabled * WIP: primeCodeFiles; refactor: rename sessionId to session_id for uniformity * Refactor: Rename session_id to sessionId for uniformity in AuthService.js * chore: bump @librechat/agents to version 1.7.1 * WIP: prime code files * refactor: Update code env file upload method to use read stream * feat: reupload code env file if no longer active * refactor: isAssistantTool -> isEntityTool + address type issues * feat: execute code tool hook * refactor: Rename isPluginAuthenticated to checkPluginAuth in PluginController.js * refactor: Update PluginController.js to use AuthType constant for comparison * feat: verify tool authentication (execute_code) * feat: enter librechat_code_api_key * refactor: Remove unused imports in BookmarkForm.tsx * feat: authenticate code tool * refactor: Update Action.tsx to conditionally render the key and revoke key buttons * refactor(Code/Action): prevent uncheck-able 'Run Code' capability when key is revoked * refactor(Code/Action): Update Action.tsx to conditionally render the key and revoke key buttons * fix: agent file upload edge cases * chore: bump @librechat/agents * fix: custom endpoint providerValue icon * feat: ollama meta modal token values + context * feat: ollama agents * refactor: Update token models for Ollama models * chore: Comment out CodeForm * refactor: Update token models for Ollama and Meta models
2024-11-01 18:36:39 -04:00
});
describe('DeepSeek Max Output Tokens', () => {
const { getModelMaxOutputTokens } = require('@librechat/api');
test('should return correct max output tokens for deepseek-chat', () => {
expect(getModelMaxOutputTokens('deepseek-chat')).toBe(8000);
expect(getModelMaxOutputTokens('deepseek-chat', EModelEndpoint.openAI)).toBe(8000);
expect(getModelMaxOutputTokens('deepseek-chat', EModelEndpoint.custom)).toBe(8000);
});
test('should return correct max output tokens for deepseek-reasoner', () => {
expect(getModelMaxOutputTokens('deepseek-reasoner')).toBe(64000);
expect(getModelMaxOutputTokens('deepseek-reasoner', EModelEndpoint.openAI)).toBe(64000);
expect(getModelMaxOutputTokens('deepseek-reasoner', EModelEndpoint.custom)).toBe(64000);
});
test('should return correct max output tokens for deepseek-r1', () => {
expect(getModelMaxOutputTokens('deepseek-r1')).toBe(64000);
expect(getModelMaxOutputTokens('deepseek-r1', EModelEndpoint.openAI)).toBe(64000);
});
test('should return correct max output tokens for deepseek base pattern', () => {
expect(getModelMaxOutputTokens('deepseek')).toBe(8000);
expect(getModelMaxOutputTokens('deepseek-v3')).toBe(8000);
});
test('should handle DeepSeek models with provider prefixes for max output tokens', () => {
expect(getModelMaxOutputTokens('deepseek/deepseek-chat')).toBe(8000);
expect(getModelMaxOutputTokens('openrouter/deepseek-reasoner')).toBe(64000);
});
});
🚧 WIP: Merge Dev Build (#4611) * refactor: Agent CodeFiles, abortUpload WIP * feat: code environment file upload * refactor: useLazyEffect * refactor: - Add `watch` from `useFormContext` to check if code execution is enabled - Disable file upload button if `agent_id` is not selected or code execution is disabled * WIP: primeCodeFiles; refactor: rename sessionId to session_id for uniformity * Refactor: Rename session_id to sessionId for uniformity in AuthService.js * chore: bump @librechat/agents to version 1.7.1 * WIP: prime code files * refactor: Update code env file upload method to use read stream * feat: reupload code env file if no longer active * refactor: isAssistantTool -> isEntityTool + address type issues * feat: execute code tool hook * refactor: Rename isPluginAuthenticated to checkPluginAuth in PluginController.js * refactor: Update PluginController.js to use AuthType constant for comparison * feat: verify tool authentication (execute_code) * feat: enter librechat_code_api_key * refactor: Remove unused imports in BookmarkForm.tsx * feat: authenticate code tool * refactor: Update Action.tsx to conditionally render the key and revoke key buttons * refactor(Code/Action): prevent uncheck-able 'Run Code' capability when key is revoked * refactor(Code/Action): Update Action.tsx to conditionally render the key and revoke key buttons * fix: agent file upload edge cases * chore: bump @librechat/agents * fix: custom endpoint providerValue icon * feat: ollama meta modal token values + context * feat: ollama agents * refactor: Update token models for Ollama models * chore: Comment out CodeForm * refactor: Update token models for Ollama and Meta models
2024-11-01 18:36:39 -04:00
describe('processModelData with Meta models', () => {
test('should process Meta model data correctly', () => {
const input = {
data: [
{
id: 'llama2',
pricing: {
prompt: '0.00001',
completion: '0.00003',
},
context_length: 4000,
},
{
id: 'llama3',
pricing: {
prompt: '0.00002',
completion: '0.00004',
},
context_length: 8000,
},
],
};
const result = processModelData(input);
expect(result.llama2).toEqual({
prompt: 10,
completion: 30,
context: 4000,
});
expect(result.llama3).toEqual({
prompt: 20,
completion: 40,
context: 8000,
});
});
});
});
describe('Grok Model Tests - Tokens', () => {
describe('getModelMaxTokens', () => {
test('should return correct tokens for Grok vision models', () => {
expect(getModelMaxTokens('grok-2-vision-1212')).toBe(32768);
expect(getModelMaxTokens('grok-2-vision')).toBe(32768);
expect(getModelMaxTokens('grok-2-vision-latest')).toBe(32768);
});
test('should return correct tokens for Grok beta models', () => {
expect(getModelMaxTokens('grok-vision-beta')).toBe(8192);
expect(getModelMaxTokens('grok-beta')).toBe(131072);
});
test('should return correct tokens for Grok text models', () => {
expect(getModelMaxTokens('grok-2-1212')).toBe(131072);
expect(getModelMaxTokens('grok-2')).toBe(131072);
expect(getModelMaxTokens('grok-2-latest')).toBe(131072);
});
🤖 refactor: Improve Agents Memory Usage, Bump Keyv, Grok 3 (#6850) * chore: remove unused redis file * chore: bump keyv dependencies, and update related imports * refactor: Implement IoRedis client for rate limiting across middleware, as node-redis via keyv not compatible * fix: Set max listeners to expected amount * WIP: memory improvements * refactor: Simplify getAbortData assignment in createAbortController * refactor: Update getAbortData to use WeakRef for content management * WIP: memory improvements in agent chat requests * refactor: Enhance memory management with finalization registry and cleanup functions * refactor: Simplify domainParser calls by removing unnecessary request parameter * refactor: Update parameter types for action tools and agent loading functions to use minimal configs * refactor: Simplify domainParser tests by removing unnecessary request parameter * refactor: Simplify domainParser call by removing unnecessary request parameter * refactor: Enhance client disposal by nullifying additional properties to improve memory management * refactor: Improve title generation by adding abort controller and timeout handling, consolidate request cleanup * refactor: Update checkIdleConnections to skip current user when checking for idle connections if passed * refactor: Update createMCPTool to derive userId from config and handle abort signals * refactor: Introduce createTokenCounter function and update tokenCounter usage; enhance disposeClient to reset Graph values * refactor: Update getMCPManager to accept userId parameter for improved idle connection handling * refactor: Extract logToolError function for improved error handling in AgentClient * refactor: Update disposeClient to clear handlerRegistry and graphRunnable references in client.run * refactor: Extract createHandleNewToken function to streamline token handling in initializeClient * chore: bump @librechat/agents * refactor: Improve timeout handling in addTitle function for better error management * refactor: Introduce createFetch instead of using class method * refactor: Enhance client disposal and request data handling in AskController and EditController * refactor: Update import statements for AnthropicClient and OpenAIClient to use specific paths * refactor: Use WeakRef for response handling in SplitStreamHandler to prevent memory leaks * refactor: Simplify client disposal and rename getReqData to processReqData in AskController and EditController * refactor: Improve logging structure and parameter handling in OpenAIClient * refactor: Remove unused GraphEvents and improve stream event handling in AnthropicClient and OpenAIClient * refactor: Simplify client initialization in AskController and EditController * refactor: Remove unused mock functions and implement in-memory store for KeyvMongo * chore: Update dependencies in package-lock.json to latest versions * refactor: Await token usage recording in OpenAIClient to ensure proper async handling * refactor: Remove handleAbort route from multiple endpoints and enhance client disposal logic * refactor: Enhance abort controller logic by managing abortKey more effectively * refactor: Add newConversation handling in useEventHandlers for improved conversation management * fix: dropparams * refactor: Use optional chaining for safer access to request properties in BaseClient * refactor: Move client disposal and request data processing logic to cleanup module for better organization * refactor: Remove aborted request check from addTitle function for cleaner logic * feat: Add Grok 3 model pricing and update tests for new models * chore: Remove trace warnings and inspect flags from backend start script used for debugging * refactor: Replace user identifier handling with userId for consistency across controllers, use UserId in clientRegistry * refactor: Enhance client disposal logic to prevent memory leaks by clearing additional references * chore: Update @librechat/agents to version 2.4.14 in package.json and package-lock.json
2025-04-12 18:46:36 -04:00
test('should return correct tokens for Grok 3 series models', () => {
expect(getModelMaxTokens('grok-3')).toBe(131072);
expect(getModelMaxTokens('grok-3-fast')).toBe(131072);
expect(getModelMaxTokens('grok-3-mini')).toBe(131072);
expect(getModelMaxTokens('grok-3-mini-fast')).toBe(131072);
});
test('should return correct tokens for Grok 4 model', () => {
expect(getModelMaxTokens('grok-4-0709')).toBe(256000);
});
test('should return correct tokens for Grok 4 Fast and Grok 4.1 Fast models', () => {
expect(getModelMaxTokens('grok-4-fast')).toBe(2000000);
expect(getModelMaxTokens('grok-4-1-fast-reasoning')).toBe(2000000);
expect(getModelMaxTokens('grok-4-1-fast-non-reasoning')).toBe(2000000);
});
test('should return correct tokens for Grok Code Fast model', () => {
expect(getModelMaxTokens('grok-code-fast-1')).toBe(256000);
});
test('should handle partial matches for Grok models with prefixes', () => {
// Vision models should match before general models
🤖 refactor: Improve Agents Memory Usage, Bump Keyv, Grok 3 (#6850) * chore: remove unused redis file * chore: bump keyv dependencies, and update related imports * refactor: Implement IoRedis client for rate limiting across middleware, as node-redis via keyv not compatible * fix: Set max listeners to expected amount * WIP: memory improvements * refactor: Simplify getAbortData assignment in createAbortController * refactor: Update getAbortData to use WeakRef for content management * WIP: memory improvements in agent chat requests * refactor: Enhance memory management with finalization registry and cleanup functions * refactor: Simplify domainParser calls by removing unnecessary request parameter * refactor: Update parameter types for action tools and agent loading functions to use minimal configs * refactor: Simplify domainParser tests by removing unnecessary request parameter * refactor: Simplify domainParser call by removing unnecessary request parameter * refactor: Enhance client disposal by nullifying additional properties to improve memory management * refactor: Improve title generation by adding abort controller and timeout handling, consolidate request cleanup * refactor: Update checkIdleConnections to skip current user when checking for idle connections if passed * refactor: Update createMCPTool to derive userId from config and handle abort signals * refactor: Introduce createTokenCounter function and update tokenCounter usage; enhance disposeClient to reset Graph values * refactor: Update getMCPManager to accept userId parameter for improved idle connection handling * refactor: Extract logToolError function for improved error handling in AgentClient * refactor: Update disposeClient to clear handlerRegistry and graphRunnable references in client.run * refactor: Extract createHandleNewToken function to streamline token handling in initializeClient * chore: bump @librechat/agents * refactor: Improve timeout handling in addTitle function for better error management * refactor: Introduce createFetch instead of using class method * refactor: Enhance client disposal and request data handling in AskController and EditController * refactor: Update import statements for AnthropicClient and OpenAIClient to use specific paths * refactor: Use WeakRef for response handling in SplitStreamHandler to prevent memory leaks * refactor: Simplify client disposal and rename getReqData to processReqData in AskController and EditController * refactor: Improve logging structure and parameter handling in OpenAIClient * refactor: Remove unused GraphEvents and improve stream event handling in AnthropicClient and OpenAIClient * refactor: Simplify client initialization in AskController and EditController * refactor: Remove unused mock functions and implement in-memory store for KeyvMongo * chore: Update dependencies in package-lock.json to latest versions * refactor: Await token usage recording in OpenAIClient to ensure proper async handling * refactor: Remove handleAbort route from multiple endpoints and enhance client disposal logic * refactor: Enhance abort controller logic by managing abortKey more effectively * refactor: Add newConversation handling in useEventHandlers for improved conversation management * fix: dropparams * refactor: Use optional chaining for safer access to request properties in BaseClient * refactor: Move client disposal and request data processing logic to cleanup module for better organization * refactor: Remove aborted request check from addTitle function for cleaner logic * feat: Add Grok 3 model pricing and update tests for new models * chore: Remove trace warnings and inspect flags from backend start script used for debugging * refactor: Replace user identifier handling with userId for consistency across controllers, use UserId in clientRegistry * refactor: Enhance client disposal logic to prevent memory leaks by clearing additional references * chore: Update @librechat/agents to version 2.4.14 in package.json and package-lock.json
2025-04-12 18:46:36 -04:00
expect(getModelMaxTokens('xai/grok-2-vision-1212')).toBe(32768);
expect(getModelMaxTokens('xai/grok-2-vision')).toBe(32768);
expect(getModelMaxTokens('xai/grok-2-vision-latest')).toBe(32768);
// Beta models
🤖 refactor: Improve Agents Memory Usage, Bump Keyv, Grok 3 (#6850) * chore: remove unused redis file * chore: bump keyv dependencies, and update related imports * refactor: Implement IoRedis client for rate limiting across middleware, as node-redis via keyv not compatible * fix: Set max listeners to expected amount * WIP: memory improvements * refactor: Simplify getAbortData assignment in createAbortController * refactor: Update getAbortData to use WeakRef for content management * WIP: memory improvements in agent chat requests * refactor: Enhance memory management with finalization registry and cleanup functions * refactor: Simplify domainParser calls by removing unnecessary request parameter * refactor: Update parameter types for action tools and agent loading functions to use minimal configs * refactor: Simplify domainParser tests by removing unnecessary request parameter * refactor: Simplify domainParser call by removing unnecessary request parameter * refactor: Enhance client disposal by nullifying additional properties to improve memory management * refactor: Improve title generation by adding abort controller and timeout handling, consolidate request cleanup * refactor: Update checkIdleConnections to skip current user when checking for idle connections if passed * refactor: Update createMCPTool to derive userId from config and handle abort signals * refactor: Introduce createTokenCounter function and update tokenCounter usage; enhance disposeClient to reset Graph values * refactor: Update getMCPManager to accept userId parameter for improved idle connection handling * refactor: Extract logToolError function for improved error handling in AgentClient * refactor: Update disposeClient to clear handlerRegistry and graphRunnable references in client.run * refactor: Extract createHandleNewToken function to streamline token handling in initializeClient * chore: bump @librechat/agents * refactor: Improve timeout handling in addTitle function for better error management * refactor: Introduce createFetch instead of using class method * refactor: Enhance client disposal and request data handling in AskController and EditController * refactor: Update import statements for AnthropicClient and OpenAIClient to use specific paths * refactor: Use WeakRef for response handling in SplitStreamHandler to prevent memory leaks * refactor: Simplify client disposal and rename getReqData to processReqData in AskController and EditController * refactor: Improve logging structure and parameter handling in OpenAIClient * refactor: Remove unused GraphEvents and improve stream event handling in AnthropicClient and OpenAIClient * refactor: Simplify client initialization in AskController and EditController * refactor: Remove unused mock functions and implement in-memory store for KeyvMongo * chore: Update dependencies in package-lock.json to latest versions * refactor: Await token usage recording in OpenAIClient to ensure proper async handling * refactor: Remove handleAbort route from multiple endpoints and enhance client disposal logic * refactor: Enhance abort controller logic by managing abortKey more effectively * refactor: Add newConversation handling in useEventHandlers for improved conversation management * fix: dropparams * refactor: Use optional chaining for safer access to request properties in BaseClient * refactor: Move client disposal and request data processing logic to cleanup module for better organization * refactor: Remove aborted request check from addTitle function for cleaner logic * feat: Add Grok 3 model pricing and update tests for new models * chore: Remove trace warnings and inspect flags from backend start script used for debugging * refactor: Replace user identifier handling with userId for consistency across controllers, use UserId in clientRegistry * refactor: Enhance client disposal logic to prevent memory leaks by clearing additional references * chore: Update @librechat/agents to version 2.4.14 in package.json and package-lock.json
2025-04-12 18:46:36 -04:00
expect(getModelMaxTokens('xai/grok-vision-beta')).toBe(8192);
expect(getModelMaxTokens('xai/grok-beta')).toBe(131072);
// Text models
🤖 refactor: Improve Agents Memory Usage, Bump Keyv, Grok 3 (#6850) * chore: remove unused redis file * chore: bump keyv dependencies, and update related imports * refactor: Implement IoRedis client for rate limiting across middleware, as node-redis via keyv not compatible * fix: Set max listeners to expected amount * WIP: memory improvements * refactor: Simplify getAbortData assignment in createAbortController * refactor: Update getAbortData to use WeakRef for content management * WIP: memory improvements in agent chat requests * refactor: Enhance memory management with finalization registry and cleanup functions * refactor: Simplify domainParser calls by removing unnecessary request parameter * refactor: Update parameter types for action tools and agent loading functions to use minimal configs * refactor: Simplify domainParser tests by removing unnecessary request parameter * refactor: Simplify domainParser call by removing unnecessary request parameter * refactor: Enhance client disposal by nullifying additional properties to improve memory management * refactor: Improve title generation by adding abort controller and timeout handling, consolidate request cleanup * refactor: Update checkIdleConnections to skip current user when checking for idle connections if passed * refactor: Update createMCPTool to derive userId from config and handle abort signals * refactor: Introduce createTokenCounter function and update tokenCounter usage; enhance disposeClient to reset Graph values * refactor: Update getMCPManager to accept userId parameter for improved idle connection handling * refactor: Extract logToolError function for improved error handling in AgentClient * refactor: Update disposeClient to clear handlerRegistry and graphRunnable references in client.run * refactor: Extract createHandleNewToken function to streamline token handling in initializeClient * chore: bump @librechat/agents * refactor: Improve timeout handling in addTitle function for better error management * refactor: Introduce createFetch instead of using class method * refactor: Enhance client disposal and request data handling in AskController and EditController * refactor: Update import statements for AnthropicClient and OpenAIClient to use specific paths * refactor: Use WeakRef for response handling in SplitStreamHandler to prevent memory leaks * refactor: Simplify client disposal and rename getReqData to processReqData in AskController and EditController * refactor: Improve logging structure and parameter handling in OpenAIClient * refactor: Remove unused GraphEvents and improve stream event handling in AnthropicClient and OpenAIClient * refactor: Simplify client initialization in AskController and EditController * refactor: Remove unused mock functions and implement in-memory store for KeyvMongo * chore: Update dependencies in package-lock.json to latest versions * refactor: Await token usage recording in OpenAIClient to ensure proper async handling * refactor: Remove handleAbort route from multiple endpoints and enhance client disposal logic * refactor: Enhance abort controller logic by managing abortKey more effectively * refactor: Add newConversation handling in useEventHandlers for improved conversation management * fix: dropparams * refactor: Use optional chaining for safer access to request properties in BaseClient * refactor: Move client disposal and request data processing logic to cleanup module for better organization * refactor: Remove aborted request check from addTitle function for cleaner logic * feat: Add Grok 3 model pricing and update tests for new models * chore: Remove trace warnings and inspect flags from backend start script used for debugging * refactor: Replace user identifier handling with userId for consistency across controllers, use UserId in clientRegistry * refactor: Enhance client disposal logic to prevent memory leaks by clearing additional references * chore: Update @librechat/agents to version 2.4.14 in package.json and package-lock.json
2025-04-12 18:46:36 -04:00
expect(getModelMaxTokens('xai/grok-2-1212')).toBe(131072);
expect(getModelMaxTokens('xai/grok-2')).toBe(131072);
expect(getModelMaxTokens('xai/grok-2-latest')).toBe(131072);
// Grok 3 models
expect(getModelMaxTokens('xai/grok-3')).toBe(131072);
expect(getModelMaxTokens('xai/grok-3-fast')).toBe(131072);
expect(getModelMaxTokens('xai/grok-3-mini')).toBe(131072);
expect(getModelMaxTokens('xai/grok-3-mini-fast')).toBe(131072);
// Grok 4 model
expect(getModelMaxTokens('xai/grok-4-0709')).toBe(256000);
// Grok 4 Fast and 4.1 Fast models
expect(getModelMaxTokens('xai/grok-4-fast')).toBe(2000000);
expect(getModelMaxTokens('xai/grok-4-1-fast-reasoning')).toBe(2000000);
expect(getModelMaxTokens('xai/grok-4-1-fast-non-reasoning')).toBe(2000000);
// Grok Code Fast model
expect(getModelMaxTokens('xai/grok-code-fast-1')).toBe(256000);
});
});
describe('matchModelName', () => {
test('should match exact Grok model names', () => {
// Vision models
expect(matchModelName('grok-2-vision-1212')).toBe('grok-2-vision-1212');
expect(matchModelName('grok-2-vision')).toBe('grok-2-vision');
expect(matchModelName('grok-2-vision-latest')).toBe('grok-2-vision-latest');
// Beta models
expect(matchModelName('grok-vision-beta')).toBe('grok-vision-beta');
expect(matchModelName('grok-beta')).toBe('grok-beta');
// Text models
expect(matchModelName('grok-2-1212')).toBe('grok-2-1212');
expect(matchModelName('grok-2')).toBe('grok-2');
expect(matchModelName('grok-2-latest')).toBe('grok-2-latest');
🤖 refactor: Improve Agents Memory Usage, Bump Keyv, Grok 3 (#6850) * chore: remove unused redis file * chore: bump keyv dependencies, and update related imports * refactor: Implement IoRedis client for rate limiting across middleware, as node-redis via keyv not compatible * fix: Set max listeners to expected amount * WIP: memory improvements * refactor: Simplify getAbortData assignment in createAbortController * refactor: Update getAbortData to use WeakRef for content management * WIP: memory improvements in agent chat requests * refactor: Enhance memory management with finalization registry and cleanup functions * refactor: Simplify domainParser calls by removing unnecessary request parameter * refactor: Update parameter types for action tools and agent loading functions to use minimal configs * refactor: Simplify domainParser tests by removing unnecessary request parameter * refactor: Simplify domainParser call by removing unnecessary request parameter * refactor: Enhance client disposal by nullifying additional properties to improve memory management * refactor: Improve title generation by adding abort controller and timeout handling, consolidate request cleanup * refactor: Update checkIdleConnections to skip current user when checking for idle connections if passed * refactor: Update createMCPTool to derive userId from config and handle abort signals * refactor: Introduce createTokenCounter function and update tokenCounter usage; enhance disposeClient to reset Graph values * refactor: Update getMCPManager to accept userId parameter for improved idle connection handling * refactor: Extract logToolError function for improved error handling in AgentClient * refactor: Update disposeClient to clear handlerRegistry and graphRunnable references in client.run * refactor: Extract createHandleNewToken function to streamline token handling in initializeClient * chore: bump @librechat/agents * refactor: Improve timeout handling in addTitle function for better error management * refactor: Introduce createFetch instead of using class method * refactor: Enhance client disposal and request data handling in AskController and EditController * refactor: Update import statements for AnthropicClient and OpenAIClient to use specific paths * refactor: Use WeakRef for response handling in SplitStreamHandler to prevent memory leaks * refactor: Simplify client disposal and rename getReqData to processReqData in AskController and EditController * refactor: Improve logging structure and parameter handling in OpenAIClient * refactor: Remove unused GraphEvents and improve stream event handling in AnthropicClient and OpenAIClient * refactor: Simplify client initialization in AskController and EditController * refactor: Remove unused mock functions and implement in-memory store for KeyvMongo * chore: Update dependencies in package-lock.json to latest versions * refactor: Await token usage recording in OpenAIClient to ensure proper async handling * refactor: Remove handleAbort route from multiple endpoints and enhance client disposal logic * refactor: Enhance abort controller logic by managing abortKey more effectively * refactor: Add newConversation handling in useEventHandlers for improved conversation management * fix: dropparams * refactor: Use optional chaining for safer access to request properties in BaseClient * refactor: Move client disposal and request data processing logic to cleanup module for better organization * refactor: Remove aborted request check from addTitle function for cleaner logic * feat: Add Grok 3 model pricing and update tests for new models * chore: Remove trace warnings and inspect flags from backend start script used for debugging * refactor: Replace user identifier handling with userId for consistency across controllers, use UserId in clientRegistry * refactor: Enhance client disposal logic to prevent memory leaks by clearing additional references * chore: Update @librechat/agents to version 2.4.14 in package.json and package-lock.json
2025-04-12 18:46:36 -04:00
// Grok 3 models
expect(matchModelName('grok-3')).toBe('grok-3');
expect(matchModelName('grok-3-fast')).toBe('grok-3-fast');
expect(matchModelName('grok-3-mini')).toBe('grok-3-mini');
expect(matchModelName('grok-3-mini-fast')).toBe('grok-3-mini-fast');
// Grok 4 model
expect(matchModelName('grok-4-0709')).toBe('grok-4');
// Grok 4 Fast and 4.1 Fast models
expect(matchModelName('grok-4-fast')).toBe('grok-4-fast');
expect(matchModelName('grok-4-1-fast-reasoning')).toBe('grok-4-1-fast');
expect(matchModelName('grok-4-1-fast-non-reasoning')).toBe('grok-4-1-fast');
// Grok Code Fast model
expect(matchModelName('grok-code-fast-1')).toBe('grok-code-fast');
});
test('should match Grok model variations with prefixes', () => {
// Vision models should match before general models
🤖 refactor: Improve Agents Memory Usage, Bump Keyv, Grok 3 (#6850) * chore: remove unused redis file * chore: bump keyv dependencies, and update related imports * refactor: Implement IoRedis client for rate limiting across middleware, as node-redis via keyv not compatible * fix: Set max listeners to expected amount * WIP: memory improvements * refactor: Simplify getAbortData assignment in createAbortController * refactor: Update getAbortData to use WeakRef for content management * WIP: memory improvements in agent chat requests * refactor: Enhance memory management with finalization registry and cleanup functions * refactor: Simplify domainParser calls by removing unnecessary request parameter * refactor: Update parameter types for action tools and agent loading functions to use minimal configs * refactor: Simplify domainParser tests by removing unnecessary request parameter * refactor: Simplify domainParser call by removing unnecessary request parameter * refactor: Enhance client disposal by nullifying additional properties to improve memory management * refactor: Improve title generation by adding abort controller and timeout handling, consolidate request cleanup * refactor: Update checkIdleConnections to skip current user when checking for idle connections if passed * refactor: Update createMCPTool to derive userId from config and handle abort signals * refactor: Introduce createTokenCounter function and update tokenCounter usage; enhance disposeClient to reset Graph values * refactor: Update getMCPManager to accept userId parameter for improved idle connection handling * refactor: Extract logToolError function for improved error handling in AgentClient * refactor: Update disposeClient to clear handlerRegistry and graphRunnable references in client.run * refactor: Extract createHandleNewToken function to streamline token handling in initializeClient * chore: bump @librechat/agents * refactor: Improve timeout handling in addTitle function for better error management * refactor: Introduce createFetch instead of using class method * refactor: Enhance client disposal and request data handling in AskController and EditController * refactor: Update import statements for AnthropicClient and OpenAIClient to use specific paths * refactor: Use WeakRef for response handling in SplitStreamHandler to prevent memory leaks * refactor: Simplify client disposal and rename getReqData to processReqData in AskController and EditController * refactor: Improve logging structure and parameter handling in OpenAIClient * refactor: Remove unused GraphEvents and improve stream event handling in AnthropicClient and OpenAIClient * refactor: Simplify client initialization in AskController and EditController * refactor: Remove unused mock functions and implement in-memory store for KeyvMongo * chore: Update dependencies in package-lock.json to latest versions * refactor: Await token usage recording in OpenAIClient to ensure proper async handling * refactor: Remove handleAbort route from multiple endpoints and enhance client disposal logic * refactor: Enhance abort controller logic by managing abortKey more effectively * refactor: Add newConversation handling in useEventHandlers for improved conversation management * fix: dropparams * refactor: Use optional chaining for safer access to request properties in BaseClient * refactor: Move client disposal and request data processing logic to cleanup module for better organization * refactor: Remove aborted request check from addTitle function for cleaner logic * feat: Add Grok 3 model pricing and update tests for new models * chore: Remove trace warnings and inspect flags from backend start script used for debugging * refactor: Replace user identifier handling with userId for consistency across controllers, use UserId in clientRegistry * refactor: Enhance client disposal logic to prevent memory leaks by clearing additional references * chore: Update @librechat/agents to version 2.4.14 in package.json and package-lock.json
2025-04-12 18:46:36 -04:00
expect(matchModelName('xai/grok-2-vision-1212')).toBe('grok-2-vision-1212');
expect(matchModelName('xai/grok-2-vision')).toBe('grok-2-vision');
expect(matchModelName('xai/grok-2-vision-latest')).toBe('grok-2-vision-latest');
// Beta models
🤖 refactor: Improve Agents Memory Usage, Bump Keyv, Grok 3 (#6850) * chore: remove unused redis file * chore: bump keyv dependencies, and update related imports * refactor: Implement IoRedis client for rate limiting across middleware, as node-redis via keyv not compatible * fix: Set max listeners to expected amount * WIP: memory improvements * refactor: Simplify getAbortData assignment in createAbortController * refactor: Update getAbortData to use WeakRef for content management * WIP: memory improvements in agent chat requests * refactor: Enhance memory management with finalization registry and cleanup functions * refactor: Simplify domainParser calls by removing unnecessary request parameter * refactor: Update parameter types for action tools and agent loading functions to use minimal configs * refactor: Simplify domainParser tests by removing unnecessary request parameter * refactor: Simplify domainParser call by removing unnecessary request parameter * refactor: Enhance client disposal by nullifying additional properties to improve memory management * refactor: Improve title generation by adding abort controller and timeout handling, consolidate request cleanup * refactor: Update checkIdleConnections to skip current user when checking for idle connections if passed * refactor: Update createMCPTool to derive userId from config and handle abort signals * refactor: Introduce createTokenCounter function and update tokenCounter usage; enhance disposeClient to reset Graph values * refactor: Update getMCPManager to accept userId parameter for improved idle connection handling * refactor: Extract logToolError function for improved error handling in AgentClient * refactor: Update disposeClient to clear handlerRegistry and graphRunnable references in client.run * refactor: Extract createHandleNewToken function to streamline token handling in initializeClient * chore: bump @librechat/agents * refactor: Improve timeout handling in addTitle function for better error management * refactor: Introduce createFetch instead of using class method * refactor: Enhance client disposal and request data handling in AskController and EditController * refactor: Update import statements for AnthropicClient and OpenAIClient to use specific paths * refactor: Use WeakRef for response handling in SplitStreamHandler to prevent memory leaks * refactor: Simplify client disposal and rename getReqData to processReqData in AskController and EditController * refactor: Improve logging structure and parameter handling in OpenAIClient * refactor: Remove unused GraphEvents and improve stream event handling in AnthropicClient and OpenAIClient * refactor: Simplify client initialization in AskController and EditController * refactor: Remove unused mock functions and implement in-memory store for KeyvMongo * chore: Update dependencies in package-lock.json to latest versions * refactor: Await token usage recording in OpenAIClient to ensure proper async handling * refactor: Remove handleAbort route from multiple endpoints and enhance client disposal logic * refactor: Enhance abort controller logic by managing abortKey more effectively * refactor: Add newConversation handling in useEventHandlers for improved conversation management * fix: dropparams * refactor: Use optional chaining for safer access to request properties in BaseClient * refactor: Move client disposal and request data processing logic to cleanup module for better organization * refactor: Remove aborted request check from addTitle function for cleaner logic * feat: Add Grok 3 model pricing and update tests for new models * chore: Remove trace warnings and inspect flags from backend start script used for debugging * refactor: Replace user identifier handling with userId for consistency across controllers, use UserId in clientRegistry * refactor: Enhance client disposal logic to prevent memory leaks by clearing additional references * chore: Update @librechat/agents to version 2.4.14 in package.json and package-lock.json
2025-04-12 18:46:36 -04:00
expect(matchModelName('xai/grok-vision-beta')).toBe('grok-vision-beta');
expect(matchModelName('xai/grok-beta')).toBe('grok-beta');
// Text models
🤖 refactor: Improve Agents Memory Usage, Bump Keyv, Grok 3 (#6850) * chore: remove unused redis file * chore: bump keyv dependencies, and update related imports * refactor: Implement IoRedis client for rate limiting across middleware, as node-redis via keyv not compatible * fix: Set max listeners to expected amount * WIP: memory improvements * refactor: Simplify getAbortData assignment in createAbortController * refactor: Update getAbortData to use WeakRef for content management * WIP: memory improvements in agent chat requests * refactor: Enhance memory management with finalization registry and cleanup functions * refactor: Simplify domainParser calls by removing unnecessary request parameter * refactor: Update parameter types for action tools and agent loading functions to use minimal configs * refactor: Simplify domainParser tests by removing unnecessary request parameter * refactor: Simplify domainParser call by removing unnecessary request parameter * refactor: Enhance client disposal by nullifying additional properties to improve memory management * refactor: Improve title generation by adding abort controller and timeout handling, consolidate request cleanup * refactor: Update checkIdleConnections to skip current user when checking for idle connections if passed * refactor: Update createMCPTool to derive userId from config and handle abort signals * refactor: Introduce createTokenCounter function and update tokenCounter usage; enhance disposeClient to reset Graph values * refactor: Update getMCPManager to accept userId parameter for improved idle connection handling * refactor: Extract logToolError function for improved error handling in AgentClient * refactor: Update disposeClient to clear handlerRegistry and graphRunnable references in client.run * refactor: Extract createHandleNewToken function to streamline token handling in initializeClient * chore: bump @librechat/agents * refactor: Improve timeout handling in addTitle function for better error management * refactor: Introduce createFetch instead of using class method * refactor: Enhance client disposal and request data handling in AskController and EditController * refactor: Update import statements for AnthropicClient and OpenAIClient to use specific paths * refactor: Use WeakRef for response handling in SplitStreamHandler to prevent memory leaks * refactor: Simplify client disposal and rename getReqData to processReqData in AskController and EditController * refactor: Improve logging structure and parameter handling in OpenAIClient * refactor: Remove unused GraphEvents and improve stream event handling in AnthropicClient and OpenAIClient * refactor: Simplify client initialization in AskController and EditController * refactor: Remove unused mock functions and implement in-memory store for KeyvMongo * chore: Update dependencies in package-lock.json to latest versions * refactor: Await token usage recording in OpenAIClient to ensure proper async handling * refactor: Remove handleAbort route from multiple endpoints and enhance client disposal logic * refactor: Enhance abort controller logic by managing abortKey more effectively * refactor: Add newConversation handling in useEventHandlers for improved conversation management * fix: dropparams * refactor: Use optional chaining for safer access to request properties in BaseClient * refactor: Move client disposal and request data processing logic to cleanup module for better organization * refactor: Remove aborted request check from addTitle function for cleaner logic * feat: Add Grok 3 model pricing and update tests for new models * chore: Remove trace warnings and inspect flags from backend start script used for debugging * refactor: Replace user identifier handling with userId for consistency across controllers, use UserId in clientRegistry * refactor: Enhance client disposal logic to prevent memory leaks by clearing additional references * chore: Update @librechat/agents to version 2.4.14 in package.json and package-lock.json
2025-04-12 18:46:36 -04:00
expect(matchModelName('xai/grok-2-1212')).toBe('grok-2-1212');
expect(matchModelName('xai/grok-2')).toBe('grok-2');
expect(matchModelName('xai/grok-2-latest')).toBe('grok-2-latest');
// Grok 3 models
expect(matchModelName('xai/grok-3')).toBe('grok-3');
expect(matchModelName('xai/grok-3-fast')).toBe('grok-3-fast');
expect(matchModelName('xai/grok-3-mini')).toBe('grok-3-mini');
expect(matchModelName('xai/grok-3-mini-fast')).toBe('grok-3-mini-fast');
// Grok 4 model
expect(matchModelName('xai/grok-4-0709')).toBe('grok-4');
// Grok 4 Fast and 4.1 Fast models
expect(matchModelName('xai/grok-4-fast')).toBe('grok-4-fast');
expect(matchModelName('xai/grok-4-1-fast-reasoning')).toBe('grok-4-1-fast');
expect(matchModelName('xai/grok-4-1-fast-non-reasoning')).toBe('grok-4-1-fast');
// Grok Code Fast model
expect(matchModelName('xai/grok-code-fast-1')).toBe('grok-code-fast');
});
});
});
describe('Claude Model Tests', () => {
it('should return correct context length for Claude 4 models', () => {
expect(getModelMaxTokens('claude-sonnet-4')).toBe(
maxTokensMap[EModelEndpoint.anthropic]['claude-sonnet-4'],
);
expect(getModelMaxTokens('claude-opus-4')).toBe(
maxTokensMap[EModelEndpoint.anthropic]['claude-opus-4'],
);
});
🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173) * updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
2025-10-19 09:23:27 -04:00
it('should return correct context length for Claude Haiku 4.5', () => {
expect(getModelMaxTokens('claude-haiku-4-5', EModelEndpoint.anthropic)).toBe(
maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'],
);
expect(getModelMaxTokens('claude-haiku-4-5')).toBe(
maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'],
);
});
it('should return correct context length for Claude Opus 4.5', () => {
expect(getModelMaxTokens('claude-opus-4-5', EModelEndpoint.anthropic)).toBe(
maxTokensMap[EModelEndpoint.anthropic]['claude-opus-4-5'],
);
expect(getModelMaxTokens('claude-opus-4-5')).toBe(
maxTokensMap[EModelEndpoint.anthropic]['claude-opus-4-5'],
);
});
🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173) * updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
2025-10-19 09:23:27 -04:00
it('should handle Claude Haiku 4.5 model name variations', () => {
const modelVariations = [
'claude-haiku-4-5',
'claude-haiku-4-5-20250420',
'claude-haiku-4-5-latest',
'anthropic/claude-haiku-4-5',
'claude-haiku-4-5/anthropic',
'claude-haiku-4-5-preview',
];
modelVariations.forEach((model) => {
const modelKey = findMatchingPattern(model, maxTokensMap[EModelEndpoint.anthropic]);
expect(modelKey).toBe('claude-haiku-4-5');
expect(getModelMaxTokens(model, EModelEndpoint.anthropic)).toBe(
maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'],
);
});
});
it('should handle Claude Opus 4.5 model name variations', () => {
const modelVariations = [
'claude-opus-4-5',
'claude-opus-4-5-20250420',
'claude-opus-4-5-latest',
'anthropic/claude-opus-4-5',
'claude-opus-4-5/anthropic',
'claude-opus-4-5-preview',
];
modelVariations.forEach((model) => {
const modelKey = findMatchingPattern(model, maxTokensMap[EModelEndpoint.anthropic]);
expect(modelKey).toBe('claude-opus-4-5');
expect(getModelMaxTokens(model, EModelEndpoint.anthropic)).toBe(
maxTokensMap[EModelEndpoint.anthropic]['claude-opus-4-5'],
);
});
});
🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173) * updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
2025-10-19 09:23:27 -04:00
it('should match model names correctly for Claude Haiku 4.5', () => {
const modelVariations = [
'claude-haiku-4-5',
'claude-haiku-4-5-20250420',
'claude-haiku-4-5-latest',
'anthropic/claude-haiku-4-5',
'claude-haiku-4-5/anthropic',
'claude-haiku-4-5-preview',
];
modelVariations.forEach((model) => {
expect(matchModelName(model, EModelEndpoint.anthropic)).toBe('claude-haiku-4-5');
});
});
it('should match model names correctly for Claude Opus 4.5', () => {
const modelVariations = [
'claude-opus-4-5',
'claude-opus-4-5-20250420',
'claude-opus-4-5-latest',
'anthropic/claude-opus-4-5',
'claude-opus-4-5/anthropic',
'claude-opus-4-5-preview',
];
modelVariations.forEach((model) => {
expect(matchModelName(model, EModelEndpoint.anthropic)).toBe('claude-opus-4-5');
});
});
it('should handle Claude 4 model name variations with different prefixes and suffixes', () => {
const modelVariations = [
'claude-sonnet-4',
'claude-sonnet-4-20240229',
'claude-sonnet-4-latest',
'anthropic/claude-sonnet-4',
'claude-sonnet-4/anthropic',
'claude-sonnet-4-preview',
'claude-sonnet-4-20240229-preview',
'claude-opus-4',
'claude-opus-4-20240229',
'claude-opus-4-latest',
'anthropic/claude-opus-4',
'claude-opus-4/anthropic',
'claude-opus-4-preview',
'claude-opus-4-20240229-preview',
];
modelVariations.forEach((model) => {
const modelKey = findMatchingPattern(model, maxTokensMap[EModelEndpoint.anthropic]);
expect(getModelMaxTokens(model)).toBe(maxTokensMap[EModelEndpoint.anthropic][modelKey]);
});
});
it('should match model names correctly for Claude 4 models', () => {
const modelVariations = [
'claude-sonnet-4',
'claude-sonnet-4-20240229',
'claude-sonnet-4-latest',
'anthropic/claude-sonnet-4',
'claude-sonnet-4/anthropic',
'claude-sonnet-4-preview',
'claude-sonnet-4-20240229-preview',
'claude-opus-4',
'claude-opus-4-20240229',
'claude-opus-4-latest',
'anthropic/claude-opus-4',
'claude-opus-4/anthropic',
'claude-opus-4-preview',
'claude-opus-4-20240229-preview',
];
modelVariations.forEach((model) => {
const isSonnet = model.includes('sonnet');
const expectedModel = isSonnet ? 'claude-sonnet-4' : 'claude-opus-4';
expect(matchModelName(model, EModelEndpoint.anthropic)).toBe(expectedModel);
});
});
});
🌙 feat: Moonshot Provider Support (#11621) * ✨ feat: Add Moonshot Provider Support - Updated the `isKnownCustomProvider` function to include `Providers.MOONSHOT` in the list of recognized custom providers. - Enhanced the `providerConfigMap` to initialize `MOONSHOT` with the custom initialization function. - Introduced `MoonshotIcon` component for visual representation in the UI, integrated into the `UnknownIcon` component. - Updated various files across the API and client to support the new `MOONSHOT` provider, including configuration and response handling. This update expands the capabilities of the application by integrating support for the Moonshot provider, enhancing both backend and frontend functionalities. * ✨ feat: Add Moonshot/Kimi Model Pricing and Tests - Introduced new pricing configurations for Moonshot and Kimi models in `tx.js`, including various model variations and their respective prompt and completion values. - Expanded unit tests in `tx.spec.js` and `tokens.spec.js` to validate pricing and token limits for the newly added Moonshot/Kimi models, ensuring accurate calculations and handling of model variations. - Updated utility functions to support the new model structures and ensure compatibility with existing functionalities. This update enhances the pricing model capabilities and improves test coverage for the Moonshot/Kimi integration. * ✨ feat: Enhance Token Pricing Documentation and Configuration - Added comprehensive documentation for token pricing configuration in `tx.js` and `tokens.ts`, emphasizing the importance of key ordering for pattern matching. - Clarified the process for defining base and specific patterns to ensure accurate pricing retrieval based on model names. - Improved code comments to guide future additions of model families, enhancing maintainability and understanding of the pricing structure. This update improves the clarity and usability of the token pricing configuration, facilitating better integration and future enhancements. * chore: import order * chore: linting
2026-02-04 10:53:57 +01:00
describe('Moonshot/Kimi Model Tests', () => {
describe('getModelMaxTokens', () => {
🌙 feat: Moonshot Provider Support (#11621) * ✨ feat: Add Moonshot Provider Support - Updated the `isKnownCustomProvider` function to include `Providers.MOONSHOT` in the list of recognized custom providers. - Enhanced the `providerConfigMap` to initialize `MOONSHOT` with the custom initialization function. - Introduced `MoonshotIcon` component for visual representation in the UI, integrated into the `UnknownIcon` component. - Updated various files across the API and client to support the new `MOONSHOT` provider, including configuration and response handling. This update expands the capabilities of the application by integrating support for the Moonshot provider, enhancing both backend and frontend functionalities. * ✨ feat: Add Moonshot/Kimi Model Pricing and Tests - Introduced new pricing configurations for Moonshot and Kimi models in `tx.js`, including various model variations and their respective prompt and completion values. - Expanded unit tests in `tx.spec.js` and `tokens.spec.js` to validate pricing and token limits for the newly added Moonshot/Kimi models, ensuring accurate calculations and handling of model variations. - Updated utility functions to support the new model structures and ensure compatibility with existing functionalities. This update enhances the pricing model capabilities and improves test coverage for the Moonshot/Kimi integration. * ✨ feat: Enhance Token Pricing Documentation and Configuration - Added comprehensive documentation for token pricing configuration in `tx.js` and `tokens.ts`, emphasizing the importance of key ordering for pattern matching. - Clarified the process for defining base and specific patterns to ensure accurate pricing retrieval based on model names. - Improved code comments to guide future additions of model families, enhancing maintainability and understanding of the pricing structure. This update improves the clarity and usability of the token pricing configuration, facilitating better integration and future enhancements. * chore: import order * chore: linting
2026-02-04 10:53:57 +01:00
test('should return correct tokens for kimi-k2.5 (multi-modal)', () => {
expect(getModelMaxTokens('kimi-k2.5')).toBe(maxTokensMap[EModelEndpoint.openAI]['kimi-k2.5']);
expect(getModelMaxTokens('kimi-k2.5-latest')).toBe(
maxTokensMap[EModelEndpoint.openAI]['kimi-k2.5'],
);
});
test('should return correct tokens for kimi-k2 series models', () => {
expect(getModelMaxTokens('kimi')).toBe(maxTokensMap[EModelEndpoint.openAI]['kimi']);
expect(getModelMaxTokens('kimi-k2')).toBe(maxTokensMap[EModelEndpoint.openAI]['kimi-k2']);
expect(getModelMaxTokens('kimi-k2-turbo')).toBe(
maxTokensMap[EModelEndpoint.openAI]['kimi-k2-turbo'],
);
expect(getModelMaxTokens('kimi-k2-turbo-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['kimi-k2-turbo-preview'],
);
expect(getModelMaxTokens('kimi-k2-0905')).toBe(
maxTokensMap[EModelEndpoint.openAI]['kimi-k2-0905'],
);
expect(getModelMaxTokens('kimi-k2-0905-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['kimi-k2-0905-preview'],
);
expect(getModelMaxTokens('kimi-k2-thinking')).toBe(
maxTokensMap[EModelEndpoint.openAI]['kimi-k2-thinking'],
);
expect(getModelMaxTokens('kimi-k2-thinking-turbo')).toBe(
maxTokensMap[EModelEndpoint.openAI]['kimi-k2-thinking-turbo'],
);
});
test('should return correct tokens for kimi-k2-0711 (smaller context)', () => {
expect(getModelMaxTokens('kimi-k2-0711')).toBe(
maxTokensMap[EModelEndpoint.openAI]['kimi-k2-0711'],
);
expect(getModelMaxTokens('kimi-k2-0711-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['kimi-k2-0711-preview'],
);
});
test('should return correct tokens for kimi-latest', () => {
expect(getModelMaxTokens('kimi-latest')).toBe(
maxTokensMap[EModelEndpoint.openAI]['kimi-latest'],
);
});
test('should return correct tokens for moonshot-v1 series models', () => {
expect(getModelMaxTokens('moonshot')).toBe(maxTokensMap[EModelEndpoint.openAI]['moonshot']);
expect(getModelMaxTokens('moonshot-v1')).toBe(
maxTokensMap[EModelEndpoint.openAI]['moonshot-v1'],
);
expect(getModelMaxTokens('moonshot-v1-auto')).toBe(
maxTokensMap[EModelEndpoint.openAI]['moonshot-v1-auto'],
);
expect(getModelMaxTokens('moonshot-v1-8k')).toBe(
maxTokensMap[EModelEndpoint.openAI]['moonshot-v1-8k'],
);
expect(getModelMaxTokens('moonshot-v1-8k-vision')).toBe(
maxTokensMap[EModelEndpoint.openAI]['moonshot-v1-8k-vision'],
);
expect(getModelMaxTokens('moonshot-v1-8k-vision-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['moonshot-v1-8k-vision-preview'],
);
expect(getModelMaxTokens('moonshot-v1-32k')).toBe(
maxTokensMap[EModelEndpoint.openAI]['moonshot-v1-32k'],
);
expect(getModelMaxTokens('moonshot-v1-32k-vision')).toBe(
maxTokensMap[EModelEndpoint.openAI]['moonshot-v1-32k-vision'],
);
expect(getModelMaxTokens('moonshot-v1-32k-vision-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['moonshot-v1-32k-vision-preview'],
);
expect(getModelMaxTokens('moonshot-v1-128k')).toBe(
maxTokensMap[EModelEndpoint.openAI]['moonshot-v1-128k'],
);
expect(getModelMaxTokens('moonshot-v1-128k-vision')).toBe(
maxTokensMap[EModelEndpoint.openAI]['moonshot-v1-128k-vision'],
);
expect(getModelMaxTokens('moonshot-v1-128k-vision-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['moonshot-v1-128k-vision-preview'],
);
});
🌙 feat: Moonshot Provider Support (#11621) * ✨ feat: Add Moonshot Provider Support - Updated the `isKnownCustomProvider` function to include `Providers.MOONSHOT` in the list of recognized custom providers. - Enhanced the `providerConfigMap` to initialize `MOONSHOT` with the custom initialization function. - Introduced `MoonshotIcon` component for visual representation in the UI, integrated into the `UnknownIcon` component. - Updated various files across the API and client to support the new `MOONSHOT` provider, including configuration and response handling. This update expands the capabilities of the application by integrating support for the Moonshot provider, enhancing both backend and frontend functionalities. * ✨ feat: Add Moonshot/Kimi Model Pricing and Tests - Introduced new pricing configurations for Moonshot and Kimi models in `tx.js`, including various model variations and their respective prompt and completion values. - Expanded unit tests in `tx.spec.js` and `tokens.spec.js` to validate pricing and token limits for the newly added Moonshot/Kimi models, ensuring accurate calculations and handling of model variations. - Updated utility functions to support the new model structures and ensure compatibility with existing functionalities. This update enhances the pricing model capabilities and improves test coverage for the Moonshot/Kimi integration. * ✨ feat: Enhance Token Pricing Documentation and Configuration - Added comprehensive documentation for token pricing configuration in `tx.js` and `tokens.ts`, emphasizing the importance of key ordering for pattern matching. - Clarified the process for defining base and specific patterns to ensure accurate pricing retrieval based on model names. - Improved code comments to guide future additions of model families, enhancing maintainability and understanding of the pricing structure. This update improves the clarity and usability of the token pricing configuration, facilitating better integration and future enhancements. * chore: import order * chore: linting
2026-02-04 10:53:57 +01:00
test('should return correct tokens for Bedrock moonshot models', () => {
expect(getModelMaxTokens('moonshot.kimi', EModelEndpoint.bedrock)).toBe(
maxTokensMap[EModelEndpoint.bedrock]['moonshot.kimi'],
);
expect(getModelMaxTokens('moonshot.kimi-k2', EModelEndpoint.bedrock)).toBe(
maxTokensMap[EModelEndpoint.bedrock]['moonshot.kimi-k2'],
);
expect(getModelMaxTokens('moonshot.kimi-k2.5', EModelEndpoint.bedrock)).toBe(
maxTokensMap[EModelEndpoint.bedrock]['moonshot.kimi-k2.5'],
);
expect(getModelMaxTokens('moonshot.kimi-k2-thinking', EModelEndpoint.bedrock)).toBe(
maxTokensMap[EModelEndpoint.bedrock]['moonshot.kimi-k2-thinking'],
);
expect(getModelMaxTokens('moonshot.kimi-k2-0711', EModelEndpoint.bedrock)).toBe(
maxTokensMap[EModelEndpoint.bedrock]['moonshot.kimi-k2-0711'],
);
});
🌙 feat: Moonshot Provider Support (#11621) * ✨ feat: Add Moonshot Provider Support - Updated the `isKnownCustomProvider` function to include `Providers.MOONSHOT` in the list of recognized custom providers. - Enhanced the `providerConfigMap` to initialize `MOONSHOT` with the custom initialization function. - Introduced `MoonshotIcon` component for visual representation in the UI, integrated into the `UnknownIcon` component. - Updated various files across the API and client to support the new `MOONSHOT` provider, including configuration and response handling. This update expands the capabilities of the application by integrating support for the Moonshot provider, enhancing both backend and frontend functionalities. * ✨ feat: Add Moonshot/Kimi Model Pricing and Tests - Introduced new pricing configurations for Moonshot and Kimi models in `tx.js`, including various model variations and their respective prompt and completion values. - Expanded unit tests in `tx.spec.js` and `tokens.spec.js` to validate pricing and token limits for the newly added Moonshot/Kimi models, ensuring accurate calculations and handling of model variations. - Updated utility functions to support the new model structures and ensure compatibility with existing functionalities. This update enhances the pricing model capabilities and improves test coverage for the Moonshot/Kimi integration. * ✨ feat: Enhance Token Pricing Documentation and Configuration - Added comprehensive documentation for token pricing configuration in `tx.js` and `tokens.ts`, emphasizing the importance of key ordering for pattern matching. - Clarified the process for defining base and specific patterns to ensure accurate pricing retrieval based on model names. - Improved code comments to guide future additions of model families, enhancing maintainability and understanding of the pricing structure. This update improves the clarity and usability of the token pricing configuration, facilitating better integration and future enhancements. * chore: import order * chore: linting
2026-02-04 10:53:57 +01:00
test('should handle Moonshot/Kimi models with provider prefixes', () => {
expect(getModelMaxTokens('openrouter/kimi-k2')).toBe(
maxTokensMap[EModelEndpoint.openAI]['kimi-k2'],
);
expect(getModelMaxTokens('openrouter/kimi-k2.5')).toBe(
maxTokensMap[EModelEndpoint.openAI]['kimi-k2.5'],
);
expect(getModelMaxTokens('openrouter/kimi-k2-turbo')).toBe(
maxTokensMap[EModelEndpoint.openAI]['kimi-k2-turbo'],
);
expect(getModelMaxTokens('openrouter/moonshot-v1-128k')).toBe(
maxTokensMap[EModelEndpoint.openAI]['moonshot-v1-128k'],
);
});
});
describe('matchModelName', () => {
test('should match exact Kimi model names', () => {
expect(matchModelName('kimi')).toBe('kimi');
🌙 feat: Moonshot Provider Support (#11621) * ✨ feat: Add Moonshot Provider Support - Updated the `isKnownCustomProvider` function to include `Providers.MOONSHOT` in the list of recognized custom providers. - Enhanced the `providerConfigMap` to initialize `MOONSHOT` with the custom initialization function. - Introduced `MoonshotIcon` component for visual representation in the UI, integrated into the `UnknownIcon` component. - Updated various files across the API and client to support the new `MOONSHOT` provider, including configuration and response handling. This update expands the capabilities of the application by integrating support for the Moonshot provider, enhancing both backend and frontend functionalities. * ✨ feat: Add Moonshot/Kimi Model Pricing and Tests - Introduced new pricing configurations for Moonshot and Kimi models in `tx.js`, including various model variations and their respective prompt and completion values. - Expanded unit tests in `tx.spec.js` and `tokens.spec.js` to validate pricing and token limits for the newly added Moonshot/Kimi models, ensuring accurate calculations and handling of model variations. - Updated utility functions to support the new model structures and ensure compatibility with existing functionalities. This update enhances the pricing model capabilities and improves test coverage for the Moonshot/Kimi integration. * ✨ feat: Enhance Token Pricing Documentation and Configuration - Added comprehensive documentation for token pricing configuration in `tx.js` and `tokens.ts`, emphasizing the importance of key ordering for pattern matching. - Clarified the process for defining base and specific patterns to ensure accurate pricing retrieval based on model names. - Improved code comments to guide future additions of model families, enhancing maintainability and understanding of the pricing structure. This update improves the clarity and usability of the token pricing configuration, facilitating better integration and future enhancements. * chore: import order * chore: linting
2026-02-04 10:53:57 +01:00
expect(matchModelName('kimi-k2')).toBe('kimi-k2');
expect(matchModelName('kimi-k2.5')).toBe('kimi-k2.5');
expect(matchModelName('kimi-k2-turbo')).toBe('kimi-k2-turbo');
expect(matchModelName('kimi-k2-0711')).toBe('kimi-k2-0711');
});
test('should match moonshot model names', () => {
expect(matchModelName('moonshot')).toBe('moonshot');
expect(matchModelName('moonshot-v1-8k')).toBe('moonshot-v1-8k');
expect(matchModelName('moonshot-v1-32k')).toBe('moonshot-v1-32k');
expect(matchModelName('moonshot-v1-128k')).toBe('moonshot-v1-128k');
});
test('should match Kimi model variations with provider prefix', () => {
🌙 feat: Moonshot Provider Support (#11621) * ✨ feat: Add Moonshot Provider Support - Updated the `isKnownCustomProvider` function to include `Providers.MOONSHOT` in the list of recognized custom providers. - Enhanced the `providerConfigMap` to initialize `MOONSHOT` with the custom initialization function. - Introduced `MoonshotIcon` component for visual representation in the UI, integrated into the `UnknownIcon` component. - Updated various files across the API and client to support the new `MOONSHOT` provider, including configuration and response handling. This update expands the capabilities of the application by integrating support for the Moonshot provider, enhancing both backend and frontend functionalities. * ✨ feat: Add Moonshot/Kimi Model Pricing and Tests - Introduced new pricing configurations for Moonshot and Kimi models in `tx.js`, including various model variations and their respective prompt and completion values. - Expanded unit tests in `tx.spec.js` and `tokens.spec.js` to validate pricing and token limits for the newly added Moonshot/Kimi models, ensuring accurate calculations and handling of model variations. - Updated utility functions to support the new model structures and ensure compatibility with existing functionalities. This update enhances the pricing model capabilities and improves test coverage for the Moonshot/Kimi integration. * ✨ feat: Enhance Token Pricing Documentation and Configuration - Added comprehensive documentation for token pricing configuration in `tx.js` and `tokens.ts`, emphasizing the importance of key ordering for pattern matching. - Clarified the process for defining base and specific patterns to ensure accurate pricing retrieval based on model names. - Improved code comments to guide future additions of model families, enhancing maintainability and understanding of the pricing structure. This update improves the clarity and usability of the token pricing configuration, facilitating better integration and future enhancements. * chore: import order * chore: linting
2026-02-04 10:53:57 +01:00
expect(matchModelName('openrouter/kimi')).toBe('kimi');
expect(matchModelName('openrouter/kimi-k2')).toBe('kimi-k2');
expect(matchModelName('openrouter/kimi-k2.5')).toBe('kimi-k2.5');
});
test('should match Kimi model variations with suffixes', () => {
🌙 feat: Moonshot Provider Support (#11621) * ✨ feat: Add Moonshot Provider Support - Updated the `isKnownCustomProvider` function to include `Providers.MOONSHOT` in the list of recognized custom providers. - Enhanced the `providerConfigMap` to initialize `MOONSHOT` with the custom initialization function. - Introduced `MoonshotIcon` component for visual representation in the UI, integrated into the `UnknownIcon` component. - Updated various files across the API and client to support the new `MOONSHOT` provider, including configuration and response handling. This update expands the capabilities of the application by integrating support for the Moonshot provider, enhancing both backend and frontend functionalities. * ✨ feat: Add Moonshot/Kimi Model Pricing and Tests - Introduced new pricing configurations for Moonshot and Kimi models in `tx.js`, including various model variations and their respective prompt and completion values. - Expanded unit tests in `tx.spec.js` and `tokens.spec.js` to validate pricing and token limits for the newly added Moonshot/Kimi models, ensuring accurate calculations and handling of model variations. - Updated utility functions to support the new model structures and ensure compatibility with existing functionalities. This update enhances the pricing model capabilities and improves test coverage for the Moonshot/Kimi integration. * ✨ feat: Enhance Token Pricing Documentation and Configuration - Added comprehensive documentation for token pricing configuration in `tx.js` and `tokens.ts`, emphasizing the importance of key ordering for pattern matching. - Clarified the process for defining base and specific patterns to ensure accurate pricing retrieval based on model names. - Improved code comments to guide future additions of model families, enhancing maintainability and understanding of the pricing structure. This update improves the clarity and usability of the token pricing configuration, facilitating better integration and future enhancements. * chore: import order * chore: linting
2026-02-04 10:53:57 +01:00
expect(matchModelName('kimi-k2-latest')).toBe('kimi-k2');
expect(matchModelName('kimi-k2.5-preview')).toBe('kimi-k2.5');
});
});
});
🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173) * updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
2025-10-19 09:23:27 -04:00
describe('Qwen3 Model Tests', () => {
describe('getModelMaxTokens', () => {
test('should return correct tokens for Qwen3 base pattern', () => {
expect(getModelMaxTokens('qwen3')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
});
test('should return correct tokens for qwen3-4b (falls back to qwen3)', () => {
expect(getModelMaxTokens('qwen3-4b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
});
test('should return correct tokens for Qwen3 base models', () => {
expect(getModelMaxTokens('qwen3-8b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-8b']);
expect(getModelMaxTokens('qwen3-14b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-14b']);
expect(getModelMaxTokens('qwen3-32b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-32b']);
expect(getModelMaxTokens('qwen3-235b-a22b')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-235b-a22b'],
);
});
test('should return correct tokens for Qwen3 VL (Vision-Language) models', () => {
expect(getModelMaxTokens('qwen3-vl-8b-thinking')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-thinking'],
);
expect(getModelMaxTokens('qwen3-vl-8b-instruct')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-instruct'],
);
expect(getModelMaxTokens('qwen3-vl-30b-a3b')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-30b-a3b'],
);
expect(getModelMaxTokens('qwen3-vl-235b-a22b')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-235b-a22b'],
);
});
test('should return correct tokens for Qwen3 specialized models', () => {
expect(getModelMaxTokens('qwen3-max')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-max']);
expect(getModelMaxTokens('qwen3-coder')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder'],
);
expect(getModelMaxTokens('qwen3-coder-30b-a3b')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-30b-a3b'],
);
expect(getModelMaxTokens('qwen3-coder-plus')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-plus'],
);
expect(getModelMaxTokens('qwen3-coder-flash')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-flash'],
);
expect(getModelMaxTokens('qwen3-next-80b-a3b')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-next-80b-a3b'],
);
});
test('should handle Qwen3 models with provider prefixes', () => {
expect(getModelMaxTokens('alibaba/qwen3')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
expect(getModelMaxTokens('alibaba/qwen3-4b')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3'],
);
expect(getModelMaxTokens('qwen/qwen3-8b')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-8b'],
);
expect(getModelMaxTokens('openrouter/qwen3-max')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-max'],
);
expect(getModelMaxTokens('alibaba/qwen3-vl-8b-instruct')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-instruct'],
);
expect(getModelMaxTokens('qwen/qwen3-coder')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder'],
);
});
test('should handle Qwen3 models with suffixes', () => {
expect(getModelMaxTokens('qwen3-preview')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
expect(getModelMaxTokens('qwen3-4b-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3'],
);
expect(getModelMaxTokens('qwen3-8b-latest')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-8b'],
);
expect(getModelMaxTokens('qwen3-max-2024')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-max'],
);
});
});
describe('matchModelName', () => {
test('should match exact Qwen3 model names', () => {
expect(matchModelName('qwen3')).toBe('qwen3');
expect(matchModelName('qwen3-4b')).toBe('qwen3');
expect(matchModelName('qwen3-8b')).toBe('qwen3-8b');
expect(matchModelName('qwen3-vl-8b-thinking')).toBe('qwen3-vl-8b-thinking');
expect(matchModelName('qwen3-max')).toBe('qwen3-max');
expect(matchModelName('qwen3-coder')).toBe('qwen3-coder');
});
test('should match Qwen3 model variations with provider prefixes', () => {
expect(matchModelName('alibaba/qwen3')).toBe('qwen3');
expect(matchModelName('alibaba/qwen3-4b')).toBe('qwen3');
expect(matchModelName('qwen/qwen3-8b')).toBe('qwen3-8b');
expect(matchModelName('openrouter/qwen3-max')).toBe('qwen3-max');
expect(matchModelName('alibaba/qwen3-vl-8b-instruct')).toBe('qwen3-vl-8b-instruct');
expect(matchModelName('qwen/qwen3-coder')).toBe('qwen3-coder');
});
test('should match Qwen3 model variations with suffixes', () => {
expect(matchModelName('qwen3-preview')).toBe('qwen3');
expect(matchModelName('qwen3-4b-preview')).toBe('qwen3');
expect(matchModelName('qwen3-8b-latest')).toBe('qwen3-8b');
expect(matchModelName('qwen3-max-2024')).toBe('qwen3-max');
expect(matchModelName('qwen3-coder-v1')).toBe('qwen3-coder');
});
});
});
describe('GLM Model Tests (Zhipu AI)', () => {
describe('getModelMaxTokens', () => {
test('should return correct tokens for GLM models', () => {
expect(getModelMaxTokens('glm-4.6')).toBe(200000);
expect(getModelMaxTokens('glm-4.5v')).toBe(66000);
expect(getModelMaxTokens('glm-4.5-air')).toBe(131000);
expect(getModelMaxTokens('glm-4.5')).toBe(131000);
expect(getModelMaxTokens('glm-4-32b')).toBe(128000);
expect(getModelMaxTokens('glm-4')).toBe(128000);
expect(getModelMaxTokens('glm4')).toBe(128000);
});
test('should handle partial matches for GLM models with provider prefixes', () => {
expect(getModelMaxTokens('z-ai/glm-4.6')).toBe(200000);
expect(getModelMaxTokens('z-ai/glm-4.5')).toBe(131000);
expect(getModelMaxTokens('z-ai/glm-4.5-air')).toBe(131000);
expect(getModelMaxTokens('z-ai/glm-4.5v')).toBe(66000);
expect(getModelMaxTokens('z-ai/glm-4-32b')).toBe(128000);
expect(getModelMaxTokens('zai/glm-4.6')).toBe(200000);
expect(getModelMaxTokens('zai/glm-4.5')).toBe(131000);
expect(getModelMaxTokens('zai/glm-4.5-air')).toBe(131000);
expect(getModelMaxTokens('zai/glm-4.5v')).toBe(66000);
expect(getModelMaxTokens('zai-org/GLM-4.6')).toBe(200000);
expect(getModelMaxTokens('zai-org/GLM-4.5')).toBe(131000);
expect(getModelMaxTokens('zai-org/GLM-4.5-Air')).toBe(131000);
expect(getModelMaxTokens('zai-org/GLM-4.5V')).toBe(66000);
expect(getModelMaxTokens('zai-org/GLM-4-32B-0414')).toBe(128000);
});
test('should handle GLM model variations with suffixes', () => {
expect(getModelMaxTokens('glm-4.6-fp8')).toBe(200000);
expect(getModelMaxTokens('zai-org/GLM-4.6-FP8')).toBe(200000);
expect(getModelMaxTokens('zai-org/GLM-4.5-Air-FP8')).toBe(131000);
});
test('should prioritize more specific GLM patterns', () => {
expect(getModelMaxTokens('glm-4.5-air-custom')).toBe(131000);
expect(getModelMaxTokens('glm-4.5-custom')).toBe(131000);
expect(getModelMaxTokens('glm-4.5v-custom')).toBe(66000);
});
});
describe('matchModelName', () => {
test('should match exact GLM model names', () => {
expect(matchModelName('glm-4.6')).toBe('glm-4.6');
expect(matchModelName('glm-4.5v')).toBe('glm-4.5v');
expect(matchModelName('glm-4.5-air')).toBe('glm-4.5-air');
expect(matchModelName('glm-4.5')).toBe('glm-4.5');
expect(matchModelName('glm-4-32b')).toBe('glm-4-32b');
expect(matchModelName('glm-4')).toBe('glm-4');
expect(matchModelName('glm4')).toBe('glm4');
});
test('should match GLM model variations with provider prefixes', () => {
expect(matchModelName('z-ai/glm-4.6')).toBe('glm-4.6');
expect(matchModelName('z-ai/glm-4.5')).toBe('glm-4.5');
expect(matchModelName('z-ai/glm-4.5-air')).toBe('glm-4.5-air');
expect(matchModelName('z-ai/glm-4.5v')).toBe('glm-4.5v');
expect(matchModelName('z-ai/glm-4-32b')).toBe('glm-4-32b');
expect(matchModelName('zai/glm-4.6')).toBe('glm-4.6');
expect(matchModelName('zai/glm-4.5')).toBe('glm-4.5');
expect(matchModelName('zai/glm-4.5-air')).toBe('glm-4.5-air');
expect(matchModelName('zai/glm-4.5v')).toBe('glm-4.5v');
expect(matchModelName('zai-org/GLM-4.6')).toBe('glm-4.6');
expect(matchModelName('zai-org/GLM-4.5')).toBe('glm-4.5');
expect(matchModelName('zai-org/GLM-4.5-Air')).toBe('glm-4.5-air');
expect(matchModelName('zai-org/GLM-4.5V')).toBe('glm-4.5v');
expect(matchModelName('zai-org/GLM-4-32B-0414')).toBe('glm-4-32b');
});
test('should match GLM model variations with suffixes', () => {
expect(matchModelName('glm-4.6-fp8')).toBe('glm-4.6');
expect(matchModelName('zai-org/GLM-4.6-FP8')).toBe('glm-4.6');
expect(matchModelName('zai-org/GLM-4.5-Air-FP8')).toBe('glm-4.5-air');
});
test('should handle case-insensitive matching for GLM models', () => {
expect(matchModelName('zai-org/GLM-4.6')).toBe('glm-4.6');
expect(matchModelName('zai-org/GLM-4.5V')).toBe('glm-4.5v');
expect(matchModelName('zai-org/GLM-4-32B-0414')).toBe('glm-4-32b');
});
});
});